linux_dsm_epyc7002/mm/dmapool.c

539 lines
14 KiB
C
Raw Normal View History

/*
* DMA Pool allocator
*
* Copyright 2001 David Brownell
* Copyright 2007 Intel Corporation
* Author: Matthew Wilcox <willy@linux.intel.com>
*
* This software may be redistributed and/or modified under the terms of
* the GNU General Public License ("GPL") version 2 as published by the
* Free Software Foundation.
*
* This allocator returns small blocks of a given size which are DMA-able by
* the given device. It uses the dma_alloc_coherent page allocator to get
* new pages, then splits them up into blocks of the required size.
* Many older drivers still have their own code to do this.
*
* The current design of this allocator is fairly simple. The pool is
* represented by the 'struct dma_pool' which keeps a doubly-linked list of
* allocated pages. Each page in the page_list is split into blocks of at
* least 'size' bytes. Free blocks are tracked in an unsorted singly-linked
* list of free blocks within the page. Used blocks aren't tracked, but we
* keep a count of how many are currently allocated from each page.
*/
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/dmapool.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/wait.h>
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
#define DMAPOOL_DEBUG 1
#endif
struct dma_pool { /* the pool */
struct list_head page_list;
spinlock_t lock;
size_t size;
struct device *dev;
size_t allocation;
size_t boundary;
char name[32];
struct list_head pools;
};
struct dma_page { /* cacheable header for 'allocation' bytes */
struct list_head page_list;
void *vaddr;
dma_addr_t dma;
unsigned int in_use;
unsigned int offset;
};
static DEFINE_MUTEX(pools_lock);
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
static DEFINE_MUTEX(pools_reg_lock);
static ssize_t
show_pools(struct device *dev, struct device_attribute *attr, char *buf)
{
unsigned temp;
unsigned size;
char *next;
struct dma_page *page;
struct dma_pool *pool;
next = buf;
size = PAGE_SIZE;
temp = scnprintf(next, size, "poolinfo - 0.1\n");
size -= temp;
next += temp;
mutex_lock(&pools_lock);
list_for_each_entry(pool, &dev->dma_pools, pools) {
unsigned pages = 0;
unsigned blocks = 0;
spin_lock_irq(&pool->lock);
list_for_each_entry(page, &pool->page_list, page_list) {
pages++;
blocks += page->in_use;
}
spin_unlock_irq(&pool->lock);
/* per-pool info, no real statistics yet */
temp = scnprintf(next, size, "%-16s %4u %4zu %4zu %2u\n",
pool->name, blocks,
pages * (pool->allocation / pool->size),
pool->size, pages);
size -= temp;
next += temp;
}
mutex_unlock(&pools_lock);
return PAGE_SIZE - size;
}
static DEVICE_ATTR(pools, 0444, show_pools, NULL);
/**
* dma_pool_create - Creates a pool of consistent memory blocks, for dma.
* @name: name of pool, for diagnostics
* @dev: device that will be doing the DMA
* @size: size of the blocks in this pool.
* @align: alignment requirement for blocks; must be a power of two
* @boundary: returned blocks won't cross this power of two boundary
* Context: not in_interrupt()
*
* Given one of these pools, dma_pool_alloc()
* may be used to allocate memory. Such memory will all have "consistent"
* DMA mappings, accessible by the device and its driver without using
* cache flushing primitives. The actual size of blocks allocated may be
* larger than requested because of alignment.
*
* If @boundary is nonzero, objects returned from dma_pool_alloc() won't
* cross that size boundary. This is useful for devices which have
* addressing restrictions on individual DMA transfers, such as not crossing
* boundaries of 4KBytes.
*
* Return: a dma allocation pool with the requested characteristics, or
* %NULL if one can't be created.
*/
struct dma_pool *dma_pool_create(const char *name, struct device *dev,
size_t size, size_t align, size_t boundary)
{
struct dma_pool *retval;
size_t allocation;
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
bool empty = false;
if (align == 0)
align = 1;
else if (align & (align - 1))
return NULL;
if (size == 0)
return NULL;
else if (size < 4)
size = 4;
if ((size % align) != 0)
size = ALIGN(size, align);
allocation = max_t(size_t, size, PAGE_SIZE);
if (!boundary)
boundary = allocation;
else if ((boundary < size) || (boundary & (boundary - 1)))
return NULL;
retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
if (!retval)
return retval;
strlcpy(retval->name, name, sizeof(retval->name));
retval->dev = dev;
INIT_LIST_HEAD(&retval->page_list);
spin_lock_init(&retval->lock);
retval->size = size;
retval->boundary = boundary;
retval->allocation = allocation;
INIT_LIST_HEAD(&retval->pools);
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
/*
* pools_lock ensures that the ->dma_pools list does not get corrupted.
* pools_reg_lock ensures that there is not a race between
* dma_pool_create() and dma_pool_destroy() or within dma_pool_create()
* when the first invocation of dma_pool_create() failed on
* device_create_file() and the second assumes that it has been done (I
* know it is a short window).
*/
mutex_lock(&pools_reg_lock);
mutex_lock(&pools_lock);
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
if (list_empty(&dev->dma_pools))
empty = true;
list_add(&retval->pools, &dev->dma_pools);
mutex_unlock(&pools_lock);
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
if (empty) {
int err;
err = device_create_file(dev, &dev_attr_pools);
if (err) {
mutex_lock(&pools_lock);
list_del(&retval->pools);
mutex_unlock(&pools_lock);
mutex_unlock(&pools_reg_lock);
kfree(retval);
return NULL;
}
}
mutex_unlock(&pools_reg_lock);
return retval;
}
EXPORT_SYMBOL(dma_pool_create);
static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
{
unsigned int offset = 0;
unsigned int next_boundary = pool->boundary;
do {
unsigned int next = offset + pool->size;
if (unlikely((next + pool->size) >= next_boundary)) {
next = next_boundary;
next_boundary += pool->boundary;
}
*(int *)(page->vaddr + offset) = next;
offset = next;
} while (offset < pool->allocation);
}
static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
{
struct dma_page *page;
page = kmalloc(sizeof(*page), mem_flags);
if (!page)
return NULL;
page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
&page->dma, mem_flags);
if (page->vaddr) {
#ifdef DMAPOOL_DEBUG
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
pool_initialise_page(pool, page);
page->in_use = 0;
page->offset = 0;
} else {
kfree(page);
page = NULL;
}
return page;
}
static inline bool is_page_busy(struct dma_page *page)
{
return page->in_use != 0;
}
static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
{
dma_addr_t dma = page->dma;
#ifdef DMAPOOL_DEBUG
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
list_del(&page->page_list);
kfree(page);
}
/**
* dma_pool_destroy - destroys a pool of dma memory blocks.
* @pool: dma pool that will be destroyed
* Context: !in_interrupt()
*
* Caller guarantees that no more memory from the pool is in use,
* and that nothing will try to use the pool after this call.
*/
void dma_pool_destroy(struct dma_pool *pool)
{
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
bool empty = false;
if (unlikely(!pool))
return;
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
mutex_lock(&pools_reg_lock);
mutex_lock(&pools_lock);
list_del(&pool->pools);
if (pool->dev && list_empty(&pool->dev->dma_pools))
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
empty = true;
mutex_unlock(&pools_lock);
mm: dmapool: add/remove sysfs file outside of the pool lock lock cat /sys/.../pools followed by removal the device leads to: |====================================================== |[ INFO: possible circular locking dependency detected ] |3.17.0-rc4+ #1498 Not tainted |------------------------------------------------------- |rmmod/2505 is trying to acquire lock: | (s_active#28){++++.+}, at: [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | |but task is already holding lock: | (pools_lock){+.+.+.}, at: [<c011494c>] dma_pool_destroy+0x18/0x17c | |which lock already depends on the new lock. |the existing dependency chain (in reverse order) is: | |-> #1 (pools_lock){+.+.+.}: | [<c0114ae8>] show_pools+0x30/0xf8 | [<c0313210>] dev_attr_show+0x1c/0x48 | [<c0180e84>] sysfs_kf_seq_show+0x88/0x10c | [<c017f960>] kernfs_seq_show+0x24/0x28 | [<c013efc4>] seq_read+0x1b8/0x480 | [<c011e820>] vfs_read+0x8c/0x148 | [<c011ea10>] SyS_read+0x40/0x8c | [<c000e960>] ret_fast_syscall+0x0/0x48 | |-> #0 (s_active#28){++++.+}: | [<c017e9ac>] __kernfs_remove+0x258/0x2ec | [<c017f754>] kernfs_remove_by_name_ns+0x3c/0x88 | [<c0114a7c>] dma_pool_destroy+0x148/0x17c | [<c03ad288>] hcd_buffer_destroy+0x20/0x34 | [<c03a4780>] usb_remove_hcd+0x110/0x1a4 The problem is the lock order of pools_lock and kernfs_mutex in dma_pool_destroy() vs show_pools() call path. This patch breaks out the creation of the sysfs file outside of the pools_lock mutex. The newly added pools_reg_lock ensures that there is no race of create vs destroy code path in terms whether or not the sysfs file has to be deleted (and was it deleted before we try to create a new one) and what to do if device_create_file() failed. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-10-10 05:28:50 +07:00
if (empty)
device_remove_file(pool->dev, &dev_attr_pools);
mutex_unlock(&pools_reg_lock);
while (!list_empty(&pool->page_list)) {
struct dma_page *page;
page = list_entry(pool->page_list.next,
struct dma_page, page_list);
if (is_page_busy(page)) {
if (pool->dev)
dev_err(pool->dev,
"dma_pool_destroy %s, %p busy\n",
pool->name, page->vaddr);
else
pr_err("dma_pool_destroy %s, %p busy\n",
pool->name, page->vaddr);
/* leak the still-in-use consistent memory */
list_del(&page->page_list);
kfree(page);
} else
pool_free_page(pool, page);
}
kfree(pool);
}
EXPORT_SYMBOL(dma_pool_destroy);
/**
* dma_pool_alloc - get a block of consistent memory
* @pool: dma pool that will produce the block
* @mem_flags: GFP_* bitmask
* @handle: pointer to dma address of block
*
* Return: the kernel virtual address of a currently unused block,
* and reports its dma address through the handle.
* If such a memory block can't be allocated, %NULL is returned.
*/
void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
dma_addr_t *handle)
{
unsigned long flags;
struct dma_page *page;
size_t offset;
void *retval;
mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd __GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-11-07 07:28:21 +07:00
might_sleep_if(gfpflags_allow_blocking(mem_flags));
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry(page, &pool->page_list, page_list) {
if (page->offset < pool->allocation)
goto ready;
}
/* pool_alloc_page() might sleep, so temporarily drop &pool->lock */
spin_unlock_irqrestore(&pool->lock, flags);
mm: add support for __GFP_ZERO flag to dma_pool_alloc() Currently a call to dma_pool_alloc() with a ___GFP_ZERO flag returns a non-zeroed memory region. This patchset adds support for the __GFP_ZERO flag to dma_pool_alloc(), adds 2 wrapper functions for allocing zeroed memory from a pool, and provides a coccinelle script for finding & replacing instances of dma_pool_alloc() followed by memset(0) with a single dma_pool_zalloc() call. There was some concern that this always calls memset() to zero, instead of passing __GFP_ZERO into the page allocator. [https://lkml.org/lkml/2015/7/15/881] I ran a test on my system to get an idea of how often dma_pool_alloc() calls into pool_alloc_page(). After Boot: [ 30.119863] alloc_calls:541, page_allocs:7 After an hour: [ 3600.951031] alloc_calls:9566, page_allocs:12 After copying 1GB file onto a USB drive: [ 4260.657148] alloc_calls:17225, page_allocs:12 It doesn't look like dma_pool_alloc() calls down to the page allocator very often (at least on my system). This patch (of 4): Currently the __GFP_ZERO flag is ignored by dma_pool_alloc(). Make dma_pool_alloc() zero the memory if this flag is set. Signed-off-by: Sean O. Stalley <sean.stalley@intel.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Vinod Koul <vinod.koul@intel.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Gilles Muller <Gilles.Muller@lip6.fr> Cc: Nicolas Palix <nicolas.palix@imag.fr> Cc: Michal Marek <mmarek@suse.cz> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-09 05:02:24 +07:00
page = pool_alloc_page(pool, mem_flags & (~__GFP_ZERO));
if (!page)
return NULL;
spin_lock_irqsave(&pool->lock, flags);
list_add(&page->page_list, &pool->page_list);
ready:
page->in_use++;
offset = page->offset;
page->offset = *(int *)(page->vaddr + offset);
retval = offset + page->vaddr;
*handle = offset + page->dma;
#ifdef DMAPOOL_DEBUG
{
int i;
u8 *data = retval;
/* page->offset is stored in first 4 bytes */
for (i = sizeof(page->offset); i < pool->size; i++) {
if (data[i] == POOL_POISON_FREED)
continue;
if (pool->dev)
dev_err(pool->dev,
"dma_pool_alloc %s, %p (corrupted)\n",
pool->name, retval);
else
pr_err("dma_pool_alloc %s, %p (corrupted)\n",
pool->name, retval);
/*
* Dump the first 4 bytes even if they are not
* POOL_POISON_FREED
*/
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1,
data, pool->size, 1);
break;
}
}
mm: add support for __GFP_ZERO flag to dma_pool_alloc() Currently a call to dma_pool_alloc() with a ___GFP_ZERO flag returns a non-zeroed memory region. This patchset adds support for the __GFP_ZERO flag to dma_pool_alloc(), adds 2 wrapper functions for allocing zeroed memory from a pool, and provides a coccinelle script for finding & replacing instances of dma_pool_alloc() followed by memset(0) with a single dma_pool_zalloc() call. There was some concern that this always calls memset() to zero, instead of passing __GFP_ZERO into the page allocator. [https://lkml.org/lkml/2015/7/15/881] I ran a test on my system to get an idea of how often dma_pool_alloc() calls into pool_alloc_page(). After Boot: [ 30.119863] alloc_calls:541, page_allocs:7 After an hour: [ 3600.951031] alloc_calls:9566, page_allocs:12 After copying 1GB file onto a USB drive: [ 4260.657148] alloc_calls:17225, page_allocs:12 It doesn't look like dma_pool_alloc() calls down to the page allocator very often (at least on my system). This patch (of 4): Currently the __GFP_ZERO flag is ignored by dma_pool_alloc(). Make dma_pool_alloc() zero the memory if this flag is set. Signed-off-by: Sean O. Stalley <sean.stalley@intel.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Vinod Koul <vinod.koul@intel.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Gilles Muller <Gilles.Muller@lip6.fr> Cc: Nicolas Palix <nicolas.palix@imag.fr> Cc: Michal Marek <mmarek@suse.cz> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-09 05:02:24 +07:00
if (!(mem_flags & __GFP_ZERO))
memset(retval, POOL_POISON_ALLOCATED, pool->size);
#endif
spin_unlock_irqrestore(&pool->lock, flags);
mm: add support for __GFP_ZERO flag to dma_pool_alloc() Currently a call to dma_pool_alloc() with a ___GFP_ZERO flag returns a non-zeroed memory region. This patchset adds support for the __GFP_ZERO flag to dma_pool_alloc(), adds 2 wrapper functions for allocing zeroed memory from a pool, and provides a coccinelle script for finding & replacing instances of dma_pool_alloc() followed by memset(0) with a single dma_pool_zalloc() call. There was some concern that this always calls memset() to zero, instead of passing __GFP_ZERO into the page allocator. [https://lkml.org/lkml/2015/7/15/881] I ran a test on my system to get an idea of how often dma_pool_alloc() calls into pool_alloc_page(). After Boot: [ 30.119863] alloc_calls:541, page_allocs:7 After an hour: [ 3600.951031] alloc_calls:9566, page_allocs:12 After copying 1GB file onto a USB drive: [ 4260.657148] alloc_calls:17225, page_allocs:12 It doesn't look like dma_pool_alloc() calls down to the page allocator very often (at least on my system). This patch (of 4): Currently the __GFP_ZERO flag is ignored by dma_pool_alloc(). Make dma_pool_alloc() zero the memory if this flag is set. Signed-off-by: Sean O. Stalley <sean.stalley@intel.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Vinod Koul <vinod.koul@intel.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Gilles Muller <Gilles.Muller@lip6.fr> Cc: Nicolas Palix <nicolas.palix@imag.fr> Cc: Michal Marek <mmarek@suse.cz> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Jonathan Corbet <corbet@lwn.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-09 05:02:24 +07:00
if (mem_flags & __GFP_ZERO)
memset(retval, 0, pool->size);
return retval;
}
EXPORT_SYMBOL(dma_pool_alloc);
static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
{
struct dma_page *page;
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
if ((dma - page->dma) < pool->allocation)
return page;
}
return NULL;
}
/**
* dma_pool_free - put block back into dma pool
* @pool: the dma pool holding the block
* @vaddr: virtual address of block
* @dma: dma address of block
*
* Caller promises neither device nor driver will again touch this block
* unless it is first re-allocated.
*/
void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
{
struct dma_page *page;
unsigned long flags;
unsigned int offset;
spin_lock_irqsave(&pool->lock, flags);
page = pool_find_page(pool, dma);
if (!page) {
spin_unlock_irqrestore(&pool->lock, flags);
if (pool->dev)
dev_err(pool->dev,
"dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long)dma);
else
pr_err("dma_pool_free %s, %p/%lx (bad dma)\n",
pool->name, vaddr, (unsigned long)dma);
return;
}
offset = vaddr - page->vaddr;
#ifdef DMAPOOL_DEBUG
if ((dma - page->dma) != offset) {
spin_unlock_irqrestore(&pool->lock, flags);
if (pool->dev)
dev_err(pool->dev,
"dma_pool_free %s, %p (bad vaddr)/%pad\n",
pool->name, vaddr, &dma);
else
pr_err("dma_pool_free %s, %p (bad vaddr)/%pad\n",
pool->name, vaddr, &dma);
return;
}
{
unsigned int chain = page->offset;
while (chain < pool->allocation) {
if (chain != offset) {
chain = *(int *)(page->vaddr + chain);
continue;
}
spin_unlock_irqrestore(&pool->lock, flags);
if (pool->dev)
dev_err(pool->dev, "dma_pool_free %s, dma %pad already free\n",
pool->name, &dma);
else
pr_err("dma_pool_free %s, dma %pad already free\n",
pool->name, &dma);
return;
}
}
memset(vaddr, POOL_POISON_FREED, pool->size);
#endif
page->in_use--;
*(int *)vaddr = page->offset;
page->offset = offset;
/*
* Resist a temptation to do
* if (!is_page_busy(page)) pool_free_page(pool, page);
* Better have a few empty pages hang around.
*/
spin_unlock_irqrestore(&pool->lock, flags);
}
EXPORT_SYMBOL(dma_pool_free);
/*
* Managed DMA pool
*/
static void dmam_pool_release(struct device *dev, void *res)
{
struct dma_pool *pool = *(struct dma_pool **)res;
dma_pool_destroy(pool);
}
static int dmam_pool_match(struct device *dev, void *res, void *match_data)
{
return *(struct dma_pool **)res == match_data;
}
/**
* dmam_pool_create - Managed dma_pool_create()
* @name: name of pool, for diagnostics
* @dev: device that will be doing the DMA
* @size: size of the blocks in this pool.
* @align: alignment requirement for blocks; must be a power of two
* @allocation: returned blocks won't cross this boundary (or zero)
*
* Managed dma_pool_create(). DMA pool created with this function is
* automatically destroyed on driver detach.
*
* Return: a managed dma allocation pool with the requested
* characteristics, or %NULL if one can't be created.
*/
struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
size_t size, size_t align, size_t allocation)
{
struct dma_pool **ptr, *pool;
ptr = devres_alloc(dmam_pool_release, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return NULL;
pool = *ptr = dma_pool_create(name, dev, size, align, allocation);
if (pool)
devres_add(dev, ptr);
else
devres_free(ptr);
return pool;
}
EXPORT_SYMBOL(dmam_pool_create);
/**
* dmam_pool_destroy - Managed dma_pool_destroy()
* @pool: dma pool that will be destroyed
*
* Managed dma_pool_destroy().
*/
void dmam_pool_destroy(struct dma_pool *pool)
{
struct device *dev = pool->dev;
WARN_ON(devres_release(dev, dmam_pool_release, dmam_pool_match, pool));
}
EXPORT_SYMBOL(dmam_pool_destroy);