mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-05 19:06:40 +07:00
0e435ac26e
Fix setting of max_segment_size and seg_boundary mask for stacked md/dm devices. When stacking devices (LVM over MD over SCSI) some of the request queue parameters are not set up correctly in some cases by default, namely max_segment_size and and seg_boundary mask. If you create MD device over SCSI, these attributes are zeroed. Problem become when there is over this mapping next device-mapper mapping - queue attributes are set in DM this way: request_queue max_segment_size seg_boundary_mask SCSI 65536 0xffffffff MD RAID1 0 0 LVM 65536 -1 (64bit) Unfortunately bio_add_page (resp. bio_phys_segments) calculates number of physical segments according to these parameters. During the generic_make_request() is segment cout recalculated and can increase bio->bi_phys_segments count over the allowed limit. (After bio_clone() in stack operation.) Thi is specially problem in CCISS driver, where it produce OOPS here BUG_ON(creq->nr_phys_segments > MAXSGENTRIES); (MAXSEGENTRIES is 31 by default.) Sometimes even this command is enough to cause oops: dd iflag=direct if=/dev/<vg>/<lv> of=/dev/null bs=128000 count=10 This command generates bios with 250 sectors, allocated in 32 4k-pages (last page uses only 1024 bytes). For LVM layer, it allocates bio with 31 segments (still OK for CCISS), unfortunatelly on lower layer it is recalculated to 32 segments and this violates CCISS restriction and triggers BUG_ON(). The patch tries to fix it by: * initializing attributes above in queue request constructor blk_queue_make_request() * make sure that blk_queue_stack_limits() inherits setting (DM uses its own function to set the limits because it blk_queue_stack_limits() was introduced later. It should probably switch to use generic stack limit function too.) * sets the default seg_boundary value in one place (blkdev.h) * use this mask as default in DM (instead of -1, which differs in 64bit) Bugs related to this: https://bugzilla.redhat.com/show_bug.cgi?id=471639 http://bugzilla.kernel.org/show_bug.cgi?id=8672 Signed-off-by: Milan Broz <mbroz@redhat.com> Reviewed-by: Alasdair G Kergon <agk@redhat.com> Cc: Neil Brown <neilb@suse.de> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Tejun Heo <htejun@gmail.com> Cc: Mike Miller <mike.miller@hp.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
999 lines
21 KiB
C
999 lines
21 KiB
C
/*
|
|
* Copyright (C) 2001 Sistina Software (UK) Limited.
|
|
* Copyright (C) 2004 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is released under the GPL.
|
|
*/
|
|
|
|
#include "dm.h"
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/ctype.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/mutex.h>
|
|
#include <asm/atomic.h>
|
|
|
|
#define DM_MSG_PREFIX "table"
|
|
|
|
#define MAX_DEPTH 16
|
|
#define NODE_SIZE L1_CACHE_BYTES
|
|
#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
|
|
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
|
|
|
|
struct dm_table {
|
|
struct mapped_device *md;
|
|
atomic_t holders;
|
|
|
|
/* btree table */
|
|
unsigned int depth;
|
|
unsigned int counts[MAX_DEPTH]; /* in nodes */
|
|
sector_t *index[MAX_DEPTH];
|
|
|
|
unsigned int num_targets;
|
|
unsigned int num_allocated;
|
|
sector_t *highs;
|
|
struct dm_target *targets;
|
|
|
|
/*
|
|
* Indicates the rw permissions for the new logical
|
|
* device. This should be a combination of FMODE_READ
|
|
* and FMODE_WRITE.
|
|
*/
|
|
fmode_t mode;
|
|
|
|
/* a list of devices used by this table */
|
|
struct list_head devices;
|
|
|
|
/*
|
|
* These are optimistic limits taken from all the
|
|
* targets, some targets will need smaller limits.
|
|
*/
|
|
struct io_restrictions limits;
|
|
|
|
/* events get handed up using this callback */
|
|
void (*event_fn)(void *);
|
|
void *event_context;
|
|
};
|
|
|
|
/*
|
|
* Similar to ceiling(log_size(n))
|
|
*/
|
|
static unsigned int int_log(unsigned int n, unsigned int base)
|
|
{
|
|
int result = 0;
|
|
|
|
while (n > 1) {
|
|
n = dm_div_up(n, base);
|
|
result++;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Returns the minimum that is _not_ zero, unless both are zero.
|
|
*/
|
|
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
|
|
|
|
/*
|
|
* Combine two io_restrictions, always taking the lower value.
|
|
*/
|
|
static void combine_restrictions_low(struct io_restrictions *lhs,
|
|
struct io_restrictions *rhs)
|
|
{
|
|
lhs->max_sectors =
|
|
min_not_zero(lhs->max_sectors, rhs->max_sectors);
|
|
|
|
lhs->max_phys_segments =
|
|
min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments);
|
|
|
|
lhs->max_hw_segments =
|
|
min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments);
|
|
|
|
lhs->hardsect_size = max(lhs->hardsect_size, rhs->hardsect_size);
|
|
|
|
lhs->max_segment_size =
|
|
min_not_zero(lhs->max_segment_size, rhs->max_segment_size);
|
|
|
|
lhs->max_hw_sectors =
|
|
min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors);
|
|
|
|
lhs->seg_boundary_mask =
|
|
min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
|
|
|
|
lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn);
|
|
|
|
lhs->no_cluster |= rhs->no_cluster;
|
|
}
|
|
|
|
/*
|
|
* Calculate the index of the child node of the n'th node k'th key.
|
|
*/
|
|
static inline unsigned int get_child(unsigned int n, unsigned int k)
|
|
{
|
|
return (n * CHILDREN_PER_NODE) + k;
|
|
}
|
|
|
|
/*
|
|
* Return the n'th node of level l from table t.
|
|
*/
|
|
static inline sector_t *get_node(struct dm_table *t,
|
|
unsigned int l, unsigned int n)
|
|
{
|
|
return t->index[l] + (n * KEYS_PER_NODE);
|
|
}
|
|
|
|
/*
|
|
* Return the highest key that you could lookup from the n'th
|
|
* node on level l of the btree.
|
|
*/
|
|
static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
|
|
{
|
|
for (; l < t->depth - 1; l++)
|
|
n = get_child(n, CHILDREN_PER_NODE - 1);
|
|
|
|
if (n >= t->counts[l])
|
|
return (sector_t) - 1;
|
|
|
|
return get_node(t, l, n)[KEYS_PER_NODE - 1];
|
|
}
|
|
|
|
/*
|
|
* Fills in a level of the btree based on the highs of the level
|
|
* below it.
|
|
*/
|
|
static int setup_btree_index(unsigned int l, struct dm_table *t)
|
|
{
|
|
unsigned int n, k;
|
|
sector_t *node;
|
|
|
|
for (n = 0U; n < t->counts[l]; n++) {
|
|
node = get_node(t, l, n);
|
|
|
|
for (k = 0U; k < KEYS_PER_NODE; k++)
|
|
node[k] = high(t, l + 1, get_child(n, k));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
|
|
{
|
|
unsigned long size;
|
|
void *addr;
|
|
|
|
/*
|
|
* Check that we're not going to overflow.
|
|
*/
|
|
if (nmemb > (ULONG_MAX / elem_size))
|
|
return NULL;
|
|
|
|
size = nmemb * elem_size;
|
|
addr = vmalloc(size);
|
|
if (addr)
|
|
memset(addr, 0, size);
|
|
|
|
return addr;
|
|
}
|
|
|
|
/*
|
|
* highs, and targets are managed as dynamic arrays during a
|
|
* table load.
|
|
*/
|
|
static int alloc_targets(struct dm_table *t, unsigned int num)
|
|
{
|
|
sector_t *n_highs;
|
|
struct dm_target *n_targets;
|
|
int n = t->num_targets;
|
|
|
|
/*
|
|
* Allocate both the target array and offset array at once.
|
|
* Append an empty entry to catch sectors beyond the end of
|
|
* the device.
|
|
*/
|
|
n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
|
|
sizeof(sector_t));
|
|
if (!n_highs)
|
|
return -ENOMEM;
|
|
|
|
n_targets = (struct dm_target *) (n_highs + num);
|
|
|
|
if (n) {
|
|
memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
|
|
memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
|
|
}
|
|
|
|
memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
|
|
vfree(t->highs);
|
|
|
|
t->num_allocated = num;
|
|
t->highs = n_highs;
|
|
t->targets = n_targets;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int dm_table_create(struct dm_table **result, fmode_t mode,
|
|
unsigned num_targets, struct mapped_device *md)
|
|
{
|
|
struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
|
|
|
|
if (!t)
|
|
return -ENOMEM;
|
|
|
|
INIT_LIST_HEAD(&t->devices);
|
|
atomic_set(&t->holders, 1);
|
|
|
|
if (!num_targets)
|
|
num_targets = KEYS_PER_NODE;
|
|
|
|
num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
|
|
|
|
if (alloc_targets(t, num_targets)) {
|
|
kfree(t);
|
|
t = NULL;
|
|
return -ENOMEM;
|
|
}
|
|
|
|
t->mode = mode;
|
|
t->md = md;
|
|
*result = t;
|
|
return 0;
|
|
}
|
|
|
|
static void free_devices(struct list_head *devices)
|
|
{
|
|
struct list_head *tmp, *next;
|
|
|
|
list_for_each_safe(tmp, next, devices) {
|
|
struct dm_dev_internal *dd =
|
|
list_entry(tmp, struct dm_dev_internal, list);
|
|
kfree(dd);
|
|
}
|
|
}
|
|
|
|
static void table_destroy(struct dm_table *t)
|
|
{
|
|
unsigned int i;
|
|
|
|
/* free the indexes (see dm_table_complete) */
|
|
if (t->depth >= 2)
|
|
vfree(t->index[t->depth - 2]);
|
|
|
|
/* free the targets */
|
|
for (i = 0; i < t->num_targets; i++) {
|
|
struct dm_target *tgt = t->targets + i;
|
|
|
|
if (tgt->type->dtr)
|
|
tgt->type->dtr(tgt);
|
|
|
|
dm_put_target_type(tgt->type);
|
|
}
|
|
|
|
vfree(t->highs);
|
|
|
|
/* free the device list */
|
|
if (t->devices.next != &t->devices) {
|
|
DMWARN("devices still present during destroy: "
|
|
"dm_table_remove_device calls missing");
|
|
|
|
free_devices(&t->devices);
|
|
}
|
|
|
|
kfree(t);
|
|
}
|
|
|
|
void dm_table_get(struct dm_table *t)
|
|
{
|
|
atomic_inc(&t->holders);
|
|
}
|
|
|
|
void dm_table_put(struct dm_table *t)
|
|
{
|
|
if (!t)
|
|
return;
|
|
|
|
if (atomic_dec_and_test(&t->holders))
|
|
table_destroy(t);
|
|
}
|
|
|
|
/*
|
|
* Checks to see if we need to extend highs or targets.
|
|
*/
|
|
static inline int check_space(struct dm_table *t)
|
|
{
|
|
if (t->num_targets >= t->num_allocated)
|
|
return alloc_targets(t, t->num_allocated * 2);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* See if we've already got a device in the list.
|
|
*/
|
|
static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
|
|
{
|
|
struct dm_dev_internal *dd;
|
|
|
|
list_for_each_entry (dd, l, list)
|
|
if (dd->dm_dev.bdev->bd_dev == dev)
|
|
return dd;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Open a device so we can use it as a map destination.
|
|
*/
|
|
static int open_dev(struct dm_dev_internal *d, dev_t dev,
|
|
struct mapped_device *md)
|
|
{
|
|
static char *_claim_ptr = "I belong to device-mapper";
|
|
struct block_device *bdev;
|
|
|
|
int r;
|
|
|
|
BUG_ON(d->dm_dev.bdev);
|
|
|
|
bdev = open_by_devnum(dev, d->dm_dev.mode);
|
|
if (IS_ERR(bdev))
|
|
return PTR_ERR(bdev);
|
|
r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
|
|
if (r)
|
|
blkdev_put(bdev, d->dm_dev.mode);
|
|
else
|
|
d->dm_dev.bdev = bdev;
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Close a device that we've been using.
|
|
*/
|
|
static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
|
|
{
|
|
if (!d->dm_dev.bdev)
|
|
return;
|
|
|
|
bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
|
|
blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
|
|
d->dm_dev.bdev = NULL;
|
|
}
|
|
|
|
/*
|
|
* If possible, this checks an area of a destination device is valid.
|
|
*/
|
|
static int check_device_area(struct dm_dev_internal *dd, sector_t start,
|
|
sector_t len)
|
|
{
|
|
sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT;
|
|
|
|
if (!dev_size)
|
|
return 1;
|
|
|
|
return ((start < dev_size) && (len <= (dev_size - start)));
|
|
}
|
|
|
|
/*
|
|
* This upgrades the mode on an already open dm_dev. Being
|
|
* careful to leave things as they were if we fail to reopen the
|
|
* device.
|
|
*/
|
|
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
|
|
struct mapped_device *md)
|
|
{
|
|
int r;
|
|
struct dm_dev_internal dd_copy;
|
|
dev_t dev = dd->dm_dev.bdev->bd_dev;
|
|
|
|
dd_copy = *dd;
|
|
|
|
dd->dm_dev.mode |= new_mode;
|
|
dd->dm_dev.bdev = NULL;
|
|
r = open_dev(dd, dev, md);
|
|
if (!r)
|
|
close_dev(&dd_copy, md);
|
|
else
|
|
*dd = dd_copy;
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Add a device to the list, or just increment the usage count if
|
|
* it's already present.
|
|
*/
|
|
static int __table_get_device(struct dm_table *t, struct dm_target *ti,
|
|
const char *path, sector_t start, sector_t len,
|
|
fmode_t mode, struct dm_dev **result)
|
|
{
|
|
int r;
|
|
dev_t uninitialized_var(dev);
|
|
struct dm_dev_internal *dd;
|
|
unsigned int major, minor;
|
|
|
|
BUG_ON(!t);
|
|
|
|
if (sscanf(path, "%u:%u", &major, &minor) == 2) {
|
|
/* Extract the major/minor numbers */
|
|
dev = MKDEV(major, minor);
|
|
if (MAJOR(dev) != major || MINOR(dev) != minor)
|
|
return -EOVERFLOW;
|
|
} else {
|
|
/* convert the path to a device */
|
|
struct block_device *bdev = lookup_bdev(path);
|
|
|
|
if (IS_ERR(bdev))
|
|
return PTR_ERR(bdev);
|
|
dev = bdev->bd_dev;
|
|
bdput(bdev);
|
|
}
|
|
|
|
dd = find_device(&t->devices, dev);
|
|
if (!dd) {
|
|
dd = kmalloc(sizeof(*dd), GFP_KERNEL);
|
|
if (!dd)
|
|
return -ENOMEM;
|
|
|
|
dd->dm_dev.mode = mode;
|
|
dd->dm_dev.bdev = NULL;
|
|
|
|
if ((r = open_dev(dd, dev, t->md))) {
|
|
kfree(dd);
|
|
return r;
|
|
}
|
|
|
|
format_dev_t(dd->dm_dev.name, dev);
|
|
|
|
atomic_set(&dd->count, 0);
|
|
list_add(&dd->list, &t->devices);
|
|
|
|
} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
|
|
r = upgrade_mode(dd, mode, t->md);
|
|
if (r)
|
|
return r;
|
|
}
|
|
atomic_inc(&dd->count);
|
|
|
|
if (!check_device_area(dd, start, len)) {
|
|
DMWARN("device %s too small for target", path);
|
|
dm_put_device(ti, &dd->dm_dev);
|
|
return -EINVAL;
|
|
}
|
|
|
|
*result = &dd->dm_dev;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev)
|
|
{
|
|
struct request_queue *q = bdev_get_queue(bdev);
|
|
struct io_restrictions *rs = &ti->limits;
|
|
char b[BDEVNAME_SIZE];
|
|
|
|
if (unlikely(!q)) {
|
|
DMWARN("%s: Cannot set limits for nonexistent device %s",
|
|
dm_device_name(ti->table->md), bdevname(bdev, b));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Combine the device limits low.
|
|
*
|
|
* FIXME: if we move an io_restriction struct
|
|
* into q this would just be a call to
|
|
* combine_restrictions_low()
|
|
*/
|
|
rs->max_sectors =
|
|
min_not_zero(rs->max_sectors, q->max_sectors);
|
|
|
|
/*
|
|
* Check if merge fn is supported.
|
|
* If not we'll force DM to use PAGE_SIZE or
|
|
* smaller I/O, just to be safe.
|
|
*/
|
|
|
|
if (q->merge_bvec_fn && !ti->type->merge)
|
|
rs->max_sectors =
|
|
min_not_zero(rs->max_sectors,
|
|
(unsigned int) (PAGE_SIZE >> 9));
|
|
|
|
rs->max_phys_segments =
|
|
min_not_zero(rs->max_phys_segments,
|
|
q->max_phys_segments);
|
|
|
|
rs->max_hw_segments =
|
|
min_not_zero(rs->max_hw_segments, q->max_hw_segments);
|
|
|
|
rs->hardsect_size = max(rs->hardsect_size, q->hardsect_size);
|
|
|
|
rs->max_segment_size =
|
|
min_not_zero(rs->max_segment_size, q->max_segment_size);
|
|
|
|
rs->max_hw_sectors =
|
|
min_not_zero(rs->max_hw_sectors, q->max_hw_sectors);
|
|
|
|
rs->seg_boundary_mask =
|
|
min_not_zero(rs->seg_boundary_mask,
|
|
q->seg_boundary_mask);
|
|
|
|
rs->bounce_pfn = min_not_zero(rs->bounce_pfn, q->bounce_pfn);
|
|
|
|
rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
|
|
}
|
|
EXPORT_SYMBOL_GPL(dm_set_device_limits);
|
|
|
|
int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
|
|
sector_t len, fmode_t mode, struct dm_dev **result)
|
|
{
|
|
int r = __table_get_device(ti->table, ti, path,
|
|
start, len, mode, result);
|
|
|
|
if (!r)
|
|
dm_set_device_limits(ti, (*result)->bdev);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Decrement a devices use count and remove it if necessary.
|
|
*/
|
|
void dm_put_device(struct dm_target *ti, struct dm_dev *d)
|
|
{
|
|
struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal,
|
|
dm_dev);
|
|
|
|
if (atomic_dec_and_test(&dd->count)) {
|
|
close_dev(dd, ti->table->md);
|
|
list_del(&dd->list);
|
|
kfree(dd);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Checks to see if the target joins onto the end of the table.
|
|
*/
|
|
static int adjoin(struct dm_table *table, struct dm_target *ti)
|
|
{
|
|
struct dm_target *prev;
|
|
|
|
if (!table->num_targets)
|
|
return !ti->begin;
|
|
|
|
prev = &table->targets[table->num_targets - 1];
|
|
return (ti->begin == (prev->begin + prev->len));
|
|
}
|
|
|
|
/*
|
|
* Used to dynamically allocate the arg array.
|
|
*/
|
|
static char **realloc_argv(unsigned *array_size, char **old_argv)
|
|
{
|
|
char **argv;
|
|
unsigned new_size;
|
|
|
|
new_size = *array_size ? *array_size * 2 : 64;
|
|
argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
|
|
if (argv) {
|
|
memcpy(argv, old_argv, *array_size * sizeof(*argv));
|
|
*array_size = new_size;
|
|
}
|
|
|
|
kfree(old_argv);
|
|
return argv;
|
|
}
|
|
|
|
/*
|
|
* Destructively splits up the argument list to pass to ctr.
|
|
*/
|
|
int dm_split_args(int *argc, char ***argvp, char *input)
|
|
{
|
|
char *start, *end = input, *out, **argv = NULL;
|
|
unsigned array_size = 0;
|
|
|
|
*argc = 0;
|
|
|
|
if (!input) {
|
|
*argvp = NULL;
|
|
return 0;
|
|
}
|
|
|
|
argv = realloc_argv(&array_size, argv);
|
|
if (!argv)
|
|
return -ENOMEM;
|
|
|
|
while (1) {
|
|
start = end;
|
|
|
|
/* Skip whitespace */
|
|
while (*start && isspace(*start))
|
|
start++;
|
|
|
|
if (!*start)
|
|
break; /* success, we hit the end */
|
|
|
|
/* 'out' is used to remove any back-quotes */
|
|
end = out = start;
|
|
while (*end) {
|
|
/* Everything apart from '\0' can be quoted */
|
|
if (*end == '\\' && *(end + 1)) {
|
|
*out++ = *(end + 1);
|
|
end += 2;
|
|
continue;
|
|
}
|
|
|
|
if (isspace(*end))
|
|
break; /* end of token */
|
|
|
|
*out++ = *end++;
|
|
}
|
|
|
|
/* have we already filled the array ? */
|
|
if ((*argc + 1) > array_size) {
|
|
argv = realloc_argv(&array_size, argv);
|
|
if (!argv)
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* we know this is whitespace */
|
|
if (*end)
|
|
end++;
|
|
|
|
/* terminate the string and put it in the array */
|
|
*out = '\0';
|
|
argv[*argc] = start;
|
|
(*argc)++;
|
|
}
|
|
|
|
*argvp = argv;
|
|
return 0;
|
|
}
|
|
|
|
static void check_for_valid_limits(struct io_restrictions *rs)
|
|
{
|
|
if (!rs->max_sectors)
|
|
rs->max_sectors = SAFE_MAX_SECTORS;
|
|
if (!rs->max_hw_sectors)
|
|
rs->max_hw_sectors = SAFE_MAX_SECTORS;
|
|
if (!rs->max_phys_segments)
|
|
rs->max_phys_segments = MAX_PHYS_SEGMENTS;
|
|
if (!rs->max_hw_segments)
|
|
rs->max_hw_segments = MAX_HW_SEGMENTS;
|
|
if (!rs->hardsect_size)
|
|
rs->hardsect_size = 1 << SECTOR_SHIFT;
|
|
if (!rs->max_segment_size)
|
|
rs->max_segment_size = MAX_SEGMENT_SIZE;
|
|
if (!rs->seg_boundary_mask)
|
|
rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
|
|
if (!rs->bounce_pfn)
|
|
rs->bounce_pfn = -1;
|
|
}
|
|
|
|
int dm_table_add_target(struct dm_table *t, const char *type,
|
|
sector_t start, sector_t len, char *params)
|
|
{
|
|
int r = -EINVAL, argc;
|
|
char **argv;
|
|
struct dm_target *tgt;
|
|
|
|
if ((r = check_space(t)))
|
|
return r;
|
|
|
|
tgt = t->targets + t->num_targets;
|
|
memset(tgt, 0, sizeof(*tgt));
|
|
|
|
if (!len) {
|
|
DMERR("%s: zero-length target", dm_device_name(t->md));
|
|
return -EINVAL;
|
|
}
|
|
|
|
tgt->type = dm_get_target_type(type);
|
|
if (!tgt->type) {
|
|
DMERR("%s: %s: unknown target type", dm_device_name(t->md),
|
|
type);
|
|
return -EINVAL;
|
|
}
|
|
|
|
tgt->table = t;
|
|
tgt->begin = start;
|
|
tgt->len = len;
|
|
tgt->error = "Unknown error";
|
|
|
|
/*
|
|
* Does this target adjoin the previous one ?
|
|
*/
|
|
if (!adjoin(t, tgt)) {
|
|
tgt->error = "Gap in table";
|
|
r = -EINVAL;
|
|
goto bad;
|
|
}
|
|
|
|
r = dm_split_args(&argc, &argv, params);
|
|
if (r) {
|
|
tgt->error = "couldn't split parameters (insufficient memory)";
|
|
goto bad;
|
|
}
|
|
|
|
r = tgt->type->ctr(tgt, argc, argv);
|
|
kfree(argv);
|
|
if (r)
|
|
goto bad;
|
|
|
|
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
|
|
|
|
/* FIXME: the plan is to combine high here and then have
|
|
* the merge fn apply the target level restrictions. */
|
|
combine_restrictions_low(&t->limits, &tgt->limits);
|
|
return 0;
|
|
|
|
bad:
|
|
DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error);
|
|
dm_put_target_type(tgt->type);
|
|
return r;
|
|
}
|
|
|
|
static int setup_indexes(struct dm_table *t)
|
|
{
|
|
int i;
|
|
unsigned int total = 0;
|
|
sector_t *indexes;
|
|
|
|
/* allocate the space for *all* the indexes */
|
|
for (i = t->depth - 2; i >= 0; i--) {
|
|
t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
|
|
total += t->counts[i];
|
|
}
|
|
|
|
indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
|
|
if (!indexes)
|
|
return -ENOMEM;
|
|
|
|
/* set up internal nodes, bottom-up */
|
|
for (i = t->depth - 2; i >= 0; i--) {
|
|
t->index[i] = indexes;
|
|
indexes += (KEYS_PER_NODE * t->counts[i]);
|
|
setup_btree_index(i, t);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Builds the btree to index the map.
|
|
*/
|
|
int dm_table_complete(struct dm_table *t)
|
|
{
|
|
int r = 0;
|
|
unsigned int leaf_nodes;
|
|
|
|
check_for_valid_limits(&t->limits);
|
|
|
|
/* how many indexes will the btree have ? */
|
|
leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
|
|
t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
|
|
|
|
/* leaf layer has already been set up */
|
|
t->counts[t->depth - 1] = leaf_nodes;
|
|
t->index[t->depth - 1] = t->highs;
|
|
|
|
if (t->depth >= 2)
|
|
r = setup_indexes(t);
|
|
|
|
return r;
|
|
}
|
|
|
|
static DEFINE_MUTEX(_event_lock);
|
|
void dm_table_event_callback(struct dm_table *t,
|
|
void (*fn)(void *), void *context)
|
|
{
|
|
mutex_lock(&_event_lock);
|
|
t->event_fn = fn;
|
|
t->event_context = context;
|
|
mutex_unlock(&_event_lock);
|
|
}
|
|
|
|
void dm_table_event(struct dm_table *t)
|
|
{
|
|
/*
|
|
* You can no longer call dm_table_event() from interrupt
|
|
* context, use a bottom half instead.
|
|
*/
|
|
BUG_ON(in_interrupt());
|
|
|
|
mutex_lock(&_event_lock);
|
|
if (t->event_fn)
|
|
t->event_fn(t->event_context);
|
|
mutex_unlock(&_event_lock);
|
|
}
|
|
|
|
sector_t dm_table_get_size(struct dm_table *t)
|
|
{
|
|
return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
|
|
}
|
|
|
|
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
|
|
{
|
|
if (index >= t->num_targets)
|
|
return NULL;
|
|
|
|
return t->targets + index;
|
|
}
|
|
|
|
/*
|
|
* Search the btree for the correct target.
|
|
*
|
|
* Caller should check returned pointer with dm_target_is_valid()
|
|
* to trap I/O beyond end of device.
|
|
*/
|
|
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
|
|
{
|
|
unsigned int l, n = 0, k = 0;
|
|
sector_t *node;
|
|
|
|
for (l = 0; l < t->depth; l++) {
|
|
n = get_child(n, k);
|
|
node = get_node(t, l, n);
|
|
|
|
for (k = 0; k < KEYS_PER_NODE; k++)
|
|
if (node[k] >= sector)
|
|
break;
|
|
}
|
|
|
|
return &t->targets[(KEYS_PER_NODE * n) + k];
|
|
}
|
|
|
|
void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q)
|
|
{
|
|
/*
|
|
* Make sure we obey the optimistic sub devices
|
|
* restrictions.
|
|
*/
|
|
blk_queue_max_sectors(q, t->limits.max_sectors);
|
|
q->max_phys_segments = t->limits.max_phys_segments;
|
|
q->max_hw_segments = t->limits.max_hw_segments;
|
|
q->hardsect_size = t->limits.hardsect_size;
|
|
q->max_segment_size = t->limits.max_segment_size;
|
|
q->max_hw_sectors = t->limits.max_hw_sectors;
|
|
q->seg_boundary_mask = t->limits.seg_boundary_mask;
|
|
q->bounce_pfn = t->limits.bounce_pfn;
|
|
|
|
if (t->limits.no_cluster)
|
|
queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
|
|
else
|
|
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
|
|
|
|
}
|
|
|
|
unsigned int dm_table_get_num_targets(struct dm_table *t)
|
|
{
|
|
return t->num_targets;
|
|
}
|
|
|
|
struct list_head *dm_table_get_devices(struct dm_table *t)
|
|
{
|
|
return &t->devices;
|
|
}
|
|
|
|
fmode_t dm_table_get_mode(struct dm_table *t)
|
|
{
|
|
return t->mode;
|
|
}
|
|
|
|
static void suspend_targets(struct dm_table *t, unsigned postsuspend)
|
|
{
|
|
int i = t->num_targets;
|
|
struct dm_target *ti = t->targets;
|
|
|
|
while (i--) {
|
|
if (postsuspend) {
|
|
if (ti->type->postsuspend)
|
|
ti->type->postsuspend(ti);
|
|
} else if (ti->type->presuspend)
|
|
ti->type->presuspend(ti);
|
|
|
|
ti++;
|
|
}
|
|
}
|
|
|
|
void dm_table_presuspend_targets(struct dm_table *t)
|
|
{
|
|
if (!t)
|
|
return;
|
|
|
|
suspend_targets(t, 0);
|
|
}
|
|
|
|
void dm_table_postsuspend_targets(struct dm_table *t)
|
|
{
|
|
if (!t)
|
|
return;
|
|
|
|
suspend_targets(t, 1);
|
|
}
|
|
|
|
int dm_table_resume_targets(struct dm_table *t)
|
|
{
|
|
int i, r = 0;
|
|
|
|
for (i = 0; i < t->num_targets; i++) {
|
|
struct dm_target *ti = t->targets + i;
|
|
|
|
if (!ti->type->preresume)
|
|
continue;
|
|
|
|
r = ti->type->preresume(ti);
|
|
if (r)
|
|
return r;
|
|
}
|
|
|
|
for (i = 0; i < t->num_targets; i++) {
|
|
struct dm_target *ti = t->targets + i;
|
|
|
|
if (ti->type->resume)
|
|
ti->type->resume(ti);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int dm_table_any_congested(struct dm_table *t, int bdi_bits)
|
|
{
|
|
struct dm_dev_internal *dd;
|
|
struct list_head *devices = dm_table_get_devices(t);
|
|
int r = 0;
|
|
|
|
list_for_each_entry(dd, devices, list) {
|
|
struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
|
|
char b[BDEVNAME_SIZE];
|
|
|
|
if (likely(q))
|
|
r |= bdi_congested(&q->backing_dev_info, bdi_bits);
|
|
else
|
|
DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
|
|
dm_device_name(t->md),
|
|
bdevname(dd->dm_dev.bdev, b));
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void dm_table_unplug_all(struct dm_table *t)
|
|
{
|
|
struct dm_dev_internal *dd;
|
|
struct list_head *devices = dm_table_get_devices(t);
|
|
|
|
list_for_each_entry(dd, devices, list) {
|
|
struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
|
|
char b[BDEVNAME_SIZE];
|
|
|
|
if (likely(q))
|
|
blk_unplug(q);
|
|
else
|
|
DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
|
|
dm_device_name(t->md),
|
|
bdevname(dd->dm_dev.bdev, b));
|
|
}
|
|
}
|
|
|
|
struct mapped_device *dm_table_get_md(struct dm_table *t)
|
|
{
|
|
dm_get(t->md);
|
|
|
|
return t->md;
|
|
}
|
|
|
|
EXPORT_SYMBOL(dm_vcalloc);
|
|
EXPORT_SYMBOL(dm_get_device);
|
|
EXPORT_SYMBOL(dm_put_device);
|
|
EXPORT_SYMBOL(dm_table_event);
|
|
EXPORT_SYMBOL(dm_table_get_size);
|
|
EXPORT_SYMBOL(dm_table_get_mode);
|
|
EXPORT_SYMBOL(dm_table_get_md);
|
|
EXPORT_SYMBOL(dm_table_put);
|
|
EXPORT_SYMBOL(dm_table_get);
|
|
EXPORT_SYMBOL(dm_table_unplug_all);
|