Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (36 commits)
  dm: set queue ordered mode
  dm: move wait queue declaration
  dm: merge pushback and deferred bio lists
  dm: allow uninterruptible wait for pending io
  dm: merge __flush_deferred_io into caller
  dm: move bio_io_error into __split_and_process_bio
  dm: rename __split_bio
  dm: remove unnecessary struct dm_wq_req
  dm: remove unnecessary work queue context field
  dm: remove unnecessary work queue type field
  dm: bio list add bio_list_add_head
  dm snapshot: persistent fix dtr cleanup
  dm snapshot: move status to exception store
  dm snapshot: move ctr parsing to exception store
  dm snapshot: use DMEMIT macro for status
  dm snapshot: remove dm_snap header
  dm snapshot: remove dm_snap header use
  dm exception store: move cow pointer
  dm exception store: move chunk_fields
  dm exception store: move dm_target pointer
  ...
This commit is contained in:
Linus Torvalds 2009-04-03 10:02:45 -07:00
commit d9b9be024a
19 changed files with 895 additions and 622 deletions

View File

@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
bl->tail = bio;
}
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)

View File

@ -16,30 +16,56 @@
* functions in this file help the target record and restore the
* original bio state.
*/
struct dm_bio_vec_details {
#if PAGE_SIZE < 65536
__u16 bv_len;
__u16 bv_offset;
#else
unsigned bv_len;
unsigned bv_offset;
#endif
};
struct dm_bio_details {
sector_t bi_sector;
struct block_device *bi_bdev;
unsigned int bi_size;
unsigned short bi_idx;
unsigned long bi_flags;
struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
{
unsigned i;
bd->bi_sector = bio->bi_sector;
bd->bi_bdev = bio->bi_bdev;
bd->bi_size = bio->bi_size;
bd->bi_idx = bio->bi_idx;
bd->bi_flags = bio->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
}
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
{
unsigned i;
bio->bi_sector = bd->bi_sector;
bio->bi_bdev = bd->bi_bdev;
bio->bi_size = bd->bi_size;
bio->bi_idx = bd->bi_idx;
bio->bi_flags = bd->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
}
}
#endif

View File

@ -1156,8 +1156,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
crypto_free_ablkcipher(tfm);
bad_cipher:
/* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
kfree(cc);
kzfree(cc);
return -EINVAL;
}
@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
dm_put_device(ti, cc->dev);
/* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
kfree(cc);
kzfree(cc);
}
static int crypt_map(struct dm_target *ti, struct bio *bio,

View File

@ -7,6 +7,7 @@
#include "dm-exception-store.h"
#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
@ -14,6 +15,257 @@
#define DM_MSG_PREFIX "snapshot exception stores"
static LIST_HEAD(_exception_store_types);
static DEFINE_SPINLOCK(_lock);
static struct dm_exception_store_type *__find_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
list_for_each_entry(type, &_exception_store_types, list)
if (!strcmp(name, type->name))
return type;
return NULL;
}
static struct dm_exception_store_type *_get_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
spin_lock(&_lock);
type = __find_exception_store_type(name);
if (type && !try_module_get(type->module))
type = NULL;
spin_unlock(&_lock);
return type;
}
/*
* get_type
* @type_name
*
* Attempt to retrieve the dm_exception_store_type by name. If not already
* available, attempt to load the appropriate module.
*
* Exstore modules are named "dm-exstore-" followed by the 'type_name'.
* Modules may contain multiple types.
* This function will first try the module "dm-exstore-<type_name>",
* then truncate 'type_name' on the last '-' and try again.
*
* For example, if type_name was "clustered-shared", it would search
* 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
*
* 'dm-exception-store-<type_name>' is too long of a name in my
* opinion, which is why I've chosen to have the files
* containing exception store implementations be 'dm-exstore-<type_name>'.
* If you want your module to be autoloaded, you will follow this
* naming convention.
*
* Returns: dm_exception_store_type* on success, NULL on failure
*/
static struct dm_exception_store_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
struct dm_exception_store_type *type;
type = _get_exception_store_type(type_name);
if (type)
return type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
DMERR("No memory left to attempt load for \"%s\"", type_name);
return NULL;
}
while (request_module("dm-exstore-%s", type_name_dup) ||
!(type = _get_exception_store_type(type_name))) {
p = strrchr(type_name_dup, '-');
if (!p)
break;
p[0] = '\0';
}
if (!type)
DMWARN("Module for exstore type \"%s\" not found.", type_name);
kfree(type_name_dup);
return type;
}
static void put_type(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
module_put(type->module);
spin_unlock(&_lock);
}
int dm_exception_store_type_register(struct dm_exception_store_type *type)
{
int r = 0;
spin_lock(&_lock);
if (!__find_exception_store_type(type->name))
list_add(&type->list, &_exception_store_types);
else
r = -EEXIST;
spin_unlock(&_lock);
return r;
}
EXPORT_SYMBOL(dm_exception_store_type_register);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
if (!__find_exception_store_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
list_del(&type->list);
spin_unlock(&_lock);
return 0;
}
EXPORT_SYMBOL(dm_exception_store_type_unregister);
/*
* Round a number up to the nearest 'size' boundary. size must
* be a power of 2.
*/
static ulong round_up(ulong n, ulong size)
{
size--;
return (n + size) & ~size;
}
static int set_chunk_size(struct dm_exception_store *store,
const char *chunk_size_arg, char **error)
{
unsigned long chunk_size_ulong;
char *value;
chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0') {
*error = "Invalid chunk size";
return -EINVAL;
}
if (!chunk_size_ulong) {
store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
return 0;
}
/*
* Chunk size must be multiple of page size. Silently
* round up if it's not.
*/
chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
/* Validate the chunk size against the device block size */
if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
store->chunk_size = chunk_size_ulong;
store->chunk_mask = chunk_size_ulong - 1;
store->chunk_shift = ffs(chunk_size_ulong) - 1;
return 0;
}
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store)
{
int r = 0;
struct dm_exception_store_type *type;
struct dm_exception_store *tmp_store;
char persistent;
if (argc < 3) {
ti->error = "Insufficient exception store arguments";
return -EINVAL;
}
tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
if (!tmp_store) {
ti->error = "Exception store allocation failed";
return -ENOMEM;
}
persistent = toupper(*argv[1]);
if (persistent != 'P' && persistent != 'N') {
ti->error = "Persistent flag is not P or N";
return -EINVAL;
}
type = get_type(argv[1]);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
goto bad_type;
}
tmp_store->type = type;
tmp_store->ti = ti;
r = dm_get_device(ti, argv[0], 0, 0,
FMODE_READ | FMODE_WRITE, &tmp_store->cow);
if (r) {
ti->error = "Cannot get COW device";
goto bad_cow;
}
r = set_chunk_size(tmp_store, argv[2], &ti->error);
if (r)
goto bad_cow;
r = type->ctr(tmp_store, 0, NULL);
if (r) {
ti->error = "Exception store type constructor failed";
goto bad_ctr;
}
*args_used = 3;
*store = tmp_store;
return 0;
bad_ctr:
dm_put_device(ti, tmp_store->cow);
bad_cow:
put_type(type);
bad_type:
kfree(tmp_store);
return r;
}
EXPORT_SYMBOL(dm_exception_store_create);
void dm_exception_store_destroy(struct dm_exception_store *store)
{
store->type->dtr(store);
dm_put_device(store->ti, store->cow);
put_type(store->type);
kfree(store);
}
EXPORT_SYMBOL(dm_exception_store_destroy);
int dm_exception_store_init(void)
{
int r;

View File

@ -37,11 +37,18 @@ struct dm_snap_exception {
* Abstraction to handle the meta/layout of exception stores (the
* COW device).
*/
struct dm_exception_store {
struct dm_exception_store;
struct dm_exception_store_type {
const char *name;
struct module *module;
int (*ctr) (struct dm_exception_store *store,
unsigned argc, char **argv);
/*
* Destroys this object when you've finished with it.
*/
void (*destroy) (struct dm_exception_store *store);
void (*dtr) (struct dm_exception_store *store);
/*
* The target shouldn't read the COW device until this is
@ -72,8 +79,9 @@ struct dm_exception_store {
*/
void (*drop_snapshot) (struct dm_exception_store *store);
int (*status) (struct dm_exception_store *store, status_type_t status,
char *result, unsigned int maxlen);
unsigned (*status) (struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen);
/*
* Return how full the snapshot is.
@ -82,7 +90,21 @@ struct dm_exception_store {
sector_t *numerator,
sector_t *denominator);
struct dm_snapshot *snap;
/* For internal device-mapper use only. */
struct list_head list;
};
struct dm_exception_store {
struct dm_exception_store_type *type;
struct dm_target *ti;
struct dm_dev *cow;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
void *context;
};
@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
# endif
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
sector_t sector)
{
return (sector & ~store->chunk_mask) >> store->chunk_shift;
}
int dm_exception_store_type_register(struct dm_exception_store_type *type);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store);
void dm_exception_store_destroy(struct dm_exception_store *store);
int dm_exception_store_init(void);
void dm_exception_store_exit(void);
@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
int dm_transient_snapshot_init(void);
void dm_transient_snapshot_exit(void);
int dm_create_persistent(struct dm_exception_store *store);
int dm_create_transient(struct dm_exception_store *store);
#endif /* _LINUX_DM_EXCEPTION_STORE */

View File

@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!atomic_read(&io.count) || signal_pending(current))
if (!atomic_read(&io.count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
if (atomic_read(&io.count))
return -EINTR;
if (error_bits)
*error_bits = io.error_bits;

View File

@ -16,40 +16,29 @@
#define DM_MSG_PREFIX "dirty region log"
struct dm_dirty_log_internal {
struct dm_dirty_log_type *type;
struct list_head list;
long use;
};
static LIST_HEAD(_log_types);
static DEFINE_SPINLOCK(_lock);
static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
{
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
list_for_each_entry(log_type, &_log_types, list)
if (!strcmp(name, log_type->type->name))
if (!strcmp(name, log_type->name))
return log_type;
return NULL;
}
static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
{
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
spin_lock(&_lock);
log_type = __find_dirty_log_type(name);
if (log_type) {
if (!log_type->use && !try_module_get(log_type->type->module))
log_type = NULL;
else
log_type->use++;
}
if (log_type && !try_module_get(log_type->module))
log_type = NULL;
spin_unlock(&_lock);
@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
if (!type_name)
return NULL;
log_type = _get_dirty_log_type(type_name);
if (log_type)
return log_type->type;
return log_type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
kfree(type_name_dup);
return log_type ? log_type->type : NULL;
return log_type;
}
static void put_type(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type;
if (!type)
return;
spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name);
if (!log_type)
if (!__find_dirty_log_type(type->name))
goto out;
if (!--log_type->use)
module_put(type->module);
BUG_ON(log_type->use < 0);
module_put(type->module);
out:
spin_unlock(&_lock);
}
static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
GFP_KERNEL);
if (log_type)
log_type->type = type;
return log_type;
}
int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
int r = 0;
if (!log_type)
return -ENOMEM;
spin_lock(&_lock);
if (!__find_dirty_log_type(type->name))
list_add(&log_type->list, &_log_types);
else {
kfree(log_type);
list_add(&type->list, &_log_types);
else
r = -EEXIST;
}
spin_unlock(&_lock);
return r;
@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type;
spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name);
if (!log_type) {
if (!__find_dirty_log_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
if (log_type->use) {
spin_unlock(&_lock);
return -ETXTBSY;
}
list_del(&log_type->list);
list_del(&type->list);
spin_unlock(&_lock);
kfree(log_type);
return 0;
}

View File

@ -17,9 +17,7 @@
struct ps_internal {
struct path_selector_type pst;
struct list_head list;
long use;
};
#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
down_read(&_ps_lock);
psi = __find_path_selector_type(name);
if (psi) {
if ((psi->use == 0) && !try_module_get(psi->pst.module))
psi = NULL;
else
psi->use++;
}
if (psi && !try_module_get(psi->pst.module))
psi = NULL;
up_read(&_ps_lock);
return psi;
@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
if (!psi)
goto out;
if (--psi->use == 0)
module_put(psi->pst.module);
BUG_ON(psi->use < 0);
module_put(psi->pst.module);
out:
up_read(&_ps_lock);
}
@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
return -EINVAL;
}
if (psi->use) {
up_write(&_ps_lock);
return -ETXTBSY;
}
list_del(&psi->list);
up_write(&_ps_lock);

View File

@ -145,6 +145,8 @@ struct dm_raid1_read_record {
struct dm_bio_details details;
};
static struct kmem_cache *_dm_raid1_read_record_cache;
/*
* Every mirror should look like this one.
*/
@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
int state;
struct bio *bio;
struct bio_list sync, nosync, recover, *this_list = NULL;
struct bio_list requeue;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
region_t region;
if (!writes->head)
return;
@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&sync);
bio_list_init(&nosync);
bio_list_init(&recover);
bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) {
state = dm_rh_get_state(ms->rh,
dm_rh_bio_to_region(ms->rh, bio), 1);
region = dm_rh_bio_to_region(ms->rh, bio);
if (log->type->is_remote_recovering &&
log->type->is_remote_recovering(log, region)) {
bio_list_add(&requeue, bio);
continue;
}
state = dm_rh_get_state(ms->rh, region, 1);
switch (state) {
case DM_RH_CLEAN:
case DM_RH_DIRTY:
@ -618,6 +631,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_add(this_list, bio);
}
/*
* Add bios that are delayed due to remote recovery
* back on to the write queue
*/
if (unlikely(requeue.head)) {
spin_lock_irq(&ms->lock);
bio_list_merge(&ms->writes, &requeue);
spin_unlock_irq(&ms->lock);
}
/*
* Increment the pending counts for any regions that will
* be written to (writes to recover regions are going to
@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
len = sizeof(struct dm_raid1_read_record);
ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
len);
ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
_dm_raid1_read_record_cache);
if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool";
kfree(ms);
@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
{
int r;
r = dm_register_target(&mirror_target);
if (r < 0)
DMERR("Failed to register mirror target");
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
if (!_dm_raid1_read_record_cache) {
DMERR("Can't allocate dm_raid1_read_record cache");
r = -ENOMEM;
goto bad_cache;
}
r = dm_register_target(&mirror_target);
if (r < 0) {
DMERR("Failed to register mirror target");
goto bad_target;
}
return 0;
bad_target:
kmem_cache_destroy(_dm_raid1_read_record_cache);
bad_cache:
return r;
}
static void __exit dm_mirror_exit(void)
{
dm_unregister_target(&mirror_target);
kmem_cache_destroy(_dm_raid1_read_record_cache);
}
/* Module hooks */

View File

@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
@ -89,7 +88,7 @@ struct commit_callback {
* The top level structure for a persistent exception store.
*/
struct pstore {
struct dm_snapshot *snap; /* up pointer to my snapshot */
struct dm_exception_store *store;
int version;
int valid;
uint32_t exceptions_per_area;
@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
int r = -ENOMEM;
size_t len;
len = ps->snap->chunk_size << SECTOR_SHIFT;
len = ps->store->chunk_size << SECTOR_SHIFT;
/*
* Allocate the chunk_size block of memory that will hold
@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
static void free_area(struct pstore *ps)
{
vfree(ps->area);
if (ps->area)
vfree(ps->area);
ps->area = NULL;
vfree(ps->zero_area);
if (ps->zero_area)
vfree(ps->zero_area);
ps->zero_area = NULL;
}
@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * chunk,
.count = ps->snap->chunk_size,
.bdev = ps->store->cow->bdev,
.sector = ps->store->chunk_size * chunk,
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = rw,
@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
static void zero_memory_area(struct pstore *ps)
{
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
}
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * area_location(ps, area),
.count = ps->snap->chunk_size,
.bdev = ps->store->cow->bdev,
.sector = ps->store->chunk_size * area_location(ps, area),
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
*/
if (!ps->snap->chunk_size) {
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
if (!ps->store->chunk_size) {
ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->store->cow->bdev) >> 9);
ps->store->chunk_mask = ps->store->chunk_size - 1;
ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
chunk_size_supplied = 0;
}
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
chunk_size));
if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client);
@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->snap->chunk_size);
(unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
ps->snap->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1;
ps->store->chunk_size = chunk_size;
ps->store->chunk_mask = chunk_size - 1;
ps->store->chunk_shift = ffs(chunk_size) - 1;
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client);
if (r)
return r;
@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
{
struct disk_header *dh;
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
}
@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
static void persistent_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = get_info(store)->next_free * store->snap->chunk_size;
*denominator = get_dev_size(store->snap->cow->bdev);
*numerator = get_info(store)->next_free * store->chunk_size;
*denominator = get_dev_size(store->cow->bdev);
}
static void persistent_destroy(struct dm_exception_store *store)
static void persistent_dtr(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq);
dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks);
/* Created in read_header */
if (ps->io_client)
dm_io_client_destroy(ps->io_client);
free_area(ps);
/* Allocated in persistent_read_metadata */
if (ps->callbacks)
vfree(ps->callbacks);
kfree(ps);
}
@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
/*
* Now we know correct chunk_size, complete the initialisation.
*/
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks));
@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev);
sector_t size = get_dev_size(store->cow->bdev);
/* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
if (size < ((ps->next_free + 1) * store->chunk_size))
return -ENOSPC;
e->new_chunk = ps->next_free;
@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
DMWARN("write header failed");
}
int dm_create_persistent(struct dm_exception_store *store)
static int persistent_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{
struct pstore *ps;
/* allocate the pstore */
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
return -ENOMEM;
ps->snap = store->snap;
ps->store = store;
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
return -ENOMEM;
}
store->destroy = persistent_destroy;
store->read_metadata = persistent_read_metadata;
store->prepare_exception = persistent_prepare_exception;
store->commit_exception = persistent_commit_exception;
store->drop_snapshot = persistent_drop_snapshot;
store->fraction_full = persistent_fraction_full;
store->context = ps;
return 0;
}
static unsigned persistent_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s P %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _persistent_type = {
.name = "persistent",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
static struct dm_exception_store_type _persistent_compat_type = {
.name = "P",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
int dm_persistent_snapshot_init(void)
{
return 0;
int r;
r = dm_exception_store_type_register(&_persistent_type);
if (r) {
DMERR("Unable to register persistent exception store type");
return r;
}
r = dm_exception_store_type_register(&_persistent_compat_type);
if (r) {
DMERR("Unable to register old-style persistent exception "
"store type");
dm_exception_store_type_unregister(&_persistent_type);
return r;
}
return r;
}
void dm_persistent_snapshot_exit(void)
{
dm_exception_store_type_unregister(&_persistent_type);
dm_exception_store_type_unregister(&_persistent_compat_type);
}

View File

@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
@ -23,7 +22,7 @@ struct transient_c {
sector_t next_free;
};
static void transient_destroy(struct dm_exception_store *store)
static void transient_dtr(struct dm_exception_store *store)
{
kfree(store->context);
}
@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
static int transient_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e)
{
struct transient_c *tc = (struct transient_c *) store->context;
sector_t size = get_dev_size(store->snap->cow->bdev);
struct transient_c *tc = store->context;
sector_t size = get_dev_size(store->cow->bdev);
if (size < (tc->next_free + store->snap->chunk_size))
if (size < (tc->next_free + store->chunk_size))
return -1;
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
tc->next_free += store->snap->chunk_size;
e->new_chunk = sector_to_chunk(store, tc->next_free);
tc->next_free += store->chunk_size;
return 0;
}
@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = ((struct transient_c *) store->context)->next_free;
*denominator = get_dev_size(store->snap->cow->bdev);
*denominator = get_dev_size(store->cow->bdev);
}
int dm_create_transient(struct dm_exception_store *store)
static int transient_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{
struct transient_c *tc;
store->destroy = transient_destroy;
store->read_metadata = transient_read_metadata;
store->prepare_exception = transient_prepare_exception;
store->commit_exception = transient_commit_exception;
store->drop_snapshot = NULL;
store->fraction_full = transient_fraction_full;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc)
return -ENOMEM;
@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
return 0;
}
static unsigned transient_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s N %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _transient_type = {
.name = "transient",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
static struct dm_exception_store_type _transient_compat_type = {
.name = "N",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
int dm_transient_snapshot_init(void)
{
return 0;
int r;
r = dm_exception_store_type_register(&_transient_type);
if (r) {
DMWARN("Unable to register transient exception store type");
return r;
}
r = dm_exception_store_type_register(&_transient_compat_type);
if (r) {
DMWARN("Unable to register old-style transient "
"exception store type");
dm_exception_store_type_unregister(&_transient_type);
return r;
}
return r;
}
void dm_transient_snapshot_exit(void)
{
dm_exception_store_type_unregister(&_transient_type);
dm_exception_store_type_unregister(&_transient_compat_type);
}

View File

@ -7,7 +7,6 @@
*/
#include <linux/blkdev.h>
#include <linux/ctype.h>
#include <linux/device-mapper.h>
#include <linux/delay.h>
#include <linux/fs.h>
@ -20,9 +19,9 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
#include <linux/workqueue.h>
#include "dm-exception-store.h"
#include "dm-snap.h"
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
@ -47,9 +46,76 @@
*/
#define MIN_IOS 256
#define DM_TRACKED_CHUNK_HASH_SIZE 16
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
struct exception_table {
uint32_t hash_mask;
unsigned hash_shift;
struct list_head *table;
};
struct dm_snapshot {
struct rw_semaphore lock;
struct dm_dev *origin;
/* List of snapshots per Origin */
struct list_head list;
/* You can't use a snapshot if this is 0 (e.g. if full) */
int valid;
/* Origin writes don't trigger exceptions until this is set */
int active;
mempool_t *pending_pool;
atomic_t pending_exceptions_count;
struct exception_table pending;
struct exception_table complete;
/*
* pe_lock protects all pending_exception operations and access
* as well as the snapshot_bios list.
*/
spinlock_t pe_lock;
/* The on disk metadata handler */
struct dm_exception_store *store;
struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
struct work_struct queued_bios_work;
/* Chunks with outstanding reads */
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
};
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
static sector_t chunk_to_sector(struct dm_exception_store *store,
chunk_t chunk)
{
return chunk << store->chunk_shift;
}
static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
{
/*
* There is only ever one instance of a particular block
* device so we can compare pointers safely.
*/
return lhs == rhs;
}
struct dm_snap_pending_exception {
struct dm_snap_exception e;
@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
* Calculate based on the size of the original volume or
* the COW volume...
*/
cow_dev_size = get_dev_size(s->cow->bdev);
cow_dev_size = get_dev_size(s->store->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
hash_size = rounddown_pow_of_two(hash_size);
@ -504,58 +570,6 @@ static int init_hash_tables(struct dm_snapshot *s)
return 0;
}
/*
* Round a number up to the nearest 'size' boundary. size must
* be a power of 2.
*/
static ulong round_up(ulong n, ulong size)
{
size--;
return (n + size) & ~size;
}
static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
char **error)
{
unsigned long chunk_size;
char *value;
chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0') {
*error = "Invalid chunk size";
return -EINVAL;
}
if (!chunk_size) {
s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
return 0;
}
/*
* Chunk size must be multiple of page size. Silently
* round up if it's not.
*/
chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
/* Validate the chunk size against the device block size */
if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
s->chunk_size = chunk_size;
s->chunk_mask = chunk_size - 1;
s->chunk_shift = ffs(chunk_size) - 1;
return 0;
}
/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_snapshot *s;
int i;
int r = -EINVAL;
char persistent;
char *origin_path;
char *cow_path;
struct dm_exception_store *store;
unsigned args_used;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
goto bad1;
goto bad_args;
}
origin_path = argv[0];
cow_path = argv[1];
persistent = toupper(*argv[2]);
argv++;
argc--;
if (persistent != 'P' && persistent != 'N') {
ti->error = "Persistent flag is not P or N";
r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
if (r) {
ti->error = "Couldn't create exception store";
r = -EINVAL;
goto bad1;
goto bad_args;
}
argv += args_used;
argc -= args_used;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL) {
if (!s) {
ti->error = "Cannot allocate snapshot context private "
"structure";
r = -ENOMEM;
goto bad1;
goto bad_snap;
}
r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
goto bad2;
goto bad_origin;
}
r = dm_get_device(ti, cow_path, 0, 0,
FMODE_READ | FMODE_WRITE, &s->cow);
if (r) {
dm_put_device(ti, s->origin);
ti->error = "Cannot get COW device";
goto bad2;
}
r = set_chunk_size(s, argv[3], &ti->error);
if (r)
goto bad3;
s->type = persistent;
s->store = store;
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
s->ti = ti;
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM;
goto bad3;
}
s->store.snap = s;
if (persistent == 'P')
r = dm_create_persistent(&s->store);
else
r = dm_create_transient(&s->store);
if (r) {
ti->error = "Couldn't create exception store";
r = -EINVAL;
goto bad4;
goto bad_hash_tables;
}
r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
if (r) {
ti->error = "Could not create kcopyd client";
goto bad5;
goto bad_kcopyd;
}
s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
if (!s->pending_pool) {
ti->error = "Could not allocate mempool for pending exceptions";
goto bad6;
goto bad_pending_pool;
}
s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
/* Metadata must only be loaded into one table at once */
r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
r = s->store->type->read_metadata(s->store, dm_add_exception,
(void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
goto bad_load_and_register;
@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = s;
ti->split_io = s->chunk_size;
ti->split_io = s->store->chunk_size;
return 0;
bad_load_and_register:
bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
bad_tracked_chunk_pool:
bad_tracked_chunk_pool:
mempool_destroy(s->pending_pool);
bad6:
bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
bad5:
s->store.destroy(&s->store);
bad4:
bad_kcopyd:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
bad3:
dm_put_device(ti, s->cow);
bad_hash_tables:
dm_put_device(ti, s->origin);
bad2:
bad_origin:
kfree(s);
bad1:
bad_snap:
dm_exception_store_destroy(store);
bad_args:
return r;
}
@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
s->store.destroy(&s->store);
}
static void snapshot_dtr(struct dm_target *ti)
@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
dm_put_device(ti, s->cow);
dm_exception_store_destroy(s->store);
kfree(s);
}
@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
else if (err == -ENOMEM)
DMERR("Invalidating snapshot: Unable to allocate exception.");
if (s->store.drop_snapshot)
s->store.drop_snapshot(&s->store);
if (s->store->type->drop_snapshot)
s->store->type->drop_snapshot(s->store);
s->valid = 0;
dm_table_event(s->ti->table);
dm_table_event(s->store->ti->table);
}
static void get_pending_exception(struct dm_snap_pending_exception *pe)
@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
else
/* Update the metadata if we are persistent */
s->store.commit_exception(&s->store, &pe->e, commit_callback,
pe);
s->store->type->commit_exception(s->store, &pe->e,
commit_callback, pe);
}
/*
@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dev_size = get_dev_size(bdev);
src.bdev = bdev;
src.sector = chunk_to_sector(s, pe->e.old_chunk);
src.count = min(s->chunk_size, dev_size - src.sector);
src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
src.count = min(s->store->chunk_size, dev_size - src.sector);
dest.bdev = s->cow->bdev;
dest.sector = chunk_to_sector(s, pe->e.new_chunk);
dest.bdev = s->store->cow->bdev;
dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
dest.count = src.count;
/* Hand over to kcopyd */
@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
&src, 1, &dest, 0, copy_callback, pe);
}
static struct dm_snap_pending_exception *
__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
{
struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
if (!e)
return NULL;
return container_of(e, struct dm_snap_pending_exception, e);
}
/*
* Looks to see if this snapshot already has a pending exception
* for this chunk, otherwise it allocates a new one and inserts
@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
* this.
*/
static struct dm_snap_pending_exception *
__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
__find_pending_exception(struct dm_snapshot *s,
struct dm_snap_pending_exception *pe, chunk_t chunk)
{
struct dm_snap_exception *e;
struct dm_snap_pending_exception *pe;
chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
struct dm_snap_pending_exception *pe2;
/*
* Is there a pending exception for this already ?
*/
e = lookup_exception(&s->pending, chunk);
if (e) {
/* cast the exception to a pending exception */
pe = container_of(e, struct dm_snap_pending_exception, e);
goto out;
}
/*
* Create a new pending exception, we don't want
* to hold the lock while we do this.
*/
up_write(&s->lock);
pe = alloc_pending_exception(s);
down_write(&s->lock);
if (!s->valid) {
pe2 = __lookup_pending_exception(s, chunk);
if (pe2) {
free_pending_exception(pe);
return NULL;
}
e = lookup_exception(&s->pending, chunk);
if (e) {
free_pending_exception(pe);
pe = container_of(e, struct dm_snap_pending_exception, e);
goto out;
return pe2;
}
pe->e.old_chunk = chunk;
@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
atomic_set(&pe->ref_count, 0);
pe->started = 0;
if (s->store.prepare_exception(&s->store, &pe->e)) {
if (s->store->type->prepare_exception(s->store, &pe->e)) {
free_pending_exception(pe);
return NULL;
}
@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
get_pending_exception(pe);
insert_exception(&s->pending, &pe->e);
out:
return pe;
}
static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
struct bio *bio, chunk_t chunk)
{
bio->bi_bdev = s->cow->bdev;
bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
(chunk - e->old_chunk)) +
(bio->bi_sector & s->chunk_mask);
bio->bi_bdev = s->store->cow->bdev;
bio->bi_sector = chunk_to_sector(s->store,
dm_chunk_number(e->new_chunk) +
(chunk - e->old_chunk)) +
(bio->bi_sector &
s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
chunk = sector_to_chunk(s, bio->bi_sector);
chunk = sector_to_chunk(s->store, bio->bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
* writeable.
*/
if (bio_rw(bio) == WRITE) {
pe = __find_pending_exception(s, bio);
pe = __lookup_pending_exception(s, chunk);
if (!pe) {
__invalidate_snapshot(s, -ENOMEM);
r = -EIO;
goto out_unlock;
up_write(&s->lock);
pe = alloc_pending_exception(s);
down_write(&s->lock);
if (!s->valid) {
free_pending_exception(pe);
r = -EIO;
goto out_unlock;
}
e = lookup_exception(&s->complete, chunk);
if (e) {
free_pending_exception(pe);
remap_exception(s, e, bio, chunk);
goto out_unlock;
}
pe = __find_pending_exception(s, pe, chunk);
if (!pe) {
__invalidate_snapshot(s, -ENOMEM);
r = -EIO;
goto out_unlock;
}
}
remap_exception(s, &pe->e, bio, chunk);
@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
switch (type) {
case STATUSTYPE_INFO:
if (!snap->valid)
snprintf(result, maxlen, "Invalid");
DMEMIT("Invalid");
else {
if (snap->store.fraction_full) {
if (snap->store->type->fraction_full) {
sector_t numerator, denominator;
snap->store.fraction_full(&snap->store,
&numerator,
&denominator);
snprintf(result, maxlen, "%llu/%llu",
(unsigned long long)numerator,
(unsigned long long)denominator);
snap->store->type->fraction_full(snap->store,
&numerator,
&denominator);
DMEMIT("%llu/%llu",
(unsigned long long)numerator,
(unsigned long long)denominator);
}
else
snprintf(result, maxlen, "Unknown");
DMEMIT("Unknown");
}
break;
@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
* to make private copies if the output is to
* make sense.
*/
snprintf(result, maxlen, "%s %s %c %llu",
snap->origin->name, snap->cow->name,
snap->type,
(unsigned long long)snap->chunk_size);
DMEMIT("%s", snap->origin->name);
snap->store->type->status(snap->store, type, result + sz,
maxlen - sz);
break;
}
@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
chunk = sector_to_chunk(snap, bio->bi_sector);
chunk = sector_to_chunk(snap->store, bio->bi_sector);
/*
* Check exception table to see if block
@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
if (e)
goto next_snapshot;
pe = __find_pending_exception(snap, bio);
pe = __lookup_pending_exception(snap, chunk);
if (!pe) {
__invalidate_snapshot(snap, -ENOMEM);
goto next_snapshot;
up_write(&snap->lock);
pe = alloc_pending_exception(snap);
down_write(&snap->lock);
if (!snap->valid) {
free_pending_exception(pe);
goto next_snapshot;
}
e = lookup_exception(&snap->complete, chunk);
if (e) {
free_pending_exception(pe);
goto next_snapshot;
}
pe = __find_pending_exception(snap, pe, chunk);
if (!pe) {
__invalidate_snapshot(snap, -ENOMEM);
goto next_snapshot;
}
}
if (!primary_pe) {
@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
o = __lookup_origin(dev->bdev);
if (o)
list_for_each_entry (snap, &o->snapshots, list)
chunk_size = min_not_zero(chunk_size, snap->chunk_size);
chunk_size = min_not_zero(chunk_size,
snap->store->chunk_size);
up_read(&_origins_lock);
ti->split_io = chunk_size;

View File

@ -1,105 +0,0 @@
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
*/
#ifndef DM_SNAPSHOT_H
#define DM_SNAPSHOT_H
#include <linux/device-mapper.h>
#include "dm-exception-store.h"
#include "dm-bio-list.h"
#include <linux/blkdev.h>
#include <linux/workqueue.h>
struct exception_table {
uint32_t hash_mask;
unsigned hash_shift;
struct list_head *table;
};
#define DM_TRACKED_CHUNK_HASH_SIZE 16
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
struct dm_snapshot {
struct rw_semaphore lock;
struct dm_target *ti;
struct dm_dev *origin;
struct dm_dev *cow;
/* List of snapshots per Origin */
struct list_head list;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
/* You can't use a snapshot if this is 0 (e.g. if full) */
int valid;
/* Origin writes don't trigger exceptions until this is set */
int active;
/* Used for display of table */
char type;
mempool_t *pending_pool;
atomic_t pending_exceptions_count;
struct exception_table pending;
struct exception_table complete;
/*
* pe_lock protects all pending_exception operations and access
* as well as the snapshot_bios list.
*/
spinlock_t pe_lock;
/* The on disk metadata handler */
struct dm_exception_store store;
struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
struct work_struct queued_bios_work;
/* Chunks with outstanding reads */
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
};
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
{
return (sector & ~s->chunk_mask) >> s->chunk_shift;
}
static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
{
return chunk << s->chunk_shift;
}
static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
{
/*
* There is only ever one instance of a particular block
* device so we can compare pointers safely.
*/
return lhs == rhs;
}
#endif

View File

@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
}
/*
* This upgrades the mode on an already open dm_dev. Being
* This upgrades the mode on an already open dm_dev, being
* careful to leave things as they were if we fail to reopen the
* device.
* device and not to touch the existing bdev field in case
* it is accessed concurrently inside dm_table_any_congested().
*/
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md)
{
int r;
struct dm_dev_internal dd_copy;
dev_t dev = dd->dm_dev.bdev->bd_dev;
struct dm_dev_internal dd_new, dd_old;
dd_copy = *dd;
dd_new = dd_old = *dd;
dd_new.dm_dev.mode |= new_mode;
dd_new.dm_dev.bdev = NULL;
r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
if (r)
return r;
dd->dm_dev.mode |= new_mode;
dd->dm_dev.bdev = NULL;
r = open_dev(dd, dev, md);
if (!r)
close_dev(&dd_copy, md);
else
*dd = dd_copy;
close_dev(&dd_old, md);
return r;
return 0;
}
/*

View File

@ -14,45 +14,34 @@
#define DM_MSG_PREFIX "target"
struct tt_internal {
struct target_type tt;
struct list_head list;
long use;
};
static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock);
#define DM_MOD_NAME_SIZE 32
static inline struct tt_internal *__find_target_type(const char *name)
static inline struct target_type *__find_target_type(const char *name)
{
struct tt_internal *ti;
struct target_type *tt;
list_for_each_entry (ti, &_targets, list)
if (!strcmp(name, ti->tt.name))
return ti;
list_for_each_entry(tt, &_targets, list)
if (!strcmp(name, tt->name))
return tt;
return NULL;
}
static struct tt_internal *get_target_type(const char *name)
static struct target_type *get_target_type(const char *name)
{
struct tt_internal *ti;
struct target_type *tt;
down_read(&_lock);
ti = __find_target_type(name);
if (ti) {
if ((ti->use == 0) && !try_module_get(ti->tt.module))
ti = NULL;
else
ti->use++;
}
tt = __find_target_type(name);
if (tt && !try_module_get(tt->module))
tt = NULL;
up_read(&_lock);
return ti;
return tt;
}
static void load_module(const char *name)
@ -62,92 +51,59 @@ static void load_module(const char *name)
struct target_type *dm_get_target_type(const char *name)
{
struct tt_internal *ti = get_target_type(name);
struct target_type *tt = get_target_type(name);
if (!ti) {
if (!tt) {
load_module(name);
ti = get_target_type(name);
tt = get_target_type(name);
}
return ti ? &ti->tt : NULL;
return tt;
}
void dm_put_target_type(struct target_type *t)
void dm_put_target_type(struct target_type *tt)
{
struct tt_internal *ti = (struct tt_internal *) t;
down_read(&_lock);
if (--ti->use == 0)
module_put(ti->tt.module);
BUG_ON(ti->use < 0);
module_put(tt->module);
up_read(&_lock);
return;
}
static struct tt_internal *alloc_target(struct target_type *t)
{
struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
if (ti)
ti->tt = *t;
return ti;
}
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param)
{
struct tt_internal *ti;
struct target_type *tt;
down_read(&_lock);
list_for_each_entry (ti, &_targets, list)
iter_func(&ti->tt, param);
list_for_each_entry(tt, &_targets, list)
iter_func(tt, param);
up_read(&_lock);
return 0;
}
int dm_register_target(struct target_type *t)
int dm_register_target(struct target_type *tt)
{
int rv = 0;
struct tt_internal *ti = alloc_target(t);
if (!ti)
return -ENOMEM;
down_write(&_lock);
if (__find_target_type(t->name))
if (__find_target_type(tt->name))
rv = -EEXIST;
else
list_add(&ti->list, &_targets);
list_add(&tt->list, &_targets);
up_write(&_lock);
if (rv)
kfree(ti);
return rv;
}
void dm_unregister_target(struct target_type *t)
void dm_unregister_target(struct target_type *tt)
{
struct tt_internal *ti;
down_write(&_lock);
if (!(ti = __find_target_type(t->name))) {
DMCRIT("Unregistering unrecognised target: %s", t->name);
if (!__find_target_type(tt->name)) {
DMCRIT("Unregistering unrecognised target: %s", tt->name);
BUG();
}
if (ti->use) {
DMCRIT("Attempt to unregister target still in use: %s",
t->name);
BUG();
}
list_del(&ti->list);
kfree(ti);
list_del(&tt->list);
up_write(&_lock);
}
@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
* io-err: always fails an io, useful for bringing
* up LVs that have holes in them.
*/
static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{
return 0;
}
static void io_err_dtr(struct dm_target *ti)
static void io_err_dtr(struct dm_target *tt)
{
/* empty */
}
static int io_err_map(struct dm_target *ti, struct bio *bio,
static int io_err_map(struct dm_target *tt, struct bio *bio,
union map_info *map_context)
{
return -EIO;

View File

@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
/*
* Work processed by per-device workqueue.
*/
struct dm_wq_req {
enum {
DM_WQ_FLUSH_DEFERRED,
} type;
struct work_struct work;
struct mapped_device *md;
void *context;
};
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
atomic_t open_count;
@ -129,8 +119,9 @@ struct mapped_device {
*/
atomic_t pending;
wait_queue_head_t wait;
struct work_struct work;
struct bio_list deferred;
struct bio_list pushback;
spinlock_t deferred_lock;
/*
* Processing queue (flush/barriers)
@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
return 1;
}
spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio);
spin_unlock_irq(&md->deferred_lock);
up_write(&md->io_lock);
return 0; /* deferred successfully */
@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
* This must be handled before the sleeper on
* suspend queue merges the pushback list.
*/
spin_lock_irqsave(&md->pushback_lock, flags);
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
bio_list_add(&md->pushback, io->bio);
bio_list_add(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
spin_unlock_irqrestore(&md->pushback_lock, flags);
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
end_io_acct(io);
@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
}
/*
* Split the bio into several clones.
* Split the bio into several clones and submit it to targets.
*/
static int __split_bio(struct mapped_device *md, struct bio *bio)
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
int error = 0;
ci.map = dm_get_table(md);
if (unlikely(!ci.map))
return -EIO;
if (unlikely(!ci.map)) {
bio_io_error(bio);
return;
}
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
dm_table_put(ci.map);
bio_endio(bio, -EOPNOTSUPP);
return 0;
return;
}
ci.md = md;
ci.bio = bio;
@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, error);
dm_table_put(ci.map);
return 0;
}
/*-----------------------------------------------------------------
* CRUD END
@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
down_read(&md->io_lock);
}
r = __split_bio(md, bio);
__split_and_process_bio(md, bio);
up_read(&md->io_lock);
return 0;
out_req:
if (r < 0)
@ -1074,6 +1066,8 @@ static int next_free_minor(int *minor)
static struct block_device_operations dm_blk_dops;
static void dm_wq_work(struct work_struct *work);
/*
* Allocate and initialise a blank device with a given minor.
*/
@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock);
spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
md->disk->major = _major;
@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
static int dm_wait_for_completion(struct mapped_device *md)
static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{
int r = 0;
DECLARE_WAITQUEUE(wait, current);
dm_unplug_all(md->queue);
add_wait_queue(&md->wait, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(interruptible);
smp_mb();
if (!atomic_read(&md->pending))
break;
if (signal_pending(current)) {
if (interruptible == TASK_INTERRUPTIBLE &&
signal_pending(current)) {
r = -EINTR;
break;
}
@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&md->wait, &wait);
return r;
}
/*
* Process the deferred bios
*/
static void __flush_deferred_io(struct mapped_device *md)
static void dm_wq_work(struct work_struct *work)
{
struct mapped_device *md = container_of(work, struct mapped_device,
work);
struct bio *c;
while ((c = bio_list_pop(&md->deferred))) {
if (__split_bio(md, c))
bio_io_error(c);
down_write(&md->io_lock);
next_bio:
spin_lock_irq(&md->deferred_lock);
c = bio_list_pop(&md->deferred);
spin_unlock_irq(&md->deferred_lock);
if (c) {
__split_and_process_bio(md, c);
goto next_bio;
}
clear_bit(DMF_BLOCK_IO, &md->flags);
}
static void __merge_pushback_list(struct mapped_device *md)
{
unsigned long flags;
spin_lock_irqsave(&md->pushback_lock, flags);
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
bio_list_merge_head(&md->deferred, &md->pushback);
bio_list_init(&md->pushback);
spin_unlock_irqrestore(&md->pushback_lock, flags);
}
static void dm_wq_work(struct work_struct *work)
{
struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
struct mapped_device *md = req->md;
down_write(&md->io_lock);
switch (req->type) {
case DM_WQ_FLUSH_DEFERRED:
__flush_deferred_io(md);
break;
default:
DMERR("dm_wq_work: unrecognised work type %d", req->type);
BUG();
}
up_write(&md->io_lock);
}
static void dm_wq_queue(struct mapped_device *md, int type, void *context,
struct dm_wq_req *req)
static void dm_queue_flush(struct mapped_device *md)
{
req->type = type;
req->md = md;
req->context = context;
INIT_WORK(&req->work, dm_wq_work);
queue_work(md->wq, &req->work);
}
static void dm_queue_flush(struct mapped_device *md, int type, void *context)
{
struct dm_wq_req req;
dm_wq_queue(md, type, context, &req);
queue_work(md->wq, &md->work);
flush_workqueue(md->wq);
}
@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
DECLARE_WAITQUEUE(wait, current);
int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
down_write(&md->io_lock);
set_bit(DMF_BLOCK_IO, &md->flags);
add_wait_queue(&md->wait, &wait);
up_write(&md->io_lock);
/* unplug */
if (map)
dm_table_unplug_all(map);
/*
* Wait for the already-mapped ios to complete.
*/
r = dm_wait_for_completion(md);
r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
if (noflush)
__merge_pushback_list(md);
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock);
/* were we interrupted ? */
if (r < 0) {
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
dm_queue_flush(md);
unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */
@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
if (r)
goto out;
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
dm_queue_flush(md);
unlock_fs(md);

View File

@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
int dm_target_init(void);
void dm_target_exit(void);
struct target_type *dm_get_target_type(const char *name);
void dm_put_target_type(struct target_type *t);
void dm_put_target_type(struct target_type *tt);
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param);

View File

@ -139,6 +139,9 @@ struct target_type {
dm_ioctl_fn ioctl;
dm_merge_fn merge;
dm_busy_fn busy;
/* For internal device-mapper use. */
struct list_head list;
};
struct io_restrictions {

View File

@ -28,6 +28,9 @@ struct dm_dirty_log_type {
const char *name;
struct module *module;
/* For internal device-mapper use */
struct list_head list;
int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
unsigned argc, char **argv);
void (*dtr)(struct dm_dirty_log *log);
@ -113,6 +116,16 @@ struct dm_dirty_log_type {
*/
int (*status)(struct dm_dirty_log *log, status_type_t status_type,
char *result, unsigned maxlen);
/*
* is_remote_recovering is necessary for cluster mirroring. It provides
* a way to detect recovery on another node, so we aren't writing
* concurrently. This function is likely to block (when a cluster log
* is used).
*
* Returns: 0, 1
*/
int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
};
int dm_dirty_log_type_register(struct dm_dirty_log_type *type);