Improve multipath's retrying mechanism in some defined circumstances

and provide a simple reserve/release mechanism for userspace tools to
 access thin provisioning metadata while the pool is in use.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJPyqIdAAoJEK2W1qbAHj1nSPoQAJvAb/6UHufTWC/lufbEyo7t
 ft6uwZZ4S/VV1Gdx8V5YXo3rxkVIZj/CV0hiJIDctmDMKGPMlzup39kCgjD/rOUF
 mzcFAE8sEr3QEavkfjSWw2RHIIlhnJpvqVnb8nu3p/mSgAB4qYGgaDjBpi+W60PV
 aqQSSWgwH1uNhfGDBIxQoJ8OIjjYvKPIf2Ir2FAXam/dNi9chWO9nzFdj3q2LccP
 nZir094BDsFac1BF0FYW3J+rgT1FfPO7RRGAQct6WNJ197IZlYWYjKH3XehxnUHE
 wgiJmjfUO8vrho1hhWmWDOesKJPPWFN67EQnl5FqAu9itP7c7k8bd7Ay4jWgtZQU
 QIx10uiAgAuFUmTdWGK1fLlE8HGKUFINYLp63N5n5NZ4TDJrgo8e7CIID3rvYf/O
 EtmL7HzAyztL9Uc6oaXzCK6TgMUtd/ht8OJCDFhjitzQTNjbrfAGz6m+RHnEZyyj
 dtOVK7WBlmuKEANl2vDFGuVVF0+MwJLTlvPx1/b/ejFvnHI/R5Wuk9EH7t/DO4LB
 nCmiwzB6uWMzU3y3vnZG72AYSF5NTKSvnAl5B8U/0rI1MZU+6PehjeviJNx6ddJN
 2YheHBLU4vbBV/LF4XIpaHK2aiHN1ltaKCp8INo3EKhCwpR4ZdlVvnAGU9ocf9+c
 qoaFTOP7zGD9zgPeGjoG
 =wCpY
 -----END PGP SIGNATURE-----

Merge tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair G Kergon:
 "Improve multipath's retrying mechanism in some defined circumstances
  and provide a simple reserve/release mechanism for userspace tools to
  access thin provisioning metadata while the pool is in use."

* tag 'dm-3.5-changes-1' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
  dm thin: provide userspace access to pool metadata
  dm thin: use slab mempools
  dm mpath: allow ioctls to trigger pg init
  dm mpath: delay retry of bypassed pg
  dm mpath: reduce size of struct multipath
This commit is contained in:
Linus Torvalds 2012-06-02 17:39:40 -07:00
commit 912afc3616
6 changed files with 323 additions and 91 deletions

View file

@ -287,6 +287,17 @@ iii) Messages
the current transaction id is when you change it with this
compare-and-swap message.
reserve_metadata_snap
Reserve a copy of the data mapping btree for use by userland.
This allows userland to inspect the mappings as they were when
this message was executed. Use the pool's status command to
get the root block associated with the metadata snapshot.
release_metadata_snap
Release a previously reserved copy of the data mapping btree.
'thin' target
-------------

View file

@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
@ -61,11 +62,11 @@ struct multipath {
struct list_head list;
struct dm_target *ti;
spinlock_t lock;
const char *hw_handler_name;
char *hw_handler_params;
spinlock_t lock;
unsigned nr_priority_groups;
struct list_head priority_groups;
@ -81,16 +82,17 @@ struct multipath {
struct priority_group *next_pg; /* Switch to this PG if set */
unsigned repeat_count; /* I/Os left before calling PS again */
unsigned queue_io; /* Must we queue all I/O? */
unsigned queue_if_no_path; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path;/* Saved state during suspension */
unsigned queue_io:1; /* Must we queue all I/O? */
unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */
unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
unsigned queue_size;
struct work_struct process_queued_ios;
struct list_head queued_ios;
unsigned queue_size;
struct work_struct trigger_event;
@ -328,14 +330,18 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
* Second time we only try the ones we skipped.
* Second time we only try the ones we skipped, but set
* pg_init_delay_retry so we do not hammer controllers.
*/
do {
list_for_each_entry(pg, &m->priority_groups, list) {
if (pg->bypassed == bypassed)
continue;
if (!__choose_path_in_pg(m, pg, nr_bytes))
if (!__choose_path_in_pg(m, pg, nr_bytes)) {
if (!bypassed)
m->pg_init_delay_retry = 1;
return;
}
}
} while (bypassed--);
@ -481,9 +487,6 @@ static void process_queued_ios(struct work_struct *work)
spin_lock_irqsave(&m->lock, flags);
if (!m->queue_size)
goto out;
if (!m->current_pgpath)
__choose_pgpath(m, 0);
@ -496,7 +499,6 @@ static void process_queued_ios(struct work_struct *work)
if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
__pg_init_all_paths(m);
out:
spin_unlock_irqrestore(&m->lock, flags);
if (!must_queue)
dispatch_queued_ios(m);
@ -1517,11 +1519,16 @@ out:
static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
unsigned long arg)
{
struct multipath *m = (struct multipath *) ti->private;
struct block_device *bdev = NULL;
fmode_t mode = 0;
struct multipath *m = ti->private;
struct block_device *bdev;
fmode_t mode;
unsigned long flags;
int r = 0;
int r;
again:
bdev = NULL;
mode = 0;
r = 0;
spin_lock_irqsave(&m->lock, flags);
@ -1546,6 +1553,12 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
r = scsi_verify_blk_ioctl(NULL, cmd);
if (r == -EAGAIN && !fatal_signal_pending(current)) {
queue_work(kmultipathd, &m->process_queued_ios);
msleep(10);
goto again;
}
return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
}
@ -1643,7 +1656,7 @@ out:
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
.version = {1, 3, 0},
.version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = multipath_ctr,
.dtr = multipath_dtr,

View file

@ -1082,15 +1082,139 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
return 0;
}
static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result)
static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
{
int r, inc;
struct thin_disk_superblock *disk_super;
struct dm_block *copy, *sblock;
dm_block_t held_root;
/*
* Copy the superblock.
*/
dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &copy, &inc);
if (r)
return r;
BUG_ON(!inc);
held_root = dm_block_location(copy);
disk_super = dm_block_data(copy);
if (le64_to_cpu(disk_super->held_root)) {
DMWARN("Pool metadata snapshot already exists: release this before taking another.");
dm_tm_dec(pmd->tm, held_root);
dm_tm_unlock(pmd->tm, copy);
pmd->need_commit = 1;
return -EBUSY;
}
/*
* Wipe the spacemap since we're not publishing this.
*/
memset(&disk_super->data_space_map_root, 0,
sizeof(disk_super->data_space_map_root));
memset(&disk_super->metadata_space_map_root, 0,
sizeof(disk_super->metadata_space_map_root));
/*
* Increment the data structures that need to be preserved.
*/
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
dm_tm_unlock(pmd->tm, copy);
/*
* Write the held root into the superblock.
*/
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r) {
dm_tm_dec(pmd->tm, held_root);
pmd->need_commit = 1;
return r;
}
disk_super = dm_block_data(sblock);
disk_super->held_root = cpu_to_le64(held_root);
dm_bm_unlock(sblock);
pmd->need_commit = 1;
return 0;
}
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
down_write(&pmd->root_lock);
r = __reserve_metadata_snap(pmd);
up_write(&pmd->root_lock);
return r;
}
static int __release_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
struct thin_disk_superblock *disk_super;
struct dm_block *sblock, *copy;
dm_block_t held_root;
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r)
return r;
disk_super = dm_block_data(sblock);
held_root = le64_to_cpu(disk_super->held_root);
disk_super->held_root = cpu_to_le64(0);
pmd->need_commit = 1;
dm_bm_unlock(sblock);
if (!held_root) {
DMWARN("No pool metadata snapshot found: nothing to release.");
return -EINVAL;
}
r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
if (r)
return r;
disk_super = dm_block_data(copy);
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
dm_sm_dec_block(pmd->metadata_sm, held_root);
return dm_tm_unlock(pmd->tm, copy);
}
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
{
int r;
down_write(&pmd->root_lock);
r = __release_metadata_snap(pmd);
up_write(&pmd->root_lock);
return r;
}
static int __get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result)
{
int r;
struct thin_disk_superblock *disk_super;
struct dm_block *sblock;
r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
&sb_validator, &sblock);
if (r)
return r;
@ -1100,13 +1224,13 @@ static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
return dm_bm_unlock(sblock);
}
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result)
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result)
{
int r;
down_read(&pmd->root_lock);
r = __get_held_metadata_root(pmd, result);
r = __get_metadata_snap(pmd, result);
up_read(&pmd->root_lock);
return r;

View file

@ -90,11 +90,18 @@ int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
/*
* Hold/get root for userspace transaction.
*
* The metadata snapshot is a copy of the current superblock (minus the
* space maps). Userland can access the data structures for READ
* operations only. A small performance hit is incurred by providing this
* copy of the metadata to userland due to extra copy-on-write operations
* on the metadata nodes. Release this as soon as you finish with it.
*/
int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd);
int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd);
int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
dm_block_t *result);
int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
dm_block_t *result);
/*
* Actions on a single virtual device.

View file

@ -111,7 +111,7 @@ struct cell_key {
dm_block_t block;
};
struct cell {
struct dm_bio_prison_cell {
struct hlist_node list;
struct bio_prison *prison;
struct cell_key key;
@ -141,6 +141,8 @@ static uint32_t calc_nr_buckets(unsigned nr_cells)
return n;
}
static struct kmem_cache *_cell_cache;
/*
* @nr_cells should be the number of cells you want in use _concurrently_.
* Don't confuse it with the number of distinct keys.
@ -157,8 +159,7 @@ static struct bio_prison *prison_create(unsigned nr_cells)
return NULL;
spin_lock_init(&prison->lock);
prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
sizeof(struct cell));
prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache);
if (!prison->cell_pool) {
kfree(prison);
return NULL;
@ -194,10 +195,10 @@ static int keys_equal(struct cell_key *lhs, struct cell_key *rhs)
(lhs->block == rhs->block);
}
static struct cell *__search_bucket(struct hlist_head *bucket,
struct cell_key *key)
static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
struct cell_key *key)
{
struct cell *cell;
struct dm_bio_prison_cell *cell;
struct hlist_node *tmp;
hlist_for_each_entry(cell, tmp, bucket, list)
@ -214,12 +215,12 @@ static struct cell *__search_bucket(struct hlist_head *bucket,
* Returns 1 if the cell was already held, 0 if @inmate is the new holder.
*/
static int bio_detain(struct bio_prison *prison, struct cell_key *key,
struct bio *inmate, struct cell **ref)
struct bio *inmate, struct dm_bio_prison_cell **ref)
{
int r = 1;
unsigned long flags;
uint32_t hash = hash_key(prison, key);
struct cell *cell, *cell2;
struct dm_bio_prison_cell *cell, *cell2;
BUG_ON(hash > prison->nr_buckets);
@ -273,7 +274,7 @@ out:
/*
* @inmates must have been initialised prior to this call
*/
static void __cell_release(struct cell *cell, struct bio_list *inmates)
static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
{
struct bio_prison *prison = cell->prison;
@ -287,7 +288,7 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates)
mempool_free(cell, prison->cell_pool);
}
static void cell_release(struct cell *cell, struct bio_list *bios)
static void cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
{
unsigned long flags;
struct bio_prison *prison = cell->prison;
@ -303,7 +304,7 @@ static void cell_release(struct cell *cell, struct bio_list *bios)
* bio may be in the cell. This function releases the cell, and also does
* a sanity check.
*/
static void __cell_release_singleton(struct cell *cell, struct bio *bio)
static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
BUG_ON(cell->holder != bio);
BUG_ON(!bio_list_empty(&cell->bios));
@ -311,7 +312,7 @@ static void __cell_release_singleton(struct cell *cell, struct bio *bio)
__cell_release(cell, NULL);
}
static void cell_release_singleton(struct cell *cell, struct bio *bio)
static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio)
{
unsigned long flags;
struct bio_prison *prison = cell->prison;
@ -324,7 +325,8 @@ static void cell_release_singleton(struct cell *cell, struct bio *bio)
/*
* Sometimes we don't want the holder, just the additional bios.
*/
static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
struct bio_prison *prison = cell->prison;
@ -334,7 +336,8 @@ static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates
mempool_free(cell, prison->cell_pool);
}
static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
static void cell_release_no_holder(struct dm_bio_prison_cell *cell,
struct bio_list *inmates)
{
unsigned long flags;
struct bio_prison *prison = cell->prison;
@ -344,7 +347,7 @@ static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
spin_unlock_irqrestore(&prison->lock, flags);
}
static void cell_error(struct cell *cell)
static void cell_error(struct dm_bio_prison_cell *cell)
{
struct bio_prison *prison = cell->prison;
struct bio_list bios;
@ -491,7 +494,7 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
* also provides the interface for creating and destroying internal
* devices.
*/
struct new_mapping;
struct dm_thin_new_mapping;
struct pool_features {
unsigned zero_new_blocks:1;
@ -537,7 +540,7 @@ struct pool {
struct deferred_set shared_read_ds;
struct deferred_set all_io_ds;
struct new_mapping *next_mapping;
struct dm_thin_new_mapping *next_mapping;
mempool_t *mapping_pool;
mempool_t *endio_hook_pool;
};
@ -630,11 +633,11 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev
/*----------------------------------------------------------------*/
struct endio_hook {
struct dm_thin_endio_hook {
struct thin_c *tc;
struct deferred_entry *shared_read_entry;
struct deferred_entry *all_io_entry;
struct new_mapping *overwrite_mapping;
struct dm_thin_new_mapping *overwrite_mapping;
};
static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
@ -647,7 +650,8 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
bio_list_init(master);
while ((bio = bio_list_pop(&bios))) {
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
if (h->tc == tc)
bio_endio(bio, DM_ENDIO_REQUEUE);
else
@ -736,7 +740,7 @@ static void wake_worker(struct pool *pool)
/*
* Bio endio functions.
*/
struct new_mapping {
struct dm_thin_new_mapping {
struct list_head list;
unsigned quiesced:1;
@ -746,7 +750,7 @@ struct new_mapping {
struct thin_c *tc;
dm_block_t virt_block;
dm_block_t data_block;
struct cell *cell, *cell2;
struct dm_bio_prison_cell *cell, *cell2;
int err;
/*
@ -759,7 +763,7 @@ struct new_mapping {
bio_end_io_t *saved_bi_end_io;
};
static void __maybe_add_mapping(struct new_mapping *m)
static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
{
struct pool *pool = m->tc->pool;
@ -772,7 +776,7 @@ static void __maybe_add_mapping(struct new_mapping *m)
static void copy_complete(int read_err, unsigned long write_err, void *context)
{
unsigned long flags;
struct new_mapping *m = context;
struct dm_thin_new_mapping *m = context;
struct pool *pool = m->tc->pool;
m->err = read_err || write_err ? -EIO : 0;
@ -786,8 +790,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
static void overwrite_endio(struct bio *bio, int err)
{
unsigned long flags;
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct new_mapping *m = h->overwrite_mapping;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_new_mapping *m = h->overwrite_mapping;
struct pool *pool = m->tc->pool;
m->err = err;
@ -811,7 +815,7 @@ static void overwrite_endio(struct bio *bio, int err)
/*
* This sends the bios in the cell back to the deferred_bios list.
*/
static void cell_defer(struct thin_c *tc, struct cell *cell,
static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell,
dm_block_t data_block)
{
struct pool *pool = tc->pool;
@ -828,7 +832,7 @@ static void cell_defer(struct thin_c *tc, struct cell *cell,
* Same as cell_defer above, except it omits one particular detainee,
* a write bio that covers the block and has already been processed.
*/
static void cell_defer_except(struct thin_c *tc, struct cell *cell)
static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
struct bio_list bios;
struct pool *pool = tc->pool;
@ -843,7 +847,7 @@ static void cell_defer_except(struct thin_c *tc, struct cell *cell)
wake_worker(pool);
}
static void process_prepared_mapping(struct new_mapping *m)
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
struct bio *bio;
@ -886,7 +890,7 @@ static void process_prepared_mapping(struct new_mapping *m)
mempool_free(m, tc->pool->mapping_pool);
}
static void process_prepared_discard(struct new_mapping *m)
static void process_prepared_discard(struct dm_thin_new_mapping *m)
{
int r;
struct thin_c *tc = m->tc;
@ -909,11 +913,11 @@ static void process_prepared_discard(struct new_mapping *m)
}
static void process_prepared(struct pool *pool, struct list_head *head,
void (*fn)(struct new_mapping *))
void (*fn)(struct dm_thin_new_mapping *))
{
unsigned long flags;
struct list_head maps;
struct new_mapping *m, *tmp;
struct dm_thin_new_mapping *m, *tmp;
INIT_LIST_HEAD(&maps);
spin_lock_irqsave(&pool->lock, flags);
@ -957,9 +961,9 @@ static int ensure_next_mapping(struct pool *pool)
return pool->next_mapping ? 0 : -ENOMEM;
}
static struct new_mapping *get_next_mapping(struct pool *pool)
static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
{
struct new_mapping *r = pool->next_mapping;
struct dm_thin_new_mapping *r = pool->next_mapping;
BUG_ON(!pool->next_mapping);
@ -971,11 +975,11 @@ static struct new_mapping *get_next_mapping(struct pool *pool)
static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
struct dm_dev *origin, dm_block_t data_origin,
dm_block_t data_dest,
struct cell *cell, struct bio *bio)
struct dm_bio_prison_cell *cell, struct bio *bio)
{
int r;
struct pool *pool = tc->pool;
struct new_mapping *m = get_next_mapping(pool);
struct dm_thin_new_mapping *m = get_next_mapping(pool);
INIT_LIST_HEAD(&m->list);
m->quiesced = 0;
@ -997,7 +1001,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
* bio immediately. Otherwise we use kcopyd to clone the data first.
*/
if (io_overwrites_block(pool, bio)) {
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
h->overwrite_mapping = m;
m->bio = bio;
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
@ -1025,7 +1030,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
dm_block_t data_origin, dm_block_t data_dest,
struct cell *cell, struct bio *bio)
struct dm_bio_prison_cell *cell, struct bio *bio)
{
schedule_copy(tc, virt_block, tc->pool_dev,
data_origin, data_dest, cell, bio);
@ -1033,18 +1038,18 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
dm_block_t data_dest,
struct cell *cell, struct bio *bio)
struct dm_bio_prison_cell *cell, struct bio *bio)
{
schedule_copy(tc, virt_block, tc->origin_dev,
virt_block, data_dest, cell, bio);
}
static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
dm_block_t data_block, struct cell *cell,
dm_block_t data_block, struct dm_bio_prison_cell *cell,
struct bio *bio)
{
struct pool *pool = tc->pool;
struct new_mapping *m = get_next_mapping(pool);
struct dm_thin_new_mapping *m = get_next_mapping(pool);
INIT_LIST_HEAD(&m->list);
m->quiesced = 1;
@ -1065,12 +1070,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
process_prepared_mapping(m);
else if (io_overwrites_block(pool, bio)) {
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
h->overwrite_mapping = m;
m->bio = bio;
save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
remap_and_issue(tc, bio, data_block);
} else {
int r;
struct dm_io_region to;
@ -1155,7 +1160,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
*/
static void retry_on_resume(struct bio *bio)
{
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct thin_c *tc = h->tc;
struct pool *pool = tc->pool;
unsigned long flags;
@ -1165,7 +1170,7 @@ static void retry_on_resume(struct bio *bio)
spin_unlock_irqrestore(&pool->lock, flags);
}
static void no_space(struct cell *cell)
static void no_space(struct dm_bio_prison_cell *cell)
{
struct bio *bio;
struct bio_list bios;
@ -1182,11 +1187,11 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
int r;
unsigned long flags;
struct pool *pool = tc->pool;
struct cell *cell, *cell2;
struct dm_bio_prison_cell *cell, *cell2;
struct cell_key key, key2;
dm_block_t block = get_bio_block(tc, bio);
struct dm_thin_lookup_result lookup_result;
struct new_mapping *m;
struct dm_thin_new_mapping *m;
build_virtual_key(tc->td, block, &key);
if (bio_detain(tc->pool->prison, &key, bio, &cell))
@ -1263,7 +1268,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
struct cell_key *key,
struct dm_thin_lookup_result *lookup_result,
struct cell *cell)
struct dm_bio_prison_cell *cell)
{
int r;
dm_block_t data_block;
@ -1290,7 +1295,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
dm_block_t block,
struct dm_thin_lookup_result *lookup_result)
{
struct cell *cell;
struct dm_bio_prison_cell *cell;
struct pool *pool = tc->pool;
struct cell_key key;
@ -1305,7 +1310,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
if (bio_data_dir(bio) == WRITE)
break_sharing(tc, bio, block, &key, lookup_result, cell);
else {
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
h->shared_read_entry = ds_inc(&pool->shared_read_ds);
@ -1315,7 +1320,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
}
static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
struct cell *cell)
struct dm_bio_prison_cell *cell)
{
int r;
dm_block_t data_block;
@ -1363,7 +1368,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
{
int r;
dm_block_t block = get_bio_block(tc, bio);
struct cell *cell;
struct dm_bio_prison_cell *cell;
struct cell_key key;
struct dm_thin_lookup_result lookup_result;
@ -1432,7 +1437,7 @@ static void process_deferred_bios(struct pool *pool)
spin_unlock_irqrestore(&pool->lock, flags);
while ((bio = bio_list_pop(&bios))) {
struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
struct thin_c *tc = h->tc;
/*
@ -1522,10 +1527,10 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
wake_worker(pool);
}
static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
h->tc = tc;
h->shared_read_entry = NULL;
@ -1687,6 +1692,9 @@ static void __pool_destroy(struct pool *pool)
kfree(pool);
}
static struct kmem_cache *_new_mapping_cache;
static struct kmem_cache *_endio_hook_cache;
static struct pool *pool_create(struct mapped_device *pool_md,
struct block_device *metadata_dev,
unsigned long block_size, char **error)
@ -1755,16 +1763,16 @@ static struct pool *pool_create(struct mapped_device *pool_md,
ds_init(&pool->all_io_ds);
pool->next_mapping = NULL;
pool->mapping_pool =
mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE,
_new_mapping_cache);
if (!pool->mapping_pool) {
*error = "Error creating pool's mapping mempool";
err_p = ERR_PTR(-ENOMEM);
goto bad_mapping_pool;
}
pool->endio_hook_pool =
mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE,
_endio_hook_cache);
if (!pool->endio_hook_pool) {
*error = "Error creating pool's endio_hook mempool";
err_p = ERR_PTR(-ENOMEM);
@ -2276,6 +2284,36 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
return 0;
}
static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
{
int r;
r = check_arg_count(argc, 1);
if (r)
return r;
r = dm_pool_reserve_metadata_snap(pool->pmd);
if (r)
DMWARN("reserve_metadata_snap message failed.");
return r;
}
static int process_release_metadata_snap_mesg(unsigned argc, char **argv, struct pool *pool)
{
int r;
r = check_arg_count(argc, 1);
if (r)
return r;
r = dm_pool_release_metadata_snap(pool->pmd);
if (r)
DMWARN("release_metadata_snap message failed.");
return r;
}
/*
* Messages supported:
* create_thin <dev_id>
@ -2283,6 +2321,8 @@ static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct po
* delete <dev_id>
* trim <dev_id> <new_size_in_sectors>
* set_transaction_id <current_trans_id> <new_trans_id>
* reserve_metadata_snap
* release_metadata_snap
*/
static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
{
@ -2302,6 +2342,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
else if (!strcasecmp(argv[0], "set_transaction_id"))
r = process_set_transaction_id_mesg(argc, argv, pool);
else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
r = process_reserve_metadata_snap_mesg(argc, argv, pool);
else if (!strcasecmp(argv[0], "release_metadata_snap"))
r = process_release_metadata_snap_mesg(argc, argv, pool);
else
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
@ -2361,7 +2407,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
if (r)
return r;
r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
if (r)
return r;
@ -2457,7 +2503,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 1, 0},
.version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
@ -2613,9 +2659,9 @@ static int thin_endio(struct dm_target *ti,
union map_info *map_context)
{
unsigned long flags;
struct endio_hook *h = map_context->ptr;
struct dm_thin_endio_hook *h = map_context->ptr;
struct list_head work;
struct new_mapping *m, *tmp;
struct dm_thin_new_mapping *m, *tmp;
struct pool *pool = h->tc->pool;
if (h->shared_read_entry) {
@ -2755,7 +2801,32 @@ static int __init dm_thin_init(void)
r = dm_register_target(&pool_target);
if (r)
dm_unregister_target(&thin_target);
goto bad_pool_target;
r = -ENOMEM;
_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
if (!_cell_cache)
goto bad_cell_cache;
_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
if (!_new_mapping_cache)
goto bad_new_mapping_cache;
_endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0);
if (!_endio_hook_cache)
goto bad_endio_hook_cache;
return 0;
bad_endio_hook_cache:
kmem_cache_destroy(_new_mapping_cache);
bad_new_mapping_cache:
kmem_cache_destroy(_cell_cache);
bad_cell_cache:
dm_unregister_target(&pool_target);
bad_pool_target:
dm_unregister_target(&thin_target);
return r;
}
@ -2764,6 +2835,10 @@ static void dm_thin_exit(void)
{
dm_unregister_target(&thin_target);
dm_unregister_target(&pool_target);
kmem_cache_destroy(_cell_cache);
kmem_cache_destroy(_new_mapping_cache);
kmem_cache_destroy(_endio_hook_cache);
}
module_init(dm_thin_init);

View file

@ -249,6 +249,7 @@ int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
return r;
}
EXPORT_SYMBOL_GPL(dm_tm_shadow_block);
int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
struct dm_block_validator *v,
@ -259,6 +260,7 @@ int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
return dm_bm_read_lock(tm->bm, b, v, blk);
}
EXPORT_SYMBOL_GPL(dm_tm_read_lock);
int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
{