Allow devices that hold metadata for the device-mapper thin

provisioning target to be extended easily; allow WRITE SAME on
 multipath devices; an assortment of little fixes and clean-ups.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJRjPh5AAoJEK2W1qbAHj1nznsP/idIXjHebZbyp0DHLIHcOyR9
 R7wj641TCE72qtIvlxPkpqrFdPQF5alilerS3OpWfrOR5clswQhI8aZOfdXRtvCe
 3QQ2yLpi1aZxm/TjJXo4vlfm+JJQZVsytG+5ou5cNPVt0/rAAhaMS3n2OvSNBiMO
 krtksMZk8oD0LV33M6FLnieV5gZIsEpy1VrPu8eLfTT7cSE28k7h0wsN8MzRlRKq
 uKThwPkA332N9adPZoxr3diRL7USSn6VYo9ygheJfAj5o6UGDxwZZu2BnO0vsaEj
 6q9yt+W6wFa96knBI7pVb19OEtNYvF7fjsYuAu0DRvEZ8cPeU40Mi/fNnN8TAg+Q
 tFZkZ+rlvQCBylIsGEhLZkOjTf9cl2H9I3BfVLQ2givwANLvUjunGtcxPENbsKSq
 kKBwlXyDs2bgUIk1ltTTyenGZxVA7ADOPNbvYMOKWzeh8gsKgQN+34ggzhHYgNQr
 jkljEt3ToPDQ+rqWmz5+NGNTzH6I0FuK/rF9C9TAaFnBgCAuXvfNEUYBR0VnXgp+
 LxijQ+q1CFDNC7pnrlVrKz2UAlpn2dDVJsREMUXZQQr4r06x3TgmMaLcGpUgNK0Q
 iZ1vHPkHMmFTeCyJYJpe62wKRDQotevhjcgyQAEJBiPFOyVcfAn/WqPRz6U/IP00
 A1QR9eBvCWKrcZnYOet9
 =YXMj
 -----END PGP SIGNATURE-----

Merge tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm

Pull device-mapper updates from Alasdair Kergon:
 "Allow devices that hold metadata for the device-mapper thin
  provisioning target to be extended easily; allow WRITE SAME on
  multipath devices; an assortment of little fixes and clean-ups."

* tag 'dm-3.10-changes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (21 commits)
  dm cache: set config value
  dm cache: move config fns
  dm thin: generate event when metadata threshold passed
  dm persistent metadata: add space map threshold callback
  dm persistent data: add threshold callback to space map
  dm thin: detect metadata device resizing
  dm persistent data: support space map resizing
  dm thin: open dev read only when possible
  dm thin: refactor data dev resize
  dm cache: replace memcpy with struct assignment
  dm cache: fix typos in comments
  dm cache policy: fix description of lookup fn
  dm: document iterate_devices
  dm persistent data: fix error message typos
  dm cache: tune migration throttling
  dm mpath: enable WRITE SAME support
  dm table: fix write same support
  dm bufio: avoid a possible __vmalloc deadlock
  dm snapshot: fix error return code in snapshot_ctr
  dm cache: fix error return code in cache_create
  ...
This commit is contained in:
Linus Torvalds 2013-05-10 09:02:50 -07:00
commit ec6671589a
15 changed files with 445 additions and 119 deletions

View file

@ -319,6 +319,9 @@ static void __cache_size_refresh(void)
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
enum data_mode *data_mode)
{
unsigned noio_flag;
void *ptr;
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
*data_mode = DATA_MODE_SLAB;
return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@ -332,7 +335,26 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
}
*data_mode = DATA_MODE_VMALLOC;
return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
/*
* __vmalloc allocates the data pages and auxiliary structures with
* gfp_flags that were specified, but pagetables are always allocated
* with GFP_KERNEL, no matter what was specified as gfp_mask.
*
* Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that
* all allocations done by this process (including pagetables) are done
* as if GFP_NOIO was specified.
*/
if (gfp_mask & __GFP_NORETRY)
noio_flag = memalloc_noio_save();
ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
return ptr;
}
/*

View file

@ -1044,7 +1044,7 @@ void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats)
{
down_read(&cmd->root_lock);
memcpy(stats, &cmd->stats, sizeof(*stats));
*stats = cmd->stats;
up_read(&cmd->root_lock);
}
@ -1052,7 +1052,7 @@ void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats)
{
down_write(&cmd->root_lock);
memcpy(&cmd->stats, stats, sizeof(*stats));
cmd->stats = *stats;
up_write(&cmd->root_lock);
}

View file

@ -130,8 +130,8 @@ struct dm_cache_policy {
*
* Must not block.
*
* Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK
* would be typical).
* Returns 0 if in cache, -ENOENT if not, < 0 for other errors
* (-EWOULDBLOCK would be typical).
*/
int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);

View file

@ -205,7 +205,7 @@ struct per_bio_data {
/*
* writethrough fields. These MUST remain at the end of this
* structure and the 'cache' member must be the first as it
* is used to determine the offsetof the writethrough fields.
* is used to determine the offset of the writethrough fields.
*/
struct cache *cache;
dm_cblock_t cblock;
@ -393,7 +393,7 @@ static int get_cell(struct cache *cache,
return r;
}
/*----------------------------------------------------------------*/
/*----------------------------------------------------------------*/
static bool is_dirty(struct cache *cache, dm_cblock_t b)
{
@ -419,6 +419,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl
}
/*----------------------------------------------------------------*/
static bool block_size_is_power_of_two(struct cache *cache)
{
return cache->sectors_per_block_shift >= 0;
@ -667,7 +668,7 @@ static void writethrough_endio(struct bio *bio, int err)
/*
* We can't issue this bio directly, since we're in interrupt
* context. So it get's put on a bio list for processing by the
* context. So it gets put on a bio list for processing by the
* worker thread.
*/
defer_writethrough_bio(pb->cache, bio);
@ -1445,6 +1446,7 @@ static void do_worker(struct work_struct *ws)
static void do_waker(struct work_struct *ws)
{
struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
policy_tick(cache->policy);
wake_worker(cache);
queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
}
@ -1809,7 +1811,37 @@ static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
static struct kmem_cache *migration_cache;
static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv)
#define NOT_CORE_OPTION 1
static int process_config_option(struct cache *cache, const char *key, const char *value)
{
unsigned long tmp;
if (!strcasecmp(key, "migration_threshold")) {
if (kstrtoul(value, 10, &tmp))
return -EINVAL;
cache->migration_threshold = tmp;
return 0;
}
return NOT_CORE_OPTION;
}
static int set_config_value(struct cache *cache, const char *key, const char *value)
{
int r = process_config_option(cache, key, value);
if (r == NOT_CORE_OPTION)
r = policy_set_config_value(cache->policy, key, value);
if (r)
DMWARN("bad config value for %s: %s", key, value);
return r;
}
static int set_config_values(struct cache *cache, int argc, const char **argv)
{
int r = 0;
@ -1819,12 +1851,9 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a
}
while (argc) {
r = policy_set_config_value(p, argv[0], argv[1]);
if (r) {
DMWARN("policy_set_config_value failed: key = '%s', value = '%s'",
argv[0], argv[1]);
return r;
}
r = set_config_value(cache, argv[0], argv[1]);
if (r)
break;
argc -= 2;
argv += 2;
@ -1836,8 +1865,6 @@ static int set_config_values(struct dm_cache_policy *p, int argc, const char **a
static int create_cache_policy(struct cache *cache, struct cache_args *ca,
char **error)
{
int r;
cache->policy = dm_cache_policy_create(ca->policy_name,
cache->cache_size,
cache->origin_sectors,
@ -1847,14 +1874,7 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
return -ENOMEM;
}
r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv);
if (r) {
*error = "Error setting cache policy's config values";
dm_cache_policy_destroy(cache->policy);
cache->policy = NULL;
}
return r;
return 0;
}
/*
@ -1886,7 +1906,7 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size,
return discard_block_size;
}
#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100)
#define DEFAULT_MIGRATION_THRESHOLD 2048
static int cache_create(struct cache_args *ca, struct cache **result)
{
@ -1911,7 +1931,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->discards_supported = true;
ti->discard_zeroes_data_unsupported = true;
memcpy(&cache->features, &ca->features, sizeof(cache->features));
cache->features = ca->features;
ti->per_bio_data_size = get_per_bio_data_size(cache);
cache->callbacks.congested_fn = cache_is_congested;
@ -1948,7 +1968,15 @@ static int cache_create(struct cache_args *ca, struct cache **result)
r = create_cache_policy(cache, ca, error);
if (r)
goto bad;
cache->policy_nr_args = ca->policy_argc;
cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
if (r) {
*error = "Error setting cache policy's config values";
goto bad;
}
cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
ca->block_size, may_format,
@ -1967,10 +1995,10 @@ static int cache_create(struct cache_args *ca, struct cache **result)
INIT_LIST_HEAD(&cache->quiesced_migrations);
INIT_LIST_HEAD(&cache->completed_migrations);
INIT_LIST_HEAD(&cache->need_commit_migrations);
cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
atomic_set(&cache->nr_migrations, 0);
init_waitqueue_head(&cache->migration_wait);
r = -ENOMEM;
cache->nr_dirty = 0;
cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
if (!cache->dirty_bitset) {
@ -2517,23 +2545,6 @@ err:
DMEMIT("Error");
}
#define NOT_CORE_OPTION 1
static int process_config_option(struct cache *cache, char **argv)
{
unsigned long tmp;
if (!strcasecmp(argv[0], "migration_threshold")) {
if (kstrtoul(argv[1], 10, &tmp))
return -EINVAL;
cache->migration_threshold = tmp;
return 0;
}
return NOT_CORE_OPTION;
}
/*
* Supports <key> <value>.
*
@ -2541,17 +2552,12 @@ static int process_config_option(struct cache *cache, char **argv)
*/
static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
{
int r;
struct cache *cache = ti->private;
if (argc != 2)
return -EINVAL;
r = process_config_option(cache, argv);
if (r == NOT_CORE_OPTION)
return policy_set_config_value(cache->policy, argv[0], argv[1]);
return r;
return set_config_value(cache, argv[0], argv[1]);
}
static int cache_iterate_devices(struct dm_target *ti,
@ -2609,7 +2615,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
.version = {1, 1, 0},
.version = {1, 1, 1},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,

View file

@ -907,6 +907,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
return 0;

View file

@ -1121,6 +1121,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
if (!s->pending_pool) {
ti->error = "Could not allocate mempool for pending exceptions";
r = -ENOMEM;
goto bad_pending_pool;
}

View file

@ -94,7 +94,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct stripe_c *sc;
sector_t width;
sector_t width, tmp_len;
uint32_t stripes;
uint32_t chunk_size;
int r;
@ -116,18 +116,19 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
width = ti->len;
if (sector_div(width, chunk_size)) {
ti->error = "Target length not divisible by "
"chunk size";
return -EINVAL;
}
if (sector_div(width, stripes)) {
ti->error = "Target length not divisible by "
"number of stripes";
return -EINVAL;
}
tmp_len = width;
if (sector_div(tmp_len, chunk_size)) {
ti->error = "Target length not divisible by "
"chunk size";
return -EINVAL;
}
/*
* Do we have enough arguments for that many stripes ?
*/

View file

@ -1442,7 +1442,7 @@ static bool dm_table_supports_write_same(struct dm_table *t)
return false;
if (!ti->type->iterate_devices ||
!ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
ti->type->iterate_devices(ti, device_not_write_same_capable, NULL))
return false;
}

View file

@ -1645,12 +1645,12 @@ int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
return r;
}
static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
{
int r;
dm_block_t old_count;
r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count);
r = dm_sm_get_nr_blocks(sm, &old_count);
if (r)
return r;
@ -1658,11 +1658,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
return 0;
if (new_count < old_count) {
DMERR("cannot reduce size of data device");
DMERR("cannot reduce size of space map");
return -EINVAL;
}
return dm_sm_extend(pmd->data_sm, new_count - old_count);
return dm_sm_extend(sm, new_count - old_count);
}
int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
@ -1671,7 +1671,19 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
down_write(&pmd->root_lock);
if (!pmd->fail_io)
r = __resize_data_dev(pmd, new_count);
r = __resize_space_map(pmd->data_sm, new_count);
up_write(&pmd->root_lock);
return r;
}
int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
{
int r = -EINVAL;
down_write(&pmd->root_lock);
if (!pmd->fail_io)
r = __resize_space_map(pmd->metadata_sm, new_count);
up_write(&pmd->root_lock);
return r;
@ -1684,3 +1696,17 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
dm_bm_set_read_only(pmd->bm);
up_write(&pmd->root_lock);
}
int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_block_t threshold,
dm_sm_threshold_fn fn,
void *context)
{
int r;
down_write(&pmd->root_lock);
r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
up_write(&pmd->root_lock);
return r;
}

View file

@ -8,6 +8,7 @@
#define DM_THIN_METADATA_H
#include "persistent-data/dm-block-manager.h"
#include "persistent-data/dm-space-map.h"
#define THIN_METADATA_BLOCK_SIZE 4096
@ -185,6 +186,7 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
* blocks would be lost.
*/
int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
/*
* Flicks the underlying block manager into read only mode, so you know
@ -192,6 +194,11 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
*/
void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd);
int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
dm_block_t threshold,
dm_sm_threshold_fn fn,
void *context);
/*----------------------------------------------------------------*/
#endif

View file

@ -922,7 +922,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
return r;
if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
DMWARN("%s: reached low water mark, sending event.",
DMWARN("%s: reached low water mark for data device: sending event.",
dm_device_name(pool->pool_md));
spin_lock_irqsave(&pool->lock, flags);
pool->low_water_triggered = 1;
@ -1281,6 +1281,10 @@ static void process_bio_fail(struct thin_c *tc, struct bio *bio)
bio_io_error(bio);
}
/*
* FIXME: should we also commit due to size of transaction, measured in
* metadata blocks?
*/
static int need_commit_due_to_time(struct pool *pool)
{
return jiffies < pool->last_commit_jiffies ||
@ -1909,6 +1913,56 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
return r;
}
static void metadata_low_callback(void *context)
{
struct pool *pool = context;
DMWARN("%s: reached low water mark for metadata device: sending event.",
dm_device_name(pool->pool_md));
dm_table_event(pool->ti->table);
}
static sector_t get_metadata_dev_size(struct block_device *bdev)
{
sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
char buffer[BDEVNAME_SIZE];
if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) {
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS);
metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING;
}
return metadata_dev_size;
}
static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev)
{
sector_t metadata_dev_size = get_metadata_dev_size(bdev);
sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
return metadata_dev_size;
}
/*
* When a metadata threshold is crossed a dm event is triggered, and
* userland should respond by growing the metadata device. We could let
* userland set the threshold, like we do with the data threshold, but I'm
* not sure they know enough to do this well.
*/
static dm_block_t calc_metadata_threshold(struct pool_c *pt)
{
/*
* 4M is ample for all ops with the possible exception of thin
* device deletion which is harmless if it fails (just retry the
* delete after you've grown the device).
*/
dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4;
return min((dm_block_t)1024ULL /* 4M */, quarter);
}
/*
* thin-pool <metadata dev> <data dev>
* <data block size (sectors)>
@ -1931,8 +1985,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
unsigned long block_size;
dm_block_t low_water_blocks;
struct dm_dev *metadata_dev;
sector_t metadata_dev_size;
char b[BDEVNAME_SIZE];
fmode_t metadata_mode;
/*
* FIXME Remove validation from scope of lock.
@ -1944,19 +1997,32 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
r = -EINVAL;
goto out_unlock;
}
as.argc = argc;
as.argv = argv;
r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev);
/*
* Set default pool features.
*/
pool_features_init(&pf);
dm_consume_args(&as, 4);
r = parse_pool_features(&as, &pf, ti);
if (r)
goto out_unlock;
metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE);
r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
if (r) {
ti->error = "Error opening metadata block device";
goto out_unlock;
}
metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
/*
* Run for the side-effect of possibly issuing a warning if the
* device is too big.
*/
(void) get_metadata_dev_size(metadata_dev->bdev);
r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
if (r) {
@ -1979,16 +2045,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto out;
}
/*
* Set default pool features.
*/
pool_features_init(&pf);
dm_consume_args(&as, 4);
r = parse_pool_features(&as, &pf, ti);
if (r)
goto out;
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
if (!pt) {
r = -ENOMEM;
@ -2040,6 +2096,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
ti->private = pt;
r = dm_pool_register_metadata_threshold(pt->pool->pmd,
calc_metadata_threshold(pt),
metadata_low_callback,
pool);
if (r)
goto out_free_pt;
pt->callbacks.congested_fn = pool_is_congested;
dm_table_add_target_callbacks(ti->table, &pt->callbacks);
@ -2079,6 +2142,78 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
return r;
}
static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit)
{
int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
sector_t data_size = ti->len;
dm_block_t sb_data_size;
*need_commit = false;
(void) sector_div(data_size, pool->sectors_per_block);
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
return r;
}
if (data_size < sb_data_size) {
DMERR("pool target (%llu blocks) too small: expected %llu",
(unsigned long long)data_size, sb_data_size);
return -EINVAL;
} else if (data_size > sb_data_size) {
r = dm_pool_resize_data_dev(pool->pmd, data_size);
if (r) {
DMERR("failed to resize data device");
set_pool_mode(pool, PM_READ_ONLY);
return r;
}
*need_commit = true;
}
return 0;
}
static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
{
int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
dm_block_t metadata_dev_size, sb_metadata_dev_size;
*need_commit = false;
metadata_dev_size = get_metadata_dev_size(pool->md_dev);
r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
if (r) {
DMERR("failed to retrieve data device size");
return r;
}
if (metadata_dev_size < sb_metadata_dev_size) {
DMERR("metadata device (%llu sectors) too small: expected %llu",
metadata_dev_size, sb_metadata_dev_size);
return -EINVAL;
} else if (metadata_dev_size > sb_metadata_dev_size) {
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
if (r) {
DMERR("failed to resize metadata device");
return r;
}
*need_commit = true;
}
return 0;
}
/*
* Retrieves the number of blocks of the data device from
* the superblock and compares it to the actual device size,
@ -2093,10 +2228,9 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
static int pool_preresume(struct dm_target *ti)
{
int r;
bool need_commit1, need_commit2;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
sector_t data_size = ti->len;
dm_block_t sb_data_size;
/*
* Take control of the pool object.
@ -2105,30 +2239,16 @@ static int pool_preresume(struct dm_target *ti)
if (r)
return r;
(void) sector_div(data_size, pool->sectors_per_block);
r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
if (r) {
DMERR("failed to retrieve data device size");
r = maybe_resize_data_dev(ti, &need_commit1);
if (r)
return r;
}
if (data_size < sb_data_size) {
DMERR("pool target too small, is %llu blocks (expected %llu)",
(unsigned long long)data_size, sb_data_size);
return -EINVAL;
} else if (data_size > sb_data_size) {
r = dm_pool_resize_data_dev(pool->pmd, data_size);
if (r) {
DMERR("failed to resize data device");
/* FIXME Stricter than necessary: Rollback transaction instead here */
set_pool_mode(pool, PM_READ_ONLY);
return r;
}
r = maybe_resize_metadata_dev(ti, &need_commit2);
if (r)
return r;
if (need_commit1 || need_commit2)
(void) commit_or_fallback(pool);
}
return 0;
}
@ -2549,7 +2669,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 7, 0},
.version = {1, 8, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,

View file

@ -248,7 +248,8 @@ static struct dm_space_map ops = {
.new_block = sm_disk_new_block,
.commit = sm_disk_commit,
.root_size = sm_disk_root_size,
.copy_root = sm_disk_copy_root
.copy_root = sm_disk_copy_root,
.register_threshold_callback = NULL
};
struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,

View file

@ -16,6 +16,55 @@
/*----------------------------------------------------------------*/
/*
* An edge triggered threshold.
*/
struct threshold {
bool threshold_set;
bool value_set;
dm_block_t threshold;
dm_block_t current_value;
dm_sm_threshold_fn fn;
void *context;
};
static void threshold_init(struct threshold *t)
{
t->threshold_set = false;
t->value_set = false;
}
static void set_threshold(struct threshold *t, dm_block_t value,
dm_sm_threshold_fn fn, void *context)
{
t->threshold_set = true;
t->threshold = value;
t->fn = fn;
t->context = context;
}
static bool below_threshold(struct threshold *t, dm_block_t value)
{
return t->threshold_set && value <= t->threshold;
}
static bool threshold_already_triggered(struct threshold *t)
{
return t->value_set && below_threshold(t, t->current_value);
}
static void check_threshold(struct threshold *t, dm_block_t value)
{
if (below_threshold(t, value) &&
!threshold_already_triggered(t))
t->fn(t->context);
t->value_set = true;
t->current_value = value;
}
/*----------------------------------------------------------------*/
/*
* Space map interface.
*
@ -54,6 +103,8 @@ struct sm_metadata {
unsigned allocated_this_transaction;
unsigned nr_uncommitted;
struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
struct threshold threshold;
};
static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
@ -144,12 +195,6 @@ static void sm_metadata_destroy(struct dm_space_map *sm)
kfree(smm);
}
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
DMERR("doesn't support extend");
return -EINVAL;
}
static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
{
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
@ -335,9 +380,19 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
{
dm_block_t count;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
int r = sm_metadata_new_block_(sm, b);
if (r)
DMERR("unable to allocate new metadata block");
r = sm_metadata_get_nr_free(sm, &count);
if (r)
DMERR("couldn't get free block count");
check_threshold(&smm->threshold, count);
return r;
}
@ -357,6 +412,18 @@ static int sm_metadata_commit(struct dm_space_map *sm)
return 0;
}
static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
dm_block_t threshold,
dm_sm_threshold_fn fn,
void *context)
{
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
set_threshold(&smm->threshold, threshold, fn, context);
return 0;
}
static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
{
*result = sizeof(struct disk_sm_root);
@ -382,6 +449,8 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t
return 0;
}
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
static struct dm_space_map ops = {
.destroy = sm_metadata_destroy,
.extend = sm_metadata_extend,
@ -395,7 +464,8 @@ static struct dm_space_map ops = {
.new_block = sm_metadata_new_block,
.commit = sm_metadata_commit,
.root_size = sm_metadata_root_size,
.copy_root = sm_metadata_copy_root
.copy_root = sm_metadata_copy_root,
.register_threshold_callback = sm_metadata_register_threshold_callback
};
/*----------------------------------------------------------------*/
@ -410,7 +480,7 @@ static void sm_bootstrap_destroy(struct dm_space_map *sm)
static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
DMERR("boostrap doesn't support extend");
DMERR("bootstrap doesn't support extend");
return -EINVAL;
}
@ -450,7 +520,7 @@ static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
uint32_t count)
{
DMERR("boostrap doesn't support set_count");
DMERR("bootstrap doesn't support set_count");
return -EINVAL;
}
@ -491,7 +561,7 @@ static int sm_bootstrap_commit(struct dm_space_map *sm)
static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
{
DMERR("boostrap doesn't support root_size");
DMERR("bootstrap doesn't support root_size");
return -EINVAL;
}
@ -499,7 +569,7 @@ static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
size_t max)
{
DMERR("boostrap doesn't support copy_root");
DMERR("bootstrap doesn't support copy_root");
return -EINVAL;
}
@ -517,11 +587,42 @@ static struct dm_space_map bootstrap_ops = {
.new_block = sm_bootstrap_new_block,
.commit = sm_bootstrap_commit,
.root_size = sm_bootstrap_root_size,
.copy_root = sm_bootstrap_copy_root
.copy_root = sm_bootstrap_copy_root,
.register_threshold_callback = NULL
};
/*----------------------------------------------------------------*/
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
int r, i;
enum allocation_event ev;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
dm_block_t old_len = smm->ll.nr_blocks;
/*
* Flick into a mode where all blocks get allocated in the new area.
*/
smm->begin = old_len;
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
/*
* Extend.
*/
r = sm_ll_extend(&smm->ll, extra_blocks);
/*
* Switch back to normal behaviour.
*/
memcpy(&smm->sm, &ops, sizeof(smm->sm));
for (i = old_len; !r && i < smm->begin; i++)
r = sm_ll_inc(&smm->ll, i, &ev);
return r;
}
/*----------------------------------------------------------------*/
struct dm_space_map *dm_sm_metadata_init(void)
{
struct sm_metadata *smm;
@ -549,6 +650,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
smm->nr_uncommitted = 0;
threshold_init(&smm->threshold);
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
@ -590,6 +692,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm,
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
smm->nr_uncommitted = 0;
threshold_init(&smm->threshold);
memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
return 0;

View file

@ -9,6 +9,8 @@
#include "dm-block-manager.h"
typedef void (*dm_sm_threshold_fn)(void *context);
/*
* struct dm_space_map keeps a record of how many times each block in a device
* is referenced. It needs to be fixed on disk as part of the transaction.
@ -59,6 +61,15 @@ struct dm_space_map {
*/
int (*root_size)(struct dm_space_map *sm, size_t *result);
int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len);
/*
* You can register one threshold callback which is edge-triggered
* when the free space in the space map drops below the threshold.
*/
int (*register_threshold_callback)(struct dm_space_map *sm,
dm_block_t threshold,
dm_sm_threshold_fn fn,
void *context);
};
/*----------------------------------------------------------------*/
@ -131,4 +142,16 @@ static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le
return sm->copy_root(sm, copy_to_here_le, len);
}
static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm,
dm_block_t threshold,
dm_sm_threshold_fn fn,
void *context)
{
if (sm->register_threshold_callback)
return sm->register_threshold_callback(sm, threshold, fn, context);
return -EINVAL;
}
#endif /* _LINUX_DM_SPACE_MAP_H */

View file

@ -79,11 +79,26 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd,
typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
struct bio_vec *biovec, int max_size);
/*
* These iteration functions are typically used to check (and combine)
* properties of underlying devices.
* E.g. Does at least one underlying device support flush?
* Does any underlying device not support WRITE_SAME?
*
* The callout function is called once for each contiguous section of
* an underlying device. State can be maintained in *data.
* Return non-zero to stop iterating through any further devices.
*/
typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
struct dm_dev *dev,
sector_t start, sector_t len,
void *data);
/*
* This function must iterate through each section of device used by the
* target until it encounters a non-zero return code, which it then returns.
* Returns zero if no callout returned non-zero.
*/
typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
iterate_devices_callout_fn fn,
void *data);