From 3469ac1aa3a2f1e2586a412923c414779a0af854 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 7 May 2012 15:33:36 -0700 Subject: [PATCH] ceph: drop support for preferred_osd pgs This was an ill-conceived feature that has been removed from Ceph. Do this gracefully: - reject attempts to specify a preferred_osd via the ioctl - stop exposing this information via virtual xattrs - always fill in -1 for requests, in case we talk to an older server - don't calculate preferred_osd placements/pgids Reviewed-by: Alex Elder Signed-off-by: Sage Weil --- drivers/block/rbd.c | 1 - fs/ceph/file.c | 1 - fs/ceph/ioctl.c | 15 ++++-------- fs/ceph/xattr.c | 9 ------- include/linux/ceph/ceph_fs.h | 4 +-- include/linux/ceph/osdmap.h | 2 -- net/ceph/osdmap.c | 47 ++++++++---------------------------- 7 files changed, 17 insertions(+), 62 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c1f770131654..a67fa63a966b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -935,7 +935,6 @@ static int rbd_do_request(struct request *rq, layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); layout->fl_stripe_count = cpu_to_le32(1); layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_preferred = cpu_to_le32(-1); layout->fl_pg_pool = cpu_to_le32(dev->poolid); ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, req, ops); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed72428d9c75..988d4f302e48 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -54,7 +54,6 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode) req->r_fmode = ceph_flags_to_mode(flags); req->r_args.open.flags = cpu_to_le32(flags); req->r_args.open.mode = cpu_to_le32(create_mode); - req->r_args.open.preferred = cpu_to_le32(-1); out: return req; } diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 790914a598dd..4feab52c5bff 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -26,8 +26,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout); l.object_size = ceph_file_layout_object_size(ci->i_layout); l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool); - l.preferred_osd = - (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred); + l.preferred_osd = (s32)-1; if (copy_to_user(arg, &l, sizeof(l))) return -EFAULT; } @@ -49,6 +48,10 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) if (copy_from_user(&l, arg, sizeof(l))) return -EFAULT; + /* preferred_osd is no longer supported */ + if (l.preferred_osd != -1) + return -EINVAL; + /* validate changed params against current layout */ err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT); if (!err) { @@ -56,8 +59,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout); nl.object_size = ceph_file_layout_object_size(ci->i_layout); nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool); - nl.preferred_osd = - (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred); } else return err; @@ -69,8 +70,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) nl.object_size = l.object_size; if (l.data_pool) nl.data_pool = l.data_pool; - if (l.preferred_osd) - nl.preferred_osd = l.preferred_osd; if ((nl.object_size & ~PAGE_MASK) || (nl.stripe_unit & ~PAGE_MASK) || @@ -106,8 +105,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) req->r_args.setlayout.layout.fl_object_size = cpu_to_le32(l.object_size); req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); - req->r_args.setlayout.layout.fl_pg_preferred = - cpu_to_le32(l.preferred_osd); parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); @@ -171,8 +168,6 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) cpu_to_le32(l.object_size); req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); - req->r_args.setlayout.layout.fl_pg_preferred = - cpu_to_le32(l.preferred_osd); err = ceph_mdsc_do_request(mdsc, inode, req); ceph_mdsc_put_request(req); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 35b86331d8a5..785cb3057c95 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -118,15 +118,6 @@ static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, (unsigned long long)ceph_file_layout_su(ci->i_layout), (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); - - if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) { - val += ret; - size -= ret; - ret += snprintf(val, size, "preferred_osd=%lld\n", - (unsigned long long)ceph_file_layout_pg_preferred( - ci->i_layout)); - } - return ret; } diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index b8c60694b2b0..e81ab30d4896 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -65,7 +65,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ + __le32 fl_unused; /* unused; used to be preferred primary (-1) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -384,7 +384,7 @@ union ceph_mds_request_args { __le32 stripe_count; /* ... */ __le32 object_size; __le32 file_replication; - __le32 preferred; + __le32 unused; /* used to be preferred osd */ } __attribute__ ((packed)) open; struct { __le32 flags; diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index ba4c205cbb01..311ef8d6aa9e 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -65,8 +65,6 @@ struct ceph_osdmap { #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) #define ceph_file_layout_object_su(l) \ ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) -#define ceph_file_layout_pg_preferred(l) \ - ((__s32)le32_to_cpu((l).fl_pg_preferred)) #define ceph_file_layout_pg_pool(l) \ ((__s32)le32_to_cpu((l).fl_pg_pool)) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 29ad46ec9dcf..7d39f3cb4947 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1000,7 +1000,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, { unsigned num, num_mask; struct ceph_pg pgid; - s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; unsigned ps; @@ -1011,23 +1010,13 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, if (!pool) return -EIO; ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - if (preferred >= 0) { - ps += preferred; - num = le32_to_cpu(pool->v.lpg_num); - num_mask = pool->lpg_num_mask; - } else { - num = le32_to_cpu(pool->v.pg_num); - num_mask = pool->pg_num_mask; - } + num = le32_to_cpu(pool->v.pg_num); + num_mask = pool->pg_num_mask; pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(preferred); + pgid.preferred = cpu_to_le16(-1); pgid.pool = fl->fl_pg_pool; - if (preferred >= 0) - dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps, - (int)preferred); - else - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); + dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); ol->ol_pgid = pgid; ol->ol_stripe_unit = fl->fl_object_stripe_unit; @@ -1046,23 +1035,17 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_pool_info *pool; int ruleno; unsigned poolid, ps, pps, t; - int preferred; poolid = le32_to_cpu(pgid.pool); ps = le16_to_cpu(pgid.ps); - preferred = (s16)le16_to_cpu(pgid.preferred); pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); if (!pool) return NULL; /* pg_temp? */ - if (preferred >= 0) - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num), - pool->lpgp_num_mask); - else - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); + t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), + pool->pgp_num_mask); pgid.ps = cpu_to_le16(t); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { @@ -1080,23 +1063,13 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, return NULL; } - /* don't forcefeed bad device ids to crush */ - if (preferred >= osdmap->max_osd || - preferred >= osdmap->crush->max_devices) - preferred = -1; - - if (preferred >= 0) - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.lpgp_num), - pool->lpgp_num_mask); - else - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); + pps = ceph_stable_mod(ps, + le32_to_cpu(pool->v.pgp_num), + pool->pgp_num_mask); pps += poolid; *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, min_t(int, pool->v.size, *num), - preferred, osdmap->osd_weight); + -1, osdmap->osd_weight); return osds; }