All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/11] crush kernel updates
@ 2012-05-04 22:53 Sage Weil
  2012-05-04 22:53 ` [PATCH 01/11] ceph: drop support for preferred_osd pgs Sage Weil
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:53 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

We ripped out support for preferred osd placement in ceph since it was a
broken misfeature that nobody used.  When I went to mirror those changes on
the kernel side I found that a whole bunch of old stuff had never been moved
over, including a number of cleanups and an actual bug fix.  Everything is
now in the wip-crush tree on github.com/ceph/ceph-client.

This code is well tested in the sense that it has been this way on the 
userland side for ages (with the exception of the first preferred_osds 
patch).  That said, I've only done very basic testing of the kernel code,
so it's the crush callers that need attention.

I'd like to get this into the testing branch soon so we can see if it 
sorts out #2022.  A few of these should probably also go to Linus before 
the next window, too.

sage


Sage Weil (11):
  ceph: drop support for preferred_osd pgs
  ceph: refactor SETLAYOUT and SETDIRLAYOUT ioctl checks into common
    helper
  crush: clean up types, const-ness
  crush: adjust local retry threshold
  crush: be more tolerant of nonsensical crush maps
  crush: use a temporary variable to simplify crush_do_rule
  crush: remove forcefeed functionality
  crush: remove parent maps
  crush: fix tree node weight lookup
  crush: fix memory leak when destroying tree buckets
  crush: warn on do_rule failure

 drivers/block/rbd.c          |    1 -
 fs/ceph/file.c               |    1 -
 fs/ceph/ioctl.c              |   89 ++++++++++-------------
 fs/ceph/xattr.c              |    9 ---
 include/linux/ceph/ceph_fs.h |    4 +-
 include/linux/ceph/osdmap.h  |    2 -
 include/linux/crush/crush.h  |   18 ++---
 include/linux/crush/mapper.h |    7 +-
 net/ceph/crush/crush.c       |   45 +++---------
 net/ceph/crush/mapper.c      |  162 +++++++++++++++++------------------------
 net/ceph/osdmap.c            |   67 +++++------------
 11 files changed, 147 insertions(+), 258 deletions(-)

-- 
1.7.9


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 01/11] ceph: drop support for preferred_osd pgs
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
@ 2012-05-04 22:53 ` Sage Weil
  2012-05-04 22:53 ` [PATCH 02/11] ceph: refactor SETLAYOUT and SETDIRLAYOUT ioctl checks into common helper Sage Weil
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:53 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

This was an ill-conceived feature that has been removed from Ceph.  Do
this gracefully:

 - reject attempts to specify a preferred_osd via the ioctl
 - stop exposing this information via virtual xattrs
 - always fill in -1 for requests, in case we talk to an older server
 - don't calculate preferred_osd placements/pgids

Signed-off-by: Sage Weil <sage@newdream.net>
---
 drivers/block/rbd.c          |    1 -
 fs/ceph/file.c               |    1 -
 fs/ceph/ioctl.c              |   15 ++++---------
 fs/ceph/xattr.c              |    9 --------
 include/linux/ceph/ceph_fs.h |    4 +-
 include/linux/ceph/osdmap.h  |    2 -
 net/ceph/osdmap.c            |   47 +++++++++---------------------------------
 7 files changed, 17 insertions(+), 62 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c1f7701..a67fa63 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -935,7 +935,6 @@ static int rbd_do_request(struct request *rq,
 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
 	layout->fl_stripe_count = cpu_to_le32(1);
 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_pg_preferred = cpu_to_le32(-1);
 	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
 	ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
 				req, ops);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed72428..988d4f3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -54,7 +54,6 @@ prepare_open_request(struct super_block *sb, int flags, int create_mode)
 	req->r_fmode = ceph_flags_to_mode(flags);
 	req->r_args.open.flags = cpu_to_le32(flags);
 	req->r_args.open.mode = cpu_to_le32(create_mode);
-	req->r_args.open.preferred = cpu_to_le32(-1);
 out:
 	return req;
 }
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 790914a59..4feab52 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -26,8 +26,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
 		l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
 		l.object_size = ceph_file_layout_object_size(ci->i_layout);
 		l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-		l.preferred_osd =
-			(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+		l.preferred_osd = (s32)-1;
 		if (copy_to_user(arg, &l, sizeof(l)))
 			return -EFAULT;
 	}
@@ -49,6 +48,10 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	if (copy_from_user(&l, arg, sizeof(l)))
 		return -EFAULT;
 
+	/* preferred_osd is no longer supported */
+	if (l.preferred_osd != -1)
+		return -EINVAL;
+
 	/* validate changed params against current layout */
 	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
 	if (!err) {
@@ -56,8 +59,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 		nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
 		nl.object_size = ceph_file_layout_object_size(ci->i_layout);
 		nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-		nl.preferred_osd =
-				(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
 	} else
 		return err;
 
@@ -69,8 +70,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 		nl.object_size = l.object_size;
 	if (l.data_pool)
 		nl.data_pool = l.data_pool;
-	if (l.preferred_osd)
-		nl.preferred_osd = l.preferred_osd;
 
 	if ((nl.object_size & ~PAGE_MASK) ||
 	    (nl.stripe_unit & ~PAGE_MASK) ||
@@ -106,8 +105,6 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	req->r_args.setlayout.layout.fl_object_size =
 		cpu_to_le32(l.object_size);
 	req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
-	req->r_args.setlayout.layout.fl_pg_preferred =
-		cpu_to_le32(l.preferred_osd);
 
 	parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
 	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
@@ -171,8 +168,6 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 			cpu_to_le32(l.object_size);
 	req->r_args.setlayout.layout.fl_pg_pool =
 			cpu_to_le32(l.data_pool);
-	req->r_args.setlayout.layout.fl_pg_preferred =
-			cpu_to_le32(l.preferred_osd);
 
 	err = ceph_mdsc_do_request(mdsc, inode, req);
 	ceph_mdsc_put_request(req);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 35b8633..785cb30 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -118,15 +118,6 @@ static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
 		(unsigned long long)ceph_file_layout_su(ci->i_layout),
 		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
 		(unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-
-	if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
-		val += ret;
-		size -= ret;
-		ret += snprintf(val, size, "preferred_osd=%lld\n",
-			    (unsigned long long)ceph_file_layout_pg_preferred(
-				    ci->i_layout));
-	}
-
 	return ret;
 }
 
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8c6069..e81ab30 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -65,7 +65,7 @@ struct ceph_file_layout {
 	__le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
 
 	/* object -> pg layout */
-	__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+	__le32 fl_unused;       /* unused; used to be preferred primary (-1) */
 	__le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
 } __attribute__ ((packed));
 
@@ -384,7 +384,7 @@ union ceph_mds_request_args {
 		__le32 stripe_count;         /* ... */
 		__le32 object_size;
 		__le32 file_replication;
-		__le32 preferred;
+		__le32 unused;               /* used to be preferred osd */
 	} __attribute__ ((packed)) open;
 	struct {
 		__le32 flags;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index ba4c205..311ef8d 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -65,8 +65,6 @@ struct ceph_osdmap {
 #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
 #define ceph_file_layout_object_su(l) \
 	((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_preferred(l) \
-	((__s32)le32_to_cpu((l).fl_pg_preferred))
 #define ceph_file_layout_pg_pool(l) \
 	((__s32)le32_to_cpu((l).fl_pg_pool))
 
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 29ad46e..7d39f3c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1000,7 +1000,6 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 {
 	unsigned num, num_mask;
 	struct ceph_pg pgid;
-	s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
 	int poolid = le32_to_cpu(fl->fl_pg_pool);
 	struct ceph_pg_pool_info *pool;
 	unsigned ps;
@@ -1011,23 +1010,13 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
 	if (!pool)
 		return -EIO;
 	ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-	if (preferred >= 0) {
-		ps += preferred;
-		num = le32_to_cpu(pool->v.lpg_num);
-		num_mask = pool->lpg_num_mask;
-	} else {
-		num = le32_to_cpu(pool->v.pg_num);
-		num_mask = pool->pg_num_mask;
-	}
+	num = le32_to_cpu(pool->v.pg_num);
+	num_mask = pool->pg_num_mask;
 
 	pgid.ps = cpu_to_le16(ps);
-	pgid.preferred = cpu_to_le16(preferred);
+	pgid.preferred = cpu_to_le16(-1);
 	pgid.pool = fl->fl_pg_pool;
-	if (preferred >= 0)
-		dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
-		     (int)preferred);
-	else
-		dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+	dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
 
 	ol->ol_pgid = pgid;
 	ol->ol_stripe_unit = fl->fl_object_stripe_unit;
@@ -1046,23 +1035,17 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	struct ceph_pg_pool_info *pool;
 	int ruleno;
 	unsigned poolid, ps, pps, t;
-	int preferred;
 
 	poolid = le32_to_cpu(pgid.pool);
 	ps = le16_to_cpu(pgid.ps);
-	preferred = (s16)le16_to_cpu(pgid.preferred);
 
 	pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
 	if (!pool)
 		return NULL;
 
 	/* pg_temp? */
-	if (preferred >= 0)
-		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
-				    pool->lpgp_num_mask);
-	else
-		t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
-				    pool->pgp_num_mask);
+	t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
+			    pool->pgp_num_mask);
 	pgid.ps = cpu_to_le16(t);
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
@@ -1080,23 +1063,13 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 		return NULL;
 	}
 
-	/* don't forcefeed bad device ids to crush */
-	if (preferred >= osdmap->max_osd ||
-	    preferred >= osdmap->crush->max_devices)
-		preferred = -1;
-
-	if (preferred >= 0)
-		pps = ceph_stable_mod(ps,
-				      le32_to_cpu(pool->v.lpgp_num),
-				      pool->lpgp_num_mask);
-	else
-		pps = ceph_stable_mod(ps,
-				      le32_to_cpu(pool->v.pgp_num),
-				      pool->pgp_num_mask);
+	pps = ceph_stable_mod(ps,
+			      le32_to_cpu(pool->v.pgp_num),
+			      pool->pgp_num_mask);
 	pps += poolid;
 	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
 			     min_t(int, pool->v.size, *num),
-			     preferred, osdmap->osd_weight);
+			     -1, osdmap->osd_weight);
 	return osds;
 }
 
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 02/11] ceph: refactor SETLAYOUT and SETDIRLAYOUT ioctl checks into common helper
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
  2012-05-04 22:53 ` [PATCH 01/11] ceph: drop support for preferred_osd pgs Sage Weil
@ 2012-05-04 22:53 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 03/11] crush: clean up types, const-ness Sage Weil
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:53 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Both of these methods perform similar checks; move that code to a helper
so that we can ensure the checks are consistent.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/ioctl.c |   82 +++++++++++++++++++++++++-----------------------------
 1 files changed, 38 insertions(+), 44 deletions(-)

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 4feab52..0e2f021 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -34,6 +34,36 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
 	return err;
 }
 
+static long __validate_layout(struct ceph_mds_client *mdsc,
+			      struct ceph_ioctl_layout *l)
+{
+	int i, err;
+
+	/* preferred_osd is no longer supported */
+	if (l->preferred_osd != -1)
+		return -EINVAL;
+
+	/* validate striping parameters */
+	if ((l->object_size & ~PAGE_MASK) ||
+	    (l->stripe_unit & ~PAGE_MASK) ||
+	    ((unsigned)l->object_size % (unsigned)l->stripe_unit))
+		return -EINVAL;
+
+	/* make sure it's a valid data pool */
+	mutex_lock(&mdsc->mutex);
+	err = -EINVAL;
+	for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+		if (mdsc->mdsmap->m_data_pg_pools[i] == l->data_pool) {
+			err = 0;
+			break;
+		}
+	mutex_unlock(&mdsc->mutex);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
@@ -43,15 +73,11 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	struct ceph_ioctl_layout l;
 	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
 	struct ceph_ioctl_layout nl;
-	int err, i;
+	int err;
 
 	if (copy_from_user(&l, arg, sizeof(l)))
 		return -EFAULT;
 
-	/* preferred_osd is no longer supported */
-	if (l.preferred_osd != -1)
-		return -EINVAL;
-
 	/* validate changed params against current layout */
 	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
 	if (!err) {
@@ -71,24 +97,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 	if (l.data_pool)
 		nl.data_pool = l.data_pool;
 
-	if ((nl.object_size & ~PAGE_MASK) ||
-	    (nl.stripe_unit & ~PAGE_MASK) ||
-	    ((unsigned)nl.object_size % (unsigned)nl.stripe_unit))
-		return -EINVAL;
-
-	/* make sure it's a valid data pool */
-	if (l.data_pool > 0) {
-		mutex_lock(&mdsc->mutex);
-		err = -EINVAL;
-		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
-			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
-				err = 0;
-				break;
-			}
-		mutex_unlock(&mdsc->mutex);
-		if (err)
-			return err;
-	}
+	err = __validate_layout(mdsc, &nl);
+	if (err)
+		return err;
 
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
 				       USE_AUTH_MDS);
@@ -124,33 +135,16 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_mds_request *req;
 	struct ceph_ioctl_layout l;
-	int err, i;
+	int err;
 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
 
 	/* copy and validate */
 	if (copy_from_user(&l, arg, sizeof(l)))
 		return -EFAULT;
 
-	if ((l.object_size & ~PAGE_MASK) ||
-	    (l.stripe_unit & ~PAGE_MASK) ||
-	    !l.stripe_unit ||
-	    (l.object_size &&
-	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
-		return -EINVAL;
-
-	/* make sure it's a valid data pool */
-	if (l.data_pool > 0) {
-		mutex_lock(&mdsc->mutex);
-		err = -EINVAL;
-		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
-			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
-				err = 0;
-				break;
-			}
-		mutex_unlock(&mdsc->mutex);
-		if (err)
-			return err;
-	}
+	err = __validate_layout(mdsc, &l);
+	if (err)
+		return err;
 
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
 				       USE_AUTH_MDS);
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 03/11] crush: clean up types, const-ness
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
  2012-05-04 22:53 ` [PATCH 01/11] ceph: drop support for preferred_osd pgs Sage Weil
  2012-05-04 22:53 ` [PATCH 02/11] ceph: refactor SETLAYOUT and SETDIRLAYOUT ioctl checks into common helper Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 04/11] crush: adjust local retry threshold Sage Weil
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Move various types from int -> __u32 (or similar), and add const as
appropriate.

This reflects changes that have been present in the userland implementation
for some time.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/crush/crush.h  |    2 +-
 include/linux/crush/mapper.h |    6 ++--
 net/ceph/crush/crush.c       |   14 ++++----
 net/ceph/crush/mapper.c      |   71 +++++++++++++++++++++--------------------
 4 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 97e435b..3f50369 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -168,7 +168,7 @@ struct crush_map {
 
 
 /* crush.c */
-extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
+extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
 extern void crush_calc_parents(struct crush_map *map);
 extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
 extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c46b99c..9322ab8 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -10,11 +10,11 @@
 
 #include "crush.h"
 
-extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
-extern int crush_do_rule(struct crush_map *map,
+extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
 			 int forcefeed,    /* -1 for none */
-			 __u32 *weights);
+			 const __u32 *weights);
 
 #endif
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index d6ebb13..8c2201d 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -23,12 +23,12 @@ const char *crush_bucket_alg_name(int alg)
 
 /**
  * crush_get_bucket_item_weight - Get weight of an item in given bucket
- * @b: bucket pointer
- * @p: item index in bucket
+ * @param b bucket pointer
+ * @param p item index in bucket
  */
-int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
+int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 {
-	if (p >= b->size)
+	if ((__u32)p >= b->size)
 		return 0;
 
 	switch (b->alg) {
@@ -120,14 +120,13 @@ void crush_destroy_bucket(struct crush_bucket *b)
 
 /**
  * crush_destroy - Destroy a crush_map
- * @map: crush_map pointer
+ * @param map crush_map pointer
  */
 void crush_destroy(struct crush_map *map)
 {
-	int b;
-
 	/* buckets */
 	if (map->buckets) {
+		__s32 b;
 		for (b = 0; b < map->max_buckets; b++) {
 			if (map->buckets[b] == NULL)
 				continue;
@@ -138,6 +137,7 @@ void crush_destroy(struct crush_map *map)
 
 	/* rules */
 	if (map->rules) {
+		__u32 b;
 		for (b = 0; b < map->max_rules; b++)
 			kfree(map->rules[b]);
 		kfree(map->rules);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b79747c..7aa79a4 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -27,14 +27,14 @@
 
 /**
  * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
- * @map: the crush_map
- * @ruleset: the storage ruleset id (user defined)
- * @type: storage ruleset type (user defined)
- * @size: output set size
+ * @param map the crush_map
+ * @param ruleset the storage ruleset id (user defined)
+ * @param type storage ruleset type (user defined)
+ * @param size output set size
  */
-int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
+int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
 {
-	int i;
+	__u32 i;
 
 	for (i = 0; i < map->max_rules; i++) {
 		if (map->rules[i] &&
@@ -72,7 +72,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
 	unsigned i, s;
 
 	/* start a new permutation if @x has changed */
-	if (bucket->perm_x != x || bucket->perm_n == 0) {
+	if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
 		dprintk("bucket %d new x=%d\n", bucket->id, x);
 		bucket->perm_x = x;
 
@@ -219,7 +219,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
 static int bucket_straw_choose(struct crush_bucket_straw *bucket,
 			       int x, int r)
 {
-	int i;
+	__u32 i;
 	int high = 0;
 	__u64 high_draw = 0;
 	__u64 draw;
@@ -262,7 +262,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
  * true if device is marked "out" (failed, fully offloaded)
  * of the cluster
  */
-static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
+static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
 {
 	if (weight[item] >= 0x10000)
 		return 0;
@@ -276,27 +276,27 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
 
 /**
  * crush_choose - choose numrep distinct items of given type
- * @map: the crush_map
- * @bucket: the bucket we are choose an item from
- * @x: crush input value
- * @numrep: the number of items to choose
- * @type: the type of item to choose
- * @out: pointer to output vector
- * @outpos: our position in that vector
- * @firstn: true if choosing "first n" items, false if choosing "indep"
- * @recurse_to_leaf: true if we want one device under each item of given type
- * @out2: second output vector for leaf items (if @recurse_to_leaf)
+ * @param map the crush_map
+ * @param bucket the bucket we are choose an item from
+ * @param x crush input value
+ * @param numrep the number of items to choose
+ * @param type the type of item to choose
+ * @param out pointer to output vector
+ * @param outpos our position in that vector
+ * @param firstn true if choosing "first n" items, false if choosing "indep"
+ * @param recurseto_leaf: true if we want one device under each item of given type
+ * @param out2 second output vector for leaf items (if @a recurse_to_leaf)
  */
-static int crush_choose(struct crush_map *map,
+static int crush_choose(const struct crush_map *map,
 			struct crush_bucket *bucket,
-			__u32 *weight,
+			const __u32 *weight,
 			int x, int numrep, int type,
 			int *out, int outpos,
 			int firstn, int recurse_to_leaf,
 			int *out2)
 {
 	int rep;
-	int ftotal, flocal;
+	unsigned int ftotal, flocal;
 	int retry_descent, retry_bucket, skip_rep;
 	struct crush_bucket *in = bucket;
 	int r;
@@ -304,7 +304,7 @@ static int crush_choose(struct crush_map *map,
 	int item = 0;
 	int itemtype;
 	int collide, reject;
-	const int orig_tries = 5; /* attempts before we fall back to search */
+	const unsigned int orig_tries = 5; /* attempts before we fall back to search */
 
 	dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
 		bucket->id, x, outpos, numrep);
@@ -325,7 +325,7 @@ static int crush_choose(struct crush_map *map,
 				r = rep;
 				if (in->alg == CRUSH_BUCKET_UNIFORM) {
 					/* be careful */
-					if (firstn || numrep >= in->size)
+					if (firstn || (__u32)numrep >= in->size)
 						/* r' = r + f_total */
 						r += ftotal;
 					else if (in->size % numrep == 0)
@@ -425,7 +425,7 @@ reject:
 						/* else give up */
 						skip_rep = 1;
 					dprintk("  reject %d  collide %d  "
-						"ftotal %d  flocal %d\n",
+						"ftotal %u  flocal %u\n",
 						reject, collide, ftotal,
 						flocal);
 				}
@@ -449,16 +449,16 @@ reject:
 
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
- * @map: the crush_map
- * @ruleno: the rule id
- * @x: hash input
- * @result: pointer to result vector
- * @result_max: maximum result size
- * @force: force initial replica choice; -1 for none
+ * @param map the crush_map
+ * @param ruleno the rule id
+ * @param x hash input
+ * @param result pointer to result vector
+ * @param resultmax: maximum result size
+ * @param force force initial replica choice; -1 for none
  */
-int crush_do_rule(struct crush_map *map,
+int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
-		  int force, __u32 *weight)
+		  int force, const __u32 *weight)
 {
 	int result_len;
 	int force_context[CRUSH_MAX_DEPTH];
@@ -473,7 +473,7 @@ int crush_do_rule(struct crush_map *map,
 	int osize;
 	int *tmp;
 	struct crush_rule *rule;
-	int step;
+	__u32 step;
 	int i, j;
 	int numrep;
 	int firstn;
@@ -488,7 +488,8 @@ int crush_do_rule(struct crush_map *map,
 	/*
 	 * determine hierarchical context of force, if any.  note
 	 * that this may or may not correspond to the specific types
-	 * referenced by the crush rule.
+	 * referenced by the crush rule.  it will also only affect
+	 * the first descent (TAKE).
 	 */
 	if (force >= 0 &&
 	    force < map->max_devices &&
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 04/11] crush: adjust local retry threshold
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (2 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 03/11] crush: clean up types, const-ness Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 05/11] crush: be more tolerant of nonsensical crush maps Sage Weil
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

This small adjustment reflects a change that was made in ceph.git commit
af6a9f30696c900a2a8bd7ae24e8ed15fb4964bb, about 6 months ago.  An N-1
search is not exhaustive.  Fixed ceph.git bug #1594.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/crush/mapper.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 7aa79a4..4df0607 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -415,7 +415,7 @@ reject:
 					if (collide && flocal < 3)
 						/* retry locally a few times */
 						retry_bucket = 1;
-					else if (flocal < in->size + orig_tries)
+					else if (flocal <= in->size + orig_tries)
 						/* exhaustive bucket search */
 						retry_bucket = 1;
 					else if (ftotal < 20)
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 05/11] crush: be more tolerant of nonsensical crush maps
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (3 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 04/11] crush: adjust local retry threshold Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 06/11] crush: use a temporary variable to simplify crush_do_rule Sage Weil
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

If we get a map that doesn't make sense, error out or ignore the badness
instead of BUGging out.  This reflects the ceph.git commits
9895f0bff7dc68e9b49b572613d242315fb11b6c and
8ded26472058d5205803f244c2f33cb6cb10de79.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/crush/mapper.c |   33 ++++++++++++++++++++++++---------
 1 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 4df0607..c39bb9c 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -152,8 +152,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
 			return bucket->h.items[i];
 	}
 
-	BUG_ON(1);
-	return 0;
+	dprintk("bad list sums for bucket %d\n", bucket->h.id);
+	return bucket->h.items[0];
 }
 
 
@@ -239,6 +239,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
 static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
 {
 	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
+	BUG_ON(in->size == 0);
 	switch (in->alg) {
 	case CRUSH_BUCKET_UNIFORM:
 		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -253,7 +254,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
 		return bucket_straw_choose((struct crush_bucket_straw *)in,
 					   x, r);
 	default:
-		BUG_ON(1);
+		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
 		return in->items[0];
 	}
 }
@@ -354,7 +355,11 @@ static int crush_choose(const struct crush_map *map,
 					item = bucket_perm_choose(in, x, r);
 				else
 					item = crush_bucket_choose(in, x, r);
-				BUG_ON(item >= map->max_devices);
+				if (item >= map->max_devices) {
+					dprintk("   bad item %d\n", item);
+					skip_rep = 1;
+					break;
+				}
 
 				/* desired type? */
 				if (item < 0)
@@ -365,8 +370,12 @@ static int crush_choose(const struct crush_map *map,
 
 				/* keep going? */
 				if (itemtype != type) {
-					BUG_ON(item >= 0 ||
-					       (-1-item) >= map->max_buckets);
+					if (item >= 0 ||
+					    (-1-item) >= map->max_buckets) {
+						dprintk("   bad item type %d\n", type);
+						skip_rep = 1;
+						break;
+					}
 					in = map->buckets[-1-item];
 					retry_bucket = 1;
 					continue;
@@ -478,7 +487,10 @@ int crush_do_rule(const struct crush_map *map,
 	int numrep;
 	int firstn;
 
-	BUG_ON(ruleno >= map->max_rules);
+	if ((__u32)ruleno >= map->max_rules) {
+		dprintk(" bad ruleno %d\n", ruleno);
+		return 0;
+	}
 
 	rule = map->rules[ruleno];
 	result_len = 0;
@@ -528,7 +540,8 @@ int crush_do_rule(const struct crush_map *map,
 			firstn = 1;
 		case CRUSH_RULE_CHOOSE_LEAF_INDEP:
 		case CRUSH_RULE_CHOOSE_INDEP:
-			BUG_ON(wsize == 0);
+			if (wsize == 0)
+				break;
 
 			recurse_to_leaf =
 				rule->steps[step].op ==
@@ -597,7 +610,9 @@ int crush_do_rule(const struct crush_map *map,
 			break;
 
 		default:
-			BUG_ON(1);
+			dprintk(" unknown op %d at step %d\n",
+				curstep->op, step);
+			break;
 		}
 	}
 	return result_len;
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 06/11] crush: use a temporary variable to simplify crush_do_rule
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (4 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 05/11] crush: be more tolerant of nonsensical crush maps Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 07/11] crush: remove forcefeed functionality Sage Weil
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Use a temporary variable here to avoid repeated array lookups and clean up
the code a bit.

This reflects ceph.git commit 6b5be27634ad307b471a5bf0db85c4f5c834885f.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/crush/mapper.c |   20 +++++++++++---------
 1 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index c39bb9c..9f407c5 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -519,14 +519,15 @@ int crush_do_rule(const struct crush_map *map,
 	}
 
 	for (step = 0; step < rule->len; step++) {
+		struct crush_rule_step *curstep = &rule->steps[step];
+
 		firstn = 0;
-		switch (rule->steps[step].op) {
+		switch (curstep->op) {
 		case CRUSH_RULE_TAKE:
-			w[0] = rule->steps[step].arg1;
+			w[0] = curstep->arg1;
 
 			/* find position in force_context/hierarchy */
-			while (force_pos >= 0 &&
-			       force_context[force_pos] != w[0])
+			while (force_pos >= 0 && force_context[force_pos] != w[0])
 				force_pos--;
 			/* and move past it */
 			if (force_pos >= 0)
@@ -538,15 +539,16 @@ int crush_do_rule(const struct crush_map *map,
 		case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
 		case CRUSH_RULE_CHOOSE_FIRSTN:
 			firstn = 1;
+			/* fall through */
 		case CRUSH_RULE_CHOOSE_LEAF_INDEP:
 		case CRUSH_RULE_CHOOSE_INDEP:
 			if (wsize == 0)
 				break;
 
 			recurse_to_leaf =
-				rule->steps[step].op ==
+				curstep->op ==
 				 CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
-				rule->steps[step].op ==
+				curstep->op ==
 				CRUSH_RULE_CHOOSE_LEAF_INDEP;
 
 			/* reset output */
@@ -558,7 +560,7 @@ int crush_do_rule(const struct crush_map *map,
 				 * basically, numrep <= 0 means relative to
 				 * the provided result_max
 				 */
-				numrep = rule->steps[step].arg1;
+				numrep = curstep->arg1;
 				if (numrep <= 0) {
 					numrep += result_max;
 					if (numrep <= 0)
@@ -569,7 +571,7 @@ int crush_do_rule(const struct crush_map *map,
 					/* skip any intermediate types */
 					while (force_pos &&
 					       force_context[force_pos] < 0 &&
-					       rule->steps[step].arg2 !=
+					       curstep->arg2 !=
 					       map->buckets[-1 -
 					       force_context[force_pos]]->type)
 						force_pos--;
@@ -583,7 +585,7 @@ int crush_do_rule(const struct crush_map *map,
 						      map->buckets[-1-w[i]],
 						      weight,
 						      x, numrep,
-						      rule->steps[step].arg2,
+						      curstep->arg2,
 						      o+osize, j,
 						      firstn,
 						      recurse_to_leaf, c+osize);
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 07/11] crush: remove forcefeed functionality
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (5 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 06/11] crush: use a temporary variable to simplify crush_do_rule Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 08/11] crush: remove parent maps Sage Weil
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Remove forcefeed functionality from CRUSH.  This is an ugly misfeature that
is mostly useless and unused.  Remove it.

Reflects ceph.git commit ed974b5000f2851207d860a651809af4a1867942.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/crush/mapper.h |    1 -
 net/ceph/crush/mapper.c      |   48 +-----------------------------------------
 net/ceph/osdmap.c            |    2 +-
 3 files changed, 2 insertions(+), 49 deletions(-)

diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 9322ab8..71d79f4 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -14,7 +14,6 @@ extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, i
 extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
-			 int forcefeed,    /* -1 for none */
 			 const __u32 *weights);
 
 #endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 9f407c5..1867afb 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -463,15 +463,12 @@ reject:
  * @param x hash input
  * @param result pointer to result vector
  * @param resultmax: maximum result size
- * @param force force initial replica choice; -1 for none
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
-		  int force, const __u32 *weight)
+		  const __u32 *weight)
 {
 	int result_len;
-	int force_context[CRUSH_MAX_DEPTH];
-	int force_pos = -1;
 	int a[CRUSH_MAX_SET];
 	int b[CRUSH_MAX_SET];
 	int c[CRUSH_MAX_SET];
@@ -497,27 +494,6 @@ int crush_do_rule(const struct crush_map *map,
 	w = a;
 	o = b;
 
-	/*
-	 * determine hierarchical context of force, if any.  note
-	 * that this may or may not correspond to the specific types
-	 * referenced by the crush rule.  it will also only affect
-	 * the first descent (TAKE).
-	 */
-	if (force >= 0 &&
-	    force < map->max_devices &&
-	    map->device_parents[force] != 0 &&
-	    !is_out(map, weight, force, x)) {
-		while (1) {
-			force_context[++force_pos] = force;
-			if (force >= 0)
-				force = map->device_parents[force];
-			else
-				force = map->bucket_parents[-1-force];
-			if (force == 0)
-				break;
-		}
-	}
-
 	for (step = 0; step < rule->len; step++) {
 		struct crush_rule_step *curstep = &rule->steps[step];
 
@@ -525,14 +501,6 @@ int crush_do_rule(const struct crush_map *map,
 		switch (curstep->op) {
 		case CRUSH_RULE_TAKE:
 			w[0] = curstep->arg1;
-
-			/* find position in force_context/hierarchy */
-			while (force_pos >= 0 && force_context[force_pos] != w[0])
-				force_pos--;
-			/* and move past it */
-			if (force_pos >= 0)
-				force_pos--;
-
 			wsize = 1;
 			break;
 
@@ -567,20 +535,6 @@ int crush_do_rule(const struct crush_map *map,
 						continue;
 				}
 				j = 0;
-				if (osize == 0 && force_pos >= 0) {
-					/* skip any intermediate types */
-					while (force_pos &&
-					       force_context[force_pos] < 0 &&
-					       curstep->arg2 !=
-					       map->buckets[-1 -
-					       force_context[force_pos]]->type)
-						force_pos--;
-					o[osize] = force_context[force_pos];
-					if (recurse_to_leaf)
-						c[osize] = force_context[0];
-					j++;
-					force_pos--;
-				}
 				osize += crush_choose(map,
 						      map->buckets[-1-w[i]],
 						      weight,
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 7d39f3c..9dda36f 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1069,7 +1069,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	pps += poolid;
 	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
 			     min_t(int, pool->v.size, *num),
-			     -1, osdmap->osd_weight);
+			     osdmap->osd_weight);
 	return osds;
 }
 
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 08/11] crush: remove parent maps
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (6 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 07/11] crush: remove forcefeed functionality Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 09/11] crush: fix tree node weight lookup Sage Weil
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

These were used for the ill-fated forcefeed feature.  Remove them.

Reflects ceph.git commit ebdf80edfecfbd5a842b71fbe5732857994380c1.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/crush/crush.h |   11 -----------
 net/ceph/crush/crush.c      |   25 -------------------------
 net/ceph/osdmap.c           |    7 -------
 3 files changed, 0 insertions(+), 43 deletions(-)

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 3f50369..158a4d2 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -151,16 +151,6 @@ struct crush_map {
 	struct crush_bucket **buckets;
 	struct crush_rule **rules;
 
-	/*
-	 * Parent pointers to identify the parent bucket a device or
-	 * bucket in the hierarchy.  If an item appears more than
-	 * once, this is the _last_ time it appeared (where buckets
-	 * are processed in bucket id order, from -1 on down to
-	 * -max_buckets.
-	 */
-	__u32 *bucket_parents;
-	__u32 *device_parents;
-
 	__s32 max_buckets;
 	__u32 max_rules;
 	__s32 max_devices;
@@ -169,7 +159,6 @@ struct crush_map {
 
 /* crush.c */
 extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
-extern void crush_calc_parents(struct crush_map *map);
 extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
 extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
 extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 8c2201d..bab3eac 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -46,29 +46,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 	return 0;
 }
 
-/**
- * crush_calc_parents - Calculate parent vectors for the given crush map.
- * @map: crush_map pointer
- */
-void crush_calc_parents(struct crush_map *map)
-{
-	int i, b, c;
-
-	for (b = 0; b < map->max_buckets; b++) {
-		if (map->buckets[b] == NULL)
-			continue;
-		for (i = 0; i < map->buckets[b]->size; i++) {
-			c = map->buckets[b]->items[i];
-			BUG_ON(c >= map->max_devices ||
-			       c < -map->max_buckets);
-			if (c >= 0)
-				map->device_parents[c] = map->buckets[b]->id;
-			else
-				map->bucket_parents[-1-c] = map->buckets[b]->id;
-		}
-	}
-}
-
 void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
 {
 	kfree(b->h.perm);
@@ -143,8 +120,6 @@ void crush_destroy(struct crush_map *map)
 		kfree(map->rules);
 	}
 
-	kfree(map->bucket_parents);
-	kfree(map->device_parents);
 	kfree(map);
 }
 
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 9dda36f..dac448b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -161,13 +161,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	c->max_rules = ceph_decode_32(p);
 	c->max_devices = ceph_decode_32(p);
 
-	c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS);
-	if (c->device_parents == NULL)
-		goto badmem;
-	c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS);
-	if (c->bucket_parents == NULL)
-		goto badmem;
-
 	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
 	if (c->buckets == NULL)
 		goto badmem;
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 09/11] crush: fix tree node weight lookup
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (7 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 08/11] crush: remove parent maps Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 10/11] crush: fix memory leak when destroying tree buckets Sage Weil
  2012-05-04 22:54 ` [PATCH 11/11] crush: warn on do_rule failure Sage Weil
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Fix the node weight lookup for tree buckets by using a correct accessor.

Reflects ceph.git commit d287ade5bcbdca82a3aef145b92924cf1e856733.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 include/linux/crush/crush.h |    5 +++++
 net/ceph/crush/crush.c      |    4 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 158a4d2..7c47508 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -166,4 +166,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
 extern void crush_destroy_bucket(struct crush_bucket *b);
 extern void crush_destroy(struct crush_map *map);
 
+static inline int crush_calc_tree_node(int i)
+{
+	return ((i+1) << 1)-1;
+}
+
 #endif
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index bab3eac..568adac 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -37,9 +37,7 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 	case CRUSH_BUCKET_LIST:
 		return ((struct crush_bucket_list *)b)->item_weights[p];
 	case CRUSH_BUCKET_TREE:
-		if (p & 1)
-			return ((struct crush_bucket_tree *)b)->node_weights[p];
-		return 0;
+		return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
 	case CRUSH_BUCKET_STRAW:
 		return ((struct crush_bucket_straw *)b)->item_weights[p];
 	}
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 10/11] crush: fix memory leak when destroying tree buckets
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (8 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 09/11] crush: fix tree node weight lookup Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  2012-05-04 22:54 ` [PATCH 11/11] crush: warn on do_rule failure Sage Weil
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

Reflects ceph.git commit 46d63d98434b3bc9dad2fc9ab23cbaedc3bcb0e4.

Reported-by: Alexander Lyakas <alex.bolshoy@gmail.com>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/crush/crush.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 568adac..c17e007 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -62,6 +62,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
 
 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
 {
+	kfree(b->h.perm);
+	kfree(b->h.items);
 	kfree(b->node_weights);
 	kfree(b);
 }
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 11/11] crush: warn on do_rule failure
  2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
                   ` (9 preceding siblings ...)
  2012-05-04 22:54 ` [PATCH 10/11] crush: fix memory leak when destroying tree buckets Sage Weil
@ 2012-05-04 22:54 ` Sage Weil
  10 siblings, 0 replies; 12+ messages in thread
From: Sage Weil @ 2012-05-04 22:54 UTC (permalink / raw)
  To: ceph-devel; +Cc: Sage Weil

If we get an error code from crush_do_rule(), print an error to the
console.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 net/ceph/osdmap.c |   15 +++++++++++----
 1 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index dac448b..2592f3c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1027,7 +1027,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 	struct ceph_pg_mapping *pg;
 	struct ceph_pg_pool_info *pool;
 	int ruleno;
-	unsigned poolid, ps, pps, t;
+	unsigned poolid, ps, pps, t, r;
 
 	poolid = le32_to_cpu(pgid.pool);
 	ps = le16_to_cpu(pgid.ps);
@@ -1060,9 +1060,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 			      le32_to_cpu(pool->v.pgp_num),
 			      pool->pgp_num_mask);
 	pps += poolid;
-	*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
-			     min_t(int, pool->v.size, *num),
-			     osdmap->osd_weight);
+	r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
+			  min_t(int, pool->v.size, *num),
+			  osdmap->osd_weight);
+	if (r < 0) {
+		pr_err("error %d from crush rule: pool %d ruleset %d type %d"
+		       " size %d\n", r, poolid, pool->v.crush_ruleset,
+		       pool->v.type, pool->v.size);
+		return NULL;
+	}
+	*num = r;
 	return osds;
 }
 
-- 
1.7.9


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2012-05-04 22:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-04 22:53 [PATCH 00/11] crush kernel updates Sage Weil
2012-05-04 22:53 ` [PATCH 01/11] ceph: drop support for preferred_osd pgs Sage Weil
2012-05-04 22:53 ` [PATCH 02/11] ceph: refactor SETLAYOUT and SETDIRLAYOUT ioctl checks into common helper Sage Weil
2012-05-04 22:54 ` [PATCH 03/11] crush: clean up types, const-ness Sage Weil
2012-05-04 22:54 ` [PATCH 04/11] crush: adjust local retry threshold Sage Weil
2012-05-04 22:54 ` [PATCH 05/11] crush: be more tolerant of nonsensical crush maps Sage Weil
2012-05-04 22:54 ` [PATCH 06/11] crush: use a temporary variable to simplify crush_do_rule Sage Weil
2012-05-04 22:54 ` [PATCH 07/11] crush: remove forcefeed functionality Sage Weil
2012-05-04 22:54 ` [PATCH 08/11] crush: remove parent maps Sage Weil
2012-05-04 22:54 ` [PATCH 09/11] crush: fix tree node weight lookup Sage Weil
2012-05-04 22:54 ` [PATCH 10/11] crush: fix memory leak when destroying tree buckets Sage Weil
2012-05-04 22:54 ` [PATCH 11/11] crush: warn on do_rule failure Sage Weil

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.