All of lore.kernel.org
 help / color / mirror / Atom feed
* [tabled patch 2/4] fix the selection of chunk
@ 2010-05-22  2:19 Pete Zaitcev
  2010-05-25 22:44 ` Jeff Garzik
  0 siblings, 1 reply; 3+ messages in thread
From: Pete Zaitcev @ 2010-05-22  2:19 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Project Hail List

If a chunkserver goes down, tabled sometimes throws an "object not found".
It happens because we keep hitting the same down node and exhaust the
retries. The existing code calls rand() every time and hopes for the
best, but this is too likely to end poorly.

The fix is to only randomize once before the retry loop, and then
cycle through all available nodes deterministically. The same fix
would apply even if we used a better technique to select an available
chunkserver than just random.

Also, we refactor the code just a little bit, so that the enormous
function object_get_body gets somewhat easier to follow.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>

---
 server/object.c |   92 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 28 deletions(-)

commit 1dddd3cf668cd5449781b0d6bda748012873c615
Author: Pete Zaitcev <zaitcev@yahoo.com>
Date:   Fri May 21 19:54:37 2010 -0600

    Eliminate phantom missing objects due to bad selection when a chunk goes down.

diff --git a/server/object.c b/server/object.c
index 24aa4fb..fa9ecd3 100644
--- a/server/object.c
+++ b/server/object.c
@@ -1059,6 +1059,49 @@ static void object_get_event(struct open_chunk *ochunk)
 	cli_write_run_compl();
 }
 
+static int object_node_count_up(struct db_obj_ent *obj)
+{
+	int n;
+	int i;
+	uint32_t nid;
+	struct storage_node *stnode;
+
+	n = 0;
+	for (i = 0; i < MAXWAY; i++) {
+		nid = GUINT32_FROM_LE(obj->d.a.nidv[i]);
+		if (nid) {
+			stnode = stor_node_by_nid(nid);
+			if (stnode) {
+				if (stnode->up)
+					n++;
+				stor_node_put(stnode);
+			}
+		}
+	}
+	return n;
+}
+
+static int object_node_select(int *nx, struct db_obj_ent *obj)
+{
+	int i;
+	uint32_t nid;
+	struct storage_node *stnode;
+
+	for (i = 0; i < MAXWAY; i++) {
+		nid = GUINT32_FROM_LE(obj->d.a.nidv[*nx]);
+		if (nid) {
+			stnode = stor_node_by_nid(nid);
+			if (stnode) {
+				if (stnode->up)
+					return stnode;
+				stor_node_put(stnode);
+			}
+		}
+		*nx = (*nx + 1) % MAXWAY;
+	}
+	return NULL;
+}
+
 static bool object_get_body(struct client *cli, const char *user,
 			    const char *bucket, const char *key, bool want_body)
 {
@@ -1159,48 +1202,37 @@ static bool object_get_body(struct client *cli, const char *user,
 }
 
 	cli->in_objid = GUINT64_FROM_LE(obj->d.a.oid);
+	cli->in_retry = object_node_count_up(obj) * 2;
 
-	n = 0;
-	for (i = 0; i < MAXWAY; i++ ) {
-		uint32_t nid;
-		nid = GUINT32_FROM_LE(obj->d.a.nidv[i]);
-		if (nid)
-			n++;
-	}
-	cli->in_retry = n * 2;
+	/*
+	 * Seed n outside of the retry loop because we definitely want to cycle,
+	 * else users see phantom unavailability of keys when nodes go down.
+	 */
+	n = rand() % MAXWAY;
 
  stnode_open_retry:
 	if (cli->in_retry == 0) {
-		applog(LOG_ERR, "No input nodes for oid %llX", cli->in_objid);
+		applog(LOG_ERR, "No ready nodes for oid %llX", cli->in_objid);
 		goto err_out_str;
 	}
 	--cli->in_retry;
 
-	stnode = NULL;
-	n = rand() % MAXWAY;
-	for (i = 0; i < MAXWAY; i++ ) {
-		uint32_t nid;
-		nid = GUINT32_FROM_LE(obj->d.a.nidv[n]);
-		if (nid) {
-			stnode = stor_node_by_nid(nid);
-			if (stnode) {
-				if (debugging)
-					applog(LOG_DEBUG,
-					       "Selected nid %u for oid %llX",
-					       nid, cli->in_objid);
-				stor_node_put(stnode);
-				break;
-			}
-		}
-		n = (n + 1) % MAXWAY;
+	stnode = object_node_select(&n, obj);
+	if (!stnode) {
+		applog(LOG_ERR, "No known nodes for oid %llX", cli->in_objid);
+		goto err_out_str;
 	}
-	if (!stnode)
-		goto stnode_open_retry;
+
+	if (debugging)
+		applog(LOG_DEBUG, "Selected nid %u for oid %llX",
+		       stnode->id, cli->in_objid);
 
 	rc = stor_open(&cli->in_ce, stnode, tabled_srv.evbase_main);
 	if (rc < 0) {
 		applog(LOG_WARNING, "Cannot open input chunk, nid %u (%d)",
 		       stnode->id, rc);
+		stor_node_put(stnode);
+		n = (n + 1) % MAXWAY;
 		goto stnode_open_retry;
 	}
 
@@ -1210,10 +1242,14 @@ static bool object_get_body(struct client *cli, const char *user,
 		applog(LOG_ERR, "Cannot start nid %u for oid %llX (%d)",
 		       stnode->id, (unsigned long long) cli->in_objid, rc);
 		stor_close(&cli->in_ce);
+		stor_node_put(stnode);
+		n = (n + 1) % MAXWAY;
 		goto stnode_open_retry;
 	}
 	cli->in_ce.cli = cli;
 
+	stor_node_put(stnode);
+
 	hdr = req_hdr(&cli->req, "if-unmodified-since");
 	if (hdr) {
 		time_t t;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [tabled patch 2/4] fix the selection of chunk
  2010-05-22  2:19 [tabled patch 2/4] fix the selection of chunk Pete Zaitcev
@ 2010-05-25 22:44 ` Jeff Garzik
  2010-05-26  3:25   ` Pete Zaitcev
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Garzik @ 2010-05-25 22:44 UTC (permalink / raw)
  To: Pete Zaitcev; +Cc: Project Hail List

On 05/21/2010 10:19 PM, Pete Zaitcev wrote:
> +static int object_node_select(int *nx, struct db_obj_ent *obj)
> +{
> +	int i;
> +	uint32_t nid;
> +	struct storage_node *stnode;
> +
> +	for (i = 0; i<  MAXWAY; i++) {
> +		nid = GUINT32_FROM_LE(obj->d.a.nidv[*nx]);
> +		if (nid) {
> +			stnode = stor_node_by_nid(nid);
> +			if (stnode) {
> +				if (stnode->up)
> +					return stnode;
> +				stor_node_put(stnode);
> +			}
> +		}
> +		*nx = (*nx + 1) % MAXWAY;
> +	}
> +	return NULL;
> +}


Did you compile or test this???

object.c: In function ‘object_node_select’:
object.c:1096: warning: return makes integer from pointer without a cast
object.c:1102: warning: return makes integer from pointer without a cast
object.c: In function ‘object_get_body’:
object.c:1220: warning: assignment makes pointer from integer without a cast

The code appears to be wanting to return a pointer.

Dropped patches 2-4, presuming they will be resent after being fixed

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [tabled patch 2/4] fix the selection of chunk
  2010-05-25 22:44 ` Jeff Garzik
@ 2010-05-26  3:25   ` Pete Zaitcev
  0 siblings, 0 replies; 3+ messages in thread
From: Pete Zaitcev @ 2010-05-26  3:25 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Project Hail List

On Tue, 25 May 2010 18:44:11 -0400
Jeff Garzik <jeff@garzik.org> wrote:

> On 05/21/2010 10:19 PM, Pete Zaitcev wrote:

> > +static int object_node_select(int *nx, struct db_obj_ent *obj)
> > +{
> > +	struct storage_node *stnode;
> > +				if (stnode->up)
> > +					return stnode;

> Did you compile or test this???

I suppose I deserved the tripple question mark for this. I tested it,
of course. The funny thing, everything is mapped below 4GB, way below
actually. As for warnings, I managed to miss them, because I started
running with -Wpointer-arith -Wsign-compare recently and that floods.
There's a lesson in it somewhere.

-- Pete

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-05-26  3:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-22  2:19 [tabled patch 2/4] fix the selection of chunk Pete Zaitcev
2010-05-25 22:44 ` Jeff Garzik
2010-05-26  3:25   ` Pete Zaitcev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.