[patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework

All of lore.kernel.org
 help / color / mirror / Atom feed

* [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework
@ 2009-01-08  8:25 Greg Banks
  2009-01-08  8:25 ` [patch 01/14] sunrpc: Use consistent naming for variables of type struct cache_detail* Greg Banks
                   ` (14 more replies)
  0 siblings, 15 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

G'day,

These patches are a snapshot of in-progress work to fix problems with
the sunrpc upcall mechanism which are triggered under load when using
multi-threaded rpc.mountd.  These are currently a hot issue with one
of SGI's customers (SGI PV988959).

The early patches in the series are cleanups with zero logic changes.

The patches have been lightly tested, by mounting and running the
cthon04 tests.  They have not been shipped or used in the field.

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 01/14] sunrpc: Use consistent naming for variables of type struct cache_detail*.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 02/14] sunrpc: Use consistent naming for variables of type struct cache_head* Greg Banks
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |  118 +++++++++++++++++++++---------------------
 1 file changed, 59 insertions(+), 59 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -47,28 +47,28 @@ static void cache_init(struct cache_head
 	h->last_refresh = now;
 }
 
-struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
+struct cache_head *sunrpc_cache_lookup(struct cache_detail *cd,
 				       struct cache_head *key, int hash)
 {
 	struct cache_head **head,  **hp;
 	struct cache_head *new = NULL;
 
-	head = &detail->hash_table[hash];
+	head = &cd->hash_table[hash];
 
-	read_lock(&detail->hash_lock);
+	read_lock(&cd->hash_lock);
 
 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
 		struct cache_head *tmp = *hp;
-		if (detail->match(tmp, key)) {
+		if (cd->match(tmp, key)) {
 			cache_get(tmp);
-			read_unlock(&detail->hash_lock);
+			read_unlock(&cd->hash_lock);
 			return tmp;
 		}
 	}
-	read_unlock(&detail->hash_lock);
+	read_unlock(&cd->hash_lock);
 	/* Didn't find anything, insert an empty entry */
 
-	new = detail->alloc();
+	new = cd->alloc();
 	if (!new)
 		return NULL;
 	/* must fully initialise 'new', else
@@ -76,32 +76,32 @@ struct cache_head *sunrpc_cache_lookup(s
 	 * cache_put it soon.
 	 */
 	cache_init(new);
-	detail->init(new, key);
+	cd->init(new, key);
 
-	write_lock(&detail->hash_lock);
+	write_lock(&cd->hash_lock);
 
 	/* check if entry appeared while we slept */
 	for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
 		struct cache_head *tmp = *hp;
-		if (detail->match(tmp, key)) {
+		if (cd->match(tmp, key)) {
 			cache_get(tmp);
-			write_unlock(&detail->hash_lock);
-			cache_put(new, detail);
+			write_unlock(&cd->hash_lock);
+			cache_put(new, cd);
 			return tmp;
 		}
 	}
 	new->next = *head;
 	*head = new;
-	detail->entries++;
+	cd->entries++;
 	cache_get(new);
-	write_unlock(&detail->hash_lock);
+	write_unlock(&cd->hash_lock);
 
 	return new;
 }
 EXPORT_SYMBOL(sunrpc_cache_lookup);
 
 
-static void queue_loose(struct cache_detail *detail, struct cache_head *ch);
+static void queue_loose(struct cache_detail *cd, struct cache_head *ch);
 
 static int cache_fresh_locked(struct cache_head *head, time_t expiry)
 {
@@ -111,17 +111,17 @@ static int cache_fresh_locked(struct cac
 }
 
 static void cache_fresh_unlocked(struct cache_head *head,
-			struct cache_detail *detail, int new)
+			struct cache_detail *cd, int new)
 {
 	if (new)
 		cache_revisit_request(head);
 	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
 		cache_revisit_request(head);
-		queue_loose(detail, head);
+		queue_loose(cd, head);
 	}
 }
 
-struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
+struct cache_head *sunrpc_cache_update(struct cache_detail *cd,
 				       struct cache_head *new, struct cache_head *old, int hash)
 {
 	/* The 'old' entry is to be replaced by 'new'.
@@ -133,49 +133,49 @@ struct cache_head *sunrpc_cache_update(s
 	int is_new;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
-		write_lock(&detail->hash_lock);
+		write_lock(&cd->hash_lock);
 		if (!test_bit(CACHE_VALID, &old->flags)) {
 			if (test_bit(CACHE_NEGATIVE, &new->flags))
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
-				detail->update(old, new);
+				cd->update(old, new);
 			is_new = cache_fresh_locked(old, new->expiry_time);
-			write_unlock(&detail->hash_lock);
-			cache_fresh_unlocked(old, detail, is_new);
+			write_unlock(&cd->hash_lock);
+			cache_fresh_unlocked(old, cd, is_new);
 			return old;
 		}
-		write_unlock(&detail->hash_lock);
+		write_unlock(&cd->hash_lock);
 	}
 	/* We need to insert a new entry */
-	tmp = detail->alloc();
+	tmp = cd->alloc();
 	if (!tmp) {
-		cache_put(old, detail);
+		cache_put(old, cd);
 		return NULL;
 	}
 	cache_init(tmp);
-	detail->init(tmp, old);
-	head = &detail->hash_table[hash];
+	cd->init(tmp, old);
+	head = &cd->hash_table[hash];
 
-	write_lock(&detail->hash_lock);
+	write_lock(&cd->hash_lock);
 	if (test_bit(CACHE_NEGATIVE, &new->flags))
 		set_bit(CACHE_NEGATIVE, &tmp->flags);
 	else
-		detail->update(tmp, new);
+		cd->update(tmp, new);
 	tmp->next = *head;
 	*head = tmp;
-	detail->entries++;
+	cd->entries++;
 	cache_get(tmp);
 	is_new = cache_fresh_locked(tmp, new->expiry_time);
 	cache_fresh_locked(old, 0);
-	write_unlock(&detail->hash_lock);
-	cache_fresh_unlocked(tmp, detail, is_new);
-	cache_fresh_unlocked(old, detail, 0);
-	cache_put(old, detail);
+	write_unlock(&cd->hash_lock);
+	cache_fresh_unlocked(tmp, cd, is_new);
+	cache_fresh_unlocked(old, cd, 0);
+	cache_put(old, cd);
 	return tmp;
 }
 EXPORT_SYMBOL(sunrpc_cache_update);
 
-static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
+static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h);
 /*
  * This is the generic cache management routine for all
  * the authentication caches.
@@ -188,7 +188,7 @@ static int cache_make_upcall(struct cach
  * -ETIMEDOUT if upcall failed and should be retried,
  * -ENOENT if cache entry was negative
  */
-int cache_check(struct cache_detail *detail,
+int cache_check(struct cache_detail *cd,
 		    struct cache_head *h, struct cache_req *rqstp)
 {
 	int rv;
@@ -198,7 +198,7 @@ int cache_check(struct cache_detail *det
 	if (!test_bit(CACHE_VALID, &h->flags) ||
 	    h->expiry_time < get_seconds())
 		rv = -EAGAIN;
-	else if (detail->flush_time > h->last_refresh)
+	else if (cd->flush_time > h->last_refresh)
 		rv = -EAGAIN;
 	else {
 		/* entry is valid */
@@ -218,12 +218,12 @@ int cache_check(struct cache_detail *det
 		dprintk("RPC:       Want update, refage=%ld, age=%ld\n",
 				refresh_age, age);
 		if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
-			switch (cache_make_upcall(detail, h)) {
+			switch (cache_make_upcall(cd, h)) {
 			case -EINVAL:
 				clear_bit(CACHE_PENDING, &h->flags);
 				if (rv == -EAGAIN) {
 					set_bit(CACHE_NEGATIVE, &h->flags);
-					cache_fresh_unlocked(h, detail,
+					cache_fresh_unlocked(h, cd,
 					     cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
 					rv = -ENOENT;
 				}
@@ -242,7 +242,7 @@ int cache_check(struct cache_detail *det
 			rv = -ETIMEDOUT;
 
 	if (rv)
-		cache_put(h, detail);
+		cache_put(h, cd);
 	return rv;
 }
 EXPORT_SYMBOL(cache_check);
@@ -445,7 +445,7 @@ static int cache_clean(void)
 
 	if (current_detail && current_index < current_detail->hash_size) {
 		struct cache_head *ch, **cp;
-		struct cache_detail *d;
+		struct cache_detail *cd;
 
 		write_lock(&current_detail->hash_lock);
 
@@ -473,12 +473,12 @@ static int cache_clean(void)
 			rv = 1;
 		}
 		write_unlock(&current_detail->hash_lock);
-		d = current_detail;
+		cd = current_detail;
 		if (!ch)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
 		if (ch)
-			cache_put(ch, d);
+			cache_put(ch, cd);
 	} else
 		spin_unlock(&cache_list_lock);
 
@@ -516,12 +516,12 @@ void cache_flush(void)
 }
 EXPORT_SYMBOL(cache_flush);
 
-void cache_purge(struct cache_detail *detail)
+void cache_purge(struct cache_detail *cd)
 {
-	detail->flush_time = LONG_MAX;
-	detail->nextcheck = get_seconds();
+	cd->flush_time = LONG_MAX;
+	cd->nextcheck = get_seconds();
 	cache_flush();
-	detail->flush_time = 1;
+	cd->flush_time = 1;
 }
 EXPORT_SYMBOL(cache_purge);
 
@@ -924,11 +924,11 @@ static const struct file_operations cach
 };
 
 
-static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
+static void queue_loose(struct cache_detail *cd, struct cache_head *ch)
 {
 	struct cache_queue *cq;
 	spin_lock(&queue_lock);
-	list_for_each_entry(cq, &detail->queue, list)
+	list_for_each_entry(cq, &cd->queue, list)
 		if (!cq->reader) {
 			struct cache_request *cr = container_of(cq, struct cache_request, q);
 			if (cr->item != ch)
@@ -937,7 +937,7 @@ static void queue_loose(struct cache_det
 				continue;
 			list_del(&cr->q.list);
 			spin_unlock(&queue_lock);
-			cache_put(cr->item, detail);
+			cache_put(cr->item, cd);
 			kfree(cr->buf);
 			kfree(cr);
 			return;
@@ -1019,12 +1019,12 @@ void qword_addhex(char **bpp, int *lp, c
 }
 EXPORT_SYMBOL(qword_addhex);
 
-static void warn_no_listener(struct cache_detail *detail)
+static void warn_no_listener(struct cache_detail *cd)
 {
-	if (detail->last_warn != detail->last_close) {
-		detail->last_warn = detail->last_close;
-		if (detail->warn_no_listener)
-			detail->warn_no_listener(detail);
+	if (cd->last_warn != cd->last_close) {
+		cd->last_warn = cd->last_close;
+		if (cd->warn_no_listener)
+			cd->warn_no_listener(cd);
 	}
 }
 
@@ -1032,7 +1032,7 @@ static void warn_no_listener(struct cach
  * register an upcall request to user-space.
  * Each request is at most one page long.
  */
-static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
+static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 {
 
 	char *buf;
@@ -1040,12 +1040,12 @@ static int cache_make_upcall(struct cach
 	char *bp;
 	int len;
 
-	if (detail->cache_request == NULL)
+	if (cd->cache_request == NULL)
 		return -EINVAL;
 
-	if (atomic_read(&detail->readers) == 0 &&
-	    detail->last_close < get_seconds() - 30) {
-			warn_no_listener(detail);
+	if (atomic_read(&cd->readers) == 0 &&
+	    cd->last_close < get_seconds() - 30) {
+			warn_no_listener(cd);
 			return -EINVAL;
 	}
 
@@ -1061,7 +1061,7 @@ static int cache_make_upcall(struct cach
 
 	bp = buf; len = PAGE_SIZE;
 
-	detail->cache_request(detail, h, &bp, &len);
+	cd->cache_request(cd, h, &bp, &len);
 
 	if (len < 0) {
 		kfree(buf);
@@ -1074,7 +1074,7 @@ static int cache_make_upcall(struct cach
 	crq->len = PAGE_SIZE - len;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
-	list_add_tail(&crq->q.list, &detail->queue);
+	list_add_tail(&crq->q.list, &cd->queue);
 	spin_unlock(&queue_lock);
 	wake_up(&queue_wait);
 	return 0;

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 02/14] sunrpc: Use consistent naming for variables of type struct cache_head*
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
  2009-01-08  8:25 ` [patch 01/14] sunrpc: Use consistent naming for variables of type struct cache_detail* Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 03/14] sunrpc: Use consistent naming for variables of type struct cache_request* Greg Banks
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

This makes the cache code easier to read.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |  100 +++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 50 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -34,8 +34,8 @@
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item);
-static void cache_revisit_request(struct cache_head *item);
+static int cache_defer_req(struct cache_req *req, struct cache_head *h);
+static void cache_revisit_request(struct cache_head *h);
 
 static void cache_init(struct cache_head *h)
 {
@@ -101,23 +101,23 @@ struct cache_head *sunrpc_cache_lookup(s
 EXPORT_SYMBOL(sunrpc_cache_lookup);
 
 
-static void queue_loose(struct cache_detail *cd, struct cache_head *ch);
+static void queue_loose(struct cache_detail *cd, struct cache_head *h);
 
-static int cache_fresh_locked(struct cache_head *head, time_t expiry)
+static int cache_fresh_locked(struct cache_head *h, time_t expiry)
 {
-	head->expiry_time = expiry;
-	head->last_refresh = get_seconds();
-	return !test_and_set_bit(CACHE_VALID, &head->flags);
+	h->expiry_time = expiry;
+	h->last_refresh = get_seconds();
+	return !test_and_set_bit(CACHE_VALID, &h->flags);
 }
 
-static void cache_fresh_unlocked(struct cache_head *head,
+static void cache_fresh_unlocked(struct cache_head *h,
 			struct cache_detail *cd, int new)
 {
 	if (new)
-		cache_revisit_request(head);
-	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
-		cache_revisit_request(head);
-		queue_loose(cd, head);
+		cache_revisit_request(h);
+	if (test_and_clear_bit(CACHE_PENDING, &h->flags)) {
+		cache_revisit_request(h);
+		queue_loose(cd, h);
 	}
 }
 
@@ -444,7 +444,7 @@ static int cache_clean(void)
 	/* find a cleanable entry in the bucket and clean it, or set to next bucket */
 
 	if (current_detail && current_index < current_detail->hash_size) {
-		struct cache_head *ch, **cp;
+		struct cache_head *h, **cp;
 		struct cache_detail *cd;
 
 		write_lock(&current_detail->hash_lock);
@@ -452,33 +452,33 @@ static int cache_clean(void)
 		/* Ok, now to clean this strand */
 
 		cp = & current_detail->hash_table[current_index];
-		ch = *cp;
-		for (; ch; cp= & ch->next, ch= *cp) {
-			if (current_detail->nextcheck > ch->expiry_time)
-				current_detail->nextcheck = ch->expiry_time+1;
-			if (ch->expiry_time >= get_seconds()
-			    && ch->last_refresh >= current_detail->flush_time
+		h = *cp;
+		for (; h; cp= & h->next, h= *cp) {
+			if (current_detail->nextcheck > h->expiry_time)
+				current_detail->nextcheck = h->expiry_time+1;
+			if (h->expiry_time >= get_seconds()
+			    && h->last_refresh >= current_detail->flush_time
 				)
 				continue;
-			if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
-				queue_loose(current_detail, ch);
+			if (test_and_clear_bit(CACHE_PENDING, &h->flags))
+				queue_loose(current_detail, h);
 
-			if (atomic_read(&ch->ref.refcount) == 1)
+			if (atomic_read(&h->ref.refcount) == 1)
 				break;
 		}
-		if (ch) {
-			*cp = ch->next;
-			ch->next = NULL;
+		if (h) {
+			*cp = h->next;
+			h->next = NULL;
 			current_detail->entries--;
 			rv = 1;
 		}
 		write_unlock(&current_detail->hash_lock);
 		cd = current_detail;
-		if (!ch)
+		if (!h)
 			current_index ++;
 		spin_unlock(&cache_list_lock);
-		if (ch)
-			cache_put(ch, cd);
+		if (h)
+			cache_put(h, cd);
 	} else
 		spin_unlock(&cache_list_lock);
 
@@ -551,10 +551,10 @@ static LIST_HEAD(cache_defer_list);
 static struct list_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static int cache_defer_req(struct cache_req *req, struct cache_head *h)
 {
 	struct cache_deferred_req *dreq;
-	int hash = DFR_HASH(item);
+	int hash = DFR_HASH(h);
 
 	if (cache_defer_cnt >= DFR_MAX) {
 		/* too much in the cache, randomly drop this one,
@@ -567,7 +567,7 @@ static int cache_defer_req(struct cache_
 	if (dreq == NULL)
 		return -ETIMEDOUT;
 
-	dreq->item = item;
+	dreq->item = h;
 
 	spin_lock(&cache_defer_lock);
 
@@ -592,20 +592,20 @@ static int cache_defer_req(struct cache_
 		/* there was one too many */
 		dreq->revisit(dreq, 1);
 	}
-	if (!test_bit(CACHE_PENDING, &item->flags)) {
+	if (!test_bit(CACHE_PENDING, &h->flags)) {
 		/* must have just been validated... */
-		cache_revisit_request(item);
+		cache_revisit_request(h);
 	}
 	return 0;
 }
 
-static void cache_revisit_request(struct cache_head *item)
+static void cache_revisit_request(struct cache_head *h)
 {
 	struct cache_deferred_req *dreq;
 	struct list_head pending;
 
 	struct list_head *lp;
-	int hash = DFR_HASH(item);
+	int hash = DFR_HASH(h);
 
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
@@ -615,7 +615,7 @@ static void cache_revisit_request(struct
 		while (lp != &cache_defer_hash[hash]) {
 			dreq = list_entry(lp, struct cache_deferred_req, hash);
 			lp = lp->next;
-			if (dreq->item == item) {
+			if (dreq->item == h) {
 				list_del(&dreq->hash);
 				list_move(&dreq->recent, &pending);
 				cache_defer_cnt--;
@@ -924,14 +924,14 @@ static const struct file_operations cach
 };
 
 
-static void queue_loose(struct cache_detail *cd, struct cache_head *ch)
+static void queue_loose(struct cache_detail *cd, struct cache_head *h)
 {
 	struct cache_queue *cq;
 	spin_lock(&queue_lock);
 	list_for_each_entry(cq, &cd->queue, list)
 		if (!cq->reader) {
 			struct cache_request *cr = container_of(cq, struct cache_request, q);
-			if (cr->item != ch)
+			if (cr->item != h)
 				continue;
 			if (cr->readers != 0)
 				continue;
@@ -1159,7 +1159,7 @@ static void *c_start(struct seq_file *m,
 {
 	loff_t n = *pos;
 	unsigned hash, entry;
-	struct cache_head *ch;
+	struct cache_head *h;
 	struct cache_detail *cd = ((struct handle*)m->private)->cd;
 
 
@@ -1169,9 +1169,9 @@ static void *c_start(struct seq_file *m,
 	hash = n >> 32;
 	entry = n & ((1LL<<32) - 1);
 
-	for (ch=cd->hash_table[hash]; ch; ch=ch->next)
+	for (h=cd->hash_table[hash]; h; h=h->next)
 		if (!entry--)
-			return ch;
+			return h;
 	n &= ~((1LL<<32) - 1);
 	do {
 		hash++;
@@ -1186,18 +1186,18 @@ static void *c_start(struct seq_file *m,
 
 static void *c_next(struct seq_file *m, void *p, loff_t *pos)
 {
-	struct cache_head *ch = p;
+	struct cache_head *h = p;
 	int hash = (*pos >> 32);
 	struct cache_detail *cd = ((struct handle*)m->private)->cd;
 
 	if (p == SEQ_START_TOKEN)
 		hash = 0;
-	else if (ch->next == NULL) {
+	else if (h->next == NULL) {
 		hash++;
 		*pos += 1LL<<32;
 	} else {
 		++*pos;
-		return ch->next;
+		return h->next;
 	}
 	*pos &= ~((1LL<<32) - 1);
 	while (hash < cd->hash_size &&
@@ -1220,7 +1220,7 @@ static void c_stop(struct seq_file *m, v
 
 static int c_show(struct seq_file *m, void *p)
 {
-	struct cache_head *cp = p;
+	struct cache_head *h = p;
 	struct cache_detail *cd = ((struct handle*)m->private)->cd;
 
 	if (p == SEQ_START_TOKEN)
@@ -1228,15 +1228,15 @@ static int c_show(struct seq_file *m, vo
 
 	ifdebug(CACHE)
 		seq_printf(m, "# expiry=%ld refcnt=%d flags=%lx\n",
-			   cp->expiry_time, atomic_read(&cp->ref.refcount), cp->flags);
-	cache_get(cp);
-	if (cache_check(cd, cp, NULL))
+			   h->expiry_time, atomic_read(&h->ref.refcount), h->flags);
+	cache_get(h);
+	if (cache_check(cd, h, NULL))
 		/* cache_check does a cache_put on failure */
 		seq_printf(m, "# ");
 	else
-		cache_put(cp, cd);
+		cache_put(h, cd);
 
-	return cd->cache_show(m, cd, cp);
+	return cd->cache_show(m, cd, h);
 }
 
 static const struct seq_operations cache_content_op = {

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 03/14] sunrpc: Use consistent naming for variables of type struct cache_request*.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
  2009-01-08  8:25 ` [patch 01/14] sunrpc: Use consistent naming for variables of type struct cache_detail* Greg Banks
  2009-01-08  8:25 ` [patch 02/14] sunrpc: Use consistent naming for variables of type struct cache_head* Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 04/14] sunrpc: Minor indentation cleanup in cache.c Greg Banks
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

This makes the cache code easier to read.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |   38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -846,9 +846,9 @@ cache_ioctl(struct inode *ino, struct fi
 	for (cq= &rp->q; &cq->list != &cd->queue;
 	     cq = list_entry(cq->list.next, struct cache_queue, list))
 		if (!cq->reader) {
-			struct cache_request *cr =
+			struct cache_request *rq =
 				container_of(cq, struct cache_request, q);
-			len = cr->len - rp->offset;
+			len = rq->len - rp->offset;
 			break;
 		}
 	spin_unlock(&queue_lock);
@@ -930,16 +930,16 @@ static void queue_loose(struct cache_det
 	spin_lock(&queue_lock);
 	list_for_each_entry(cq, &cd->queue, list)
 		if (!cq->reader) {
-			struct cache_request *cr = container_of(cq, struct cache_request, q);
-			if (cr->item != h)
+			struct cache_request *rq = container_of(cq, struct cache_request, q);
+			if (rq->item != h)
 				continue;
-			if (cr->readers != 0)
+			if (rq->readers != 0)
 				continue;
-			list_del(&cr->q.list);
+			list_del(&rq->q.list);
 			spin_unlock(&queue_lock);
-			cache_put(cr->item, cd);
-			kfree(cr->buf);
-			kfree(cr);
+			cache_put(rq->item, cd);
+			kfree(rq->buf);
+			kfree(rq);
 			return;
 		}
 	spin_unlock(&queue_lock);
@@ -1036,7 +1036,7 @@ static int cache_make_upcall(struct cach
 {
 
 	char *buf;
-	struct cache_request *crq;
+	struct cache_request *rq;
 	char *bp;
 	int len;
 
@@ -1053,8 +1053,8 @@ static int cache_make_upcall(struct cach
 	if (!buf)
 		return -EAGAIN;
 
-	crq = kmalloc(sizeof (*crq), GFP_KERNEL);
-	if (!crq) {
+	rq = kmalloc(sizeof (*rq), GFP_KERNEL);
+	if (!rq) {
 		kfree(buf);
 		return -EAGAIN;
 	}
@@ -1065,16 +1065,16 @@ static int cache_make_upcall(struct cach
 
 	if (len < 0) {
 		kfree(buf);
-		kfree(crq);
+		kfree(rq);
 		return -EAGAIN;
 	}
-	crq->q.reader = 0;
-	crq->item = cache_get(h);
-	crq->buf = buf;
-	crq->len = PAGE_SIZE - len;
-	crq->readers = 0;
+	rq->q.reader = 0;
+	rq->item = cache_get(h);
+	rq->buf = buf;
+	rq->len = PAGE_SIZE - len;
+	rq->readers = 0;
 	spin_lock(&queue_lock);
-	list_add_tail(&crq->q.list, &cd->queue);
+	list_add_tail(&rq->q.list, &cd->queue);
 	spin_unlock(&queue_lock);
 	wake_up(&queue_wait);
 	return 0;

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 04/14] sunrpc: Minor indentation cleanup in cache.c
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (2 preceding siblings ...)
  2009-01-08  8:25 ` [patch 03/14] sunrpc: Use consistent naming for variables of type struct cache_request* Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 05/14] sunrpc: Rename queue_loose() to cache_remove_queued() Greg Banks
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -1045,8 +1045,8 @@ static int cache_make_upcall(struct cach
 
 	if (atomic_read(&cd->readers) == 0 &&
 	    cd->last_close < get_seconds() - 30) {
-			warn_no_listener(cd);
-			return -EINVAL;
+		warn_no_listener(cd);
+		return -EINVAL;
 	}
 
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 05/14] sunrpc: Rename queue_loose() to cache_remove_queued().
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (3 preceding siblings ...)
  2009-01-08  8:25 ` [patch 04/14] sunrpc: Minor indentation cleanup in cache.c Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 06/14] sunrpc: Gather forward declarations of static functions in cache.c Greg Banks
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

The new name is more accurate name as well being grammatically correct.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -101,7 +101,7 @@ struct cache_head *sunrpc_cache_lookup(s
 EXPORT_SYMBOL(sunrpc_cache_lookup);
 
 
-static void queue_loose(struct cache_detail *cd, struct cache_head *h);
+static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h);
 
 static int cache_fresh_locked(struct cache_head *h, time_t expiry)
 {
@@ -117,7 +117,7 @@ static void cache_fresh_unlocked(struct 
 		cache_revisit_request(h);
 	if (test_and_clear_bit(CACHE_PENDING, &h->flags)) {
 		cache_revisit_request(h);
-		queue_loose(cd, h);
+		cache_remove_queued(cd, h);
 	}
 }
 
@@ -461,7 +461,7 @@ static int cache_clean(void)
 				)
 				continue;
 			if (test_and_clear_bit(CACHE_PENDING, &h->flags))
-				queue_loose(current_detail, h);
+				cache_remove_queued(current_detail, h);
 
 			if (atomic_read(&h->ref.refcount) == 1)
 				break;
@@ -924,7 +924,7 @@ static const struct file_operations cach
 };
 
 
-static void queue_loose(struct cache_detail *cd, struct cache_head *h)
+static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h)
 {
 	struct cache_queue *cq;
 	spin_lock(&queue_lock);

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 06/14] sunrpc: Gather forward declarations of static functions in cache.c.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (4 preceding siblings ...)
  2009-01-08  8:25 ` [patch 05/14] sunrpc: Rename queue_loose() to cache_remove_queued() Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 07/14] sunrpc: Make the global queue_lock per-cache-detail Greg Banks
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Previously these forward declarations were scattered throughout
the code.  This makes the cache code easier to read.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -36,6 +36,9 @@
 
 static int cache_defer_req(struct cache_req *req, struct cache_head *h);
 static void cache_revisit_request(struct cache_head *h);
+static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h);
+static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h);
+static void do_cache_clean(struct work_struct *work);
 
 static void cache_init(struct cache_head *h)
 {
@@ -101,7 +104,6 @@ struct cache_head *sunrpc_cache_lookup(s
 EXPORT_SYMBOL(sunrpc_cache_lookup);
 
 
-static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h);
 
 static int cache_fresh_locked(struct cache_head *h, time_t expiry)
 {
@@ -175,7 +177,6 @@ struct cache_head *sunrpc_cache_update(s
 }
 EXPORT_SYMBOL(sunrpc_cache_update);
 
-static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h);
 /*
  * This is the generic cache management routine for all
  * the authentication caches.
@@ -288,7 +289,6 @@ static const struct file_operations cach
 static const struct file_operations content_file_operations;
 static const struct file_operations cache_flush_operations;
 
-static void do_cache_clean(struct work_struct *work);
 static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
 
 static void remove_cache_proc_entries(struct cache_detail *cd)

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 07/14] sunrpc: Make the global queue_lock per-cache-detail.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (5 preceding siblings ...)
  2009-01-08  8:25 ` [patch 06/14] sunrpc: Gather forward declarations of static functions in cache.c Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 08/14] sunrpc: Make the global queue_wait per-cache-detail Greg Banks
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

The data structures it's protecting are all contained by the
cache_detail, so having a global lock is unnecessary and potentially
a performance limitation.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 include/linux/sunrpc/cache.h |    2 +
 net/sunrpc/cache.c           |   48 ++++++++++++++++----------------
 2 files changed, 26 insertions(+), 24 deletions(-)

Index: bfields/include/linux/sunrpc/cache.h
===================================================================
--- bfields.orig/include/linux/sunrpc/cache.h
+++ bfields/include/linux/sunrpc/cache.h
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <asm/atomic.h>
 #include <linux/proc_fs.h>
+#include <linux/spinlock.h>
 
 /*
  * Each cache requires:
@@ -95,6 +96,7 @@ struct cache_detail {
 	int			entries;
 
 	/* fields for communication over channel */
+	spinlock_t		queue_lock;
 	struct list_head	queue;
 	struct proc_dir_entry	*proc_ent;
 	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -359,6 +359,7 @@ int cache_register(struct cache_detail *
 	if (ret)
 		return ret;
 	rwlock_init(&cd->hash_lock);
+	spin_lock_init(&cd->queue_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
@@ -672,7 +673,6 @@ void cache_clean_deferred(void *owner)
  *
  */
 
-static DEFINE_SPINLOCK(queue_lock);
 static DEFINE_MUTEX(queue_io_mutex);
 
 struct cache_queue {
@@ -705,7 +705,7 @@ cache_read(struct file *filp, char __use
 	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
 			      * readers on this file */
  again:
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 	/* need to find next request */
 	while (rp->q.list.next != &cd->queue &&
 	       list_entry(rp->q.list.next, struct cache_queue, list)
@@ -714,7 +714,7 @@ cache_read(struct file *filp, char __use
 		list_move(&rp->q.list, next);
 	}
 	if (rp->q.list.next == &cd->queue) {
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 		mutex_unlock(&queue_io_mutex);
 		BUG_ON(rp->offset);
 		return 0;
@@ -723,13 +723,13 @@ cache_read(struct file *filp, char __use
 	BUG_ON(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	} else {
 		if (rp->offset + count > rq->len)
 			count = rq->len - rp->offset;
@@ -739,26 +739,26 @@ cache_read(struct file *filp, char __use
 		rp->offset += count;
 		if (rp->offset >= rq->len) {
 			rp->offset = 0;
-			spin_lock(&queue_lock);
+			spin_lock(&cd->queue_lock);
 			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 		}
 		err = 0;
 	}
  out:
 	if (rp->offset == 0) {
 		/* need to release rq */
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		rq->readers--;
 		if (rq->readers == 0 &&
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
 			list_del(&rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 	}
 	if (err == -EAGAIN)
 		goto again;
@@ -814,7 +814,7 @@ cache_poll(struct file *filp, poll_table
 	if (!rp)
 		return mask;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	for (cq= &rp->q; &cq->list != &cd->queue;
 	     cq = list_entry(cq->list.next, struct cache_queue, list))
@@ -822,7 +822,7 @@ cache_poll(struct file *filp, poll_table
 			mask |= POLLIN | POLLRDNORM;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 	return mask;
 }
 
@@ -838,7 +838,7 @@ cache_ioctl(struct inode *ino, struct fi
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	/* only find the length remaining in current request,
 	 * or the length of the next request
@@ -851,7 +851,7 @@ cache_ioctl(struct inode *ino, struct fi
 			len = rq->len - rp->offset;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
 }
@@ -871,9 +871,9 @@ cache_open(struct inode *inode, struct f
 		rp->offset = 0;
 		rp->q.reader = 1;
 		atomic_inc(&cd->readers);
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_add(&rp->q.list, &cd->queue);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	}
 	filp->private_data = rp;
 	return 0;
@@ -886,7 +886,7 @@ cache_release(struct inode *inode, struc
 	struct cache_detail *cd = PDE(inode)->data;
 
 	if (rp) {
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		if (rp->offset) {
 			struct cache_queue *cq;
 			for (cq= &rp->q; &cq->list != &cd->queue;
@@ -899,7 +899,7 @@ cache_release(struct inode *inode, struc
 			rp->offset = 0;
 		}
 		list_del(&rp->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 
 		filp->private_data = NULL;
 		kfree(rp);
@@ -927,7 +927,7 @@ static const struct file_operations cach
 static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h)
 {
 	struct cache_queue *cq;
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 	list_for_each_entry(cq, &cd->queue, list)
 		if (!cq->reader) {
 			struct cache_request *rq = container_of(cq, struct cache_request, q);
@@ -936,13 +936,13 @@ static void cache_remove_queued(struct c
 			if (rq->readers != 0)
 				continue;
 			list_del(&rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 			return;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 }
 
 /*
@@ -1073,9 +1073,9 @@ static int cache_make_upcall(struct cach
 	rq->buf = buf;
 	rq->len = PAGE_SIZE - len;
 	rq->readers = 0;
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 	list_add_tail(&rq->q.list, &cd->queue);
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 	wake_up(&queue_wait);
 	return 0;
 }

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 08/14] sunrpc: Make the global queue_wait per-cache-detail.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (6 preceding siblings ...)
  2009-01-08  8:25 ` [patch 07/14] sunrpc: Make the global queue_lock per-cache-detail Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 09/14] sunrpc: Remove the global lock queue_io_mutex Greg Banks
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

The file descriptor on which we wait is per-cache_detail, so it
makes some sense to put the polling infrastructure there too.
Plus, with multiple wait queues it's easier to pick apart the data
structures in coredumps to figure out which process is polling on
which caches.  This makes a big difference when debugging problems
with the multi-threaded rpc.mountd.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 include/linux/sunrpc/cache.h |    2 ++
 net/sunrpc/cache.c           |    7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

Index: bfields/include/linux/sunrpc/cache.h
===================================================================
--- bfields.orig/include/linux/sunrpc/cache.h
+++ bfields/include/linux/sunrpc/cache.h
@@ -98,6 +98,8 @@ struct cache_detail {
 	/* fields for communication over channel */
 	spinlock_t		queue_lock;
 	struct list_head	queue;
+	wait_queue_head_t	queue_wait;
+
 	struct proc_dir_entry	*proc_ent;
 	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
 
Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -360,6 +360,7 @@ int cache_register(struct cache_detail *
 		return ret;
 	rwlock_init(&cd->hash_lock);
 	spin_lock_init(&cd->queue_lock);
+	init_waitqueue_head(&cd->queue_wait);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
@@ -796,8 +797,6 @@ cache_write(struct file *filp, const cha
 	return err ? err : count;
 }
 
-static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-
 static unsigned int
 cache_poll(struct file *filp, poll_table *wait)
 {
@@ -806,7 +805,7 @@ cache_poll(struct file *filp, poll_table
 	struct cache_queue *cq;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 
-	poll_wait(filp, &queue_wait, wait);
+	poll_wait(filp, &cd->queue_wait, wait);
 
 	/* alway allow write */
 	mask = POLL_OUT | POLLWRNORM;
@@ -1076,7 +1075,7 @@ static int cache_make_upcall(struct cach
 	spin_lock(&cd->queue_lock);
 	list_add_tail(&rq->q.list, &cd->queue);
 	spin_unlock(&cd->queue_lock);
-	wake_up(&queue_wait);
+	wake_up(&cd->queue_wait);
 	return 0;
 }
 

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 09/14] sunrpc: Remove the global lock queue_io_mutex.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (7 preceding siblings ...)
  2009-01-08  8:25 ` [patch 08/14] sunrpc: Make the global queue_wait per-cache-detail Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls Greg Banks
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

This lock was used for two different purposes in two places.  The use
in cache_read() is redundant because cd->queue_lock is also taken
and can be removed without harm.  The use in cache_write() protects a
global buffer which can be eliminated and replaced with a temporarily
allocated page; this also removes an unnecessary global bottleneck
when multiple rpc.mountd threads are writing down into the kernel.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |   34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -674,8 +674,6 @@ void cache_clean_deferred(void *owner)
  *
  */
 
-static DEFINE_MUTEX(queue_io_mutex);
-
 struct cache_queue {
 	struct list_head	list;
 	int			reader;	/* if 0, then request */
@@ -703,8 +701,6 @@ cache_read(struct file *filp, char __use
 	if (count == 0)
 		return 0;
 
-	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
-			      * readers on this file */
  again:
 	spin_lock(&cd->queue_lock);
 	/* need to find next request */
@@ -716,7 +712,6 @@ cache_read(struct file *filp, char __use
 	}
 	if (rp->q.list.next == &cd->queue) {
 		spin_unlock(&cd->queue_lock);
-		mutex_unlock(&queue_io_mutex);
 		BUG_ON(rp->offset);
 		return 0;
 	}
@@ -763,37 +758,38 @@ cache_read(struct file *filp, char __use
 	}
 	if (err == -EAGAIN)
 		goto again;
-	mutex_unlock(&queue_io_mutex);
 	return err ? err :  count;
 }
 
-static char write_buf[8192]; /* protected by queue_io_mutex */
-
 static ssize_t
 cache_write(struct file *filp, const char __user *buf, size_t count,
 	    loff_t *ppos)
 {
 	int err;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	char *tmp;
+	int tmp_size = PAGE_SIZE;
 
 	if (count == 0)
 		return 0;
-	if (count >= sizeof(write_buf))
+	if (count >= tmp_size)
 		return -EINVAL;
 
-	mutex_lock(&queue_io_mutex);
+	tmp = kmalloc(tmp_size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
-	if (copy_from_user(write_buf, buf, count)) {
-		mutex_unlock(&queue_io_mutex);
-		return -EFAULT;
-	}
-	write_buf[count] = '\0';
+	err = -EFAULT;
+	if (copy_from_user(tmp, buf, count))
+		goto out;
+
+	tmp[count] = '\0';
+	err = -EINVAL;
 	if (cd->cache_parse)
-		err = cd->cache_parse(cd, write_buf, count);
-	else
-		err = -EINVAL;
+		err = cd->cache_parse(cd, tmp, count);
 
-	mutex_unlock(&queue_io_mutex);
+out:
+	kfree(tmp);
 	return err ? err : count;
 }
 

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (8 preceding siblings ...)
  2009-01-08  8:25 ` [patch 09/14] sunrpc: Remove the global lock queue_io_mutex Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08 19:57   ` J. Bruce Fields
  2009-01-08  8:25 ` [patch 11/14] sunrpc: Allocate cache_requests in a single allocation Greg Banks
                   ` (4 subsequent siblings)
  14 siblings, 1 reply; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Instead of a single list which confusingly contains a mixture of
cache_request and cache_reader structures in various states, use two
separate lists.

Both new lists contain cache_request structures, the cache_reader
structure is eliminated.  It's only purpose was to hold state which
supports partial reads of upcalls from userspace.  However the
implementation of partial reads is broken in the presence of the
multi-threaded rpc.mountd, in two different ways.

Firstly, the kernel code assumes that each reader uses a separate
struct file; because rpc.mountd fork()s *after* opening the cache
file descriptor this is not true.  Thus the single struct file and
the single rp->offset field are shared between multiple threads.
Unfortunately rp->offset is not maintained in a safe manner.  This can
lead to the BUG_ON() in cache_read() being tripped.

Secondly, even if the kernel code worked perfectly it's sharing
a single offset between multiple reading rpc.mountd threads.  If a
thread does a partial read, there's no way to match up the remaining
bytes in the upcall to the thread that read the initial part.  So a
partial read will result in any second reading thread that comes
along being given a spurious part of an upcall.  Both threads will
then fail to parse their respective mangled upcalls.  At the very
least this will result in clients seeing NFS calls which triggered
an upcall being randomly dropped under load.

The "right" way to fix this would be to implement a primitive such as
recvmsg() that an rpc.mountd thread could use to atomically retrieve an
entire upcall message.  However in this case we know that the size of
the messages is limited by existing code to PAGE_SIZE and by usage to
even less.  We also know that gssd and recent rpc.mountd do 2048 byte
read()s, so partial reads should be unnecessary.  These circumstances
should allow removing support for partial reads.

Having made that decision, we can remove struct cache_reader and
greatly simplify all the code that deals with the upcall queue and
with cache file descriptors.

Further, the old code kept in it's single list cache_requests objects
in two different states: waiting to be sent up to an rpc.mountd thread
in response to a read(), and waiting for a reply (or pre-emptive reply)
from an rpc.mountd thread which arrives in a write().  The difference
was tracked by some very gnarly code which relied on the relative
position of cache_reader and cache_request objects in the single list.
This is very hard code to understand and debug.  The new code uses
two separate lists and much simpler logic.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 include/linux/sunrpc/cache.h |    3 
 net/sunrpc/cache.c           |  246 ++++++++++++--------------------
 2 files changed, 97 insertions(+), 152 deletions(-)

Index: bfields/include/linux/sunrpc/cache.h
===================================================================
--- bfields.orig/include/linux/sunrpc/cache.h
+++ bfields/include/linux/sunrpc/cache.h
@@ -97,7 +97,8 @@ struct cache_detail {
 
 	/* fields for communication over channel */
 	spinlock_t		queue_lock;
-	struct list_head	queue;
+	struct list_head	to_read;
+	struct list_head	to_write;
 	wait_queue_head_t	queue_wait;
 
 	struct proc_dir_entry	*proc_ent;
Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -361,7 +361,8 @@ int cache_register(struct cache_detail *
 	rwlock_init(&cd->hash_lock);
 	spin_lock_init(&cd->queue_lock);
 	init_waitqueue_head(&cd->queue_wait);
-	INIT_LIST_HEAD(&cd->queue);
+	INIT_LIST_HEAD(&cd->to_read);
+	INIT_LIST_HEAD(&cd->to_write);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -659,106 +660,91 @@ void cache_clean_deferred(void *owner)
 }
 
 /*
- * communicate with user-space
+ * Caches communicate with user-space.
  *
  * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
- * On read, you get a full request, or block.
- * On write, an update request is processed.
+ *
+ * On read, you get a full request.  If the length passed
+ * to read() is too short, you get nothing and the message is dropped,
+ * which is bad.  So you should use a sufficently large length,
+ * for example PAGE_SIZE.  If there are no requests queued,
+ * read() returns 0.
+ *
+ * On write, an update is processed.  This may, as a side effect,
+ * cause a previously queued request to be de-queued and removed.
+ * Userspace can also pre-emptively write updates which the kernel
+ * has not yet requested.
+ *
  * Poll works if anything to read, and always allows write.
  *
- * Implemented by linked list of requests.  Each open file has
- * a ->private that also exists in this list.  New requests are added
- * to the end and may wakeup and preceding readers.
- * New readers are added to the head.  If, on read, an item is found with
- * CACHE_UPCALLING clear, we free it from the list.
+ * The channel is implemented by two linked lists of cache_request
+ * objects.  cd->to_read is requests which have been generated in
+ * the kernel and are waiting for a userspace process to read them.
+ * cd->to_write is requests which have been read by userspace and
+ * are awaiting a reply to be written.
  *
+ * Both lists are protected by cd->queue_lock.
  */
 
-struct cache_queue {
-	struct list_head	list;
-	int			reader;	/* if 0, then request */
-};
 struct cache_request {
-	struct cache_queue	q;
+	struct list_head	list;
 	struct cache_head	*item;
 	char			* buf;
 	int			len;
-	int			readers;
-};
-struct cache_reader {
-	struct cache_queue	q;
-	int			offset;	/* if non-0, we have a refcnt on next request */
 };
 
 static ssize_t
 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct cache_reader *rp = filp->private_data;
-	struct cache_request *rq;
+	struct cache_request *rq = NULL;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 	int err;
 
 	if (count == 0)
 		return 0;
 
- again:
+	/* de-queue the next request which is waiting to be read */
 	spin_lock(&cd->queue_lock);
-	/* need to find next request */
-	while (rp->q.list.next != &cd->queue &&
-	       list_entry(rp->q.list.next, struct cache_queue, list)
-	       ->reader) {
-		struct list_head *next = rp->q.list.next;
-		list_move(&rp->q.list, next);
-	}
-	if (rp->q.list.next == &cd->queue) {
-		spin_unlock(&cd->queue_lock);
-		BUG_ON(rp->offset);
-		return 0;
+	if (!list_empty(&cd->to_read)) {
+		rq = container_of(cd->to_read.next, struct cache_request, list);
+		list_del_init(&rq->list);
 	}
-	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	BUG_ON(rq->q.reader);
-	if (rp->offset == 0)
-		rq->readers++;
 	spin_unlock(&cd->queue_lock);
 
-	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
-		err = -EAGAIN;
-		spin_lock(&cd->queue_lock);
-		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&cd->queue_lock);
-	} else {
-		if (rp->offset + count > rq->len)
-			count = rq->len - rp->offset;
-		err = -EFAULT;
-		if (copy_to_user(buf, rq->buf + rp->offset, count))
-			goto out;
-		rp->offset += count;
-		if (rp->offset >= rq->len) {
-			rp->offset = 0;
-			spin_lock(&cd->queue_lock);
-			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&cd->queue_lock);
-		}
-		err = 0;
-	}
- out:
-	if (rp->offset == 0) {
-		/* need to release rq */
-		spin_lock(&cd->queue_lock);
-		rq->readers--;
-		if (rq->readers == 0 &&
-		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
-			list_del(&rq->q.list);
-			spin_unlock(&cd->queue_lock);
-			cache_put(rq->item, cd);
-			kfree(rq->buf);
-			kfree(rq);
-		} else
-			spin_unlock(&cd->queue_lock);
-	}
-	if (err == -EAGAIN)
-		goto again;
-	return err ? err :  count;
+	if (rq == NULL)
+		return 0; /* no queued requests */
+
+	err = -EAGAIN;	/* gnb:TODO...this used to cause a loop, wtf */
+	if (!test_bit(CACHE_PENDING, &rq->item->flags))
+		goto error;
+
+	/* gnb:TODO whine to dmesg; stat */
+	err = -EFAULT;
+	if (count < rq->len)
+		goto error; /* We make no pretence at handling short reads */
+	count = rq->len;
+
+	err = -EFAULT;
+	if (copy_to_user(buf, rq->buf, count))
+		goto error;
+
+	/*
+	 * Done reading, append to the list of requests
+	 * which are waiting for a write from userspace.
+	 */
+	spin_lock(&cd->queue_lock);
+	list_add_tail(&rq->list, &cd->to_write);
+	spin_unlock(&cd->queue_lock);
+
+	return count;
+
+error:
+	/* need to release rq */
+	cache_put(rq->item, cd);
+	kfree(rq->buf);
+	kfree(rq);
+
+	return err;
 }
 
 static ssize_t
@@ -796,28 +782,21 @@ out:
 static unsigned int
 cache_poll(struct file *filp, poll_table *wait)
 {
-	unsigned int mask;
-	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
+	unsigned int mask = 0;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 
 	poll_wait(filp, &cd->queue_wait, wait);
 
-	/* alway allow write */
-	mask = POLL_OUT | POLLWRNORM;
+	if (filp->f_mode & FMODE_WRITE)
+		mask = POLL_OUT | POLLWRNORM;
 
-	if (!rp)
-		return mask;
-
-	spin_lock(&cd->queue_lock);
-
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
+	if (filp->f_mode & FMODE_READ) {
+		spin_lock(&cd->queue_lock);
+		if (!list_empty(&cd->to_read))
 			mask |= POLLIN | POLLRDNORM;
-			break;
-		}
-	spin_unlock(&cd->queue_lock);
+		spin_unlock(&cd->queue_lock);
+	}
+
 	return mask;
 }
 
@@ -826,26 +805,23 @@ cache_ioctl(struct inode *ino, struct fi
 	    unsigned int cmd, unsigned long arg)
 {
 	int len = 0;
-	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
+	struct cache_request *rq;
 	struct cache_detail *cd = PDE(ino)->data;
 
-	if (cmd != FIONREAD || !rp)
+	if (cmd != FIONREAD)
+		return -EINVAL;
+	if (!(filp->f_mode & FMODE_READ))
 		return -EINVAL;
 
 	spin_lock(&cd->queue_lock);
 
-	/* only find the length remaining in current request,
-	 * or the length of the next request
+	/* only find the length of the next request
 	 */
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			struct cache_request *rq =
-				container_of(cq, struct cache_request, q);
-			len = rq->len - rp->offset;
-			break;
-		}
+	if (!list_empty(&cd->to_read)) {
+		rq = container_of(cd->to_read.next, struct cache_request, list);
+		len = rq->len;
+	}
+
 	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
@@ -854,51 +830,20 @@ cache_ioctl(struct inode *ino, struct fi
 static int
 cache_open(struct inode *inode, struct file *filp)
 {
-	struct cache_reader *rp = NULL;
-
 	nonseekable_open(inode, filp);
 	if (filp->f_mode & FMODE_READ) {
 		struct cache_detail *cd = PDE(inode)->data;
-
-		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-		if (!rp)
-			return -ENOMEM;
-		rp->offset = 0;
-		rp->q.reader = 1;
 		atomic_inc(&cd->readers);
-		spin_lock(&cd->queue_lock);
-		list_add(&rp->q.list, &cd->queue);
-		spin_unlock(&cd->queue_lock);
 	}
-	filp->private_data = rp;
 	return 0;
 }
 
 static int
 cache_release(struct inode *inode, struct file *filp)
 {
-	struct cache_reader *rp = filp->private_data;
 	struct cache_detail *cd = PDE(inode)->data;
 
-	if (rp) {
-		spin_lock(&cd->queue_lock);
-		if (rp->offset) {
-			struct cache_queue *cq;
-			for (cq= &rp->q; &cq->list != &cd->queue;
-			     cq = list_entry(cq->list.next, struct cache_queue, list))
-				if (!cq->reader) {
-					container_of(cq, struct cache_request, q)
-						->readers--;
-					break;
-				}
-			rp->offset = 0;
-		}
-		list_del(&rp->q.list);
-		spin_unlock(&cd->queue_lock);
-
-		filp->private_data = NULL;
-		kfree(rp);
-
+	if (filp->f_mode & FMODE_READ) {
 		cd->last_close = get_seconds();
 		atomic_dec(&cd->readers);
 	}
@@ -918,26 +863,39 @@ static const struct file_operations cach
 	.release	= cache_release,
 };
 
+static struct cache_request *
+cache_queue_find_locked(struct list_head *listp, struct cache_head *h)
+{
+	struct cache_request *rq;
+
+	list_for_each_entry(rq, listp, list) {
+		if (rq->item == h)
+			return rq;
+	}
+	return NULL;
+}
 
 static void cache_remove_queued(struct cache_detail *cd, struct cache_head *h)
 {
-	struct cache_queue *cq;
+	struct cache_request *rq;
+
+	/* find and de-queue */
 	spin_lock(&cd->queue_lock);
-	list_for_each_entry(cq, &cd->queue, list)
-		if (!cq->reader) {
-			struct cache_request *rq = container_of(cq, struct cache_request, q);
-			if (rq->item != h)
-				continue;
-			if (rq->readers != 0)
-				continue;
-			list_del(&rq->q.list);
-			spin_unlock(&cd->queue_lock);
-			cache_put(rq->item, cd);
-			kfree(rq->buf);
-			kfree(rq);
-			return;
-		}
+
+	rq = cache_queue_find_locked(&cd->to_read, h);
+	if (!rq)
+		rq = cache_queue_find_locked(&cd->to_write, h);
+	if (rq)
+		list_del(&rq->list);
+
 	spin_unlock(&cd->queue_lock);
+
+	/* if found, destroy */
+	if (rq) {
+		cache_put(rq->item, cd);
+		kfree(rq->buf);
+		kfree(rq);
+	}
 }
 
 /*
@@ -1063,13 +1021,11 @@ static int cache_make_upcall(struct cach
 		kfree(rq);
 		return -EAGAIN;
 	}
-	rq->q.reader = 0;
 	rq->item = cache_get(h);
 	rq->buf = buf;
 	rq->len = PAGE_SIZE - len;
-	rq->readers = 0;
 	spin_lock(&cd->queue_lock);
-	list_add_tail(&rq->q.list, &cd->queue);
+	list_add_tail(&rq->list, &cd->to_read);
 	spin_unlock(&cd->queue_lock);
 	wake_up(&cd->queue_wait);
 	return 0;

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 11/14] sunrpc: Allocate cache_requests in a single allocation.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (9 preceding siblings ...)
  2009-01-08  8:25 ` [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 12/14] sunrpc: Centralise memory management of cache_requests Greg Banks
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Allocate the cache_request object and it's buffer in a single
contiguous memory allocation instead of two separate ones.
Code trawling shows that none of the users of caches make upcalls
anywhere near even the small x86 PAGE_SIZE, so we can afford to lose
a few bytes.  One less allocation makes the code a little simpler.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |   28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -689,8 +689,8 @@ void cache_clean_deferred(void *owner)
 struct cache_request {
 	struct list_head	list;
 	struct cache_head	*item;
-	char			* buf;
 	int			len;
+	char			buf[0];
 };
 
 static ssize_t
@@ -741,7 +741,6 @@ cache_read(struct file *filp, char __use
 error:
 	/* need to release rq */
 	cache_put(rq->item, cd);
-	kfree(rq->buf);
 	kfree(rq);
 
 	return err;
@@ -893,7 +892,6 @@ static void cache_remove_queued(struct c
 	/* if found, destroy */
 	if (rq) {
 		cache_put(rq->item, cd);
-		kfree(rq->buf);
 		kfree(rq);
 	}
 }
@@ -983,15 +981,13 @@ static void warn_no_listener(struct cach
 
 /*
  * register an upcall request to user-space.
- * Each request is at most one page long.
+ * Each request is at most (slightly less than) one page long.
  */
 static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-
-	char *buf;
 	struct cache_request *rq;
 	char *bp;
-	int len;
+	int len;	/* bytes remaining in the buffer */
 
 	if (cd->cache_request == NULL)
 		return -EINVAL;
@@ -1002,28 +998,20 @@ static int cache_make_upcall(struct cach
 		return -EINVAL;
 	}
 
-	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -EAGAIN;
-
-	rq = kmalloc(sizeof (*rq), GFP_KERNEL);
-	if (!rq) {
-		kfree(buf);
+	rq = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!rq)
 		return -EAGAIN;
-	}
-
-	bp = buf; len = PAGE_SIZE;
 
+	bp = rq->buf;
+	len = PAGE_SIZE - sizeof(*rq);
 	cd->cache_request(cd, h, &bp, &len);
 
 	if (len < 0) {
-		kfree(buf);
 		kfree(rq);
 		return -EAGAIN;
 	}
 	rq->item = cache_get(h);
-	rq->buf = buf;
-	rq->len = PAGE_SIZE - len;
+	rq->len = PAGE_SIZE - sizeof(*rq) - len;
 	spin_lock(&cd->queue_lock);
 	list_add_tail(&rq->list, &cd->to_read);
 	spin_unlock(&cd->queue_lock);

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 12/14] sunrpc: Centralise memory management of cache_requests.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (10 preceding siblings ...)
  2009-01-08  8:25 ` [patch 11/14] sunrpc: Allocate cache_requests in a single allocation Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 13/14] sunrpc: Move struct cache_request to linux/sunrpc/cache.h Greg Banks
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Centralise the allocation and freeing of cache_request objects instead
of doing it in multiple places.  Use a reference count (but not a
kref, that would require adding a backpointer to the cache_detail in
the cache_request).

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |   46 +++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -688,11 +688,39 @@ void cache_clean_deferred(void *owner)
 
 struct cache_request {
 	struct list_head	list;
+	atomic_t		count;
 	struct cache_head	*item;
 	int			len;
 	char			buf[0];
 };
 
+static void
+cache_request_put(struct cache_request *rq, struct cache_detail *cd)
+{
+	if (atomic_dec_and_test(&rq->count)) {
+		cache_put(rq->item, cd);
+		kfree(rq);
+	}
+}
+
+static struct cache_request *
+cache_request_get(struct cache_head *h)
+{
+	struct cache_request *rq;
+
+	rq = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!rq)
+		return NULL;
+
+	memset(rq, 0, sizeof(*rq));
+	INIT_LIST_HEAD(&rq->list);
+	atomic_set(&rq->count, 1);
+	rq->len = PAGE_SIZE - sizeof(*rq);
+	rq->item = cache_get(h);
+
+	return rq;
+}
+
 static ssize_t
 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 {
@@ -740,8 +768,7 @@ cache_read(struct file *filp, char __use
 
 error:
 	/* need to release rq */
-	cache_put(rq->item, cd);
-	kfree(rq);
+	cache_request_put(rq, cd);
 
 	return err;
 }
@@ -890,10 +917,8 @@ static void cache_remove_queued(struct c
 	spin_unlock(&cd->queue_lock);
 
 	/* if found, destroy */
-	if (rq) {
-		cache_put(rq->item, cd);
-		kfree(rq);
-	}
+	if (rq)
+		cache_request_put(rq, cd);
 }
 
 /*
@@ -998,20 +1023,19 @@ static int cache_make_upcall(struct cach
 		return -EINVAL;
 	}
 
-	rq = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	rq = cache_request_get(h);
 	if (!rq)
 		return -EAGAIN;
 
 	bp = rq->buf;
-	len = PAGE_SIZE - sizeof(*rq);
+	len = rq->len;
 	cd->cache_request(cd, h, &bp, &len);
 
 	if (len < 0) {
-		kfree(rq);
+		cache_request_put(rq, cd);
 		return -EAGAIN;
 	}
-	rq->item = cache_get(h);
-	rq->len = PAGE_SIZE - sizeof(*rq) - len;
+	rq->len -= len;
 	spin_lock(&cd->queue_lock);
 	list_add_tail(&rq->list, &cd->to_read);
 	spin_unlock(&cd->queue_lock);

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 13/14] sunrpc: Move struct cache_request to linux/sunrpc/cache.h
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (11 preceding siblings ...)
  2009-01-08  8:25 ` [patch 12/14] sunrpc: Centralise memory management of cache_requests Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08  8:25 ` [patch 14/14] sunrpc: Improve the usefulness of debug printks in the sunrpc cache code Greg Banks
  2009-01-08 19:52 ` [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework J. Bruce Fields
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Now the definition of struct cache_request is in the same header file
that declares struct cache_detail, instead of being in a .c file.

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 include/linux/sunrpc/cache.h |   35 ++++++++++++++++++++++++++++++++
 net/sunrpc/cache.c           |   35 --------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

Index: bfields/include/linux/sunrpc/cache.h
===================================================================
--- bfields.orig/include/linux/sunrpc/cache.h
+++ bfields/include/linux/sunrpc/cache.h
@@ -131,6 +131,41 @@ struct cache_deferred_req {
 					   int too_many);
 };
 
+/*
+ * Caches communicate with user-space.
+ *
+ * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
+ *
+ * On read, you get a full request.  If the length passed
+ * to read() is too short, you get nothing and the message is dropped,
+ * which is bad.  So you should use a sufficently large length,
+ * for example PAGE_SIZE.  If there are no requests queued,
+ * read() returns 0.
+ *
+ * On write, an update is processed.  This may, as a side effect,
+ * cause a previously queued request to be de-queued and removed.
+ * Userspace can also pre-emptively write updates which the kernel
+ * has not yet requested.
+ *
+ * Poll works if anything to read, and always allows write.
+ *
+ * The channel is implemented by two linked lists of cache_request
+ * objects.  cd->to_read is requests which have been generated in
+ * the kernel and are waiting for a userspace process to read them.
+ * cd->to_write is requests which have been read by userspace and
+ * are awaiting a reply to be written.
+ *
+ * Both lists are protected by cd->queue_lock.
+ */
+
+struct cache_request {
+	struct list_head	list;
+	atomic_t		count;
+	struct cache_head	*item;
+	int			len;
+	char			buf[0];
+};
+
 
 extern struct cache_head *
 sunrpc_cache_lookup(struct cache_detail *detail,
Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -659,41 +659,6 @@ void cache_clean_deferred(void *owner)
 	}
 }
 
-/*
- * Caches communicate with user-space.
- *
- * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
- *
- * On read, you get a full request.  If the length passed
- * to read() is too short, you get nothing and the message is dropped,
- * which is bad.  So you should use a sufficently large length,
- * for example PAGE_SIZE.  If there are no requests queued,
- * read() returns 0.
- *
- * On write, an update is processed.  This may, as a side effect,
- * cause a previously queued request to be de-queued and removed.
- * Userspace can also pre-emptively write updates which the kernel
- * has not yet requested.
- *
- * Poll works if anything to read, and always allows write.
- *
- * The channel is implemented by two linked lists of cache_request
- * objects.  cd->to_read is requests which have been generated in
- * the kernel and are waiting for a userspace process to read them.
- * cd->to_write is requests which have been read by userspace and
- * are awaiting a reply to be written.
- *
- * Both lists are protected by cd->queue_lock.
- */
-
-struct cache_request {
-	struct list_head	list;
-	atomic_t		count;
-	struct cache_head	*item;
-	int			len;
-	char			buf[0];
-};
-
 static void
 cache_request_put(struct cache_request *rq, struct cache_detail *cd)
 {

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [patch 14/14] sunrpc: Improve the usefulness of debug printks in the sunrpc cache code.
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (12 preceding siblings ...)
  2009-01-08  8:25 ` [patch 13/14] sunrpc: Move struct cache_request to linux/sunrpc/cache.h Greg Banks
@ 2009-01-08  8:25 ` Greg Banks
  2009-01-08 19:52 ` [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework J. Bruce Fields
  14 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-08  8:25 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

Signed-off-by: Greg Banks <gnb@sgi.com>
---

 net/sunrpc/cache.c |  116 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 100 insertions(+), 16 deletions(-)

Index: bfields/net/sunrpc/cache.c
===================================================================
--- bfields.orig/net/sunrpc/cache.c
+++ bfields/net/sunrpc/cache.c
@@ -109,6 +109,8 @@ static int cache_fresh_locked(struct cac
 {
 	h->expiry_time = expiry;
 	h->last_refresh = get_seconds();
+	dprintk("%s: refreshing entry %p, expiry %ld refreshed %ld\n",
+		__func__, h, h->expiry_time, h->last_refresh);
 	return !test_and_set_bit(CACHE_VALID, &h->flags);
 }
 
@@ -194,35 +196,57 @@ int cache_check(struct cache_detail *cd,
 {
 	int rv;
 	long refresh_age, age;
+	long now = get_seconds();
+
+	dprintk("%s: cache %s entry %p\n",
+		__func__, cd->name, h);
 
 	/* First decide return status as best we can */
 	if (!test_bit(CACHE_VALID, &h->flags) ||
-	    h->expiry_time < get_seconds())
+	    h->expiry_time < now) {
+		dprintk("%s: entry %p invalid, flags %lu expiry %ld now %ld\n",
+			__func__, h, h->flags, h->expiry_time, now);
 		rv = -EAGAIN;
-	else if (cd->flush_time > h->last_refresh)
+	} else if (cd->flush_time > h->last_refresh) {
+		dprintk("%s: entry %p flushed: last_refresh %ld before flush_time %ld\n",
+			__func__, h, h->last_refresh, cd->flush_time);
 		rv = -EAGAIN;
-	else {
+	} else {
 		/* entry is valid */
-		if (test_bit(CACHE_NEGATIVE, &h->flags))
+		if (test_bit(CACHE_NEGATIVE, &h->flags)) {
+			dprintk("%s: entry %p negative\n",
+				__func__, h);
 			rv = -ENOENT;
+		}
 		else rv = 0;
 	}
 
 	/* now see if we want to start an upcall */
 	refresh_age = (h->expiry_time - h->last_refresh);
-	age = get_seconds() - h->last_refresh;
+	age = now - h->last_refresh;
+	dprintk("%s: entry %p now=%ld expiry=%ld last_refresh=%ld refage=%ld age=%ld\n",
+		__func__, h, now, h->expiry_time, h->last_refresh, refresh_age, age);
 
 	if (rqstp == NULL) {
-		if (rv == -EAGAIN)
+		if (rv == -EAGAIN) {
+			dprintk("%s: entry %p, needs upcall but cannot defer, returning -ENOENT\n",
+				__func__, h);
 			rv = -ENOENT;
+		}
 	} else if (rv == -EAGAIN || age > refresh_age/2) {
-		dprintk("RPC:       Want update, refage=%ld, age=%ld\n",
-				refresh_age, age);
+		dprintk("%s: entry %p needs upcall\n",
+		        __func__, h);
 		if (!test_and_set_bit(CACHE_PENDING, &h->flags)) {
+			dprintk("%s: entry %p initiating upcall\n",
+				__func__, h);
 			switch (cache_make_upcall(cd, h)) {
 			case -EINVAL:
+				dprintk("%s: entry %p cache_make_upcall returned -EINVAL\n",
+					__func__, h);
 				clear_bit(CACHE_PENDING, &h->flags);
 				if (rv == -EAGAIN) {
+					dprintk("%s: entry %p going NEGATIVE\n",
+						__func__, h);
 					set_bit(CACHE_NEGATIVE, &h->flags);
 					cache_fresh_unlocked(h, cd,
 					     cache_fresh_locked(h, get_seconds()+CACHE_NEW_EXPIRY));
@@ -231,6 +255,8 @@ int cache_check(struct cache_detail *cd,
 				break;
 
 			case -EAGAIN:
+				dprintk("%s: entry %p cache_make_upcall returned -EAGAIN\n",
+					__func__, h);
 				clear_bit(CACHE_PENDING, &h->flags);
 				cache_revisit_request(h);
 				break;
@@ -244,6 +270,9 @@ int cache_check(struct cache_detail *cd,
 
 	if (rv)
 		cache_put(h, cd);
+
+	dprintk("%s: entry %p, returning %d\n",
+		__func__, h, rv);
 	return rv;
 }
 EXPORT_SYMBOL(cache_check);
@@ -559,10 +588,15 @@ static int cache_defer_req(struct cache_
 	struct cache_deferred_req *dreq;
 	int hash = DFR_HASH(h);
 
+	dprintk("%s: deferring entry %p\n",
+		__func__, h);
+
 	if (cache_defer_cnt >= DFR_MAX) {
 		/* too much in the cache, randomly drop this one,
 		 * or continue and drop the oldest below
 		 */
+		dprintk("%s: entry %p, too many deferrals, dropping new\n",
+			__func__, h);
 		if (net_random()&1)
 			return -ETIMEDOUT;
 	}
@@ -585,6 +619,8 @@ static int cache_defer_req(struct cache_
 	if (++cache_defer_cnt > DFR_MAX) {
 		dreq = list_entry(cache_defer_list.prev,
 				  struct cache_deferred_req, recent);
+		dprintk("%s: entry %p, too many deferrals, dropping old %p\n",
+			__func__, h, dreq);
 		list_del(&dreq->recent);
 		list_del(&dreq->hash);
 		cache_defer_cnt--;
@@ -610,6 +646,9 @@ static void cache_revisit_request(struct
 	struct list_head *lp;
 	int hash = DFR_HASH(h);
 
+	dprintk("%s: revisiting entry %p\n",
+		__func__, h);
+
 	INIT_LIST_HEAD(&pending);
 	spin_lock(&cache_defer_lock);
 
@@ -693,6 +732,9 @@ cache_read(struct file *filp, char __use
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 	int err;
 
+	dprintk("%s: cache %s pid %d(%s) reading %zd bytes\n",
+		__func__, cd->name, task_pid_nr(current), current->comm, count);
+
 	if (count == 0)
 		return 0;
 
@@ -704,17 +746,24 @@ cache_read(struct file *filp, char __use
 	}
 	spin_unlock(&cd->queue_lock);
 
-	if (rq == NULL)
+	if (rq == NULL) {
+		dprintk("%s: cache %s no queued requests\n",
+			__func__, cd->name);
 		return 0; /* no queued requests */
+	}
 
 	err = -EAGAIN;	/* gnb:TODO...this used to cause a loop, wtf */
 	if (!test_bit(CACHE_PENDING, &rq->item->flags))
 		goto error;
 
-	/* gnb:TODO whine to dmesg; stat */
+	/* gnb:TODO stat */
 	err = -EFAULT;
-	if (count < rq->len)
+	if (count < rq->len) {
+		dprintk("%s: cache %s short read, message length %d"
+		        " dropping request %p entry %p\n",
+			__func__, cd->name, rq->len, rq, rq->item);
 		goto error; /* We make no pretence at handling short reads */
+	}
 	count = rq->len;
 
 	err = -EFAULT;
@@ -735,6 +784,7 @@ error:
 	/* need to release rq */
 	cache_request_put(rq, cd);
 
+	dprintk("%s: returning %d\n", __func__, err);
 	return err;
 }
 
@@ -747,27 +797,38 @@ cache_write(struct file *filp, const cha
 	char *tmp;
 	int tmp_size = PAGE_SIZE;
 
+	dprintk("%s: cache %s pid %d(%s) writing %zd bytes\n",
+		__func__, cd->name, task_pid_nr(current), current->comm, count);
+
 	if (count == 0)
 		return 0;
+	err = -EINVAL;
 	if (count >= tmp_size)
-		return -EINVAL;
+		goto out2;
 
+	err = -ENOMEM;
 	tmp = kmalloc(tmp_size, GFP_KERNEL);
 	if (!tmp)
-		return -ENOMEM;
+		goto out2;
 
 	err = -EFAULT;
 	if (copy_from_user(tmp, buf, count))
 		goto out;
 
 	tmp[count] = '\0';
+	dprintk("%s: cache %s parsing update text \"%.*s\"\n",
+		__func__, cd->name, (int)count, tmp);
 	err = -EINVAL;
 	if (cd->cache_parse)
 		err = cd->cache_parse(cd, tmp, count);
 
 out:
 	kfree(tmp);
-	return err ? err : count;
+out2:
+	if (!err)
+		err = count;
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
 }
 
 static unsigned int
@@ -776,6 +837,9 @@ cache_poll(struct file *filp, poll_table
 	unsigned int mask = 0;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 
+	dprintk("%s: cache %s pid %d(%s) polling on filp %p\n",
+		__func__, cd->name, task_pid_nr(current), current->comm, filp);
+
 	poll_wait(filp, &cd->queue_wait, wait);
 
 	if (filp->f_mode & FMODE_WRITE)
@@ -799,6 +863,9 @@ cache_ioctl(struct inode *ino, struct fi
 	struct cache_request *rq;
 	struct cache_detail *cd = PDE(ino)->data;
 
+	dprintk("%s: cache %s, cmd=%u\n",
+		__func__, cd->name, cmd);
+
 	if (cmd != FIONREAD)
 		return -EINVAL;
 	if (!(filp->f_mode & FMODE_READ))
@@ -811,6 +878,8 @@ cache_ioctl(struct inode *ino, struct fi
 	if (!list_empty(&cd->to_read)) {
 		rq = container_of(cd->to_read.next, struct cache_request, list);
 		len = rq->len;
+		dprintk("%s: cache %s, request %p is length %d\n",
+			__func__, cd->name, rq, rq->len);
 	}
 
 	spin_unlock(&cd->queue_lock);
@@ -870,6 +939,9 @@ static void cache_remove_queued(struct c
 {
 	struct cache_request *rq;
 
+	dprintk("%s: trying to remove entry %p\n",
+		__func__, h);
+
 	/* find and de-queue */
 	spin_lock(&cd->queue_lock);
 
@@ -882,8 +954,11 @@ static void cache_remove_queued(struct c
 	spin_unlock(&cd->queue_lock);
 
 	/* if found, destroy */
-	if (rq)
+	if (rq) {
+		dprintk("%s: dropping reference on entry %p\n",
+			__func__, h);
 		cache_request_put(rq, cd);
+	}
 }
 
 /*
@@ -992,6 +1067,9 @@ static int cache_make_upcall(struct cach
 	if (!rq)
 		return -EAGAIN;
 
+	dprintk("%s: entry %p, request %p\n",
+		__func__, h, rq);
+
 	bp = rq->buf;
 	len = rq->len;
 	cd->cache_request(cd, h, &bp, &len);
@@ -1001,6 +1079,10 @@ static int cache_make_upcall(struct cach
 		return -EAGAIN;
 	}
 	rq->len -= len;
+
+	dprintk("%s: cache %s, entry %p, queueing request %p text \"%.*s\"\n",
+		__func__, cd->name, h, rq, rq->len, rq->buf);
+
 	spin_lock(&cd->queue_lock);
 	list_add_tail(&rq->list, &cd->to_read);
 	spin_unlock(&cd->queue_lock);

--
-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework
  2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
                   ` (13 preceding siblings ...)
  2009-01-08  8:25 ` [patch 14/14] sunrpc: Improve the usefulness of debug printks in the sunrpc cache code Greg Banks
@ 2009-01-08 19:52 ` J. Bruce Fields
  2009-01-09  1:42   ` Greg Banks
  14 siblings, 1 reply; 27+ messages in thread
From: J. Bruce Fields @ 2009-01-08 19:52 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS ML

On Thu, Jan 08, 2009 at 07:25:10PM +1100, Greg Banks wrote:
> These patches are a snapshot of in-progress work

That sounds suspiciously like "do not apply!".  I may as well queue up
the first 6 for 2.6.30, though?

> to fix problems with
> the sunrpc upcall mechanism which are triggered under load when using
> multi-threaded rpc.mountd.  These are currently a hot issue with one
> of SGI's customers (SGI PV988959).
> 
> The early patches in the series are cleanups with zero logic changes.
> 
> The patches have been lightly tested, by mounting and running the
> cthon04 tests.  They have not been shipped or used in the field.

OK, thanks.

--b.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-08  8:25 ` [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls Greg Banks
@ 2009-01-08 19:57   ` J. Bruce Fields
  2009-01-09  2:40     ` Greg Banks
  0 siblings, 1 reply; 27+ messages in thread
From: J. Bruce Fields @ 2009-01-08 19:57 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS ML

On Thu, Jan 08, 2009 at 07:25:20PM +1100, Greg Banks wrote:
> Instead of a single list which confusingly contains a mixture of
> cache_request and cache_reader structures in various states, use two
> separate lists.
> 
> Both new lists contain cache_request structures, the cache_reader
> structure is eliminated.  It's only purpose was to hold state which
> supports partial reads of upcalls from userspace.  However the
> implementation of partial reads is broken in the presence of the
> multi-threaded rpc.mountd, in two different ways.
> 
> Firstly, the kernel code assumes that each reader uses a separate
> struct file; because rpc.mountd fork()s *after* opening the cache
> file descriptor this is not true.  Thus the single struct file and
> the single rp->offset field are shared between multiple threads.
> Unfortunately rp->offset is not maintained in a safe manner.  This can
> lead to the BUG_ON() in cache_read() being tripped.

Hm, have you seen this happen?

All the current upcalls are small enough (and mountd's default read
buffer large enough) that messages should always be read in one gulp.

> 
> Secondly, even if the kernel code worked perfectly it's sharing
> a single offset between multiple reading rpc.mountd threads.

I made what I suspect is a similar patch a while ago and never got
around to submitting it.  (Arrgh!  Bad me!)  Appended below if it's of
any use to you to compare.  (Happy to apply either your patch or mine
depending which looks better; I haven't tried to compare carefully.)

--b.

commit f948255274fdc8b5932ad4e88d5610c6ea3de9f7
Author: J. Bruce Fields <bfields@citi.umich.edu>
Date:   Wed Dec 5 14:52:51 2007 -0500

    sunrpc: simplify dispatching of upcalls to file descriptors
    
    If you want to handle a lot of upcall requests, especially requests that
    may take a while (e.g. because they require talking to ldap or kerberos
    servers, or spinning up a newly accessed disk), then you need to process
    upcalls in parallel.
    
    When there are multiple requests available for processing, we'd prefer
    not to hand the same request to multiple readers, as that either wastes
    resources (or requires upcall-processing to detect such cases itself).
    
    The current code keeps a single list containing all open file
    descriptors and requests.  Each file descriptor is advanced through the
    list until it finds a request.  Multiple requests may find the same
    request at once, and requests are left on the list until they are
    responded to.  Under this scheme the likelihood of two readers getting
    the same request is high.
    
    You can mitigate the problem by reading from a single file descriptor
    that's shared between processes (as mountd currently does), but that
    requires the read to always provide a buffer large enough to get the
    whole request in one atomic read.  That's less practical for gss
    init_sec_context calls, which could vary in size from a few hundred
    bytes to 100k or so.
    
    So, instead move processes that are being read out off the common list
    of requests into the file descriptor's private area, and move them to
    the end of the list after they're read.
    
    That makes it a little less likely they'll be re-read by another process
    immediately.
    
    This also simplifies the code.
    
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 636c8e0..5512fbb 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -670,39 +670,31 @@ void cache_clean_deferred(void *owner)
  * On write, an update request is processed.
  * Poll works if anything to read, and always allows write.
  *
- * Implemented by linked list of requests.  Each open file has
- * a ->private that also exists in this list.  New requests are added
- * to the end and may wakeup and preceding readers.
- * New readers are added to the head.  If, on read, an item is found with
- * CACHE_UPCALLING clear, we free it from the list.
- *
+ * Keep a list of requests associated with each cache descriptor.
+ * On read from an open file descriptor, pick the request at the
+ * head of the list and move it to the file descriptor's private area.
+ * After it is read, return it to the tail of the list.  Keep requests
+ * on the list until we notice that they have been responded to (at
+ * which point CACHE_PENDING is cleared).
  */
 
 static DEFINE_SPINLOCK(queue_lock);
 static DEFINE_MUTEX(queue_io_mutex);
 
-struct cache_queue {
-	struct list_head	list;
-	int			reader;	/* if 0, then request */
-};
 struct cache_request {
-	struct cache_queue	q;
+	struct list_head	list;
 	struct cache_head	*item;
 	char			* buf;
+	int			offset;
 	int			len;
-	int			readers;
-};
-struct cache_reader {
-	struct cache_queue	q;
-	int			offset;	/* if non-0, we have a refcnt on next request */
 };
 
 static ssize_t
 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct cache_reader *rp = filp->private_data;
-	struct cache_request *rq;
+	struct cache_request *rq = filp->private_data;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct list_head *queue = &cd->queue;
 	int err;
 
 	if (count == 0)
@@ -711,60 +703,45 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
 	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
 			      * readers on this file */
  again:
-	spin_lock(&queue_lock);
-	/* need to find next request */
-	while (rp->q.list.next != &cd->queue &&
-	       list_entry(rp->q.list.next, struct cache_queue, list)
-	       ->reader) {
-		struct list_head *next = rp->q.list.next;
-		list_move(&rp->q.list, next);
+	if (rq == NULL) {
+		if (!list_empty(queue)) {
+			spin_lock(&queue_lock);
+			rq = list_first_entry(queue, struct cache_request,
+						list);
+			list_del_init(&rq->list);
+			filp->private_data = rq;
+			spin_unlock(&queue_lock);
+		}
 	}
-	if (rp->q.list.next == &cd->queue) {
-		spin_unlock(&queue_lock);
+	if (rq == NULL) {
 		mutex_unlock(&queue_io_mutex);
-		BUG_ON(rp->offset);
-		return 0;
+		return -EAGAIN;
 	}
-	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	BUG_ON(rq->q.reader);
-	if (rp->offset == 0)
-		rq->readers++;
-	spin_unlock(&queue_lock);
 
-	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
+	if (!test_bit(CACHE_PENDING, &rq->item->flags))
 		err = -EAGAIN;
-		spin_lock(&queue_lock);
-		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&queue_lock);
-	} else {
-		if (rp->offset + count > rq->len)
-			count = rq->len - rp->offset;
+	else {
+		if (rq->offset + count > rq->len)
+			count = rq->len - rq->offset;
 		err = -EFAULT;
-		if (copy_to_user(buf, rq->buf + rp->offset, count))
+		if (copy_to_user(buf, rq->buf + rq->offset, count))
 			goto out;
-		rp->offset += count;
-		if (rp->offset >= rq->len) {
-			rp->offset = 0;
-			spin_lock(&queue_lock);
-			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&queue_lock);
-		}
+		rq->offset += count;
+		if (rq->offset >= rq->len)
+			rq->offset = 0;
 		err = 0;
 	}
  out:
-	if (rp->offset == 0) {
-		/* need to release rq */
+	if (rq->offset == 0) {
+		filp->private_data = NULL;
 		spin_lock(&queue_lock);
-		rq->readers--;
-		if (rq->readers == 0 &&
-		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
-			list_del(&rq->q.list);
-			spin_unlock(&queue_lock);
+		if (!test_bit(CACHE_PENDING, &rq->item->flags)) {
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
-			spin_unlock(&queue_lock);
+			list_add_tail(&rq->list, queue);
+		spin_unlock(&queue_lock);
 	}
 	if (err == -EAGAIN)
 		goto again;
@@ -808,26 +785,19 @@ static unsigned int
 cache_poll(struct file *filp, poll_table *wait)
 {
 	unsigned int mask;
-	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
 	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+	struct list_head *queue = &cd->queue;
 
 	poll_wait(filp, &queue_wait, wait);
 
 	/* alway allow write */
 	mask = POLL_OUT | POLLWRNORM;
 
-	if (!rp)
-		return mask;
-
 	spin_lock(&queue_lock);
 
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			mask |= POLLIN | POLLRDNORM;
-			break;
-		}
+	if (filp->private_data || !list_empty(queue))
+		mask |= POLLIN | POLLRDNORM;
+
 	spin_unlock(&queue_lock);
 	return mask;
 }
@@ -837,11 +807,11 @@ cache_ioctl(struct inode *ino, struct file *filp,
 	    unsigned int cmd, unsigned long arg)
 {
 	int len = 0;
-	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
+	struct cache_request *rq = filp->private_data;
 	struct cache_detail *cd = PDE(ino)->data;
+	struct list_head *queue = &cd->queue;
 
-	if (cmd != FIONREAD || !rp)
+	if (cmd != FIONREAD)
 		return -EINVAL;
 
 	spin_lock(&queue_lock);
@@ -849,14 +819,11 @@ cache_ioctl(struct inode *ino, struct file *filp,
 	/* only find the length remaining in current request,
 	 * or the length of the next request
 	 */
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			struct cache_request *cr =
-				container_of(cq, struct cache_request, q);
-			len = cr->len - rp->offset;
-			break;
-		}
+	if (!rq)
+		rq = list_first_entry(queue, struct cache_request, list);
+	if (rq)
+		len = rq->len - rq->offset;
+
 	spin_unlock(&queue_lock);
 
 	return put_user(len, (int __user *)arg);
@@ -865,51 +832,33 @@ cache_ioctl(struct inode *ino, struct file *filp,
 static int
 cache_open(struct inode *inode, struct file *filp)
 {
-	struct cache_reader *rp = NULL;
-
 	nonseekable_open(inode, filp);
 	if (filp->f_mode & FMODE_READ) {
 		struct cache_detail *cd = PDE(inode)->data;
 
-		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-		if (!rp)
-			return -ENOMEM;
-		rp->offset = 0;
-		rp->q.reader = 1;
 		atomic_inc(&cd->readers);
-		spin_lock(&queue_lock);
-		list_add(&rp->q.list, &cd->queue);
-		spin_unlock(&queue_lock);
 	}
-	filp->private_data = rp;
+	filp->private_data = NULL;
 	return 0;
 }
 
 static int
 cache_release(struct inode *inode, struct file *filp)
 {
-	struct cache_reader *rp = filp->private_data;
+	struct cache_request *rq = filp->private_data;
 	struct cache_detail *cd = PDE(inode)->data;
+	struct list_head *queue = &cd->queue;
+
+	filp->private_data = NULL;
 
-	if (rp) {
+	if (rq) {
+		rq->offset = 0;
 		spin_lock(&queue_lock);
-		if (rp->offset) {
-			struct cache_queue *cq;
-			for (cq= &rp->q; &cq->list != &cd->queue;
-			     cq = list_entry(cq->list.next, struct cache_queue, list))
-				if (!cq->reader) {
-					container_of(cq, struct cache_request, q)
-						->readers--;
-					break;
-				}
-			rp->offset = 0;
-		}
-		list_del(&rp->q.list);
+		list_add_tail(&rq->list, queue);
 		spin_unlock(&queue_lock);
 
-		filp->private_data = NULL;
-		kfree(rp);
-
+	}
+	if (filp->f_mode & FMODE_READ) {
 		cd->last_close = get_seconds();
 		atomic_dec(&cd->readers);
 	}
@@ -932,20 +881,15 @@ static const struct file_operations cache_file_operations = {
 
 static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
 {
-	struct cache_queue *cq;
+	struct cache_request *rq;
 	spin_lock(&queue_lock);
-	list_for_each_entry(cq, &detail->queue, list)
-		if (!cq->reader) {
-			struct cache_request *cr = container_of(cq, struct cache_request, q);
-			if (cr->item != ch)
-				continue;
-			if (cr->readers != 0)
-				continue;
-			list_del(&cr->q.list);
+	list_for_each_entry(rq, &detail->queue, list)
+		if (rq->item  == ch) {
+			list_del(&rq->list);
 			spin_unlock(&queue_lock);
-			cache_put(cr->item, detail);
-			kfree(cr->buf);
-			kfree(cr);
+			cache_put(rq->item, detail);
+			kfree(rq->buf);
+			kfree(rq);
 			return;
 		}
 	spin_unlock(&queue_lock);
@@ -1074,13 +1018,12 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
 		kfree(crq);
 		return -EAGAIN;
 	}
-	crq->q.reader = 0;
 	crq->item = cache_get(h);
 	crq->buf = buf;
 	crq->len = PAGE_SIZE - len;
-	crq->readers = 0;
+	crq->offset = 0;
 	spin_lock(&queue_lock);
-	list_add_tail(&crq->q.list, &detail->queue);
+	list_add_tail(&crq->list, &detail->queue);
 	spin_unlock(&queue_lock);
 	wake_up(&queue_wait);
 	return 0;

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework
  2009-01-08 19:52 ` [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework J. Bruce Fields
@ 2009-01-09  1:42   ` Greg Banks
  0 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-09  1:42 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

J. Bruce Fields wrote:
> On Thu, Jan 08, 2009 at 07:25:10PM +1100, Greg Banks wrote:
>   
>> These patches are a snapshot of in-progress work
>>     
>
> That sounds suspiciously like "do not apply!". 
On these patches, it's your call.

When I say "in-progress" I really mean that these haven't shipped in an
SGI product. There are more patches in this same series, not yet posted,
which are definitely in the "do not apply" category and which I will
mark as such when they appear.

For the latter (logic-changing) patches, I can report that they've had
the same amount of testing that most Linux patches claim to have
received.  In other words they worked fine on one architecture, with the
obvious publicly available testsuite, with a small client count, with
light load levels, and no data corruption testing, and no point
regression test for the bug being fixed.  I've seen code checked into
Linux with less testing.

OTOH these patches have not been through an SGI QA cycle, and given the
circumstances it seems unlikely they will.  So I would not ship these in
an SGI product yet.

Your call.  I would be comfortable with these patches going into 2.6.30.

>  I may as well queue up
> the first 6 for 2.6.30, though?
>   
Yes, the first 6 make no logic changes and I believe they are quite
harmless.

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-08 19:57   ` J. Bruce Fields
@ 2009-01-09  2:40     ` Greg Banks
       [not found]       ` <4966B92F.8060008-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Greg Banks @ 2009-01-09  2:40 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

J. Bruce Fields wrote:
> On Thu, Jan 08, 2009 at 07:25:20PM +1100, Greg Banks wrote:
>   
>> [...]
>> Firstly, the kernel code assumes that each reader uses a separate
>> struct file; because rpc.mountd fork()s *after* opening the cache
>> file descriptor this is not true.  Thus the single struct file and
>> the single rp->offset field are shared between multiple threads.
>> Unfortunately rp->offset is not maintained in a safe manner.  This can
>> lead to the BUG_ON() in cache_read() being tripped.
>>     
>
> Hm, have you seen this happen?
>   

Yes, at last count at least a half-dozen times with two separate
customers.  For future reference, the SGI bug report is PV 988959. 
Here's a representative crash report:

System dropped to KDB with following crash string:

    <4>kernel BUG at net/sunrpc/cache.c:578!
    <4>rpc.mountd[50696]: bugcheck! 0 [1]
    <4>nfsd: request from insecure port (10.150.3.54:911)!
    <4>Modules linked in: af_packet svcxprt_rdma nfsd exportfs lockd<4>nfsd: request from insecure port (10.150.3.59:838)!
    <4> nfs_acl sunrpc rdma_ucm rdma_cm iw_cm ib_addr ib_uverbs ib_umad iw_cxgb3 cxgb3 mlx4_ib mlx4_core iscsi_trgt crc32c libcrc32c mst_pciconf mst_pci binfmt_misc button ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables scsi_dump dump_blockdev dump_gzi zlib_deflate dump xfs_quota xfs_dmapi dmapi<4>nfsd: request from insecure port (10.150.3.84:744)!
    <4>nfsd: request from insecure port (10.150.3.68:768)!
    <4> nls_iso8859_1 loop sgi_xvm sgi_os_lib ib_ipoib ib_cm ib_sa ipv6 kdbm_vm kdbm_task kdbm_sched kdbm_pg mmtimer mca_recovery arsess xpmem xp numatools mspec shpchp pci_hotplug ib_mthca ide_cd ib_mad ehci_hcd ohci_hcd cdrom ib_core tg3 usbcore pata_sil680siimage xfs fan thermal processor ide_generic qla2xxx firmware_class mptfc scsi_transport_fc mptspi scsi_transport_spi sg mptsas mptscsih mptbase scsi_transport_sas qla1280 sata_vsc libata sgiioc4 ioc4 sd_mod scsi_mod ide_disk ide_core
    <4>
    <4>Pid: 50696, CPU 1, comm:           rpc.mountd
    <4>psr : 0000101008526010 ifs : 800000000000050d ip  : [<a000000214362120>]    Tainted: P U
    <4>ip is at cache_read+0x2a0/0x840 [sunrpc]
    <4>unat: 0000000000000000 pfs : 000000000000050d rsc : 0000000000000003
    <4>rnat: 0000000000000000 bsps: 0000000000000000 pr  : 95a95666659aa959
    <4>ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f
    <4>csd : 0000000000000000 ssd : 0000000000000000
    <4>b0  : a000000214362120 b6  : a000000100431640 b7  : a00000010000c320
    <4>f6  : 1003e00000000000000a0 f7  : 1003e20c49ba5e353f7cf
    <4>f8  : 1003e00000000000004e2 f9  : 1003e000000000fa00000
    <4>f10 : 1003e000000003b9aca00 f11 : 1003e431bde82d7b634db
    <4>r1  : a000000100b31db0 r2  : e000006003118e6b r3  : a00000010087fad8
    <4>r8  : 0000000000000029 r9  : ffffffffffff8e68 r10 : ffffffffffff0028
    <4>r11 : 8000020310000380 r12 : e0000171ebb77e20 r13 : e0000171ebb70000
    <4>r14 : 0000000000004000 r15 : a00000010087fad8 r16 : a00000010087fae0
    <4>r17 : e0000171e575fe18 r18 : 0000000000000027 r19 : 0000020000000000
    <4>r20 : 0fd0000000000000 r21 : 80000001fdc00820 r22 : 0000000000000004
    <4>r23 : 0000000000000820 r24 : 80000001fdc00000 r25 : 0000000000000820
    <4>r26 : 0000000000000000 r27 : e000006003118e6b r28 : e000006003118e68
    <4>r29 : ffffffffffff8e68 r30 : e000006003120000 r31 : ffffffffffff0028
    <4>
    <4>Call Trace:
    <4> [<a000000100014ce0>] show_stack+0x40/0xa0
    <4>                                sp=e0000171ebb779b0 bsp=e0000171ebb71478
    <4> [<a0000001000155e0>] show_regs+0x840/0x880
    <4>                                sp=e0000171ebb77b80 bsp=e0000171ebb71420
    <4> [<a000000100039460>] die+0x1c0/0x360
    <4>                                sp=e0000171ebb77b80 bsp=e0000171ebb713d0
    <4> [<a000000100039650>] die_if_kernel+0x50/0x80
    <4>                                sp=e0000171ebb77ba0 bsp=e0000171ebb713a0
    <4> [<a000000100559b80>] ia64_bad_break+0x260/0x540
    <4>                                sp=e0000171ebb77ba0 bsp=e0000171ebb71378
    <4> [<a00000010000cb20>] ia64_leave_kernel+0x0/0x280
    <4>                                sp=e0000171ebb77c50 bsp=e0000171ebb71378
    <4> [<a000000214362120>] cache_read+0x2a0/0x840 [sunrpc]
    <4>                                sp=e0000171ebb77e20 bsp=e0000171ebb71310
    <4> [<a0000001001615a0>] vfs_read+0x240/0x3e0
    <4>                                sp=e0000171ebb77e20 bsp=e0000171ebb712c0
    <4> [<a000000100161e10>] sys_read+0x70/0xe0
    <4>                                sp=e0000171ebb77e20 bsp=e0000171ebb71248
    <4> [<a00000010000c980>] ia64_ret_from_syscall+0x0/0x20
    <4>                                sp=e0000171ebb77e30 bsp=e0000171ebb71248
    <4> [<a000000000010620>] __kernel_syscall_via_break+0x0/0x20
    <4>                                sp=e0000171ebb78000 bsp=e0000171ebb71248


Rpc.mountd is doing a read() on /proc/net/rpc/auth.unix.ip/channel.  The
BUG_ON() tripped is here:

    553 static ssize_t
    554 cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
    555 {
    556         struct cache_reader *rp = filp->private_data;
    557         struct cache_request *rq;
    558         struct cache_detail *cd = PDE(filp->f_dentry->d_inode)->data;
    ...
    567         spin_lock(&queue_lock);
    568         /* need to find next request */
    569         while (rp->q.list.next != &cd->queue &&
    570                list_entry(rp->q.list.next, struct cache_queue, list)
    571                ->reader) {
    572                 struct list_head *next = rp->q.list.next;
    573                 list_move(&rp->q.list, next);
    574         }
    575         if (rp->q.list.next == &cd->queue) {
    576                 spin_unlock(&queue_lock);
    577                 up(&queue_io_sem);
    578                 BUG_ON(rp->offset);  <----- SPLAT!
    579                 return 0;
    580         }


Note there's one obvious problem: the BUG_ON() is testing the condition
(rp->offset) outside any locking.  In at least one dump rp->offset is
actually zero by the time the page got saved to the dump device, so I
suspect a race here.

> All the current upcalls are small enough (and mountd's default read
> buffer large enough) that messages should always be read in one gulp.
>   
Indeed.  This is why SGI shipped a workaround, comprising a backported
commit to mountd to use the larger read buffer size and thus avoid
triggering the partial read path.  Unfortunately this seems not to have
helped.  I *suspect* that the BUG_ON() is racing with another thread
running through this code later in the same function:

    596                 if (copy_to_user(buf, rq->buf + rp->offset, count))
    597                         goto out;
    598                 rp->offset += count;
    599                 if (rp->offset >= rq->len) {
    600                         rp->offset = 0;
    601                         spin_lock(&queue_lock);


Note that even if userspace does full sized reads, rp->offset briefly
becomes nonzero in code which is serialised by queue_io_mutex.  The
BUG_ON() is not serialised by that lock.

The two customers to which this is occurring have in common a very large
set of NFS clients (>6000 in one case) all mounting over TCP/IPoIB at
the same time.  This puts a lot of stress on the auth.unix.ip cache :-/

>   
>> Secondly, even if the kernel code worked perfectly it's sharing
>> a single offset between multiple reading rpc.mountd threads.
>>     
>
> I made what I suspect is a similar patch a while ago and never got
> around to submitting it.  (Arrgh!  Bad me!)
Heh, I do way too much of the same thing :-(

>   Appended below if it's of
> any use to you to compare.  (Happy to apply either your patch or mine
> depending which looks better; I haven't tried to compare carefully.)
>   

Ok, I'll take a look at yours.
> [...]
>     
>     You can mitigate the problem by reading from a single file descriptor
>     that's shared between processes (as mountd currently does), but that
>     requires the read to always provide a buffer large enough to get the
>     whole request in one atomic read.  That's less practical for gss
>     init_sec_context calls, which could vary in size from a few hundred
>     bytes to 100k or so.
>   
I'm confused -- doesn't the current cache_make_upcall() code allocate a
buffer of length PAGE_SIZE and not allow it to be resized?
>     [...]
> -struct cache_queue {
> -	struct list_head	list;
> -	int			reader;	/* if 0, then request */
> -};
>  struct cache_request {
> -	struct cache_queue	q;
> +	struct list_head	list;
>  	struct cache_head	*item;
>  	char			* buf;
> +	int			offset;
>  	int			len;
> -	int			readers;
> -};
> -struct cache_reader {
> -	struct cache_queue	q;
> -	int			offset;	/* if non-0, we have a refcnt on next request */
>  };
>  
>   
Looking very similar so far.

>  static ssize_t
>  cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
>  {
> -	struct cache_reader *rp = filp->private_data;
> -	struct cache_request *rq;
> +	struct cache_request *rq = filp->private_data;
>  	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
> +	struct list_head *queue = &cd->queue;
>  	int err;
>  
>  	if (count == 0)
> @@ -711,60 +703,45 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
>  	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
>  			      * readers on this file */
>   

Ah, so you still have a single global lock which is serialising all
reads and writes to all caches.

Also, I think you'd want to sample filp->private_data after taking
queue_io_mutex.
> +	if (rq == NULL) {
>  		mutex_unlock(&queue_io_mutex);
> -		BUG_ON(rp->offset);
>   

Good riddance to that BUG_ON().
> -		return 0;
> +		return -EAGAIN;
>  	}
> -	rq = container_of(rp->q.list.next, struct cache_request, q.list);
> -	BUG_ON(rq->q.reader);
> -	if (rp->offset == 0)
> -		rq->readers++;
> -	spin_unlock(&queue_lock);
>  
> -	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
> +	if (!test_bit(CACHE_PENDING, &rq->item->flags))
>  		err = -EAGAIN;
> -		spin_lock(&queue_lock);
> -		list_move(&rp->q.list, &rq->q.list);
> -		spin_unlock(&queue_lock);
> -	} else {
> -		if (rp->offset + count > rq->len)
> -			count = rq->len - rp->offset;
> +	else {
> +		if (rq->offset + count > rq->len)
> +			count = rq->len - rq->offset;
>  		err = -EFAULT;
> -		if (copy_to_user(buf, rq->buf + rp->offset, count))
> +		if (copy_to_user(buf, rq->buf + rq->offset, count))
>  			goto out;
>   

Ok, so you try to keep partial read support but move the offset into the
upcall request itself.  Interesting idea.

I think partial reads are Just Too Hard to do properly, i.e. without
risk of racy message corruption under all combinations of message size
and userspace behaviour .  In particular I think your code will corrupt
upcall data if multiple userspace threads race to do partial reads on
the same struct file (as rpc.mountd is doing at SGI's customer sites).

For the same reasons I think the FIONREAD support is utterly pointless
(even though I preserved it).

But I still don't understand how this 100K message size number for gssd
can happen?  If that's really necessary then the design equation changes
considerably.  This seems to be the most significant difference between
our patches.

A smaller issue is that you keep a single list and use the value of the
CACHE_PENDING bit to tell the difference between states.  I think this
could be made to work; however my technique of using two lists makes
most of the code even simpler at the small cost of having to do two list
searches in queue_loose().

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
       [not found]       ` <4966B92F.8060008-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
@ 2009-01-09  2:57         ` J. Bruce Fields
  2009-01-09  3:12           ` Greg Banks
  2009-01-09 21:29         ` J. Bruce Fields
  1 sibling, 1 reply; 27+ messages in thread
From: J. Bruce Fields @ 2009-01-09  2:57 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS ML

On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
> J. Bruce Fields wrote:
> > [...]
> >     
> >     You can mitigate the problem by reading from a single file descriptor
> >     that's shared between processes (as mountd currently does), but that
> >     requires the read to always provide a buffer large enough to get the
> >     whole request in one atomic read.  That's less practical for gss
> >     init_sec_context calls, which could vary in size from a few hundred
> >     bytes to 100k or so.
> >   
> I'm confused -- doesn't the current cache_make_upcall() code allocate a
> buffer of length PAGE_SIZE and not allow it to be resized?

Yeah, sorry for the confusion: this was written as cleanup in
preparation for patches to support larger gss init_sec_context calls
needed for spkm3, which I'm told likes to send across entire certificate
trains in the initial NULL calls.  (But the spkm3 work is stalled for
now).

--b.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-09  2:57         ` J. Bruce Fields
@ 2009-01-09  3:12           ` Greg Banks
       [not found]             ` <4966C0AB.7000604-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 27+ messages in thread
From: Greg Banks @ 2009-01-09  3:12 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

J. Bruce Fields wrote:
> On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
>   
>> J. Bruce Fields wrote:
>>     
>>> [...]
>>>     
>>>     whole request in one atomic read.  That's less practical for gss
>>>     init_sec_context calls, which could vary in size from a few hundred
>>>     bytes to 100k or so.
>>>   
>>>       
>> I'm confused -- doesn't the current cache_make_upcall() code allocate a
>> buffer of length PAGE_SIZE and not allow it to be resized?
>>     
>
> Yeah, sorry for the confusion: this was written as cleanup in
> preparation for patches to support larger gss init_sec_context calls
> needed for spkm3, which I'm told likes to send across entire certificate
> trains in the initial NULL calls.  (But the spkm3 work is stalled for
> now).
>   
Aha.

So if at some point in the future we actually need to send 100K in an
upcall, I think we have two options:

a) support partial reads but do so properly:
 - track offset in the cache_request
 - also track reader's pid in the cache request so partially read
requests are matched to threads
 - handle multiple requests being in a state where they have been
partially read
 - handle the case where a thread dies after doing a partial read but
before finishing, so the request is left dangling
 - handle the similar case where a thread does a partial read then fails
to ever finish the read without dying
 - handle both the "multiple struct files, 1 thread per struct file" and
"1 struct file, multiple threads" cases cleanly

b) don't support partial reads but require userspace to do larger full
reads.  I don't think 100K is too much to ask.

My patch does most of what we need for option b).  Yours does some of
what we need for option a).  Certainly a) is a lot more complex.

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
       [not found]             ` <4966C0AB.7000604-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
@ 2009-01-09 16:53               ` Chuck Lever
  2009-01-10  1:28                 ` Greg Banks
  0 siblings, 1 reply; 27+ messages in thread
From: Chuck Lever @ 2009-01-09 16:53 UTC (permalink / raw)
  To: Greg Banks; +Cc: J. Bruce Fields, Linux NFS ML

On Jan 8, 2009, at Jan 8, 2009, 10:12 PM, Greg Banks wrote:
> J. Bruce Fields wrote:
>> On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
>>
>>> J. Bruce Fields wrote:
>>>
>>>> [...]
>>>>
>>>>    whole request in one atomic read.  That's less practical for gss
>>>>    init_sec_context calls, which could vary in size from a few  
>>>> hundred
>>>>    bytes to 100k or so.
>>>>
>>>>
>>> I'm confused -- doesn't the current cache_make_upcall() code  
>>> allocate a
>>> buffer of length PAGE_SIZE and not allow it to be resized?
>>>
>>
>> Yeah, sorry for the confusion: this was written as cleanup in
>> preparation for patches to support larger gss init_sec_context calls
>> needed for spkm3, which I'm told likes to send across entire  
>> certificate
>> trains in the initial NULL calls.  (But the spkm3 work is stalled for
>> now).
>>
> Aha.
>
> So if at some point in the future we actually need to send 100K in an
> upcall, I think we have two options:
>
> a) support partial reads but do so properly:
> - track offset in the cache_request
> - also track reader's pid in the cache request so partially read
> requests are matched to threads
> - handle multiple requests being in a state where they have been
> partially read
> - handle the case where a thread dies after doing a partial read but
> before finishing, so the request is left dangling
> - handle the similar case where a thread does a partial read then  
> fails
> to ever finish the read without dying
> - handle both the "multiple struct files, 1 thread per struct file"  
> and
> "1 struct file, multiple threads" cases cleanly
>
> b) don't support partial reads but require userspace to do larger full
> reads.  I don't think 100K is too much to ask.

How about:

c) Use an mmap like API to avoid copying 100K of data between user  
space and kernel.

> My patch does most of what we need for option b).  Yours does some of
> what we need for option a).  Certainly a) is a lot more complex.
>
> -- 
> Greg Banks, P.Engineer, SGI Australian Software Group.
> the brightly coloured sporks of revolution.
> I don't speak for SGI.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs"  
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Chuck Lever
chuck[dot]lever[at]oracle[dot]com





^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
       [not found]       ` <4966B92F.8060008-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
  2009-01-09  2:57         ` J. Bruce Fields
@ 2009-01-09 21:29         ` J. Bruce Fields
  2009-01-09 21:41           ` J. Bruce Fields
  2009-01-09 23:29           ` Greg Banks
  1 sibling, 2 replies; 27+ messages in thread
From: J. Bruce Fields @ 2009-01-09 21:29 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS ML

On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
> J. Bruce Fields wrote:
> >   Appended below if it's of
> > any use to you to compare.  (Happy to apply either your patch or mine
> > depending which looks better; I haven't tried to compare carefully.)
> >   
> 
> Ok, I'll take a look at yours.

Thanks for doing this, by the way.

> >  static ssize_t
> >  cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
> >  {
> > -	struct cache_reader *rp = filp->private_data;
> > -	struct cache_request *rq;
> > +	struct cache_request *rq = filp->private_data;
> >  	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
> > +	struct list_head *queue = &cd->queue;
> >  	int err;
> >  
> >  	if (count == 0)
> > @@ -711,60 +703,45 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
> >  	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
> >  			      * readers on this file */
> >   
> 
> Ah, so you still have a single global lock which is serialising all
> reads and writes to all caches.

Yes, making this per-cd seems sensible (though if the problem is
typically a single cache (auth_unix) then I don't know how significant a
help it is).

> Also, I think you'd want to sample filp->private_data after taking
> queue_io_mutex.

Whoops, yes.

> > +	if (rq == NULL) {
> >  		mutex_unlock(&queue_io_mutex);
> > -		BUG_ON(rp->offset);
> >   
> 
> Good riddance to that BUG_ON().
> > -		return 0;
> > +		return -EAGAIN;
> >  	}
> > -	rq = container_of(rp->q.list.next, struct cache_request, q.list);
> > -	BUG_ON(rq->q.reader);
> > -	if (rp->offset == 0)
> > -		rq->readers++;
> > -	spin_unlock(&queue_lock);
> >  
> > -	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
> > +	if (!test_bit(CACHE_PENDING, &rq->item->flags))
> >  		err = -EAGAIN;
> > -		spin_lock(&queue_lock);
> > -		list_move(&rp->q.list, &rq->q.list);
> > -		spin_unlock(&queue_lock);
> > -	} else {
> > -		if (rp->offset + count > rq->len)
> > -			count = rq->len - rp->offset;
> > +	else {
> > +		if (rq->offset + count > rq->len)
> > +			count = rq->len - rq->offset;
> >  		err = -EFAULT;
> > -		if (copy_to_user(buf, rq->buf + rp->offset, count))
> > +		if (copy_to_user(buf, rq->buf + rq->offset, count))
> >  			goto out;
> >   
> 
> Ok, so you try to keep partial read support but move the offset into the
> upcall request itself.  Interesting idea.
> 
> I think partial reads are Just Too Hard to do properly, i.e. without
> risk of racy message corruption under all combinations of message size
> and userspace behaviour .  In particular I think your code will corrupt
> upcall data if multiple userspace threads race to do partial reads on
> the same struct file (as rpc.mountd is doing at SGI's customer sites).

Yes, but what mountd's doing is just dumb, as far as I can tell; is
there any real reason not to just keep a separate open for each thread?

If we just tell userland to keep a number of opens equal to the number
of concurrent upcalls it wants to handle, and then all of this becomes
very easy.

Forget sharing a single struct file between tasks that do too-small
reads: we should make sure that we don't oops, but that's all.

> For the same reasons I think the FIONREAD support is utterly pointless
> (even though I preserved it).
> 
> But I still don't understand how this 100K message size number for gssd
> can happen?  If that's really necessary then the design equation changes
> considerably.  This seems to be the most significant difference between
> our patches.

So, the somewhat depressing situation with spkm3, which was to be the
public-key-based gss mechanism for nfs: we (citi) implemented it (modulo
this problem and maybe one or two others), but found some problems along
the way that required revising the spec.  I think there might have been
an implementation for one other platform too.  But the ietf seems to
have given up on spkm3, and instead is likely to pursue a new mechanism,
pku2u.  Nobody's implementing that yet.  Consulting my local expert, it
sounds like pku2u will have some more reasonable bound on init-sec-ctx
token sizes.  (Not sure if it'll be under a page, without checking, but
it shouldn't be much more than that, anyway).

So: the immediate pressure for larger upcalls is probably gone.  And
anyway more changes would be required (the ascii-hex encoding and
upcall-downcall matching are very wasteful in this case, etc.), so we'd
likely end up using a different mechanism.

That said, I think it's easy enough to handle just the case of multiple
reads on unshared struct files that it might make sense to keep that
piece.

> A smaller issue is that you keep a single list and use the value of the
> CACHE_PENDING bit to tell the difference between states.  I think this
> could be made to work; however my technique of using two lists makes
> most of the code even simpler at the small cost of having to do two list
> searches in queue_loose().

OK.  When exactly do they get moved between lists?  I'll take a look at
the code.

--b.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-09 21:29         ` J. Bruce Fields
@ 2009-01-09 21:41           ` J. Bruce Fields
  2009-01-09 23:40             ` Greg Banks
  2009-01-09 23:29           ` Greg Banks
  1 sibling, 1 reply; 27+ messages in thread
From: J. Bruce Fields @ 2009-01-09 21:41 UTC (permalink / raw)
  To: Greg Banks; +Cc: Linux NFS ML

On Fri, Jan 09, 2009 at 04:29:21PM -0500, bfields wrote:
> On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
> > A smaller issue is that you keep a single list and use the value of the
> > CACHE_PENDING bit to tell the difference between states.  I think this
> > could be made to work; however my technique of using two lists makes
> > most of the code even simpler at the small cost of having to do two list
> > searches in queue_loose().
> 
> OK.  When exactly do they get moved between lists?  I'll take a look at
> the code.

The one thing I'd be curious about here would be robustness in the face
of a userland daemon that was restarted: would requests marked as read
but not responded to be stuck there indefinitely at the time the daemon
went down?  That wouldn't be fatal, since if nothing else the client
will retry eventually, but it might lead to some unnecessary delays if
the admin needed to restart a daemon for some reason.

--b.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-09 21:29         ` J. Bruce Fields
  2009-01-09 21:41           ` J. Bruce Fields
@ 2009-01-09 23:29           ` Greg Banks
  1 sibling, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-09 23:29 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

J. Bruce Fields wrote:
> On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
>   
>> J. Bruce Fields wrote:
>>     
>>
>   
>>>  static ssize_t
>>>  cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
>>>  {
>>> -	struct cache_reader *rp = filp->private_data;
>>> -	struct cache_request *rq;
>>> +	struct cache_request *rq = filp->private_data;
>>>  	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
>>> +	struct list_head *queue = &cd->queue;
>>>  	int err;
>>>  
>>>  	if (count == 0)
>>> @@ -711,60 +703,45 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
>>>  	mutex_lock(&queue_io_mutex); /* protect against multiple concurrent
>>>  			      * readers on this file */
>>>   
>>>       
>> Ah, so you still have a single global lock which is serialising all
>> reads and writes to all caches.
>>     
>
> Yes, making this per-cd seems sensible (though if the problem is
> typically a single cache (auth_unix) then I don't know how significant a
> help it is).
>   
The usual pattern of traffic I see (on a SLES10 platform with the older
set of export caches) when the first NFS packet arrives during a client
mounts is:

 - upcall to mountd via auth.unix.ip cache
 - mountd writes pre-emptively to nfsd.export and nfsd.expkey caches
 - mountd write reply to auth.unix.ip cache

So it's not just a single cache.

However, I have no measurements for any performance improvement.  Based
on earlier experience I believe the elapsed mounting time to be
dominated by the latency of the forward and reverse DNS lookup that
mountd does, so the improvement is probably small.

>   
>>
>> I think partial reads are Just Too Hard to do properly, i.e. without
>> risk of racy message corruption under all combinations of message size
>> and userspace behaviour .  In particular I think your code will corrupt
>> upcall data if multiple userspace threads race to do partial reads on
>> the same struct file (as rpc.mountd is doing at SGI's customer sites).
>>     
>
> Yes, but what mountd's doing is just dumb, as far as I can tell; is
> there any real reason not to just keep a separate open for each thread?
>   
None at all.  The current rpc.mountd behaviour is a historical accident
of the "look, we can put a fork() here and everything will Just Work"
variety.  I was hoping to avoid changes to the current userspace
behaviour to limit deployment hassles with shipping a fix, but
ultimately it can be changed.

> If we just tell userland to keep a number of opens equal to the number
> of concurrent upcalls it wants to handle, and then all of this becomes
> very easy.
>   
If we put that requirement on userspace, and partial reads are still
necessary, then your approach of using filp->private_data as a parking
spot for requests currently being read would be the right way to go.

> Forget sharing a single struct file between tasks that do too-small
> reads: we should make sure that we don't oops, but that's all.
>   
We should definitely not oops :-)  Consistently delivering correct
messages to userspace would be nice too.

> So, the somewhat depressing situation with spkm3, which was to be the
> public-key-based gss mechanism for nfs: we (citi) implemented it (modulo
> this problem and maybe one or two others), but found some problems along
> the way that required revising the spec.
This is frequently the way with specs :-/

> [...]
> So: the immediate pressure for larger upcalls is probably gone.
Sweet.

> That said, I think it's easy enough to handle just the case of multiple
> reads on unshared struct files that it might make sense to keep that
> piece.
>
>   
>> A smaller issue is that you keep a single list and use the value of the
>> CACHE_PENDING bit to tell the difference between states. [...]
>>     
>
> OK.  When exactly do they get moved between lists?
Immediately after being copied out to userspace in cache_read().

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-09 21:41           ` J. Bruce Fields
@ 2009-01-09 23:40             ` Greg Banks
  0 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-09 23:40 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Linux NFS ML

J. Bruce Fields wrote:
> On Fri, Jan 09, 2009 at 04:29:21PM -0500, bfields wrote:
>   
>> On Fri, Jan 09, 2009 at 01:40:47PM +1100, Greg Banks wrote:
>>     
>>> A smaller issue is that you keep a single list and use the value of the
>>> CACHE_PENDING bit to tell the difference between states.  I think this
>>> could be made to work; however my technique of using two lists makes
>>> most of the code even simpler at the small cost of having to do two list
>>> searches in queue_loose().
>>>       
>> OK.  When exactly do they get moved between lists?  I'll take a look at
>> the code.
>>     
>
> The one thing I'd be curious about here would be robustness in the face
> of a userland daemon that was restarted:
Fair enough.
>  would requests marked as read
> but not responded to be stuck there indefinitely at the time the daemon
> went down? 
I don't think so.  Requests on the to_write list have the CACHE_PENDING
bit set, and that will trigger a call to cache_remove_queued() in either
of cache_fresh_unlocked() or cache_clean().  The latter will happen when
the cache_head expires or when the .../flush file is written, e.g. the
next exportfs change.  One of those should happen if the daemons are
started normally.

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls.
  2009-01-09 16:53               ` Chuck Lever
@ 2009-01-10  1:28                 ` Greg Banks
  0 siblings, 0 replies; 27+ messages in thread
From: Greg Banks @ 2009-01-10  1:28 UTC (permalink / raw)
  To: Chuck Lever; +Cc: J. Bruce Fields, Linux NFS ML

Chuck Lever wrote:
> On Jan 8, 2009, at Jan 8, 2009, 10:12 PM, Greg Banks wrote:
>>
>> a) support partial reads but do so properly: [...]
>>
>> b) don't support partial reads but require userspace to do larger full
>> reads.  I don't think 100K is too much to ask.
>
> How about:
>
> c) Use an mmap like API to avoid copying 100K of data between user
> space and kernel.

I think that would be a significant complexity hit, and even at 100K the
size of the data just doesn't warrant it.

-- 
Greg Banks, P.Engineer, SGI Australian Software Group.
the brightly coloured sporks of revolution.
I don't speak for SGI.


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2009-01-10  1:36 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-01-08  8:25 [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework Greg Banks
2009-01-08  8:25 ` [patch 01/14] sunrpc: Use consistent naming for variables of type struct cache_detail* Greg Banks
2009-01-08  8:25 ` [patch 02/14] sunrpc: Use consistent naming for variables of type struct cache_head* Greg Banks
2009-01-08  8:25 ` [patch 03/14] sunrpc: Use consistent naming for variables of type struct cache_request* Greg Banks
2009-01-08  8:25 ` [patch 04/14] sunrpc: Minor indentation cleanup in cache.c Greg Banks
2009-01-08  8:25 ` [patch 05/14] sunrpc: Rename queue_loose() to cache_remove_queued() Greg Banks
2009-01-08  8:25 ` [patch 06/14] sunrpc: Gather forward declarations of static functions in cache.c Greg Banks
2009-01-08  8:25 ` [patch 07/14] sunrpc: Make the global queue_lock per-cache-detail Greg Banks
2009-01-08  8:25 ` [patch 08/14] sunrpc: Make the global queue_wait per-cache-detail Greg Banks
2009-01-08  8:25 ` [patch 09/14] sunrpc: Remove the global lock queue_io_mutex Greg Banks
2009-01-08  8:25 ` [patch 10/14] sunrpc: Reorganise the queuing of cache upcalls Greg Banks
2009-01-08 19:57   ` J. Bruce Fields
2009-01-09  2:40     ` Greg Banks
     [not found]       ` <4966B92F.8060008-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
2009-01-09  2:57         ` J. Bruce Fields
2009-01-09  3:12           ` Greg Banks
     [not found]             ` <4966C0AB.7000604-cP1dWloDopni96+mSzHFpQC/G2K4zDHf@public.gmane.org>
2009-01-09 16:53               ` Chuck Lever
2009-01-10  1:28                 ` Greg Banks
2009-01-09 21:29         ` J. Bruce Fields
2009-01-09 21:41           ` J. Bruce Fields
2009-01-09 23:40             ` Greg Banks
2009-01-09 23:29           ` Greg Banks
2009-01-08  8:25 ` [patch 11/14] sunrpc: Allocate cache_requests in a single allocation Greg Banks
2009-01-08  8:25 ` [patch 12/14] sunrpc: Centralise memory management of cache_requests Greg Banks
2009-01-08  8:25 ` [patch 13/14] sunrpc: Move struct cache_request to linux/sunrpc/cache.h Greg Banks
2009-01-08  8:25 ` [patch 14/14] sunrpc: Improve the usefulness of debug printks in the sunrpc cache code Greg Banks
2009-01-08 19:52 ` [patch 00/14] sunrpc: Sunrpc cache cleanups and upcall rework J. Bruce Fields
2009-01-09  1:42   ` Greg Banks

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.