All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.com>
To: Oleg Drokin <oleg.drokin@intel.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	James Simmons <jsimmons@infradead.org>,
	Andreas Dilger <andreas.dilger@intel.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [PATCH 13/20] staging: lustre: lu_object: move retry logic inside htable_lookup
Date: Thu, 12 Apr 2018 07:54:49 +1000	[thread overview]
Message-ID: <152348368899.12394.6341901965661540031.stgit@noble> (raw)
In-Reply-To: <152348312863.12394.11915752362061083241.stgit@noble>

The current retry logic, to wait when a 'dying' object is found,
spans multiple functions.  The process is attached to a waitqueue
and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
is passed back through lu_object_find_try() to lu_object_find_at()
where schedule() is called and the process is removed from the queue.

This can be simplified by moving all the logic (including
hashtable locking) inside htable_lookup(), which now never returns
EAGAIN.

Note that htable_lookup() is called with the hash bucket lock
held, and will drop and retake it if it needs to schedule.

I made this a 'goto' loop rather than a 'while(1)' loop as the
diff is easier to read.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 064166843e64..bf505a9463a3 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -596,16 +596,21 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
+	struct cfs_hash		*hs = s->ls_obj_hash;
 	struct lu_site_bkt_data *bkt;
 	struct lu_object_header *h;
 	struct hlist_node	*hnode;
-	__u64  ver = cfs_hash_bd_version_get(bd);
+	__u64 ver;
+	wait_queue_entry_t waiter;
 
-	if (*version == ver)
+retry:
+	ver = cfs_hash_bd_version_get(bd);
+
+	if (*version == ver) {
 		return ERR_PTR(-ENOENT);
+	}
 
 	*version = ver;
 	/* cfs_hash_bd_peek_locked is a somehow "internal" function
@@ -638,11 +643,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 	 * drained), and moreover, lookup has to wait until object is freed.
 	 */
 
-	init_waitqueue_entry(waiter, current);
-	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+	init_waitqueue_entry(&waiter, current);
+	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-	return ERR_PTR(-EAGAIN);
+	cfs_hash_bd_unlock(hs, bd, 1);
+	schedule();
+	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	cfs_hash_bd_lock(hs, bd, 1);
+	goto retry;
 }
 
 /**
@@ -706,13 +715,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
 }
 
 /**
- * Core logic of lu_object_find*() functions.
+ * Much like lu_object_find(), but top level device of object is specifically
+ * \a dev rather than top level device of the site. This interface allows
+ * objects of different "stacking" to be created within the same site.
  */
-static struct lu_object *lu_object_find_try(const struct lu_env *env,
-					    struct lu_device *dev,
-					    const struct lu_fid *f,
-					    const struct lu_object_conf *conf,
-					    wait_queue_entry_t *waiter)
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+				    struct lu_device *dev,
+				    const struct lu_fid *f,
+				    const struct lu_object_conf *conf)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -738,17 +748,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
 	 * just alloc and insert directly.
 	 *
-	 * If dying object is found during index search, add @waiter to the
-	 * site wait-queue and return ERR_PTR(-EAGAIN).
 	 */
 	if (conf && conf->loc_flags & LOC_F_NEW)
 		return lu_object_new(env, dev, f, conf);
 
 	s  = dev->ld_site;
 	hs = s->ls_obj_hash;
-	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
-	o = htable_lookup(s, &bd, f, waiter, &version);
-	cfs_hash_bd_unlock(hs, &bd, 1);
+	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
+	o = htable_lookup(s, &bd, f, &version);
+	cfs_hash_bd_unlock(hs, &bd, 0);
+
 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
 		return o;
 
@@ -764,7 +773,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 
 	cfs_hash_bd_lock(hs, &bd, 1);
 
-	shadow = htable_lookup(s, &bd, f, waiter, &version);
+	shadow = htable_lookup(s, &bd, f, &version);
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);
@@ -779,34 +788,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	lu_object_free(env, o);
 	return shadow;
 }
-
-/**
- * Much like lu_object_find(), but top level device of object is specifically
- * \a dev rather than top level device of the site. This interface allows
- * objects of different "stacking" to be created within the same site.
- */
-struct lu_object *lu_object_find_at(const struct lu_env *env,
-				    struct lu_device *dev,
-				    const struct lu_fid *f,
-				    const struct lu_object_conf *conf)
-{
-	wait_queue_head_t	*wq;
-	struct lu_object	*obj;
-	wait_queue_entry_t	   wait;
-
-	while (1) {
-		obj = lu_object_find_try(env, dev, f, conf, &wait);
-		if (obj != ERR_PTR(-EAGAIN))
-			return obj;
-		/*
-		 * lu_object_find_try() already added waiter into the
-		 * wait queue.
-		 */
-		schedule();
-		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
-		remove_wait_queue(wq, &wait);
-	}
-}
 EXPORT_SYMBOL(lu_object_find_at);
 
 /**

WARNING: multiple messages have this Message-ID (diff)
From: NeilBrown <neilb@suse.com>
To: Oleg Drokin <oleg.drokin@intel.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	James Simmons <jsimmons@infradead.org>,
	Andreas Dilger <andreas.dilger@intel.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 13/20] staging: lustre: lu_object: move retry logic inside htable_lookup
Date: Thu, 12 Apr 2018 07:54:49 +1000	[thread overview]
Message-ID: <152348368899.12394.6341901965661540031.stgit@noble> (raw)
In-Reply-To: <152348312863.12394.11915752362061083241.stgit@noble>

The current retry logic, to wait when a 'dying' object is found,
spans multiple functions.  The process is attached to a waitqueue
and set TASK_UNINTERRUPTIBLE in htable_lookup, and this status
is passed back through lu_object_find_try() to lu_object_find_at()
where schedule() is called and the process is removed from the queue.

This can be simplified by moving all the logic (including
hashtable locking) inside htable_lookup(), which now never returns
EAGAIN.

Note that htable_lookup() is called with the hash bucket lock
held, and will drop and retake it if it needs to schedule.

I made this a 'goto' loop rather than a 'while(1)' loop as the
diff is easier to read.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lustre/obdclass/lu_object.c |   73 +++++++-------------
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/drivers/staging/lustre/lustre/obdclass/lu_object.c b/drivers/staging/lustre/lustre/obdclass/lu_object.c
index 064166843e64..bf505a9463a3 100644
--- a/drivers/staging/lustre/lustre/obdclass/lu_object.c
+++ b/drivers/staging/lustre/lustre/obdclass/lu_object.c
@@ -596,16 +596,21 @@ EXPORT_SYMBOL(lu_object_print);
 static struct lu_object *htable_lookup(struct lu_site *s,
 				       struct cfs_hash_bd *bd,
 				       const struct lu_fid *f,
-				       wait_queue_entry_t *waiter,
 				       __u64 *version)
 {
+	struct cfs_hash		*hs = s->ls_obj_hash;
 	struct lu_site_bkt_data *bkt;
 	struct lu_object_header *h;
 	struct hlist_node	*hnode;
-	__u64  ver = cfs_hash_bd_version_get(bd);
+	__u64 ver;
+	wait_queue_entry_t waiter;
 
-	if (*version == ver)
+retry:
+	ver = cfs_hash_bd_version_get(bd);
+
+	if (*version == ver) {
 		return ERR_PTR(-ENOENT);
+	}
 
 	*version = ver;
 	/* cfs_hash_bd_peek_locked is a somehow "internal" function
@@ -638,11 +643,15 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 	 * drained), and moreover, lookup has to wait until object is freed.
 	 */
 
-	init_waitqueue_entry(waiter, current);
-	add_wait_queue(&bkt->lsb_marche_funebre, waiter);
+	init_waitqueue_entry(&waiter, current);
+	add_wait_queue(&bkt->lsb_marche_funebre, &waiter);
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
-	return ERR_PTR(-EAGAIN);
+	cfs_hash_bd_unlock(hs, bd, 1);
+	schedule();
+	remove_wait_queue(&bkt->lsb_marche_funebre, &waiter);
+	cfs_hash_bd_lock(hs, bd, 1);
+	goto retry;
 }
 
 /**
@@ -706,13 +715,14 @@ static struct lu_object *lu_object_new(const struct lu_env *env,
 }
 
 /**
- * Core logic of lu_object_find*() functions.
+ * Much like lu_object_find(), but top level device of object is specifically
+ * \a dev rather than top level device of the site. This interface allows
+ * objects of different "stacking" to be created within the same site.
  */
-static struct lu_object *lu_object_find_try(const struct lu_env *env,
-					    struct lu_device *dev,
-					    const struct lu_fid *f,
-					    const struct lu_object_conf *conf,
-					    wait_queue_entry_t *waiter)
+struct lu_object *lu_object_find_at(const struct lu_env *env,
+				    struct lu_device *dev,
+				    const struct lu_fid *f,
+				    const struct lu_object_conf *conf)
 {
 	struct lu_object      *o;
 	struct lu_object      *shadow;
@@ -738,17 +748,16 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
 	 * just alloc and insert directly.
 	 *
-	 * If dying object is found during index search, add @waiter to the
-	 * site wait-queue and return ERR_PTR(-EAGAIN).
 	 */
 	if (conf && conf->loc_flags & LOC_F_NEW)
 		return lu_object_new(env, dev, f, conf);
 
 	s  = dev->ld_site;
 	hs = s->ls_obj_hash;
-	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
-	o = htable_lookup(s, &bd, f, waiter, &version);
-	cfs_hash_bd_unlock(hs, &bd, 1);
+	cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 0);
+	o = htable_lookup(s, &bd, f, &version);
+	cfs_hash_bd_unlock(hs, &bd, 0);
+
 	if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
 		return o;
 
@@ -764,7 +773,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 
 	cfs_hash_bd_lock(hs, &bd, 1);
 
-	shadow = htable_lookup(s, &bd, f, waiter, &version);
+	shadow = htable_lookup(s, &bd, f, &version);
 	if (likely(PTR_ERR(shadow) == -ENOENT)) {
 		cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
 		cfs_hash_bd_unlock(hs, &bd, 1);
@@ -779,34 +788,6 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
 	lu_object_free(env, o);
 	return shadow;
 }
-
-/**
- * Much like lu_object_find(), but top level device of object is specifically
- * \a dev rather than top level device of the site. This interface allows
- * objects of different "stacking" to be created within the same site.
- */
-struct lu_object *lu_object_find_at(const struct lu_env *env,
-				    struct lu_device *dev,
-				    const struct lu_fid *f,
-				    const struct lu_object_conf *conf)
-{
-	wait_queue_head_t	*wq;
-	struct lu_object	*obj;
-	wait_queue_entry_t	   wait;
-
-	while (1) {
-		obj = lu_object_find_try(env, dev, f, conf, &wait);
-		if (obj != ERR_PTR(-EAGAIN))
-			return obj;
-		/*
-		 * lu_object_find_try() already added waiter into the
-		 * wait queue.
-		 */
-		schedule();
-		wq = lu_site_wq_from_fid(dev->ld_site, (void *)f);
-		remove_wait_queue(wq, &wait);
-	}
-}
 EXPORT_SYMBOL(lu_object_find_at);
 
 /**

  parent reply	other threads:[~2018-04-11 21:54 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-11 21:54 [PATCH 00/20] staging: lustre: convert to rhashtable NeilBrown
2018-04-11 21:54 ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 03/20] staging: lustre: convert obd uuid hash " NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 04/20] staging: lustre: convert osc_quota " NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 05/20] staging: lustre: separate buckets from ldlm hash table NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 09/20] staging: lustre: convert ldlm_resource hash to rhashtable NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 07/20] staging: lustre: ldlm: store name directly in namespace NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 10/20] staging: lustre: make struct lu_site_bkt_data private NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 02/20] staging: lustre: convert lov_pool to use rhashtable NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 12/20] staging: lustre: lu_object: factor out extra per-bucket data NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 08/20] staging: lustre: simplify ldlm_ns_hash_defs[] NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 01/20] staging: lustre: ptlrpc: convert conn_hash to rhashtable NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 06/20] staging: lustre: ldlm: add a counter to the per-namespace data NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 11/20] staging: lustre: lu_object: discard extra lru count NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 17/20] staging: lustre: use call_rcu() to free lu_object_headers NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 15/20] staging: lustre: llite: use more private data in dump_pgcache NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 16/20] staging: lustre: llite: remove redundant lookup " NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 14/20] staging: lustre: fold lu_object_new() into lu_object_find_at() NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` NeilBrown [this message]
2018-04-11 21:54   ` [lustre-devel] [PATCH 13/20] staging: lustre: lu_object: move retry logic inside htable_lookup NeilBrown
2018-04-11 21:54 ` [PATCH 18/20] staging: lustre: change how "dump_page_cache" walks a hash table NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 20/20] staging: lustre: remove cfs_hash resizeable hashtable implementation NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-11 21:54 ` [PATCH 19/20] staging: lustre: convert lu_object cache to rhashtable NeilBrown
2018-04-11 21:54   ` [lustre-devel] " NeilBrown
2018-04-17  3:35 ` [PATCH 00/20] staging: lustre: convert " James Simmons
2018-04-17  3:35   ` [lustre-devel] " James Simmons
2018-04-18  3:17   ` NeilBrown
2018-04-18  3:17     ` [lustre-devel] " NeilBrown
2018-04-18 21:56     ` Simmons, James A.
2018-04-18 21:56       ` Simmons, James A.
2018-04-23 13:08 ` Greg Kroah-Hartman
2018-04-23 13:08   ` [lustre-devel] " Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152348368899.12394.6341901965661540031.stgit@noble \
    --to=neilb@suse.com \
    --cc=andreas.dilger@intel.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jsimmons@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lustre-devel@lists.lustre.org \
    --cc=oleg.drokin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.