All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: linux-nfs@vger.kernel.org
Cc: neilb@suse.de, jlayton@redhat.com
Subject: [PATCH v6 14/14] NFSD: Use rhashtable for managing nfs4_file objects
Date: Thu, 27 Oct 2022 14:53:06 -0400	[thread overview]
Message-ID: <166689678674.90991.8343122135992096162.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <166689625728.90991.15067635142973595248.stgit@klimt.1015granger.net>

fh_match() is costly, especially when filehandles are large (as is
the case for NFSv4). It needs to be used sparingly when searching
data structures. Unfortunately, with common workloads, I see
multiple thousands of objects stored in file_hashtbl[], which has
just 256 buckets, making its bucket hash chains quite lengthy.

Walking long hash chains with the state_lock held blocks other
activity that needs that lock. Sizable hash chains are a common
occurrance once the server has handed out some delegations, for
example -- IIUC, each delegated file is held open on the server by
an nfs4_file object.

To help mitigate the cost of searching with fh_match(), replace the
nfs4_file hash table with an rhashtable, which can dynamically
resize its bucket array to minimize hash chain length.

The result of this modification is an improvement in the latency of
NFSv4 operations, and the reduction of nfsd CPU utilization due to
eliminating the cost of multiple calls to fh_match() and reducing
the CPU cache misses incurred while walking long hash chains in the
nfs4_file hash table.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c |   77 ++++++++++++++++++++++++++-------------------------
 fs/nfsd/state.h     |    4 ---
 2 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3afb73750d2d..1ded89235111 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -591,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
 void
 put_nfs4_file(struct nfs4_file *fi)
 {
-	might_lock(&state_lock);
-
-	if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
+	if (refcount_dec_and_test(&fi->fi_ref)) {
 		remove_nfs4_file_locked(fi);
-		spin_unlock(&state_lock);
 		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
 		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
 		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
@@ -709,20 +706,6 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
 	return ret & OWNER_HASH_MASK;
 }
 
-/* hash table for nfs4_file */
-#define FILE_HASH_BITS                   8
-#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
-
-static unsigned int file_hashval(const struct svc_fh *fh)
-{
-	struct inode *inode = d_inode(fh->fh_dentry);
-
-	/* XXX: why not (here & in file cache) use inode? */
-	return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
-}
-
-static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
-
 static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
 
 static const struct rhashtable_params nfs4_file_rhash_params = {
@@ -4686,12 +4669,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
 static noinline_for_stack struct nfs4_file *
 find_nfs4_file(const struct svc_fh *fhp)
 {
-	unsigned int hashval = file_hashval(fhp);
+	struct inode *inode = d_inode(fhp->fh_dentry);
+	struct rhlist_head *tmp, *list;
 	struct nfs4_file *fi;
 
 	rcu_read_lock();
-	hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
-				 lockdep_is_held(&state_lock)) {
+	list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+			       nfs4_file_rhash_params);
+	rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
 		if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
 			if (refcount_inc_not_zero(&fi->fi_ref)) {
 				rcu_read_unlock();
@@ -4705,40 +4690,56 @@ find_nfs4_file(const struct svc_fh *fhp)
 
 /*
  * On hash insertion, identify entries with the same inode but
- * distinct filehandles. They will all be in the same hash bucket
- * because nfs4_file's are hashed by the address in the fi_inode
- * field.
+ * distinct filehandles. They will all be on the list returned
+ * by rhltable_lookup().
+ *
+ * inode->i_lock prevents racing insertions from adding an entry
+ * for the same inode/fhp pair twice.
  */
 static noinline_for_stack struct nfs4_file *
 insert_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp)
 {
-	unsigned int hashval = file_hashval(fhp);
+	struct inode *inode = d_inode(fhp->fh_dentry);
+	struct rhlist_head *tmp, *list;
 	struct nfs4_file *ret = NULL;
 	bool alias_found = false;
 	struct nfs4_file *fi;
+	int err;
 
-	spin_lock(&state_lock);
-	hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
-				 lockdep_is_held(&state_lock)) {
+	rcu_read_lock();
+	spin_lock(&inode->i_lock);
+
+	list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+			       nfs4_file_rhash_params);
+	rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
 		if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
 			if (refcount_inc_not_zero(&fi->fi_ref))
 				ret = fi;
-		} else if (d_inode(fhp->fh_dentry) == fi->fi_inode)
+		} else
 			fi->fi_aliased = alias_found = true;
 	}
-	if (likely(ret == NULL)) {
-		init_nfs4_file(fhp, new);
-		hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]);
-		new->fi_aliased = alias_found;
-		ret = new;
-	}
-	spin_unlock(&state_lock);
+	if (ret)
+		goto out_unlock;
+
+	init_nfs4_file(fhp, new);
+	err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
+			      nfs4_file_rhash_params);
+	if (err)
+		goto out_unlock;
+
+	new->fi_aliased = alias_found;
+	ret = new;
+
+out_unlock:
+	spin_unlock(&inode->i_lock);
+	rcu_read_unlock();
 	return ret;
 }
 
 static noinline_for_stack void remove_nfs4_file_locked(struct nfs4_file *fi)
 {
-	hlist_del_rcu(&fi->fi_hash);
+	rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
+			nfs4_file_rhash_params);
 }
 
 /*
@@ -5628,6 +5629,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	 * If not found, create the nfs4_file struct
 	 */
 	fp = insert_nfs4_file(open->op_file, current_fh);
+	if (unlikely(!fp))
+		return nfserr_jukebox;
 	if (fp != open->op_file) {
 		status = nfs4_check_deleg(cl, open, &dp);
 		if (status)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 190fc7e418a4..eadd7f465bf5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -536,16 +536,12 @@ struct nfs4_clnt_odstate {
  * inode can have multiple filehandles associated with it, so there is
  * (potentially) a many to one relationship between this struct and struct
  * inode.
- *
- * These are hashed by filehandle in the file_hashtbl, which is protected by
- * the global state_lock spinlock.
  */
 struct nfs4_file {
 	refcount_t		fi_ref;
 	struct inode *		fi_inode;
 	bool			fi_aliased;
 	spinlock_t		fi_lock;
-	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
 	struct rhlist_head	fi_rlist;
 	struct list_head        fi_stateids;
 	union {



  parent reply	other threads:[~2022-10-27 18:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-27 18:51 [PATCH v6 00/14] Series short description Chuck Lever
2022-10-27 18:51 ` [PATCH v6 01/14] NFSD: Pass the target nfsd_file to nfsd_commit() Chuck Lever
2022-10-27 18:51 ` [PATCH v6 02/14] NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file immediately" Chuck Lever
2022-10-27 18:51 ` [PATCH v6 03/14] NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection Chuck Lever
2022-10-27 18:52 ` [PATCH v6 04/14] NFSD: Clean up nfs4_preprocess_stateid_op() call sites Chuck Lever
2022-10-27 18:52 ` [PATCH v6 05/14] NFSD: Trace stateids returned via DELEGRETURN Chuck Lever
2022-10-27 18:52 ` [PATCH v6 06/14] NFSD: Trace delegation revocations Chuck Lever
2022-10-27 18:52 ` [PATCH v6 07/14] NFSD: Use const pointers as parameters to fh_ helpers Chuck Lever
2022-10-27 18:52 ` [PATCH v6 08/14] NFSD: Update file_hashtbl() helpers Chuck Lever
2022-10-27 18:52 ` [PATCH v6 09/14] NFSD: Clean up nfsd4_init_file() Chuck Lever
2022-10-27 23:22   ` NeilBrown
2022-10-27 23:46     ` Chuck Lever III
2022-10-28  0:13       ` NeilBrown
2022-10-27 18:52 ` [PATCH v6 10/14] NFSD: Add a remove_nfs4_file() helper Chuck Lever
2022-10-27 18:52 ` [PATCH v6 11/14] NFSD: Clean up find_or_add_file() Chuck Lever
2022-10-27 18:52 ` [PATCH v6 12/14] NFSD: Refactor find_file() Chuck Lever
2022-10-27 18:53 ` [PATCH v6 13/14] NFSD: Allocate an rhashtable for nfs4_file objects Chuck Lever
2022-10-27 23:24   ` NeilBrown
2022-10-27 18:53 ` Chuck Lever [this message]
2022-10-27 23:29 ` [PATCH v6 00/14] Series short description NeilBrown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166689678674.90991.8343122135992096162.stgit@klimt.1015granger.net \
    --to=chuck.lever@oracle.com \
    --cc=jlayton@redhat.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.