All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: linux-afs@lists.infradead.org
Cc: dhowells@redhat.com, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 09/27] afs: Make callback processing more efficient.
Date: Fri, 29 May 2020 23:01:10 +0100	[thread overview]
Message-ID: <159078967097.679399.13471090664845334315.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <159078959973.679399.15496997680826127470.stgit@warthog.procyon.org.uk>

afs_vol_interest objects represent the volume IDs currently being accessed
from a fileserver.  These hold lists of afs_cb_interest objects that
repesent the superblocks using that volume ID on that server.

When a callback notification from the server telling of a modification by
another client arrives, the volume ID specified in the notification is
looked up in the server's afs_vol_interest list.  Through the
afs_cb_interest list, the relevant superblocks can be iterated over and the
specific inode looked up and marked in each one.

Make the following efficiency improvements:

 (1) Hold rcu_read_lock() over the entire processing rather than locking it
     each time.

 (2) Do all the callbacks for each vid together rather than individually.
     Each volume then only needs to be looked up once.

 (3) afs_vol_interest objects are now stored in an rb_tree rather than a
     flat list to reduce the lookup step count.

 (4) afs_vol_interest lookup is now done with RCU, but because it's in an
     rb_tree which may rotate under us, a seqlock is used so that if it
     changes during the walk, we repeat the walk with a lock held.

With this and the preceding patch which adds RCU-based lookups in the inode
cache, target volumes/vnodes can be taken without the need to take any
locks, except on the target itself.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/afs/callback.c |  150 ++++++++++++++++++++++++++++++++++-------------------
 fs/afs/internal.h |    6 +-
 fs/afs/server.c   |    4 +
 3 files changed, 100 insertions(+), 60 deletions(-)

diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 0dcbd40732d1..b16781e1683e 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -28,7 +28,7 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
 {
 	struct afs_vol_interest *new_vi, *vi;
 	struct afs_cb_interest *new;
-	struct hlist_node **pp;
+	struct rb_node *parent, **pp;
 
 	new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL);
 	if (!new_vi)
@@ -42,7 +42,6 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
 
 	new_vi->usage = 1;
 	new_vi->vid = vnode->volume->vid;
-	INIT_HLIST_NODE(&new_vi->srv_link);
 	INIT_HLIST_HEAD(&new_vi->cb_interests);
 
 	refcount_set(&new->usage, 1);
@@ -51,31 +50,31 @@ static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
 	new->server = afs_get_server(server, afs_server_trace_get_new_cbi);
 	INIT_HLIST_NODE(&new->cb_vlink);
 
-	write_lock(&server->cb_break_lock);
+	write_seqlock(&server->cb_break_lock);
 
-	for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) {
-		vi = hlist_entry(*pp, struct afs_vol_interest, srv_link);
-		if (vi->vid < new_vi->vid)
-			continue;
-		if (vi->vid > new_vi->vid)
-			break;
-		vi->usage++;
-		goto found_vi;
+	pp = &server->cb_volumes.rb_node;
+	while ((parent = *pp)) {
+		vi = rb_entry(parent, struct afs_vol_interest, srv_node);
+		if (vi->vid < new_vi->vid) {
+			pp = &(*pp)->rb_left;
+		} else if (vi->vid > new_vi->vid) {
+			pp = &(*pp)->rb_right;
+		} else {
+			vi->usage++;
+			goto found_vi;
+		}
 	}
 
-	new_vi->srv_link.pprev = pp;
-	new_vi->srv_link.next = *pp;
-	if (*pp)
-		(*pp)->pprev = &new_vi->srv_link.next;
-	*pp = &new_vi->srv_link;
 	vi = new_vi;
 	new_vi = NULL;
-found_vi:
+	rb_link_node_rcu(&vi->srv_node, parent, pp);
+	rb_insert_color(&vi->srv_node, &server->cb_volumes);
 
+found_vi:
 	new->vol_interest = vi;
 	hlist_add_head(&new->cb_vlink, &vi->cb_interests);
 
-	write_unlock(&server->cb_break_lock);
+	write_sequnlock(&server->cb_break_lock);
 	kfree(new_vi);
 	return new;
 }
@@ -182,17 +181,17 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
 
 	if (cbi && refcount_dec_and_test(&cbi->usage)) {
 		if (!hlist_unhashed(&cbi->cb_vlink)) {
-			write_lock(&cbi->server->cb_break_lock);
+			write_seqlock(&cbi->server->cb_break_lock);
 
 			hlist_del_init(&cbi->cb_vlink);
 			vi = cbi->vol_interest;
 			cbi->vol_interest = NULL;
 			if (--vi->usage == 0)
-				hlist_del(&vi->srv_link);
+				rb_erase(&vi->srv_node, &cbi->server->cb_volumes);
 			else
 				vi = NULL;
 
-			write_unlock(&cbi->server->cb_break_lock);
+			write_sequnlock(&cbi->server->cb_break_lock);
 			if (vi)
 				kfree_rcu(vi, rcu);
 			afs_put_server(net, cbi->server, afs_server_trace_put_cbi);
@@ -237,6 +236,45 @@ void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason
 	write_sequnlock(&vnode->cb_lock);
 }
 
+/*
+ * Look up a volume interest by volume ID under RCU conditions.
+ */
+static struct afs_vol_interest *afs_lookup_vol_interest_rcu(struct afs_server *server,
+							    afs_volid_t vid)
+{
+	struct afs_vol_interest *vi = NULL;
+	struct rb_node *p;
+	int seq = 0;
+
+	do {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		read_seqbegin_or_lock(&server->cb_break_lock, &seq);
+
+		p = rcu_dereference_raw(server->cb_volumes.rb_node);
+		while (p) {
+			vi = rb_entry(p, struct afs_vol_interest, srv_node);
+
+			if (vi->vid < vid)
+				p = rcu_dereference_raw(p->rb_left);
+			else if (vi->vid > vid)
+				p = rcu_dereference_raw(p->rb_right);
+			else
+				break;
+			/* We want to repeat the search, this time with the
+			 * lock properly locked.
+			 */
+			vi = NULL;
+		}
+
+	} while (need_seqretry(&server->cb_break_lock, seq));
+
+	done_seqretry(&server->cb_break_lock, seq);
+	return vi;
+}
+
 /*
  * allow the fileserver to explicitly break one callback
  * - happens when
@@ -244,37 +282,18 @@ void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason
  *   - a lock is released
  */
 static void afs_break_one_callback(struct afs_server *server,
-				   struct afs_fid *fid)
+				   struct afs_fid *fid,
+				   struct afs_vol_interest *vi)
 {
-	struct afs_vol_interest *vi;
 	struct afs_cb_interest *cbi;
 	struct afs_iget_data data;
 	struct afs_vnode *vnode;
 	struct inode *inode;
 
-	rcu_read_lock();
-	read_lock(&server->cb_break_lock);
-	hlist_for_each_entry(vi, &server->cb_volumes, srv_link) {
-		if (vi->vid < fid->vid)
-			continue;
-		if (vi->vid > fid->vid) {
-			vi = NULL;
-			break;
-		}
-		//atomic_inc(&vi->usage);
-		break;
-	}
-
-	/* TODO: Find all matching volumes if we couldn't match the server and
-	 * break them anyway.
-	 */
-	if (!vi)
-		goto out;
-
 	/* Step through all interested superblocks.  There may be more than one
 	 * because of cell aliasing.
 	 */
-	hlist_for_each_entry(cbi, &vi->cb_interests, cb_vlink) {
+	hlist_for_each_entry_rcu(cbi, &vi->cb_interests, cb_vlink) {
 		if (fid->vnode == 0 && fid->unique == 0) {
 			/* The callback break applies to an entire volume. */
 			struct afs_super_info *as = AFS_FS_S(cbi->sb);
@@ -303,10 +322,36 @@ static void afs_break_one_callback(struct afs_server *server,
 			}
 		}
 	}
+}
 
-out:
-	read_unlock(&server->cb_break_lock);
-	rcu_read_unlock();
+static void afs_break_some_callbacks(struct afs_server *server,
+				     struct afs_callback_break *cbb,
+				     size_t *_count)
+{
+	struct afs_callback_break *residue = cbb;
+	struct afs_vol_interest *vi;
+	afs_volid_t vid = cbb->fid.vid;
+	size_t i;
+
+	vi = afs_lookup_vol_interest_rcu(server, vid);
+
+	/* TODO: Find all matching volumes if we couldn't match the server and
+	 * break them anyway.
+	 */
+
+	for (i = *_count; i > 0; cbb++, i--) {
+		if (cbb->fid.vid == vid) {
+			_debug("- Fid { vl=%08llx n=%llu u=%u }",
+			       cbb->fid.vid,
+			       cbb->fid.vnode,
+			       cbb->fid.unique);
+			--*_count;
+			if (vi)
+				afs_break_one_callback(server, &cbb->fid, vi);
+		} else {
+			*residue++ = *cbb;
+		}
+	}
 }
 
 /*
@@ -319,17 +364,12 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 
 	ASSERT(server != NULL);
 
-	/* TODO: Sort the callback break list by volume ID */
+	rcu_read_lock();
 
-	for (; count > 0; callbacks++, count--) {
-		_debug("- Fid { vl=%08llx n=%llu u=%u }",
-		       callbacks->fid.vid,
-		       callbacks->fid.vnode,
-		       callbacks->fid.unique);
-		afs_break_one_callback(server, &callbacks->fid);
-	}
+	while (count > 0)
+		afs_break_some_callbacks(server, callbacks, &count);
 
-	_leave("");
+	rcu_read_unlock();
 	return;
 }
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 61320a632e15..b6665fc5d355 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -524,9 +524,9 @@ struct afs_server {
 	rwlock_t		fs_lock;	/* access lock */
 
 	/* callback promise management */
-	struct hlist_head	cb_volumes;	/* List of volume interests on this server */
+	struct rb_root		cb_volumes;	/* List of volume interests on this server */
 	unsigned		cb_s_break;	/* Break-everything counter. */
-	rwlock_t		cb_break_lock;	/* Volume finding lock */
+	seqlock_t		cb_break_lock;	/* Volume finding lock */
 
 	/* Probe state */
 	unsigned long		probed_at;	/* Time last probe was dispatched (jiffies) */
@@ -552,7 +552,7 @@ struct afs_server {
  * Volume collation in the server's callback interest list.
  */
 struct afs_vol_interest {
-	struct hlist_node	srv_link;	/* Link in server->cb_volumes */
+	struct rb_node		srv_node;	/* Link in server->cb_volumes */
 	struct hlist_head	cb_interests;	/* List of callback interests on the server */
 	union {
 		struct rcu_head	rcu;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 3f707b5ecb62..5ed90f419c54 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -212,8 +212,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
 	server->addr_version = alist->version;
 	server->uuid = *uuid;
 	rwlock_init(&server->fs_lock);
-	INIT_HLIST_HEAD(&server->cb_volumes);
-	rwlock_init(&server->cb_break_lock);
+	server->cb_volumes = RB_ROOT;
+	seqlock_init(&server->cb_break_lock);
 	init_waitqueue_head(&server->probe_wq);
 	INIT_LIST_HEAD(&server->probe_link);
 	spin_lock_init(&server->probe_lock);



  parent reply	other threads:[~2020-05-29 22:01 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-29 21:59 [PATCH 00/27] afs: Improvements David Howells
2020-05-29 22:00 ` [PATCH 01/27] vfs, afs, ext4: Make the inode hash table RCU searchable David Howells
2020-05-31 13:09   ` Al Viro
2020-05-31 14:20   ` David Howells
2020-05-29 22:00 ` [PATCH 02/27] rxrpc: Map the EACCES error produced by some ICMP6 to EHOSTUNREACH David Howells
2020-05-29 22:00 ` [PATCH 03/27] rxrpc: Adjust /proc/net/rxrpc/calls to display call->debug_id not user_ID David Howells
2020-05-29 22:00 ` [PATCH 04/27] afs: Always include dir in bulk status fetch from afs_do_lookup() David Howells
2020-05-29 22:00 ` [PATCH 05/27] afs: Use the serverUnique field in the UVLDB record to reduce rpc ops David Howells
2020-05-29 22:00 ` [PATCH 06/27] afs: Split the usage count on struct afs_server David Howells
2020-05-29 22:00 ` [PATCH 07/27] afs: Actively poll fileservers to maintain NAT or firewall openings David Howells
2020-05-29 22:01 ` [PATCH 08/27] afs: Show more information in /proc/net/afs/servers David Howells
2020-05-29 22:01 ` David Howells [this message]
2020-05-29 22:01 ` [PATCH 10/27] afs: Set error flag rather than return error from file status decode David Howells
2020-05-29 22:01 ` [PATCH 11/27] afs: Remove the error argument from afs_protocol_error() David Howells
2020-05-29 22:01 ` [PATCH 12/27] afs: Rename struct afs_fs_cursor to afs_operation David Howells
2020-05-29 22:01 ` [PATCH 13/27] afs: Build an abstraction around an "operation" concept David Howells
2020-05-29 22:01 ` [PATCH 14/27] afs: Don't get epoch from a server because it may be ambiguous David Howells
2020-05-29 22:01 ` [PATCH 15/27] afs: Fix handling of CB.ProbeUuid cache manager op David Howells
2020-05-29 22:02 ` [PATCH 16/27] afs: Retain more of the VLDB record for alias detection David Howells
2020-05-29 22:02 ` [PATCH 17/27] afs: Implement client support for the YFSVL.GetCellName RPC op David Howells
2020-05-29 22:02 ` [PATCH 18/27] afs: Detect cell aliases 1 - Cells with root volumes David Howells
2020-06-06  1:58   ` Kees Cook
2020-05-29 22:02 ` [PATCH 19/27] afs: Detect cell aliases 2 - Cells with no " David Howells
2020-05-29 22:02 ` [PATCH 20/27] afs: Detect cell aliases 3 - YFS Cells with a canonical cell name op David Howells
2020-05-29 22:02 ` [PATCH 21/27] afs: Add a tracepoint to track the lifetime of the afs_volume struct David Howells
2020-05-29 22:02 ` [PATCH 22/27] afs: Reorganise volume and server trees to be rooted on the cell David Howells
2020-05-29 22:02 ` [PATCH 23/27] afs: Fix the by-UUID server tree to allow servers with the same UUID David Howells
2020-05-29 22:02 ` [PATCH 24/27] afs: Fix afs_statfs() to not let the values go below zero David Howells
2020-05-29 22:03 ` [PATCH 25/27] afs: Don't use probe running state to make decisions outside probe code David Howells
2020-05-29 22:03 ` [PATCH 26/27] afs: Show more a bit more server state in /proc/net/afs/servers David Howells
2020-05-29 22:03 ` [PATCH 27/27] afs: Adjust the fileserver rotation algorithm to reprobe/retry more quickly David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=159078967097.679399.13471090664845334315.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.