All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Aring <aahringo@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: cluster-devel@redhat.com, ocfs2-devel@lists.linux.dev,
	linux-fsdevel@vger.kernel.org, teigland@redhat.com,
	rpeterso@redhat.com, agruenba@redhat.com,
	trond.myklebust@hammerspace.com, anna@kernel.org,
	chuck.lever@oracle.com, jlayton@kernel.org
Subject: [RFCv2 1/7] lockd: fix race in async lock request handling
Date: Mon, 14 Aug 2023 17:11:10 -0400	[thread overview]
Message-ID: <20230814211116.3224759-2-aahringo@redhat.com> (raw)
In-Reply-To: <20230814211116.3224759-1-aahringo@redhat.com>

This patch fixes a race in async lock request handling between adding
the relevant struct nlm_block to nlm_blocked list after the request was
sent by vfs_lock_file() and nlmsvc_grant_deferred() does a lookup of the
nlm_block in the nlm_blocked list. It could be that the async request is
completed before the nlm_block was added to the list. This would end
in a -ENOENT and a kernel log message of "lockd: grant for unknown
block".

To solve this issue we add the nlm_block before the vfs_lock_file() call
to be sure it has been added when a possible nlmsvc_grant_deferred() is
called. If the vfs_lock_file() results in an case when it wouldn't be
added to nlm_blocked list, the nlm_block struct will be removed from
this list again.

The introducing of the new B_PENDING_CALLBACK nlm_block flag will handle
async lock requests on a pending lock requests as a retry on the caller
level to hit the -EAGAIN case.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/lockd/svclock.c          | 100 ++++++++++++++++++++++++++----------
 include/linux/lockd/lockd.h |   2 +
 2 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed9..7d63524bdb81 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -133,6 +133,7 @@ nlmsvc_remove_block(struct nlm_block *block)
 {
 	if (!list_empty(&block->b_list)) {
 		spin_lock(&nlm_blocked_lock);
+		block->b_flags &= ~B_PENDING_CALLBACK;
 		list_del_init(&block->b_list);
 		spin_unlock(&nlm_blocked_lock);
 		nlmsvc_release_block(block);
@@ -232,6 +233,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 	kref_init(&block->b_count);
 	INIT_LIST_HEAD(&block->b_list);
 	INIT_LIST_HEAD(&block->b_flist);
+	mutex_init(&block->b_cb_mutex);
 
 	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
@@ -289,6 +291,8 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	dprintk("lockd: freeing block %p...\n", block);
 
+	WARN_ON_ONCE(block->b_flags & B_PENDING_CALLBACK);
+
 	/* Remove block from file's list of blocks */
 	list_del_init(&block->b_flist);
 	mutex_unlock(&file->f_mutex);
@@ -532,6 +536,13 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto out;
 	}
 
+	mutex_lock(&block->b_cb_mutex);
+	if (block->b_flags & B_PENDING_CALLBACK)
+		goto pending_request;
+
+	/* Append to list of blocked */
+	nlmsvc_insert_block(block, NLM_NEVER);
+
 	if (!wait)
 		lock->fl.fl_flags &= ~FL_SLEEP;
 	mode = lock_to_openmode(&lock->fl);
@@ -541,9 +552,13 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	dprintk("lockd: vfs_lock_file returned %d\n", error);
 	switch (error) {
 		case 0:
+			nlmsvc_remove_block(block);
 			ret = nlm_granted;
-			goto out;
+			goto out_cb_mutex;
 		case -EAGAIN:
+			if (!wait)
+				nlmsvc_remove_block(block);
+pending_request:
 			/*
 			 * If this is a blocking request for an
 			 * already pending lock request then we need
@@ -552,26 +567,29 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			if (wait)
 				break;
 			ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
-			goto out;
+			goto out_cb_mutex;
 		case FILE_LOCK_DEFERRED:
+			block->b_flags |= B_PENDING_CALLBACK;
+
 			if (wait)
 				break;
 			/* Filesystem lock operation is in progress
 			   Add it to the queue waiting for callback */
 			ret = nlmsvc_defer_lock_rqst(rqstp, block);
-			goto out;
+			goto out_cb_mutex;
 		case -EDEADLK:
+			nlmsvc_remove_block(block);
 			ret = nlm_deadlock;
-			goto out;
+			goto out_cb_mutex;
 		default:			/* includes ENOLCK */
+			nlmsvc_remove_block(block);
 			ret = nlm_lck_denied_nolocks;
-			goto out;
+			goto out_cb_mutex;
 	}
 
 	ret = nlm_lck_blocked;
-
-	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
+out_cb_mutex:
+	mutex_unlock(&block->b_cb_mutex);
 out:
 	mutex_unlock(&file->f_mutex);
 	nlmsvc_release_block(block);
@@ -728,34 +746,60 @@ nlmsvc_update_deferred_block(struct nlm_block *block, int result)
 		block->b_flags |= B_TIMED_OUT;
 }
 
+static int __nlmsvc_grant_deferred(struct nlm_block *block,
+				   struct file_lock *fl,
+				   int result)
+{
+	int rc = 0;
+
+	dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
+					block, block->b_flags);
+	if (block->b_flags & B_QUEUED) {
+		if (block->b_flags & B_TIMED_OUT) {
+			rc = -ENOLCK;
+			goto out;
+		}
+		nlmsvc_update_deferred_block(block, result);
+	} else if (result == 0)
+		block->b_granted = 1;
+
+	nlmsvc_insert_block_locked(block, 0);
+	svc_wake_up(block->b_daemon);
+out:
+	return rc;
+}
+
 static int nlmsvc_grant_deferred(struct file_lock *fl, int result)
 {
-	struct nlm_block *block;
-	int rc = -ENOENT;
+	struct nlm_block *iter, *block = NULL;
+	int rc;
 
 	spin_lock(&nlm_blocked_lock);
-	list_for_each_entry(block, &nlm_blocked, b_list) {
-		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
-			dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
-							block, block->b_flags);
-			if (block->b_flags & B_QUEUED) {
-				if (block->b_flags & B_TIMED_OUT) {
-					rc = -ENOLCK;
-					break;
-				}
-				nlmsvc_update_deferred_block(block, result);
-			} else if (result == 0)
-				block->b_granted = 1;
-
-			nlmsvc_insert_block_locked(block, 0);
-			svc_wake_up(block->b_daemon);
-			rc = 0;
+	list_for_each_entry(iter, &nlm_blocked, b_list) {
+		if (nlm_compare_locks(&iter->b_call->a_args.lock.fl, fl)) {
+			kref_get(&iter->b_count);
+			block = iter;
 			break;
 		}
 	}
 	spin_unlock(&nlm_blocked_lock);
-	if (rc == -ENOENT)
-		printk(KERN_WARNING "lockd: grant for unknown block\n");
+
+	if (!block) {
+		pr_warn("lockd: grant for unknown pending block\n");
+		return -ENOENT;
+	}
+
+	/* don't interfere with nlmsvc_lock() */
+	mutex_lock(&block->b_cb_mutex);
+	block->b_flags &= ~B_PENDING_CALLBACK;
+
+	spin_lock(&nlm_blocked_lock);
+	WARN_ON_ONCE(list_empty(&block->b_list));
+	rc = __nlmsvc_grant_deferred(block, fl, result);
+	spin_unlock(&nlm_blocked_lock);
+	mutex_unlock(&block->b_cb_mutex);
+
+	nlmsvc_release_block(block);
 	return rc;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..91f55458f5fc 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -185,10 +185,12 @@ struct nlm_block {
 	struct nlm_file *	b_file;		/* file in question */
 	struct cache_req *	b_cache_req;	/* deferred request handling */
 	struct cache_deferred_req * b_deferred_req;
+	struct mutex		b_cb_mutex;	/* callback mutex */
 	unsigned int		b_flags;	/* block flags */
 #define B_QUEUED		1	/* lock queued */
 #define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
 #define B_TIMED_OUT		4	/* filesystem too slow to respond */
+#define B_PENDING_CALLBACK	8	/* pending callback for lock request */
 };
 
 /*
-- 
2.31.1


WARNING: multiple messages have this Message-ID (diff)
From: Alexander Aring <aahringo@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: jlayton@kernel.org, cluster-devel@redhat.com,
	ocfs2-devel@lists.linux.dev, chuck.lever@oracle.com,
	anna@kernel.org, linux-fsdevel@vger.kernel.org,
	trond.myklebust@hammerspace.com
Subject: [Cluster-devel] [RFCv2 1/7] lockd: fix race in async lock request handling
Date: Mon, 14 Aug 2023 17:11:10 -0400	[thread overview]
Message-ID: <20230814211116.3224759-2-aahringo@redhat.com> (raw)
In-Reply-To: <20230814211116.3224759-1-aahringo@redhat.com>

This patch fixes a race in async lock request handling between adding
the relevant struct nlm_block to nlm_blocked list after the request was
sent by vfs_lock_file() and nlmsvc_grant_deferred() does a lookup of the
nlm_block in the nlm_blocked list. It could be that the async request is
completed before the nlm_block was added to the list. This would end
in a -ENOENT and a kernel log message of "lockd: grant for unknown
block".

To solve this issue we add the nlm_block before the vfs_lock_file() call
to be sure it has been added when a possible nlmsvc_grant_deferred() is
called. If the vfs_lock_file() results in an case when it wouldn't be
added to nlm_blocked list, the nlm_block struct will be removed from
this list again.

The introducing of the new B_PENDING_CALLBACK nlm_block flag will handle
async lock requests on a pending lock requests as a retry on the caller
level to hit the -EAGAIN case.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
 fs/lockd/svclock.c          | 100 ++++++++++++++++++++++++++----------
 include/linux/lockd/lockd.h |   2 +
 2 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed9..7d63524bdb81 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -133,6 +133,7 @@ nlmsvc_remove_block(struct nlm_block *block)
 {
 	if (!list_empty(&block->b_list)) {
 		spin_lock(&nlm_blocked_lock);
+		block->b_flags &= ~B_PENDING_CALLBACK;
 		list_del_init(&block->b_list);
 		spin_unlock(&nlm_blocked_lock);
 		nlmsvc_release_block(block);
@@ -232,6 +233,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 	kref_init(&block->b_count);
 	INIT_LIST_HEAD(&block->b_list);
 	INIT_LIST_HEAD(&block->b_flist);
+	mutex_init(&block->b_cb_mutex);
 
 	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
@@ -289,6 +291,8 @@ static void nlmsvc_free_block(struct kref *kref)
 
 	dprintk("lockd: freeing block %p...\n", block);
 
+	WARN_ON_ONCE(block->b_flags & B_PENDING_CALLBACK);
+
 	/* Remove block from file's list of blocks */
 	list_del_init(&block->b_flist);
 	mutex_unlock(&file->f_mutex);
@@ -532,6 +536,13 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 		goto out;
 	}
 
+	mutex_lock(&block->b_cb_mutex);
+	if (block->b_flags & B_PENDING_CALLBACK)
+		goto pending_request;
+
+	/* Append to list of blocked */
+	nlmsvc_insert_block(block, NLM_NEVER);
+
 	if (!wait)
 		lock->fl.fl_flags &= ~FL_SLEEP;
 	mode = lock_to_openmode(&lock->fl);
@@ -541,9 +552,13 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	dprintk("lockd: vfs_lock_file returned %d\n", error);
 	switch (error) {
 		case 0:
+			nlmsvc_remove_block(block);
 			ret = nlm_granted;
-			goto out;
+			goto out_cb_mutex;
 		case -EAGAIN:
+			if (!wait)
+				nlmsvc_remove_block(block);
+pending_request:
 			/*
 			 * If this is a blocking request for an
 			 * already pending lock request then we need
@@ -552,26 +567,29 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			if (wait)
 				break;
 			ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
-			goto out;
+			goto out_cb_mutex;
 		case FILE_LOCK_DEFERRED:
+			block->b_flags |= B_PENDING_CALLBACK;
+
 			if (wait)
 				break;
 			/* Filesystem lock operation is in progress
 			   Add it to the queue waiting for callback */
 			ret = nlmsvc_defer_lock_rqst(rqstp, block);
-			goto out;
+			goto out_cb_mutex;
 		case -EDEADLK:
+			nlmsvc_remove_block(block);
 			ret = nlm_deadlock;
-			goto out;
+			goto out_cb_mutex;
 		default:			/* includes ENOLCK */
+			nlmsvc_remove_block(block);
 			ret = nlm_lck_denied_nolocks;
-			goto out;
+			goto out_cb_mutex;
 	}
 
 	ret = nlm_lck_blocked;
-
-	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
+out_cb_mutex:
+	mutex_unlock(&block->b_cb_mutex);
 out:
 	mutex_unlock(&file->f_mutex);
 	nlmsvc_release_block(block);
@@ -728,34 +746,60 @@ nlmsvc_update_deferred_block(struct nlm_block *block, int result)
 		block->b_flags |= B_TIMED_OUT;
 }
 
+static int __nlmsvc_grant_deferred(struct nlm_block *block,
+				   struct file_lock *fl,
+				   int result)
+{
+	int rc = 0;
+
+	dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
+					block, block->b_flags);
+	if (block->b_flags & B_QUEUED) {
+		if (block->b_flags & B_TIMED_OUT) {
+			rc = -ENOLCK;
+			goto out;
+		}
+		nlmsvc_update_deferred_block(block, result);
+	} else if (result == 0)
+		block->b_granted = 1;
+
+	nlmsvc_insert_block_locked(block, 0);
+	svc_wake_up(block->b_daemon);
+out:
+	return rc;
+}
+
 static int nlmsvc_grant_deferred(struct file_lock *fl, int result)
 {
-	struct nlm_block *block;
-	int rc = -ENOENT;
+	struct nlm_block *iter, *block = NULL;
+	int rc;
 
 	spin_lock(&nlm_blocked_lock);
-	list_for_each_entry(block, &nlm_blocked, b_list) {
-		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
-			dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
-							block, block->b_flags);
-			if (block->b_flags & B_QUEUED) {
-				if (block->b_flags & B_TIMED_OUT) {
-					rc = -ENOLCK;
-					break;
-				}
-				nlmsvc_update_deferred_block(block, result);
-			} else if (result == 0)
-				block->b_granted = 1;
-
-			nlmsvc_insert_block_locked(block, 0);
-			svc_wake_up(block->b_daemon);
-			rc = 0;
+	list_for_each_entry(iter, &nlm_blocked, b_list) {
+		if (nlm_compare_locks(&iter->b_call->a_args.lock.fl, fl)) {
+			kref_get(&iter->b_count);
+			block = iter;
 			break;
 		}
 	}
 	spin_unlock(&nlm_blocked_lock);
-	if (rc == -ENOENT)
-		printk(KERN_WARNING "lockd: grant for unknown block\n");
+
+	if (!block) {
+		pr_warn("lockd: grant for unknown pending block\n");
+		return -ENOENT;
+	}
+
+	/* don't interfere with nlmsvc_lock() */
+	mutex_lock(&block->b_cb_mutex);
+	block->b_flags &= ~B_PENDING_CALLBACK;
+
+	spin_lock(&nlm_blocked_lock);
+	WARN_ON_ONCE(list_empty(&block->b_list));
+	rc = __nlmsvc_grant_deferred(block, fl, result);
+	spin_unlock(&nlm_blocked_lock);
+	mutex_unlock(&block->b_cb_mutex);
+
+	nlmsvc_release_block(block);
 	return rc;
 }
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..91f55458f5fc 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -185,10 +185,12 @@ struct nlm_block {
 	struct nlm_file *	b_file;		/* file in question */
 	struct cache_req *	b_cache_req;	/* deferred request handling */
 	struct cache_deferred_req * b_deferred_req;
+	struct mutex		b_cb_mutex;	/* callback mutex */
 	unsigned int		b_flags;	/* block flags */
 #define B_QUEUED		1	/* lock queued */
 #define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
 #define B_TIMED_OUT		4	/* filesystem too slow to respond */
+#define B_PENDING_CALLBACK	8	/* pending callback for lock request */
 };
 
 /*
-- 
2.31.1


  reply	other threads:[~2023-08-14 21:13 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-14 21:11 [RFCv2 0/7] fs: nfs: async lock request changes Alexander Aring
2023-08-14 21:11 ` [Cluster-devel] " Alexander Aring
2023-08-14 21:11 ` Alexander Aring [this message]
2023-08-14 21:11   ` [Cluster-devel] [RFCv2 1/7] lockd: fix race in async lock request handling Alexander Aring
2023-08-15 17:49   ` Jeff Layton
2023-08-15 17:49     ` [Cluster-devel] " Jeff Layton
2023-08-15 18:21     ` Jeff Layton
2023-08-15 18:21       ` [Cluster-devel] " Jeff Layton
2023-08-17 18:39       ` Alexander Aring
2023-08-17 18:39         ` [Cluster-devel] " Alexander Aring
2023-08-17 18:36     ` Alexander Aring
2023-08-17 18:36       ` [Cluster-devel] " Alexander Aring
2023-08-14 21:11 ` [RFCv2 2/7] lockd: FILE_LOCK_DEFERRED only on FL_SLEEP Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring
2023-08-16 11:37   ` Jeff Layton
2023-08-16 11:37     ` [Cluster-devel] " Jeff Layton
2023-08-17  1:40     ` Alexander Aring
2023-08-17  1:40       ` Alexander Aring
2023-08-14 21:11 ` [RFCv2 3/7] lockd: introduce safe async lock op Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring
2023-08-16 11:43   ` Jeff Layton
2023-08-16 11:43     ` [Cluster-devel] " Jeff Layton
2023-08-14 21:11 ` [RFCv2 4/7] locks: update lock callback documentation Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring
2023-08-16 12:01   ` Jeff Layton
2023-08-16 12:01     ` [Cluster-devel] " Jeff Layton
2023-08-17  1:23     ` Alexander Aring
2023-08-17  1:23       ` Alexander Aring
2023-08-14 21:11 ` [RFCv2 5/7] dlm: use fl_owner from lockd Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring
2023-08-16 12:02   ` Jeff Layton
2023-08-16 12:02     ` [Cluster-devel] " Jeff Layton
2023-08-14 21:11 ` [RFCv2 6/7] dlm: use FL_SLEEP to check if blocking request Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring
2023-08-16 13:07   ` Jeff Layton
2023-08-16 13:07     ` [Cluster-devel] " Jeff Layton
2023-08-17  1:19     ` Alexander Aring
2023-08-17  1:19       ` Alexander Aring
2023-08-17 11:27       ` [Cluster-devel] " Jeff Layton
2023-08-17 11:27         ` Jeff Layton
2023-08-17 13:02         ` Alexander Aring
2023-08-17 13:02           ` [Cluster-devel] " Alexander Aring
2023-08-14 21:11 ` [RFCv2 7/7] dlm: implement EXPORT_OP_SAFE_ASYNC_LOCK Alexander Aring
2023-08-14 21:11   ` [Cluster-devel] " Alexander Aring

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230814211116.3224759-2-aahringo@redhat.com \
    --to=aahringo@redhat.com \
    --cc=agruenba@redhat.com \
    --cc=anna@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=cluster-devel@redhat.com \
    --cc=jlayton@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=ocfs2-devel@lists.linux.dev \
    --cc=rpeterso@redhat.com \
    --cc=teigland@redhat.com \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.