[RFC PATCH] cifs: Fix cifsInodeInfo lock_sem deadlock with multiple readers

* [RFC PATCH] cifs: Fix cifsInodeInfo lock_sem deadlock with multiple readers
@ 2019-10-22 20:33 Dave Wysochanski
  2019-10-23  4:50 ` Ronnie Sahlberg
  0 siblings, 1 reply; 6+ messages in thread
From: Dave Wysochanski @ 2019-10-22 20:33 UTC (permalink / raw)
  To: linux-cifs

NOTE: I have verified this fixes the problem but have not run
locking tests yet.

There's a deadlock that is possible that can easily be seen with
multiple readers open/read/close of the same file.  The deadlock
is due to a reader calling down_read(lock_sem) and holding
it across the full IO, even if a network or server disruption
occurs and the session has to be reconnected.  Upon reconnect,
cifs_relock_file is called where down_read(lock_sem) is called
a second time.  Normally this is not a problem, but if there is
another process that calls down_write(lock_sem) in between the
first and second reader call to down_read(lock_sem), this will
cause a deadlock.  The caller of down_write (often either
_cifsFileInfo_put that is just removing and freeing cifsLockInfo
structures from the list of locks, or cifs_new_fileinfo, which
is just attaching cifs_fid_locks to cifsInodeInfo->llist), will
block due to the reader's first down_read(lock_sem) that obtains
the semaphore (read IO in flight).  And then when the server
comes back up, the reader that holds calls down_read(lock_sem)
a second time, and this time is blocked too because of the
blocked in down_write (rw_semaphores would starve writers if
this was not the case).  Interestingly enough, the callers of
down_write in the simple test case was not adding a
conflicting lock at all, just either opening or closing the
file, and modifying the list of locks attached to cifsInodeInfo,
this ends up tripping up the reader process and causing the
deadlock.

The root of the problem is that lock_sem both protects the
cifsInodeInfo fields (such as the lllist - the list of locks),
but is also being re-used to avoid a conflicting lock coming
in while IO is in flight.  Add a new semaphore that tracks
just the IO in flight, and must be obtained before adding
a new lock.  While this does add another layer of complexity
and a semaphore ordering that must be obeyed to avoid new
deadlocks, it does clealy solve the underlying problem.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
---
 fs/cifs/cifsfs.c   |  1 +
 fs/cifs/cifsglob.h |  1 +
 fs/cifs/file.c     | 24 +++++++++++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c049c7b3aa87..10f614324e4e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1336,6 +1336,7 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
 
 	inode_init_once(&cifsi->vfs_inode);
 	init_rwsem(&cifsi->lock_sem);
+	init_rwsem(&cifsi->io_inflight_sem);
 }
 
 static int __init
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 50dfd9049370..40e8358dc1cc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1392,6 +1392,7 @@ struct cifsInodeInfo {
 	bool can_cache_brlcks;
 	struct list_head llist;	/* locks helb by this inode */
 	struct rw_semaphore lock_sem;	/* protect the fields above */
+	struct rw_semaphore io_inflight_sem; /* Used to avoid lock conflicts  */
 	/* BB add in lists for dirty pages i.e. write caching info for oplock */
 	struct list_head openFileList;
 	spinlock_t	open_file_lock;	/* protects openFileList */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ad15de2bb4f..417baa7f5dd3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -621,7 +621,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	int rc = 0;
 
-	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
+	down_read(&cinode->lock_sem);
 	if (cinode->can_cache_brlcks) {
 		/* can cache locks - no need to relock */
 		up_read(&cinode->lock_sem);
@@ -973,6 +973,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	struct cifs_fid_locks *cur;
 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 
+	down_read(&cinode->lock_sem);
 	list_for_each_entry(cur, &cinode->llist, llist) {
 		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 						 flags, cfile, conf_lock,
@@ -980,6 +981,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 		if (rc)
 			break;
 	}
+	up_read(&cinode->lock_sem);
 
 	return rc;
 }
@@ -1027,9 +1029,11 @@ int cifs_closedir(struct inode *inode, struct file *file)
 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 {
 	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
+	down_write(&cinode->io_inflight_sem);
 	down_write(&cinode->lock_sem);
 	list_add_tail(&lock->llist, &cfile->llist->locks);
 	up_write(&cinode->lock_sem);
+	up_write(&cinode->io_inflight_sem);
 }
 
 /*
@@ -1049,6 +1053,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 
 try_again:
 	exist = false;
+	down_write(&cinode->io_inflight_sem);
 	down_write(&cinode->lock_sem);
 
 	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
@@ -1057,6 +1062,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	if (!exist && cinode->can_cache_brlcks) {
 		list_add_tail(&lock->llist, &cfile->llist->locks);
 		up_write(&cinode->lock_sem);
+		up_write(&cinode->io_inflight_sem);
 		return rc;
 	}
 
@@ -1077,6 +1083,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 	}
 
 	up_write(&cinode->lock_sem);
+	up_write(&cinode->io_inflight_sem);
 	return rc;
 }
 
@@ -1125,14 +1132,17 @@ int cifs_closedir(struct inode *inode, struct file *file)
 		return rc;
 
 try_again:
+	down_write(&cinode->io_inflight_sem);
 	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
 		up_write(&cinode->lock_sem);
+		down_write(&cinode->io_inflight_sem);
 		return rc;
 	}
 
 	rc = posix_lock_file(file, flock, NULL);
 	up_write(&cinode->lock_sem);
+	up_write(&cinode->io_inflight_sem);
 	if (rc == FILE_LOCK_DEFERRED) {
 		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
 		if (!rc)
@@ -1331,6 +1341,7 @@ struct lock_to_push {
 	int rc = 0;
 
 	/* we are going to update can_cache_brlcks here - need a write access */
+	down_write(&cinode->io_inflight_sem);
 	down_write(&cinode->lock_sem);
 	if (!cinode->can_cache_brlcks) {
 		up_write(&cinode->lock_sem);
@@ -1346,6 +1357,7 @@ struct lock_to_push {
 
 	cinode->can_cache_brlcks = false;
 	up_write(&cinode->lock_sem);
+	up_write(&cinode->io_inflight_sem);
 	return rc;
 }
 
@@ -1522,6 +1534,7 @@ struct lock_to_push {
 	if (!buf)
 		return -ENOMEM;
 
+	down_write(&cinode->io_inflight_sem);
 	down_write(&cinode->lock_sem);
 	for (i = 0; i < 2; i++) {
 		cur = buf;
@@ -1593,6 +1606,7 @@ struct lock_to_push {
 	}
 
 	up_write(&cinode->lock_sem);
+	up_write(&cinode->io_inflight_sem);
 	kfree(buf);
 	return rc;
 }
@@ -3148,7 +3162,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 	 * We need to hold the sem to be sure nobody modifies lock list
 	 * with a brlock that prevents writing.
 	 */
-	down_read(&cinode->lock_sem);
+	down_read(&cinode->io_inflight_sem);
 
 	rc = generic_write_checks(iocb, from);
 	if (rc <= 0)
@@ -3161,7 +3175,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 	else
 		rc = -EACCES;
 out:
-	up_read(&cinode->lock_sem);
+	up_read(&cinode->io_inflight_sem);
 	inode_unlock(inode);
 
 	if (rc > 0)
@@ -3887,12 +3901,12 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	 * We need to hold the sem to be sure nobody modifies lock list
 	 * with a brlock that prevents reading.
 	 */
-	down_read(&cinode->lock_sem);
+	down_read(&cinode->io_inflight_sem);
 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
 				     tcon->ses->server->vals->shared_lock_type,
 				     0, NULL, CIFS_READ_OP))
 		rc = generic_file_read_iter(iocb, to);
-	up_read(&cinode->lock_sem);
+	up_read(&cinode->io_inflight_sem);
 	return rc;
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread