All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: lustre-devel@lists.lustre.org
Subject: [lustre-devel] [PATCH v2 03/29] lustre: llite: replace lli_trunc_sem
Date: Mon, 20 May 2019 08:50:45 -0400	[thread overview]
Message-ID: <1558356671-29599-4-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1558356671-29599-1-git-send-email-jsimmons@infradead.org>

From: NeilBrown <neilb@suse.com>

lli_trunc_sem can lead to a readlock.

vvp_io_read_start can take mmap_sem while holding lli_trunc_sem,
and vvp_io_fault_start will take lli_trunc_sem while holding mmap_sem.

These aren't necessarily the same mmap_sem, but can be if you mmap a
lustre file, then read into that mapped memory from the file.

These are both 'down_read' calls on lli_trunc_sem so they don't
necessarily conflict, but if vvp_io_setattr_start() is called to
truncate the file between these, the later will wait for the former
and a deadlock can eventuate.

Solve this by replacing with a hand-coded semaphore, using atomic
counters and wait_var_event().
In the vvp_io_fault_start() case where mmap_sem is held, don't wait
for a pending writer, only for an active writer.  This means we won't
wait if vvp_io_read_start has started, and so no deadlock happens.

I'd like there to be a better way to fix this, but I haven't found it
yet.

Signed-off-by: NeilBrown <neilb@suse.com>
---
 fs/lustre/llite/llite_internal.h |  3 ++-
 fs/lustre/llite/llite_lib.c      |  3 ++-
 fs/lustre/llite/vvp_io.c         | 28 +++++++++++++++++++++-------
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/fs/lustre/llite/llite_internal.h b/fs/lustre/llite/llite_internal.h
index 9da59b1..7566b1b 100644
--- a/fs/lustre/llite/llite_internal.h
+++ b/fs/lustre/llite/llite_internal.h
@@ -190,7 +190,8 @@ struct ll_inode_info {
 			 *    struct list_head wait_list;
 			 * }
 			 */
-			struct rw_semaphore		lli_trunc_sem;
+			atomic_t			lli_trunc_readers;
+			atomic_t			lli_trunc_waiters;
 			struct range_lock_tree		lli_write_tree;
 
 			struct rw_semaphore		lli_glimpse_sem;
diff --git a/fs/lustre/llite/llite_lib.c b/fs/lustre/llite/llite_lib.c
index 4e98eb4..ab7c84a 100644
--- a/fs/lustre/llite/llite_lib.c
+++ b/fs/lustre/llite/llite_lib.c
@@ -894,7 +894,8 @@ void ll_lli_init(struct ll_inode_info *lli)
 	} else {
 		mutex_init(&lli->lli_size_mutex);
 		lli->lli_symlink_name = NULL;
-		init_rwsem(&lli->lli_trunc_sem);
+		atomic_set(&lli->lli_trunc_readers, 0);
+		atomic_set(&lli->lli_trunc_waiters, 0);
 		range_lock_tree_init(&lli->lli_write_tree);
 		init_rwsem(&lli->lli_glimpse_sem);
 		lli->lli_glimpse_time = 0;
diff --git a/fs/lustre/llite/vvp_io.c b/fs/lustre/llite/vvp_io.c
index 225a858..a9db530 100644
--- a/fs/lustre/llite/vvp_io.c
+++ b/fs/lustre/llite/vvp_io.c
@@ -667,7 +667,10 @@ static int vvp_io_setattr_start(const struct lu_env *env,
 	struct ll_inode_info *lli = ll_i2info(inode);
 
 	if (cl_io_is_trunc(io)) {
-		down_write(&lli->lli_trunc_sem);
+		atomic_inc(&lli->lli_trunc_waiters);
+		wait_var_event(&lli->lli_trunc_readers,
+			       atomic_cmpxchg(&lli->lli_trunc_readers, 0, -1) == 0);
+		atomic_dec(&lli->lli_trunc_waiters);
 		inode_lock(inode);
 		inode_dio_wait(inode);
 	} else {
@@ -693,7 +696,8 @@ static void vvp_io_setattr_end(const struct lu_env *env,
 		 */
 		vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
 		inode_unlock(inode);
-		up_write(&lli->lli_trunc_sem);
+		atomic_set(&lli->lli_trunc_readers, 0);
+		wake_up_var(&lli->lli_trunc_readers);
 	} else {
 		inode_unlock(inode);
 	}
@@ -732,7 +736,9 @@ static int vvp_io_read_start(const struct lu_env *env,
 
 	CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
 
-	down_read(&lli->lli_trunc_sem);
+	wait_var_event(&lli->lli_trunc_readers,
+		       atomic_read(&lli->lli_trunc_waiters) == 0 &&
+		       atomic_inc_unless_negative(&lli->lli_trunc_readers));
 
 	if (!can_populate_pages(env, io, inode))
 		return 0;
@@ -965,7 +971,9 @@ static int vvp_io_write_start(const struct lu_env *env,
 	size_t cnt = io->u.ci_wr.wr.crw_count;
 	ssize_t result = 0;
 
-	down_read(&lli->lli_trunc_sem);
+	wait_var_event(&lli->lli_trunc_readers,
+		       atomic_read(&lli->lli_trunc_waiters) == 0 &&
+		       atomic_inc_unless_negative(&lli->lli_trunc_readers));
 
 	if (!can_populate_pages(env, io, inode))
 		return 0;
@@ -1059,7 +1067,9 @@ static void vvp_io_rw_end(const struct lu_env *env,
 	struct inode *inode = vvp_object_inode(ios->cis_obj);
 	struct ll_inode_info *lli = ll_i2info(inode);
 
-	up_read(&lli->lli_trunc_sem);
+	if (atomic_dec_return(&lli->lli_trunc_readers) == 0 &&
+	    atomic_read(&lli->lli_trunc_waiters))
+		wake_up_var(&lli->lli_trunc_readers);
 }
 
 static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
@@ -1124,7 +1134,8 @@ static int vvp_io_fault_start(const struct lu_env *env,
 	loff_t size;
 	pgoff_t last_index;
 
-	down_read(&lli->lli_trunc_sem);
+	wait_var_event(&lli->lli_trunc_readers,
+		       atomic_inc_unless_negative(&lli->lli_trunc_readers));
 
 	/* offset of the last byte on the page */
 	offset = cl_offset(obj, fio->ft_index + 1) - 1;
@@ -1281,7 +1292,10 @@ static void vvp_io_fault_end(const struct lu_env *env,
 
 	CLOBINVRNT(env, ios->cis_io->ci_obj,
 		   vvp_object_invariant(ios->cis_io->ci_obj));
-	up_read(&lli->lli_trunc_sem);
+
+	if (atomic_dec_return(&lli->lli_trunc_readers) == 0 &&
+	    atomic_read(&lli->lli_trunc_waiters))
+		wake_up_var(&lli->lli_trunc_readers);
 }
 
 static int vvp_io_fsync_start(const struct lu_env *env,
-- 
1.8.3.1

  parent reply	other threads:[~2019-05-20 12:50 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-20 12:50 [lustre-devel] [PATCH v2 00/29] More lustre patches James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 01/29] lustre: llite: ll_fault fixes James Simmons
2019-05-22  3:54   ` NeilBrown
2019-05-22 12:48     ` Patrick Farrell
2019-05-22 23:26       ` NeilBrown
2019-05-23  0:13         ` Patrick Farrell
2019-05-22 19:06     ` James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 02/29] lustre: llite: fix error in vvp_pgcache seqfile James Simmons
2019-05-20 12:50 ` James Simmons [this message]
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 04/29] lustre: lov: use GFP_NOFS to allocate lo_entries James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 05/29] lustre: llite: don't use class_setup_tunables() James Simmons
2019-05-22  4:22   ` NeilBrown
2019-05-22 18:58     ` James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 06/29] lustre: embed typ_kobj in obd_type James Simmons
2019-05-22  5:20   ` NeilBrown
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 07/29] lustre: obd: collect all resource releasing for obj_type James Simmons
2019-05-22  6:49   ` NeilBrown
2019-05-22 18:51   ` James Simmons
2019-05-22 22:07     ` Andreas Dilger
2019-06-01  0:38       ` James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 08/29] lustre: obd_type: use typ_kobj.name as typ_name James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 09/29] lustre: obd_type: discard obd_types linked list James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 10/29] lustre: obd_type: discard obd_type_lock James Simmons
2019-05-22  6:53   ` NeilBrown
2019-05-22 19:24     ` James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 11/29] lustre: obdclass: don't copy ops structures in to new type James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 12/29] lustre: obdclass: fix module load locking James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 13/29] lustre: convert rsi_sem to a spinlock James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 14/29] lustre: ldlm: discard varname in ldlm_pool James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 15/29] lustre: lprocfs: use log2.h macros instead of shift loop James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 16/29] lustre: handles: discard h_owner in favour of h_ops James Simmons
2019-05-20 12:50 ` [lustre-devel] [PATCH v2 17/29] lustre: handle: move refcount into the lustre_handle James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 18/29] lustre: discard OBD_FREE_RCU James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 19/29] lustre: portals_handle: rename ops to owner James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 20/29] lustre: portals_handle: remove locking from class_handle2object() James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 21/29] lustre: portals_handle: use hlist for hash lists James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 22/29] lustre: portals_handle: discard h_lock James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 23/29] lustre: remove unused fields from struct obd_device James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 24/29] lustre: obd_sysfs: error-check value stored in jobid_var James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 25/29] lustre: obdclass: discard process_quota_config James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 26/29] lustre: obdclass: remove unnecessary code from lustre_init_lsi() James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH 27/29] lustre: ldlm: discard l_lock from struct ldlm_lock James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 28/29] lustre: ldlm: don't access l_resource when not locked James Simmons
2019-05-20 12:51 ` [lustre-devel] [PATCH v2 29/29] lustre: ldlm: drop SLAB_TYPESAFE_BY_RCU from ldlm_lock slab James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1558356671-29599-4-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=lustre-devel@lists.lustre.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.