All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: Amir Goldstein <amir73il@gmail.com>,
	Miklos Szeredi <miklos@szeredi.hu>,
	Paul Moore <paul@paul-moore.com>, Jan Kara <jack@suse.cz>
Subject: [PATCH 11/22] fsnotify: Provide framework for dropping SRCU lock in ->handle_event
Date: Fri, 20 Jan 2017 14:21:12 +0100	[thread overview]
Message-ID: <20170120132123.9670-12-jack@suse.cz> (raw)
In-Reply-To: <20170120132123.9670-1-jack@suse.cz>

fanotify wants to drop fsnotify_mark_srcu lock when waiting for response
from userspace so that the whole notification subsystem is not blocked
during that time. This patch provides a framework for safely getting
mark reference for a mark found in the object list which pins the mark
in that list. We can then drop fsnotify_mark_srcu, wait for userspace
response and then safely continue iteration of the object list once we
reaquire fsnotify_mark_srcu.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/group.c                |  1 +
 fs/notify/mark.c                 | 86 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fsnotify_backend.h |  8 ++++
 3 files changed, 95 insertions(+)

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0fb4aadcc19f..79439cdf16e0 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -126,6 +126,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	/* set to 0 when there a no external references to this group */
 	atomic_set(&group->refcnt, 1);
 	atomic_set(&group->num_marks, 0);
+	atomic_set(&group->user_waits, 0);
 
 	spin_lock_init(&group->notification_lock);
 	INIT_LIST_HEAD(&group->notification_list);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index e22e87bbcd84..8800db34e19f 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,6 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	atomic_inc(&mark->refcnt);
 }
 
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
+{
+	return atomic_inc_not_zero(&mark->refcnt);
+}
+
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -244,6 +254,76 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 			   FSNOTIFY_REAPER_DELAY);
 }
 
+bool fsnotify_prepare_user_wait(struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
+				int *srcu_idx)
+{
+	struct fsnotify_group *group;
+
+	if (WARN_ON_ONCE(!inode_mark && !vfsmount_mark))
+		return false;
+
+	if (inode_mark)
+		group = inode_mark->group;
+	else
+		group = vfsmount_mark->group;
+
+	/*
+	 * Since acquisition of mark reference is an atomic op as well, we can
+	 * be sure this inc is seen before any effect of refcount increment.
+	 */
+	atomic_inc(&group->user_waits);
+
+	if (inode_mark) {
+		/* This can fail if mark is being removed */
+		if (!fsnotify_get_mark_safe(inode_mark))
+			goto out_wait;
+	}
+	if (vfsmount_mark) {
+		if (!fsnotify_get_mark_safe(vfsmount_mark))
+			goto out_inode;
+	}
+
+	/*
+	 * Now that both marks are pinned by refcount in the inode / vfsmount
+	 * lists, we can drop SRCU lock, and safely resume the list iteration
+	 * once userspace returns.
+	 */
+	srcu_read_unlock(&fsnotify_mark_srcu, *srcu_idx);
+
+	return true;
+out_inode:
+	if (inode_mark)
+		fsnotify_put_mark(inode_mark);
+out_wait:
+	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+		wake_up(&group->notification_waitq);
+	return false;
+}
+
+void fsnotify_finish_user_wait(struct fsnotify_mark *inode_mark,
+			       struct fsnotify_mark *vfsmount_mark,
+			       int *srcu_idx)
+{
+	struct fsnotify_group *group = NULL;
+
+	*srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+	if (inode_mark) {
+		group = inode_mark->group;
+		fsnotify_put_mark(inode_mark);
+	}
+	if (vfsmount_mark) {
+		group = vfsmount_mark->group;
+		fsnotify_put_mark(vfsmount_mark);
+	}
+	/*
+	 * We abuse notification_waitq on group shutdown for waiting for all
+	 * marks pinned when waiting for userspace.
+	 */
+	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+		wake_up(&group->notification_waitq);
+}
+
 /*
  * Mark mark as detached, remove it from group list. Mark still stays in object
  * list until its last reference is dropped. Note that we rely on mark being
@@ -628,6 +708,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group)
 		fsnotify_free_mark(mark);
 		fsnotify_put_mark(mark);
 	}
+	/*
+	 * Some marks can still be pinned when waiting for response from
+	 * userspace. Wait for those now. fsnotify_prepare_user_wait() will
+	 * not succeed now so this wait is race-free.
+	 */
+	wait_event(group->notification_waitq, !atomic_read(&group->user_waits));
 }
 
 /* Destroy all marks attached to inode / vfsmount */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 64ce249288ad..786dc1cf715c 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -162,6 +162,8 @@ struct fsnotify_group {
 	struct fsnotify_event *overflow_event;	/* Event we queue when the
 						 * notification list is too
 						 * full */
+	atomic_t user_waits;		/* Number of tasks waiting for user
+					 * response */
 
 	/* groups can define private fields here or use the void *private */
 	union {
@@ -367,6 +369,12 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 extern void fsnotify_unmount_inodes(struct super_block *sb);
+extern void fsnotify_finish_user_wait(struct fsnotify_mark *inode_mark,
+				      struct fsnotify_mark *vfsmount_mark,
+				      int *srcu_idx);
+extern bool fsnotify_prepare_user_wait(struct fsnotify_mark *inode_mark,
+				       struct fsnotify_mark *vfsmount_mark,
+				       int *srcu_idx);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern void fsnotify_init_event(struct fsnotify_event *event,
-- 
2.10.2


  parent reply	other threads:[~2017-01-20 13:36 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-20 13:21 [PATCH 0/22 v3] fsnotify: Avoid SRCU stalls with fanotify permission events Jan Kara
2017-01-20 13:21 ` [PATCH 01/22] fsnotify: Remove unnecessary tests when showing fdinfo Jan Kara
2017-01-20 13:21 ` [PATCH 02/22] inotify: Remove inode pointers from debug messages Jan Kara
2017-01-20 13:21 ` [PATCH 03/22] fanotify: Move recalculation of inode / vfsmount mask under mark_mutex Jan Kara
2017-01-20 13:21 ` [PATCH 04/22] audit: Abstract hash key handling Jan Kara
2017-01-20 13:21 ` [PATCH 05/22] fsnotify: Update comments Jan Kara
2017-01-20 13:21 ` [PATCH 06/22] fsnotify: Attach marks to object via dedicated head structure Jan Kara
2017-01-21 15:52   ` Amir Goldstein
2017-01-25  9:41   ` Miklos Szeredi
2017-01-31 15:41     ` Jan Kara
2017-01-20 13:21 ` [PATCH 07/22] inotify: Do not drop mark reference under idr_lock Jan Kara
2017-01-20 13:21 ` [PATCH 08/22] fsnotify: Move queueing of mark for destruction into fsnotify_put_mark() Jan Kara
2017-01-20 13:21 ` [PATCH 09/22] fsnotify: Detach mark from object list when last reference is dropped Jan Kara
2017-01-21 15:50   ` Amir Goldstein
2017-01-20 13:21 ` [PATCH 10/22] fsnotify: Remove special handling of mark destruction on group shutdown Jan Kara
2017-01-20 13:21 ` Jan Kara [this message]
2017-01-20 13:21 ` [PATCH 12/22] fsnotify: Pass SRCU index into handle_event handler Jan Kara
2017-01-20 13:21 ` [PATCH 13/22] fanotify: Release SRCU lock when waiting for userspace response Jan Kara
2017-01-25 15:22   ` Miklos Szeredi
2017-01-31 13:28     ` Jan Kara
2017-01-20 13:21 ` [PATCH 14/22] fsnotify: Remove fsnotify_set_mark_{,ignored_}mask_locked() Jan Kara
2017-01-20 13:21 ` [PATCH 15/22] fsnotify: Remove fsnotify_recalc_{inode|vfsmount}_mask() Jan Kara
2017-01-20 13:21 ` [PATCH 16/22] fsnotify: Inline fsnotify_clear_{inode|vfsmount}_mark_group() Jan Kara
2017-01-20 13:21 ` [PATCH 17/22] fsnotify: Rename fsnotify_clear_marks_by_group_flags() Jan Kara
2017-01-20 13:21 ` [PATCH 18/22] fsnotify: Remove fsnotify_detach_group_marks() Jan Kara
2017-01-20 13:21 ` [PATCH 19/22] fsnotify: Remove fsnotify_find_{inode|vfsmount}_mark() Jan Kara
2017-01-20 13:21 ` [PATCH 20/22] fsnotify: Drop inode_mark.c Jan Kara
2017-01-20 13:21 ` [PATCH 21/22] fsnotify: Add group pointer in fsnotify_init_mark() Jan Kara
2017-01-20 13:21 ` [PATCH 22/22] fsnotify: Move ->free_mark callback to fsnotify_ops Jan Kara
  -- strict thread matches above, loose matches on Subject: below --
2017-01-06 10:43 [PATCH 0/22 v2] fsnotify: Avoid SRCU stalls with fanotify permission events Jan Kara
2017-01-06 10:43 ` [PATCH 11/22] fsnotify: Provide framework for dropping SRCU lock in ->handle_event Jan Kara
2017-01-08  9:02   ` Amir Goldstein
2017-01-10 13:00     ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170120132123.9670-12-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=amir73il@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=paul@paul-moore.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.