All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: linux-fsdevel@vger.kernel.org
Cc: Amir Goldstein <amir73il@gmail.com>,
	Paul Moore <paul@paul-moore.com>,
	Miklos Szeredi <miklos@szeredi.hu>, Jan Kara <jack@suse.cz>
Subject: [PATCH 09/22] fsnotify: Detach mark from object list when last reference is dropped
Date: Fri,  6 Jan 2017 11:43:48 +0100	[thread overview]
Message-ID: <20170106104401.5894-10-jack@suse.cz> (raw)
In-Reply-To: <20170106104401.5894-1-jack@suse.cz>

Instead of removing mark from object list from fsnotify_detach_mark(),
remove the mark when last reference to the mark is dropped. This will
allow fanotify to wait for userspace response to event without having to
hold onto fsnotify_mark_srcu.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/mark.c                 | 198 +++++++++++++++++++++++----------------
 include/linux/fsnotify_backend.h |   4 +-
 kernel/audit_tree.c              |  20 +---
 3 files changed, 124 insertions(+), 98 deletions(-)

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 521c4bd9b497..dc94b5df7627 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -49,7 +49,13 @@
  *
  * A list of notification marks relating to inode / mnt is contained in
  * fsnotify_mark_connector. That structure is alive as long as there are any marks
- * in the list and is also protected by fsnotify_mark_srcu.
+ * in the list and is also protected by fsnotify_mark_srcu. A mark gets
+ * detached from fsnotify_mark_connector when last reference to the mark is dropped.
+ * Thus having mark reference is enough to protect mark->connector pointer
+ * and to make sure fsnotify_mark_connector cannot disappear. Also because we remove
+ * mark from g_list before dropping mark reference associated with that, any
+ * mark found through g_list is guaranteed to have mark->connector set
+ * until we drop group->mark_mutex.
  *
  * LIFETIME:
  * Inode marks survive between when they are added to an inode and when their
@@ -103,17 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	atomic_inc(&mark->refcnt);
 }
 
-void fsnotify_put_mark(struct fsnotify_mark *mark)
-{
-	if (atomic_dec_and_test(&mark->refcnt)) {
-		spin_lock(&destroy_lock);
-		list_add(&mark->g_list, &destroy_list);
-		spin_unlock(&destroy_lock);
-		queue_delayed_work(system_unbound_wq, &reaper_work,
-				   FSNOTIFY_REAPER_DELAY);
-	}
-}
-
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -168,55 +163,97 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 	}
 }
 
-static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark)
+static struct inode *fsnotify_detach_connector_from_object(
+					struct fsnotify_mark_connector *conn)
 {
-	struct fsnotify_mark_connector *conn;
 	struct inode *inode = NULL;
-	bool free_conn = false;
 
-	conn = mark->connector;
-	spin_lock(&conn->lock);
-	hlist_del_init_rcu(&mark->obj_list);
-	if (hlist_empty(&conn->list)) {
-		if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
-			inode = conn->inode;
-			inode->i_fsnotify_marks = NULL;
-			inode->i_fsnotify_mask = 0;
-			conn->inode = NULL;
-			conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
-		} else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
-			real_mount(conn->mnt)->mnt_fsnotify_marks = NULL;
-			real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
-			conn->mnt = NULL;
-			conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
-		}
-		free_conn = true;
-	} else
-		__fsnotify_recalc_mask(conn);
-	mark->connector = NULL;
-	spin_unlock(&conn->lock);
+	if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
+		inode = conn->inode;
+		inode->i_fsnotify_marks = NULL;
+		inode->i_fsnotify_mask = 0;
+		conn->inode = NULL;
+		conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
+	} else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+		real_mount(conn->mnt)->mnt_fsnotify_marks = NULL;
+		real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
+		conn->mnt = NULL;
+		conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+	}
+
+	return inode;
+}
+
+static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
+{
+	if (mark->group)
+		fsnotify_put_group(mark->group);
+	mark->free_mark(mark);
+}
 
-	if (free_conn) {
+void fsnotify_put_mark(struct fsnotify_mark *mark)
+{
+	/* Catch marks that were actually never attached to object */
+	if (!mark->connector && atomic_dec_and_test(&mark->refcnt)) {
+		fsnotify_final_mark_destroy(mark);
+		return;
+	}
+
+	/*
+	 * We have to be careful so that traversals of obj_list under lock can
+	 * safely grab mark reference.
+	 */
+	if (atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) {
+		struct fsnotify_mark_connector *conn;
+		struct inode *inode = NULL;
+		bool free_conn = false;
+
+		conn = mark->connector;
+		hlist_del_init_rcu(&mark->obj_list);
+		if (hlist_empty(&conn->list)) {
+			inode = fsnotify_detach_connector_from_object(conn);
+			free_conn = true;
+		} else
+			__fsnotify_recalc_mask(conn);
+		mark->connector = NULL;
+		spin_unlock(&conn->lock);
+
+		if (inode)
+			iput(inode);
+
+		if (free_conn) {
+			spin_lock(&destroy_lock);
+			conn->destroy_next = connector_destroy_list;
+			connector_destroy_list = conn;
+			spin_unlock(&destroy_lock);
+			queue_work(system_unbound_wq, &connector_reaper_work);
+		}
+		/*
+		 * Note that we didn't update flags telling whether inode cares
+		 * about what's happening with children. We update these flags
+		 * from __fsnotify_parent() lazily when next event happens on
+		 * one of our children.
+		 */
 		spin_lock(&destroy_lock);
-		conn->destroy_next = connector_destroy_list;
-		connector_destroy_list = conn;
+		list_add(&mark->g_list, &destroy_list);
 		spin_unlock(&destroy_lock);
-		queue_work(system_unbound_wq, &connector_reaper_work);
+		queue_delayed_work(system_unbound_wq, &reaper_work,
+				   FSNOTIFY_REAPER_DELAY);
 	}
-
-	return inode;
 }
 
 /*
- * Remove mark from inode / vfsmount list, group list, drop inode reference
- * if we got one.
+ * Mark mark as dead, remove it from group list. Mark still stays in object
+ * list until its last reference is dropped. Note that we rely on mark being
+ * removed from group list before corresponding reference to it is dropped. In
+ * particular we rely on mark->connector being valid while we hold
+ * group->mark_mutex if we found the mark through g_list.
  *
  * Must be called with group->mark_mutex held. The caller must either hold
  * reference to the mark or be protected by fsnotify_mark_srcu.
  */
 void fsnotify_detach_mark(struct fsnotify_mark *mark)
 {
-	struct inode *inode = NULL;
 	struct fsnotify_group *group = mark->group;
 
 	WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
@@ -225,31 +262,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
 			!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
 
 	spin_lock(&mark->lock);
-
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 		spin_unlock(&mark->lock);
 		return;
 	}
-
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
-
-	inode = fsnotify_detach_from_object(mark);
-
-	/*
-	 * Note that we didn't update flags telling whether inode cares about
-	 * what's happening with children. We update these flags from
-	 * __fsnotify_parent() lazily when next event happens on one of our
-	 * children.
-	 */
-
 	list_del_init(&mark->g_list);
-
 	spin_unlock(&mark->lock);
 
-	if (inode)
-		iput(inode);
-
 	atomic_dec(&group->num_marks);
 
 	/* Drop mark reference acquired in fsnotify_add_mark_locked() */
@@ -436,7 +457,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 	hlist_for_each_entry(lmark, &conn->list, obj_list) {
 		last = lmark;
 
-		if ((lmark->group == mark->group) && !allow_dups) {
+		if ((lmark->group == mark->group) &&
+		    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
+		    !allow_dups) {
 			err = -EEXIST;
 			goto out_err;
 		}
@@ -487,7 +510,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 	mark->group = group;
 	list_add(&mark->g_list, &group->marks_list);
 	atomic_inc(&group->num_marks);
-	fsnotify_get_mark(mark); /* for i_list and g_list */
+	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
 	ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
@@ -533,7 +556,8 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector **connp,
 	if (!conn)
 		return NULL;
 	hlist_for_each_entry(mark, &conn->list, obj_list) {
-		if (mark->group == group) {
+		if (mark->group == group &&
+		    (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 			fsnotify_get_mark(mark);
 			fsnotify_put_connector(conn);
 			return mark;
@@ -613,23 +637,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group)
 void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp)
 {
 	struct fsnotify_mark_connector *conn;
-	struct fsnotify_mark *mark;
+	struct fsnotify_mark *mark, *old_mark = NULL;
+	struct inode *inode;
 
-	while ((conn = fsnotify_grab_connector(connp))) {
-		/*
-		 * We have to be careful since we can race with e.g.
-		 * fsnotify_clear_marks_by_group() and once we drop the list
-		 * lock, mark can get removed from the obj_list and destroyed.
-		 * But we are holding mark reference so mark cannot be freed
-		 * and calling fsnotify_destroy_mark() more than once is fine.
-		 */
-		mark = hlist_entry(conn->list.first, struct fsnotify_mark,
-				   obj_list);
+	conn = fsnotify_grab_connector(connp);
+	if (!conn)
+		return;
+	/*
+	 * We have to be careful since we can race with e.g.
+	 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
+	 * list can get modified. However we are holding mark reference and
+	 * thus our mark cannot be removed from obj_list so we can continue
+	 * iteration after regaining conn->lock.
+	 */
+	hlist_for_each_entry(mark, &conn->list, obj_list) {
 		fsnotify_get_mark(mark);
-		fsnotify_put_connector(conn);
+		spin_unlock(&conn->lock);
+		if (old_mark)
+			fsnotify_put_mark(old_mark);
+		old_mark = mark;
 		fsnotify_destroy_mark(mark, mark->group);
-		fsnotify_put_mark(mark);
+		spin_lock(&conn->lock);
 	}
+	/*
+	 * Detach list from object now so that we don't pin inode until all
+	 * mark references get dropped. It would lead to strange results such
+	 * as delaying inode deletion or blocking unmount.
+	 */
+	inode = fsnotify_detach_connector_from_object(conn);
+	fsnotify_put_connector(conn);
+	if (inode)
+		iput(inode);
+	if (old_mark)
+		fsnotify_put_mark(old_mark);
 }
 
 /*
@@ -662,9 +702,7 @@ void fsnotify_mark_destroy_list(void)
 
 	list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 		list_del_init(&mark->g_list);
-		if (mark->group)
-			fsnotify_put_group(mark->group);
-		mark->free_mark(mark);
+		fsnotify_final_mark_destroy(mark);
 	}
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index f3df68a64ee6..64ce249288ad 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -244,9 +244,9 @@ struct fsnotify_mark {
 	struct list_head g_list;
 	/* Protects inode / mnt pointers, flags, masks */
 	spinlock_t lock;
-	/* List of marks for inode / vfsmount [connector->lock] */
+	/* List of marks for inode / vfsmount [connector->lock, mark ref] */
 	struct hlist_node obj_list;
-	/* Head of list of marks for an object [mark->lock, group->mark_mutex] */
+	/* Head of list of marks for an object [mark ref] */
 	struct fsnotify_mark_connector *connector;
 	/* Events types to ignore [mark->lock, group->mark_mutex] */
 	__u32 ignored_mask;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index a878a98f55b3..6962443f2216 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -172,27 +172,15 @@ static unsigned long inode_to_key(const struct inode *inode)
 /*
  * Function to return search key in our hash from chunk. Key 0 is special and
  * should never be present in the hash.
- *
- * Must be called with chunk->mark.lock held to protect from connector
- * becoming NULL.
  */
-static unsigned long __chunk_to_key(struct audit_chunk *chunk)
+static unsigned long chunk_to_key(struct audit_chunk *chunk)
 {
-	if (!chunk->mark.connector)
+	/* We have a reference to the mark so it should be attached. */
+	if (WARN_ON_ONCE(!chunk->mark.connector))
 		return 0;
 	return (unsigned long)chunk->mark.connector->inode;
 }
 
-static unsigned long chunk_to_key(struct audit_chunk *chunk)
-{
-	unsigned long key;
-
-	spin_lock(&chunk->mark.lock);
-	key = __chunk_to_key(chunk);
-	spin_unlock(&chunk->mark.lock);
-	return key;
-}
-
 static inline struct list_head *chunk_hash(unsigned long key)
 {
 	unsigned long n = key / L1_CACHE_BYTES;
@@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key)
 /* hash_lock & entry->lock is held by caller */
 static void insert_hash(struct audit_chunk *chunk)
 {
-	unsigned long key = __chunk_to_key(chunk);
+	unsigned long key = chunk_to_key(chunk);
 	struct list_head *list;
 
 	if (!key)
-- 
2.10.2


  parent reply	other threads:[~2017-01-06 10:44 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-06 10:43 [PATCH 0/22 v2] fsnotify: Avoid SRCU stalls with fanotify permission events Jan Kara
2017-01-06 10:43 ` [PATCH 01/22] fsnotify: Remove unnecessary tests when showing fdinfo Jan Kara
2017-01-06 10:43 ` [PATCH 02/22] inotify: Remove inode pointers from debug messages Jan Kara
2017-01-06 10:43 ` [PATCH 03/22] fanotify: Move recalculation of inode / vfsmount mask under mark_mutex Jan Kara
2017-01-06 10:43 ` [PATCH 04/22] audit: Abstract hash key handling Jan Kara
2017-01-06 10:43 ` [PATCH 05/22] fsnotify: Update comments Jan Kara
2017-01-06 10:43 ` [PATCH 06/22] fsnotify: Attach marks to object via dedicated head structure Jan Kara
2017-01-06 13:12   ` Amir Goldstein
2017-01-08 10:18   ` Amir Goldstein
2017-01-10 13:30     ` Jan Kara
2017-01-06 10:43 ` [PATCH 07/22] inotify: Do not drop mark reference under idr_lock Jan Kara
2017-01-06 10:43 ` [PATCH 08/22] fsnotify: Move queueing of mark for destruction into fsnotify_put_mark() Jan Kara
2017-01-08 11:15   ` Amir Goldstein
2017-01-06 10:43 ` Jan Kara [this message]
2017-01-08 12:10   ` [PATCH 09/22] fsnotify: Detach mark from object list when last reference is dropped Amir Goldstein
2017-01-10 13:47     ` Jan Kara
2017-01-06 10:43 ` [PATCH 10/22] fsnotify: Remove special handling of mark destruction on group shutdown Jan Kara
2017-01-06 10:43 ` [PATCH 11/22] fsnotify: Provide framework for dropping SRCU lock in ->handle_event Jan Kara
2017-01-08  9:02   ` Amir Goldstein
2017-01-10 13:00     ` Jan Kara
2017-01-06 10:43 ` [PATCH 12/22] fsnotify: Pass SRCU index into handle_event handler Jan Kara
2017-01-06 10:43 ` [PATCH 13/22] fanotify: Release SRCU lock when waiting for userspace response Jan Kara
2017-01-06 10:43 ` [PATCH 14/22] fsnotify: Remove fsnotify_set_mark_{,ignored_}mask_locked() Jan Kara
2017-01-06 10:43 ` [PATCH 15/22] fsnotify: Remove fsnotify_recalc_{inode|vfsmount}_mask() Jan Kara
2017-01-06 10:43 ` [PATCH 16/22] fsnotify: Inline fsnotify_clear_{inode|vfsmount}_mark_group() Jan Kara
2017-01-06 10:43 ` [PATCH 17/22] fsnotify: Rename fsnotify_clear_marks_by_group_flags() Jan Kara
2017-01-08 12:17   ` Amir Goldstein
2017-01-06 10:43 ` [PATCH 18/22] fsnotify: Remove fsnotify_detach_group_marks() Jan Kara
2017-01-08 12:26   ` Amir Goldstein
2017-01-06 10:43 ` [PATCH 19/22] fsnotify: Remove fsnotify_find_{inode|vfsmount}_mark() Jan Kara
2017-01-06 10:43 ` [PATCH 20/22] fsnotify: Drop inode_mark.c Jan Kara
2017-01-06 10:44 ` [PATCH 21/22] fsnotify: Add group pointer in fsnotify_init_mark() Jan Kara
2017-01-08 12:35   ` Amir Goldstein
2017-01-10 13:48     ` Jan Kara
2017-01-06 10:44 ` [PATCH 22/22] fsnotify: Move ->free_mark callback to fsnotify_ops Jan Kara
2017-01-08 12:50 ` [PATCH 0/22 v2] fsnotify: Avoid SRCU stalls with fanotify permission events Amir Goldstein
2017-01-20 13:21 [PATCH 0/22 v3] " Jan Kara
2017-01-20 13:21 ` [PATCH 09/22] fsnotify: Detach mark from object list when last reference is dropped Jan Kara
2017-01-21 15:50   ` Amir Goldstein

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170106104401.5894-10-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=amir73il@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=paul@paul-moore.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.