All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] fix fanotify issues with the series in v4.12
@ 2017-10-19 13:58 Miklos Szeredi
  2017-10-19 13:58 ` [PATCH 1/4] fsnotify: fix pinning of marks and groups Miklos Szeredi
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-19 13:58 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Xiong Zhou, linux-kernel

We discovered some problems in the latest fsnotify/fanotify codebase with
the help of a stress test (Xiong Zhou is working on upstreaming it to fstests).

This series attempts to fix these.  With the patch applied the stress test passes.

Please review/test.

Thanks,
Miklos
---

Miklos Szeredi (4):
  fsnotify: fix pinning of marks and groups
  fsnotify: skip unattached marks
  fanotify: fix fsnotify_prepare_user_wait() failure
  fsnotify: clean up fsnotify()

 fs/notify/fanotify/fanotify.c |  33 ++++++++------
 fs/notify/fsnotify.c          | 102 ++++++++++++++++++++----------------------
 fs/notify/mark.c              | 100 +++++++++++++++++++----------------------
 3 files changed, 113 insertions(+), 122 deletions(-)

-- 
2.5.5

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] fsnotify: fix pinning of marks and groups
  2017-10-19 13:58 [PATCH 0/4] fix fanotify issues with the series in v4.12 Miklos Szeredi
@ 2017-10-19 13:58 ` Miklos Szeredi
  2017-10-20 11:37   ` Amir Goldstein
  2017-10-19 13:58 ` [PATCH 2/4] fsnotify: skip unattached marks Miklos Szeredi
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-19 13:58 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Xiong Zhou, linux-kernel

We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
dropping the srcu read lock, resulting in use after free at the next
iteration.

Solution is to store both marks in iter_info instead of just the one we'll
be sending the event for, as well as pinning the group for both (if they
have the same group: fine, pinnig it twice doesn't hurt).

Also blind increment of group's user_waits is not enough, we could be far
enough in the group's destruction that it isn't taken into account.
Instead we need to check (under lock) that the mark is still attached to
the group after having obtained a ref to the mark.  If not, skip it.

In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
fsnotify_finish_user_wait().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
Cc: <stable@vger.kernel.org> # v4.12
---
 fs/notify/fsnotify.c |   6 ++--
 fs/notify/mark.c     | 100 +++++++++++++++++++++++----------------------------
 2 files changed, 48 insertions(+), 58 deletions(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0c4583b61717..48ec61f4c4d5 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			vfsmount_group = vfsmount_mark->group;
 		}
 
+		iter_info.inode_mark = inode_mark;
+		iter_info.vfsmount_mark = vfsmount_mark;
+
 		if (inode_group && vfsmount_group) {
 			int cmp = fsnotify_compare_groups(inode_group,
 							  vfsmount_group);
@@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			}
 		}
 
-		iter_info.inode_mark = inode_mark;
-		iter_info.vfsmount_mark = vfsmount_mark;
-
 		ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
 				    data, data_is, cookie, file_name,
 				    &iter_info);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 9991f8826734..9a7c8dbeeb59 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	atomic_inc(&mark->refcnt);
 }
 
-/*
- * Get mark reference when we found the mark via lockless traversal of object
- * list. Mark can be already removed from the list by now and on its way to be
- * destroyed once SRCU period ends.
- */
-static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
-{
-	return atomic_inc_not_zero(&mark->refcnt);
-}
-
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 			   FSNOTIFY_REAPER_DELAY);
 }
 
-bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
 {
-	struct fsnotify_group *group;
-
-	if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
-		return false;
-
-	if (iter_info->inode_mark)
-		group = iter_info->inode_mark->group;
-	else
-		group = iter_info->vfsmount_mark->group;
+	if (!mark)
+		return true;
+
+	if (atomic_inc_not_zero(&mark->refcnt)) {
+		spin_lock(&mark->lock);
+		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
+			/* mark is attached, group is still alive then */
+			atomic_inc(&mark->group->user_waits);
+			spin_unlock(&mark->lock);
+			return true;
+		}
+		spin_unlock(&mark->lock);
+		fsnotify_put_mark(mark);
+	}
+	return false;
+}
 
-	/*
-	 * Since acquisition of mark reference is an atomic op as well, we can
-	 * be sure this inc is seen before any effect of refcount increment.
-	 */
-	atomic_inc(&group->user_waits);
+static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
+{
+	if (mark) {
+		struct fsnotify_group *group = mark->group;
 
-	if (iter_info->inode_mark) {
-		/* This can fail if mark is being removed */
-		if (!fsnotify_get_mark_safe(iter_info->inode_mark))
-			goto out_wait;
+		fsnotify_put_mark(mark);
+		/*
+		 * We abuse notification_waitq on group shutdown for waiting for all
+		 * marks pinned when waiting for userspace.
+		 */
+		if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+			wake_up(&group->notification_waitq);
 	}
-	if (iter_info->vfsmount_mark) {
-		if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
-			goto out_inode;
+}
+
+bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+{
+	/* This can fail if mark is being removed */
+	if (!fsnotify_get_mark_safe(iter_info->inode_mark))
+		return false;
+	if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
+		fsnotify_put_mark_wait(iter_info->inode_mark);
+		return false;
 	}
 
 	/*
@@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
 	srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
 
 	return true;
-out_inode:
-	if (iter_info->inode_mark)
-		fsnotify_put_mark(iter_info->inode_mark);
-out_wait:
-	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
-		wake_up(&group->notification_waitq);
-	return false;
 }
 
 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
 {
-	struct fsnotify_group *group = NULL;
-
 	iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
-	if (iter_info->inode_mark) {
-		group = iter_info->inode_mark->group;
-		fsnotify_put_mark(iter_info->inode_mark);
-	}
-	if (iter_info->vfsmount_mark) {
-		group = iter_info->vfsmount_mark->group;
-		fsnotify_put_mark(iter_info->vfsmount_mark);
-	}
-	/*
-	 * We abuse notification_waitq on group shutdown for waiting for all
-	 * marks pinned when waiting for userspace.
-	 */
-	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
-		wake_up(&group->notification_waitq);
+	fsnotify_put_mark_wait(iter_info->inode_mark);
+	fsnotify_put_mark_wait(iter_info->vfsmount_mark);
 }
 
 /*
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/4] fsnotify: skip unattached marks
  2017-10-19 13:58 [PATCH 0/4] fix fanotify issues with the series in v4.12 Miklos Szeredi
  2017-10-19 13:58 ` [PATCH 1/4] fsnotify: fix pinning of marks and groups Miklos Szeredi
@ 2017-10-19 13:58 ` Miklos Szeredi
  2017-10-20 12:05   ` Amir Goldstein
  2017-10-19 13:58 ` [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure Miklos Szeredi
  2017-10-19 13:58 ` [PATCH 4/4] fsnotify: clean up fsnotify() Miklos Szeredi
  3 siblings, 1 reply; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-19 13:58 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Xiong Zhou, linux-kernel

After having gone through a ref-unref for the mark, dereferencing the group
(e.g. in fsnotify_compare_groups()) is wrong since the group may be
completely gone by that time.  So before continuing to traverse the mark
list, check if the mark is still attached.

This is done in the generic case, not just when we go through
fsnotify_prepare_user_wait()/fsnotify_finish_user_wait(), otherwise it
would introduce unnecessary complexity.  And it shouldn't hurt to skip
unattached marks anyway ("flags" is very likely in same cacheline as
neighbouring "ignored_mask", which is pulled in anyway).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
Cc: <stable@vger.kernel.org> # v4.12
---
 fs/notify/fsnotify.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 48ec61f4c4d5..0ab6a7179e4d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -328,12 +328,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
 						 struct fsnotify_mark, obj_list);
 			inode_group = inode_mark->group;
+			if (!(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
+				goto skip_inode;
 		}
 
 		if (vfsmount_node) {
 			vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
 						    struct fsnotify_mark, obj_list);
 			vfsmount_group = vfsmount_mark->group;
+			if (!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
+				goto skip_vfsmount;
 		}
 
 		iter_info.inode_mark = inode_mark;
@@ -357,10 +361,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 
 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
 			goto out;
-
+skip_inode:
 		if (inode_group)
 			inode_node = srcu_dereference(inode_node->next,
 						      &fsnotify_mark_srcu);
+skip_vfsmount:
 		if (vfsmount_group)
 			vfsmount_node = srcu_dereference(vfsmount_node->next,
 							 &fsnotify_mark_srcu);
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure
  2017-10-19 13:58 [PATCH 0/4] fix fanotify issues with the series in v4.12 Miklos Szeredi
  2017-10-19 13:58 ` [PATCH 1/4] fsnotify: fix pinning of marks and groups Miklos Szeredi
  2017-10-19 13:58 ` [PATCH 2/4] fsnotify: skip unattached marks Miklos Szeredi
@ 2017-10-19 13:58 ` Miklos Szeredi
  2017-10-20 12:25   ` Amir Goldstein
  2017-10-19 13:58 ` [PATCH 4/4] fsnotify: clean up fsnotify() Miklos Szeredi
  3 siblings, 1 reply; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-19 13:58 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Xiong Zhou, linux-kernel

If fsnotify_prepare_user_wait() fails, we leave the event on the
notification list.  Which will result in a warning in
fsnotify_destroy_event() and later use-after-free.

Instead of adding a new helper to remove the event from the list in this
case, I opted to move the prepare/finish up into fanotify_handle_event().

This will allow these to be moved further out into the generic code later,
and perhaps let us move to non-sleeping RCU.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 05f0e38724e8 ("fanotify: Release SRCU lock when waiting for userspace response")
Cc: <stable@vger.kernel.org> # v4.12
---
 fs/notify/fanotify/fanotify.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 2fa99aeaa095..fb7a1339982c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -64,19 +64,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	/*
-	 * fsnotify_prepare_user_wait() fails if we race with mark deletion.
-	 * Just let the operation pass in that case.
-	 */
-	if (!fsnotify_prepare_user_wait(iter_info)) {
-		event->response = FAN_ALLOW;
-		goto out;
-	}
-
 	wait_event(group->fanotify_data.access_waitq, event->response);
 
-	fsnotify_finish_user_wait(iter_info);
-out:
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
 	case FAN_ALLOW:
@@ -211,9 +200,21 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
 		 mask);
 
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (mask & FAN_ALL_PERM_EVENTS) {
+		/*
+		 * fsnotify_prepare_user_wait() fails if we race with mark deletion.
+		 * Just let the operation pass in that case.
+		 */
+		if (!fsnotify_prepare_user_wait(iter_info))
+			return 0;
+	}
+#endif
+
 	event = fanotify_alloc_event(inode, mask, data);
+	ret = -ENOMEM;
 	if (unlikely(!event))
-		return -ENOMEM;
+		goto finish;
 
 	fsn_event = &event->fse;
 	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
@@ -223,7 +224,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
 
-		return 0;
+		ret = 0;
+		goto finish;
 	}
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -232,6 +234,11 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 					    iter_info);
 		fsnotify_destroy_event(group, fsn_event);
 	}
+finish:
+	if (mask & FAN_ALL_PERM_EVENTS)
+		fsnotify_finish_user_wait(iter_info);
+#else
+finish:
 #endif
 	return ret;
 }
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/4] fsnotify: clean up fsnotify()
  2017-10-19 13:58 [PATCH 0/4] fix fanotify issues with the series in v4.12 Miklos Szeredi
                   ` (2 preceding siblings ...)
  2017-10-19 13:58 ` [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure Miklos Szeredi
@ 2017-10-19 13:58 ` Miklos Szeredi
  2017-10-20 12:48   ` Amir Goldstein
  3 siblings, 1 reply; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-19 13:58 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Jan Kara, Amir Goldstein, Xiong Zhou, linux-kernel

Use helpers to get first and next marks from connector.

Also get rid of inode_node/vfsmount_node local variables, which just refers
to the same objects as iter_info.  There was an srcu_dereference() for
foo_node, but that's completely superfluous since we've already done it
when obtaining foo_node.

Also get rid of inode_group/vfsmount_group local variables; checking
against non-NULL for these is the same as checking against non-NULL
inode_mark/vfsmount_mark.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/notify/fsnotify.c | 107 +++++++++++++++++++++++----------------------------
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0ab6a7179e4d..7490edd74883 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -243,6 +243,28 @@ static int send_to_group(struct inode *to_tell,
 					file_name, cookie, iter_info);
 }
 
+static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
+{
+	struct fsnotify_mark_connector *conn;
+	struct hlist_node *node = NULL;
+
+	conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
+	if (conn)
+		node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
+
+	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
+}
+
+static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
+{
+	struct hlist_node *node = NULL;
+
+	if (mark)
+		node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu);
+
+	return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
+}
+
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
@@ -252,11 +274,7 @@ static int send_to_group(struct inode *to_tell,
 int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	     const unsigned char *file_name, u32 cookie)
 {
-	struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
-	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
-	struct fsnotify_group *inode_group, *vfsmount_group;
-	struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
-	struct fsnotify_iter_info iter_info;
+	struct fsnotify_iter_info iter_info = { NULL, NULL };
 	struct mount *mnt;
 	int ret = 0;
 	/* global tests shouldn't care about events on child only the specific event */
@@ -291,26 +309,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 
 	if ((mask & FS_MODIFY) ||
 	    (test_mask & to_tell->i_fsnotify_mask)) {
-		inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
-					      &fsnotify_mark_srcu);
-		if (inode_conn)
-			inode_node = srcu_dereference(inode_conn->list.first,
-						      &fsnotify_mark_srcu);
+		iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
 	}
 
 	if (mnt && ((mask & FS_MODIFY) ||
 		    (test_mask & mnt->mnt_fsnotify_mask))) {
-		inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
-					      &fsnotify_mark_srcu);
-		if (inode_conn)
-			inode_node = srcu_dereference(inode_conn->list.first,
-						      &fsnotify_mark_srcu);
-		vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
-					         &fsnotify_mark_srcu);
-		if (vfsmount_conn)
-			vfsmount_node = srcu_dereference(
-						vfsmount_conn->list.first,
-						&fsnotify_mark_srcu);
+		iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
+		iter_info.vfsmount_mark = fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
 	}
 
 	/*
@@ -318,41 +323,28 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	 * ignore masks are properly reflected for mount mark notifications.
 	 * That's why this traversal is so complicated...
 	 */
-	while (inode_node || vfsmount_node) {
-		inode_group = NULL;
-		inode_mark = NULL;
-		vfsmount_group = NULL;
-		vfsmount_mark = NULL;
-
-		if (inode_node) {
-			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
-						 struct fsnotify_mark, obj_list);
-			inode_group = inode_mark->group;
-			if (!(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
-				goto skip_inode;
-		}
+	while (iter_info.inode_mark || iter_info.vfsmount_mark) {
+		struct fsnotify_mark *inode_mark = iter_info.inode_mark;
+		struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark;
 
-		if (vfsmount_node) {
-			vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
-						    struct fsnotify_mark, obj_list);
-			vfsmount_group = vfsmount_mark->group;
-			if (!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
-				goto skip_vfsmount;
+		if (inode_mark &&
+		    !(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+			vfsmount_mark = NULL;
+			goto skip;
+		}
+		if (vfsmount_mark &&
+		    !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+			inode_mark = NULL;
+			goto skip;
 		}
 
-		iter_info.inode_mark = inode_mark;
-		iter_info.vfsmount_mark = vfsmount_mark;
-
-		if (inode_group && vfsmount_group) {
-			int cmp = fsnotify_compare_groups(inode_group,
-							  vfsmount_group);
-			if (cmp > 0) {
-				inode_group = NULL;
+		if (inode_mark && vfsmount_mark) {
+			int cmp = fsnotify_compare_groups(inode_mark->group,
+							  vfsmount_mark->group);
+			if (cmp > 0)
 				inode_mark = NULL;
-			} else if (cmp < 0) {
-				vfsmount_group = NULL;
+			else if (cmp < 0)
 				vfsmount_mark = NULL;
-			}
 		}
 
 		ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
@@ -361,14 +353,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 
 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
 			goto out;
-skip_inode:
-		if (inode_group)
-			inode_node = srcu_dereference(inode_node->next,
-						      &fsnotify_mark_srcu);
-skip_vfsmount:
-		if (vfsmount_group)
-			vfsmount_node = srcu_dereference(vfsmount_node->next,
-							 &fsnotify_mark_srcu);
+skip:
+		if (inode_mark)
+			iter_info.inode_mark = fsnotify_next_mark(iter_info.inode_mark);
+		if (vfsmount_mark)
+			iter_info.vfsmount_mark = fsnotify_next_mark(iter_info.vfsmount_mark);
 	}
 	ret = 0;
 out:
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] fsnotify: fix pinning of marks and groups
  2017-10-19 13:58 ` [PATCH 1/4] fsnotify: fix pinning of marks and groups Miklos Szeredi
@ 2017-10-20 11:37   ` Amir Goldstein
  2017-10-20 11:56     ` Miklos Szeredi
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2017-10-20 11:37 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
> We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
> dropping the srcu read lock, resulting in use after free at the next
> iteration.
>
> Solution is to store both marks in iter_info instead of just the one we'll
> be sending the event for, as well as pinning the group for both (if they
> have the same group: fine, pinnig it twice doesn't hurt).
>
> Also blind increment of group's user_waits is not enough, we could be far
> enough in the group's destruction that it isn't taken into account.
> Instead we need to check (under lock) that the mark is still attached to
> the group after having obtained a ref to the mark.  If not, skip it.
>
> In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
> fsnotify_finish_user_wait().
>

Change looks correct, but it was so hard to follow this patch.
The moving of helpers around created a completely garbled diff
where it is impossible to see the logic changed.

Suggest to separate to 3 patches: create helpers (move them around),
pass the 2 marks in iter_info and fix increment of group's user_waits.

> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
> Cc: <stable@vger.kernel.org> # v4.12
> ---
>  fs/notify/fsnotify.c |   6 ++--
>  fs/notify/mark.c     | 100 +++++++++++++++++++++++----------------------------
>  2 files changed, 48 insertions(+), 58 deletions(-)
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 0c4583b61717..48ec61f4c4d5 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>                         vfsmount_group = vfsmount_mark->group;
>                 }
>
> +               iter_info.inode_mark = inode_mark;
> +               iter_info.vfsmount_mark = vfsmount_mark;
> +
>                 if (inode_group && vfsmount_group) {
>                         int cmp = fsnotify_compare_groups(inode_group,
>                                                           vfsmount_group);
> @@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>                         }
>                 }
>
> -               iter_info.inode_mark = inode_mark;
> -               iter_info.vfsmount_mark = vfsmount_mark;
> -
>                 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
>                                     data, data_is, cookie, file_name,
>                                     &iter_info);
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 9991f8826734..9a7c8dbeeb59 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
>         atomic_inc(&mark->refcnt);
>  }
>
> -/*
> - * Get mark reference when we found the mark via lockless traversal of object
> - * list. Mark can be already removed from the list by now and on its way to be
> - * destroyed once SRCU period ends.
> - */
> -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
> -{
> -       return atomic_inc_not_zero(&mark->refcnt);
> -}
> -
>  static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
>  {
>         u32 new_mask = 0;
> @@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
>                            FSNOTIFY_REAPER_DELAY);
>  }
>
> -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +/*
> + * Get mark reference when we found the mark via lockless traversal of object
> + * list. Mark can be already removed from the list by now and on its way to be
> + * destroyed once SRCU period ends.
> + */
> +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
>  {
> -       struct fsnotify_group *group;
> -
> -       if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
> -               return false;
> -
> -       if (iter_info->inode_mark)
> -               group = iter_info->inode_mark->group;
> -       else
> -               group = iter_info->vfsmount_mark->group;
> +       if (!mark)
> +               return true;
> +
> +       if (atomic_inc_not_zero(&mark->refcnt)) {
> +               spin_lock(&mark->lock);
> +               if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
> +                       /* mark is attached, group is still alive then */
> +                       atomic_inc(&mark->group->user_waits);
> +                       spin_unlock(&mark->lock);
> +                       return true;
> +               }
> +               spin_unlock(&mark->lock);
> +               fsnotify_put_mark(mark);
> +       }
> +       return false;
> +}
>
> -       /*
> -        * Since acquisition of mark reference is an atomic op as well, we can
> -        * be sure this inc is seen before any effect of refcount increment.
> -        */
> -       atomic_inc(&group->user_waits);
> +static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
> +{
> +       if (mark) {
> +               struct fsnotify_group *group = mark->group;
>
> -       if (iter_info->inode_mark) {
> -               /* This can fail if mark is being removed */
> -               if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> -                       goto out_wait;
> +               fsnotify_put_mark(mark);
> +               /*
> +                * We abuse notification_waitq on group shutdown for waiting for all
> +                * marks pinned when waiting for userspace.
> +                */
> +               if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> +                       wake_up(&group->notification_waitq);
>         }
> -       if (iter_info->vfsmount_mark) {
> -               if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
> -                       goto out_inode;
> +}
> +
> +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +{
> +       /* This can fail if mark is being removed */
> +       if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> +               return false;
> +       if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
> +               fsnotify_put_mark_wait(iter_info->inode_mark);
> +               return false;
>         }
>
>         /*
> @@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
>         srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
>
>         return true;
> -out_inode:
> -       if (iter_info->inode_mark)
> -               fsnotify_put_mark(iter_info->inode_mark);
> -out_wait:
> -       if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> -               wake_up(&group->notification_waitq);
> -       return false;
>  }
>
>  void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
>  {
> -       struct fsnotify_group *group = NULL;
> -
>         iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> -       if (iter_info->inode_mark) {
> -               group = iter_info->inode_mark->group;
> -               fsnotify_put_mark(iter_info->inode_mark);
> -       }
> -       if (iter_info->vfsmount_mark) {
> -               group = iter_info->vfsmount_mark->group;
> -               fsnotify_put_mark(iter_info->vfsmount_mark);
> -       }
> -       /*
> -        * We abuse notification_waitq on group shutdown for waiting for all
> -        * marks pinned when waiting for userspace.
> -        */
> -       if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> -               wake_up(&group->notification_waitq);
> +       fsnotify_put_mark_wait(iter_info->inode_mark);
> +       fsnotify_put_mark_wait(iter_info->vfsmount_mark);
>  }
>
>  /*
> --
> 2.5.5
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/4] fsnotify: fix pinning of marks and groups
  2017-10-20 11:37   ` Amir Goldstein
@ 2017-10-20 11:56     ` Miklos Szeredi
  0 siblings, 0 replies; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-20 11:56 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Fri, Oct 20, 2017 at 1:37 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
>> We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
>> dropping the srcu read lock, resulting in use after free at the next
>> iteration.
>>
>> Solution is to store both marks in iter_info instead of just the one we'll
>> be sending the event for, as well as pinning the group for both (if they
>> have the same group: fine, pinnig it twice doesn't hurt).

I was thinking about this one and actually pinning only the single
group for the used mark was correct, since the other, unused mark
won't have its group dereferenced while pinned.

Only there doesn't appear to be a way to make this work without adding
complexity :(


>>
>> Also blind increment of group's user_waits is not enough, we could be far
>> enough in the group's destruction that it isn't taken into account.
>> Instead we need to check (under lock) that the mark is still attached to
>> the group after having obtained a ref to the mark.  If not, skip it.
>>
>> In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
>> fsnotify_finish_user_wait().
>>
>
> Change looks correct, but it was so hard to follow this patch.
> The moving of helpers around created a completely garbled diff
> where it is impossible to see the logic changed.
>
> Suggest to separate to 3 patches: create helpers (move them around),
> pass the 2 marks in iter_info and fix increment of group's user_waits.

Yeah, I'll do that.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] fsnotify: skip unattached marks
  2017-10-19 13:58 ` [PATCH 2/4] fsnotify: skip unattached marks Miklos Szeredi
@ 2017-10-20 12:05   ` Amir Goldstein
  0 siblings, 0 replies; 11+ messages in thread
From: Amir Goldstein @ 2017-10-20 12:05 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
> After having gone through a ref-unref for the mark, dereferencing the group
> (e.g. in fsnotify_compare_groups()) is wrong since the group may be
> completely gone by that time.  So before continuing to traverse the mark
> list, check if the mark is still attached.
>
> This is done in the generic case, not just when we go through
> fsnotify_prepare_user_wait()/fsnotify_finish_user_wait(), otherwise it
> would introduce unnecessary complexity.  And it shouldn't hurt to skip
> unattached marks anyway ("flags" is very likely in same cacheline as
> neighbouring "ignored_mask", which is pulled in anyway).
>

Looks good.

> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")

I don't know that *this* is the commit that this patch "Fixes"
but for the purpose of determining if fix patch should be applied or
not I guess it will do

> Cc: <stable@vger.kernel.org> # v4.12
> ---
>  fs/notify/fsnotify.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 48ec61f4c4d5..0ab6a7179e4d 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -328,12 +328,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>                         inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
>                                                  struct fsnotify_mark, obj_list);
>                         inode_group = inode_mark->group;
> +                       if (!(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
> +                               goto skip_inode;
>                 }
>
>                 if (vfsmount_node) {
>                         vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
>                                                     struct fsnotify_mark, obj_list);
>                         vfsmount_group = vfsmount_mark->group;
> +                       if (!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
> +                               goto skip_vfsmount;
>                 }
>
>                 iter_info.inode_mark = inode_mark;
> @@ -357,10 +361,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>
>                 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
>                         goto out;
> -
> +skip_inode:
>                 if (inode_group)
>                         inode_node = srcu_dereference(inode_node->next,
>                                                       &fsnotify_mark_srcu);
> +skip_vfsmount:
>                 if (vfsmount_group)
>                         vfsmount_node = srcu_dereference(vfsmount_node->next,
>                                                          &fsnotify_mark_srcu);
> --
> 2.5.5
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure
  2017-10-19 13:58 ` [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure Miklos Szeredi
@ 2017-10-20 12:25   ` Amir Goldstein
  0 siblings, 0 replies; 11+ messages in thread
From: Amir Goldstein @ 2017-10-20 12:25 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
> If fsnotify_prepare_user_wait() fails, we leave the event on the
> notification list.  Which will result in a warning in
> fsnotify_destroy_event() and later use-after-free.
>
> Instead of adding a new helper to remove the event from the list in this
> case, I opted to move the prepare/finish up into fanotify_handle_event().
>
> This will allow these to be moved further out into the generic code later,
> and perhaps let us move to non-sleeping RCU.

Interesting.
Because all marks are ordered by group priority and there are no permission
events in priority 0, as soon as we see group prio 0 on both inode and vfs mark,
we can maybe change the mark iteration?

>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> Fixes: 05f0e38724e8 ("fanotify: Release SRCU lock when waiting for userspace response")
> Cc: <stable@vger.kernel.org> # v4.12
> ---
>  fs/notify/fanotify/fanotify.c | 33 ++++++++++++++++++++-------------
>  1 file changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 2fa99aeaa095..fb7a1339982c 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -64,19 +64,8 @@ static int fanotify_get_response(struct fsnotify_group *group,
>
>         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
>
> -       /*
> -        * fsnotify_prepare_user_wait() fails if we race with mark deletion.
> -        * Just let the operation pass in that case.
> -        */
> -       if (!fsnotify_prepare_user_wait(iter_info)) {
> -               event->response = FAN_ALLOW;
> -               goto out;
> -       }
> -
>         wait_event(group->fanotify_data.access_waitq, event->response);
>
> -       fsnotify_finish_user_wait(iter_info);
> -out:
>         /* userspace responded, convert to something usable */
>         switch (event->response) {
>         case FAN_ALLOW:
> @@ -211,9 +200,21 @@ static int fanotify_handle_event(struct fsnotify_group *group,
>         pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
>                  mask);
>
> +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
> +       if (mask & FAN_ALL_PERM_EVENTS) {
> +               /*
> +                * fsnotify_prepare_user_wait() fails if we race with mark deletion.
> +                * Just let the operation pass in that case.
> +                */
> +               if (!fsnotify_prepare_user_wait(iter_info))
> +                       return 0;
> +       }
> +#endif

Well you are not the one to introduce ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
to this code, but I have to wonder, what are all those ifdefs doing in
this code?
What are they trying to save?
FAN_ALL_PERM_EVENTS is masked out from valid event flags in fanotify_mark()
I don't really see a reason for any other ifdef in non headers here.

> +
>         event = fanotify_alloc_event(inode, mask, data);
> +       ret = -ENOMEM;
>         if (unlikely(!event))
> -               return -ENOMEM;
> +               goto finish;
>
>         fsn_event = &event->fse;
>         ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
> @@ -223,7 +224,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
>                 /* Our event wasn't used in the end. Free it. */
>                 fsnotify_destroy_event(group, fsn_event);
>
> -               return 0;
> +               ret = 0;
> +               goto finish;
>         }
>
>  #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
> @@ -232,6 +234,11 @@ static int fanotify_handle_event(struct fsnotify_group *group,
>                                             iter_info);
>                 fsnotify_destroy_event(group, fsn_event);
>         }
> +finish:
> +       if (mask & FAN_ALL_PERM_EVENTS)
> +               fsnotify_finish_user_wait(iter_info);
> +#else
> +finish:
>  #endif
>         return ret;
>  }
> --
> 2.5.5
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] fsnotify: clean up fsnotify()
  2017-10-19 13:58 ` [PATCH 4/4] fsnotify: clean up fsnotify() Miklos Szeredi
@ 2017-10-20 12:48   ` Amir Goldstein
  2017-10-20 12:56     ` Miklos Szeredi
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2017-10-20 12:48 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
> Use helpers to get first and next marks from connector.
>
> Also get rid of inode_node/vfsmount_node local variables, which just refers
> to the same objects as iter_info.  There was an srcu_dereference() for
> foo_node, but that's completely superfluous since we've already done it
> when obtaining foo_node.
>
> Also get rid of inode_group/vfsmount_group local variables; checking
> against non-NULL for these is the same as checking against non-NULL
> inode_mark/vfsmount_mark.
>

Nice Cleanup!

> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/notify/fsnotify.c | 107 +++++++++++++++++++++++----------------------------
>  1 file changed, 48 insertions(+), 59 deletions(-)
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 0ab6a7179e4d..7490edd74883 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -243,6 +243,28 @@ static int send_to_group(struct inode *to_tell,
>                                         file_name, cookie, iter_info);
>  }
>
> +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
> +{
> +       struct fsnotify_mark_connector *conn;
> +       struct hlist_node *node = NULL;
> +
> +       conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
> +       if (conn)
> +               node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
> +
> +       return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
> +}
> +
> +static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
> +{
> +       struct hlist_node *node = NULL;
> +
> +       if (mark)
> +               node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu);
> +
> +       return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
> +}
> +
>  /*
>   * This is the main call to fsnotify.  The VFS calls into hook specific functions
>   * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
> @@ -252,11 +274,7 @@ static int send_to_group(struct inode *to_tell,
>  int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>              const unsigned char *file_name, u32 cookie)
>  {
> -       struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
> -       struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
> -       struct fsnotify_group *inode_group, *vfsmount_group;
> -       struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
> -       struct fsnotify_iter_info iter_info;
> +       struct fsnotify_iter_info iter_info = { NULL, NULL };

There was a series of security patches converting those initializers to { }
don't ask me why.

>         struct mount *mnt;
>         int ret = 0;
>         /* global tests shouldn't care about events on child only the specific event */
> @@ -291,26 +309,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>
>         if ((mask & FS_MODIFY) ||
>             (test_mask & to_tell->i_fsnotify_mask)) {
> -               inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
> -                                             &fsnotify_mark_srcu);
> -               if (inode_conn)
> -                       inode_node = srcu_dereference(inode_conn->list.first,
> -                                                     &fsnotify_mark_srcu);
> +               iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
>         }
>
>         if (mnt && ((mask & FS_MODIFY) ||
>                     (test_mask & mnt->mnt_fsnotify_mask))) {
> -               inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
> -                                             &fsnotify_mark_srcu);
> -               if (inode_conn)
> -                       inode_node = srcu_dereference(inode_conn->list.first,
> -                                                     &fsnotify_mark_srcu);
> -               vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
> -                                                &fsnotify_mark_srcu);
> -               if (vfsmount_conn)
> -                       vfsmount_node = srcu_dereference(
> -                                               vfsmount_conn->list.first,
> -                                               &fsnotify_mark_srcu);
> +               iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
> +               iter_info.vfsmount_mark = fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
>         }
>
>         /*
> @@ -318,41 +323,28 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>          * ignore masks are properly reflected for mount mark notifications.
>          * That's why this traversal is so complicated...
>          */
> -       while (inode_node || vfsmount_node) {
> -               inode_group = NULL;
> -               inode_mark = NULL;
> -               vfsmount_group = NULL;
> -               vfsmount_mark = NULL;
> -
> -               if (inode_node) {
> -                       inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
> -                                                struct fsnotify_mark, obj_list);
> -                       inode_group = inode_mark->group;
> -                       if (!(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
> -                               goto skip_inode;
> -               }
> +       while (iter_info.inode_mark || iter_info.vfsmount_mark) {
> +               struct fsnotify_mark *inode_mark = iter_info.inode_mark;
> +               struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark;
>
> -               if (vfsmount_node) {
> -                       vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
> -                                                   struct fsnotify_mark, obj_list);
> -                       vfsmount_group = vfsmount_mark->group;
> -                       if (!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
> -                               goto skip_vfsmount;
> +               if (inode_mark &&
> +                   !(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> +                       vfsmount_mark = NULL;
> +                       goto skip;
> +               }
> +               if (vfsmount_mark &&
> +                   !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> +                       inode_mark = NULL;
> +                       goto skip;
>                 }
>
> -               iter_info.inode_mark = inode_mark;
> -               iter_info.vfsmount_mark = vfsmount_mark;
> -
> -               if (inode_group && vfsmount_group) {
> -                       int cmp = fsnotify_compare_groups(inode_group,
> -                                                         vfsmount_group);
> -                       if (cmp > 0) {
> -                               inode_group = NULL;
> +               if (inode_mark && vfsmount_mark) {
> +                       int cmp = fsnotify_compare_groups(inode_mark->group,
> +                                                         vfsmount_mark->group);
> +                       if (cmp > 0)
>                                 inode_mark = NULL;
> -                       } else if (cmp < 0) {
> -                               vfsmount_group = NULL;
> +                       else if (cmp < 0)
>                                 vfsmount_mark = NULL;
> -                       }
>                 }
>
>                 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
> @@ -361,14 +353,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>
>                 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
>                         goto out;
> -skip_inode:
> -               if (inode_group)
> -                       inode_node = srcu_dereference(inode_node->next,
> -                                                     &fsnotify_mark_srcu);
> -skip_vfsmount:
> -               if (vfsmount_group)
> -                       vfsmount_node = srcu_dereference(vfsmount_node->next,
> -                                                        &fsnotify_mark_srcu);
> +skip:
> +               if (inode_mark)
> +                       iter_info.inode_mark = fsnotify_next_mark(iter_info.inode_mark);
else if?
> +               if (vfsmount_mark)
> +                       iter_info.vfsmount_mark = fsnotify_next_mark(iter_info.vfsmount_mark);
>         }
>         ret = 0;
>  out:
> --
> 2.5.5
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] fsnotify: clean up fsnotify()
  2017-10-20 12:48   ` Amir Goldstein
@ 2017-10-20 12:56     ` Miklos Szeredi
  0 siblings, 0 replies; 11+ messages in thread
From: Miklos Szeredi @ 2017-10-20 12:56 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: linux-fsdevel, Jan Kara, Xiong Zhou, linux-kernel

On Fri, Oct 20, 2017 at 2:48 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
>> Use helpers to get first and next marks from connector.
>>
>> Also get rid of inode_node/vfsmount_node local variables, which just refers
>> to the same objects as iter_info.  There was an srcu_dereference() for
>> foo_node, but that's completely superfluous since we've already done it
>> when obtaining foo_node.
>>
>> Also get rid of inode_group/vfsmount_group local variables; checking
>> against non-NULL for these is the same as checking against non-NULL
>> inode_mark/vfsmount_mark.
>>
>
> Nice Cleanup!
>
>> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
>> ---
>>  fs/notify/fsnotify.c | 107 +++++++++++++++++++++++----------------------------
>>  1 file changed, 48 insertions(+), 59 deletions(-)
>>
>> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
>> index 0ab6a7179e4d..7490edd74883 100644
>> --- a/fs/notify/fsnotify.c
>> +++ b/fs/notify/fsnotify.c
>> @@ -243,6 +243,28 @@ static int send_to_group(struct inode *to_tell,
>>                                         file_name, cookie, iter_info);
>>  }
>>
>> +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
>> +{
>> +       struct fsnotify_mark_connector *conn;
>> +       struct hlist_node *node = NULL;
>> +
>> +       conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
>> +       if (conn)
>> +               node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
>> +
>> +       return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
>> +}
>> +
>> +static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
>> +{
>> +       struct hlist_node *node = NULL;
>> +
>> +       if (mark)
>> +               node = srcu_dereference(mark->obj_list.next, &fsnotify_mark_srcu);
>> +
>> +       return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
>> +}
>> +
>>  /*
>>   * This is the main call to fsnotify.  The VFS calls into hook specific functions
>>   * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
>> @@ -252,11 +274,7 @@ static int send_to_group(struct inode *to_tell,
>>  int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>>              const unsigned char *file_name, u32 cookie)
>>  {
>> -       struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
>> -       struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
>> -       struct fsnotify_group *inode_group, *vfsmount_group;
>> -       struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
>> -       struct fsnotify_iter_info iter_info;
>> +       struct fsnotify_iter_info iter_info = { NULL, NULL };
>
> There was a series of security patches converting those initializers to { }
> don't ask me why.

Okay.

>
>>         struct mount *mnt;
>>         int ret = 0;
>>         /* global tests shouldn't care about events on child only the specific event */
>> @@ -291,26 +309,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>>
>>         if ((mask & FS_MODIFY) ||
>>             (test_mask & to_tell->i_fsnotify_mask)) {
>> -               inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
>> -                                             &fsnotify_mark_srcu);
>> -               if (inode_conn)
>> -                       inode_node = srcu_dereference(inode_conn->list.first,
>> -                                                     &fsnotify_mark_srcu);
>> +               iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
>>         }
>>
>>         if (mnt && ((mask & FS_MODIFY) ||
>>                     (test_mask & mnt->mnt_fsnotify_mask))) {
>> -               inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
>> -                                             &fsnotify_mark_srcu);
>> -               if (inode_conn)
>> -                       inode_node = srcu_dereference(inode_conn->list.first,
>> -                                                     &fsnotify_mark_srcu);
>> -               vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
>> -                                                &fsnotify_mark_srcu);
>> -               if (vfsmount_conn)
>> -                       vfsmount_node = srcu_dereference(
>> -                                               vfsmount_conn->list.first,
>> -                                               &fsnotify_mark_srcu);
>> +               iter_info.inode_mark = fsnotify_first_mark(&to_tell->i_fsnotify_marks);
>> +               iter_info.vfsmount_mark = fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
>>         }
>>
>>         /*
>> @@ -318,41 +323,28 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>>          * ignore masks are properly reflected for mount mark notifications.
>>          * That's why this traversal is so complicated...
>>          */
>> -       while (inode_node || vfsmount_node) {
>> -               inode_group = NULL;
>> -               inode_mark = NULL;
>> -               vfsmount_group = NULL;
>> -               vfsmount_mark = NULL;
>> -
>> -               if (inode_node) {
>> -                       inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
>> -                                                struct fsnotify_mark, obj_list);
>> -                       inode_group = inode_mark->group;
>> -                       if (!(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
>> -                               goto skip_inode;
>> -               }
>> +       while (iter_info.inode_mark || iter_info.vfsmount_mark) {
>> +               struct fsnotify_mark *inode_mark = iter_info.inode_mark;
>> +               struct fsnotify_mark *vfsmount_mark = iter_info.vfsmount_mark;
>>
>> -               if (vfsmount_node) {
>> -                       vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
>> -                                                   struct fsnotify_mark, obj_list);
>> -                       vfsmount_group = vfsmount_mark->group;
>> -                       if (!(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
>> -                               goto skip_vfsmount;
>> +               if (inode_mark &&
>> +                   !(inode_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
>> +                       vfsmount_mark = NULL;
>> +                       goto skip;
>> +               }
>> +               if (vfsmount_mark &&
>> +                   !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
>> +                       inode_mark = NULL;
>> +                       goto skip;
>>                 }
>>
>> -               iter_info.inode_mark = inode_mark;
>> -               iter_info.vfsmount_mark = vfsmount_mark;
>> -
>> -               if (inode_group && vfsmount_group) {
>> -                       int cmp = fsnotify_compare_groups(inode_group,
>> -                                                         vfsmount_group);
>> -                       if (cmp > 0) {
>> -                               inode_group = NULL;
>> +               if (inode_mark && vfsmount_mark) {
>> +                       int cmp = fsnotify_compare_groups(inode_mark->group,
>> +                                                         vfsmount_mark->group);
>> +                       if (cmp > 0)
>>                                 inode_mark = NULL;
>> -                       } else if (cmp < 0) {
>> -                               vfsmount_group = NULL;
>> +                       else if (cmp < 0)
>>                                 vfsmount_mark = NULL;
>> -                       }
>>                 }
>>
>>                 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
>> @@ -361,14 +353,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>>
>>                 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
>>                         goto out;
>> -skip_inode:
>> -               if (inode_group)
>> -                       inode_node = srcu_dereference(inode_node->next,
>> -                                                     &fsnotify_mark_srcu);
>> -skip_vfsmount:
>> -               if (vfsmount_group)
>> -                       vfsmount_node = srcu_dereference(vfsmount_node->next,
>> -                                                        &fsnotify_mark_srcu);
>> +skip:
>> +               if (inode_mark)
>> +                       iter_info.inode_mark = fsnotify_next_mark(iter_info.inode_mark);
> else if?

We can have both (see  fsnotify_compare_groups()).

>> +               if (vfsmount_mark)
>> +                       iter_info.vfsmount_mark = fsnotify_next_mark(iter_info.vfsmount_mark);
>>         }
>>         ret = 0;
>>  out:
>> --
>> 2.5.5
>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-10-20 12:56 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-19 13:58 [PATCH 0/4] fix fanotify issues with the series in v4.12 Miklos Szeredi
2017-10-19 13:58 ` [PATCH 1/4] fsnotify: fix pinning of marks and groups Miklos Szeredi
2017-10-20 11:37   ` Amir Goldstein
2017-10-20 11:56     ` Miklos Szeredi
2017-10-19 13:58 ` [PATCH 2/4] fsnotify: skip unattached marks Miklos Szeredi
2017-10-20 12:05   ` Amir Goldstein
2017-10-19 13:58 ` [PATCH 3/4] fanotify: fix fsnotify_prepare_user_wait() failure Miklos Szeredi
2017-10-20 12:25   ` Amir Goldstein
2017-10-19 13:58 ` [PATCH 4/4] fsnotify: clean up fsnotify() Miklos Szeredi
2017-10-20 12:48   ` Amir Goldstein
2017-10-20 12:56     ` Miklos Szeredi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.