All of lore.kernel.org
 help / color / mirror / Atom feed
* [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
@ 2021-09-07  6:33 Murphy Zhou
  2021-09-09 11:00 ` Amir Goldstein
  0 siblings, 1 reply; 6+ messages in thread
From: Murphy Zhou @ 2021-09-07  6:33 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Amir Goldstein

Hi,

Since this commit:

commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
Author: Amir Goldstein <amir73il@gmail.com>
Date:   Tue Aug 10 18:12:19 2021 +0300

    fsnotify: count all objects with attached connectors




Kernel fsnotify can't finish a stress testcase that used to pass quickly.

Kernel hung at umount. Can not be killed but restarting the server.

Reproducer text is attached.

Thanks,
Murphy

--- reproducer test.sh start -------------------------------------

#!/bin/bash

cc fanotify_init_stress.c -o fanotify_init_stress || exit
cc fanotify_flush_stress.c -o fanotify_flush_stress || exit

export TIMEOUT=10s
STRESSES="fanotify_flush_stress fanotify_init_stress"

function cleanup_processes()
{
	while ps jf | egrep "fanotify_flush_stress|fanotify_init_stress" | grep -v grep ; do
		killall fanotify_init_stress > /dev/null 2>&1
		killall fanotify_flush_stress > /dev/null 2>&1
		sleep 1
	done
}

SCRATCH_MNT=/fsn

fallocate -l 1G fsn.img || exit
mkfs.xfs -f fsn.img || exit
mkdir -p $SCRATCH_MNT
mount -o loop fsn.img $SCRATCH_MNT || exit

touch $SCRATCH_MNT/testfile
for i in $STRESSES
do
for j in $STRESSES
do
	echo testing $i $j
	./$i $SCRATCH_MNT $TIMEOUT > /dev/null 2>&1 &
	./$i $SCRATCH_MNT/testfile $TIMEOUT > /dev/null 2>&1 &
	./$j $SCRATCH_MNT $TIMEOUT > /dev/null 2>&1 &
	./$j $SCRATCH_MNT/testfile $TIMEOUT > /dev/null 2>&1 &
	sleep $TIMEOUT
	cleanup_processes
done
done
sleep $TIMEOUT
sync
umount $SCRATCH_MNT
rm -rf fsn* tmp
--- reproducer test.sh end -------------------------------------

--- reproducer fanotify_flush_stress.c start----------------
#define _GNU_SOURCE     /* Needed to get O_LARGEFILE definition */
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <poll.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/fanotify.h>
#include <unistd.h>
#include <string.h>

int main(int argc, char *argv[])
{
	char buf;
	int fd;

#if 0
	/* Check mount point is supplied */
	if (argc != 2) {
		fprintf(stderr, "Usage: %s MOUNT\n", argv[0]);
		exit(EXIT_FAILURE);
	}
#endif

	printf("%s on %s\n", argv[0], argv[1]);
	/* Create the file descriptor for accessing the fanotify API */
	fd = fanotify_init(FAN_CLOEXEC | FAN_CLASS_CONTENT | FAN_NONBLOCK,
					   O_RDONLY | O_LARGEFILE);
	if (fd == -1) {
		perror("fanotify_init");
		exit(EXIT_FAILURE);
	}

	/* Loop marking all kinds of events and flush */
	while (1) {

		if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
			  FAN_ACCESS | FAN_MODIFY | FAN_OPEN_PERM | FAN_CLOSE |
			  FAN_OPEN | FAN_ACCESS_PERM | FAN_ONDIR |
			  FAN_EVENT_ON_CHILD, -1, argv[1]) == -1)

			perror("fanotify_mark add");

		if (fanotify_mark(fd, FAN_MARK_FLUSH | FAN_MARK_MOUNT,
						0, -1, argv[1]) == -1)
			perror("fanotify_mark flush mount");

		if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
			  FAN_ACCESS | FAN_MODIFY | FAN_OPEN_PERM | FAN_CLOSE |
			  FAN_OPEN | FAN_ACCESS_PERM | FAN_ONDIR |
			  FAN_EVENT_ON_CHILD, -1, argv[1]) == -1)

			perror("fanotify_mark add");

		if (fanotify_mark(fd, FAN_MARK_FLUSH, 0, -1, argv[1]) == -1)
			perror("fanotify_mark flush");
	}

	close(fd);
	exit(EXIT_SUCCESS);
}
--- reproducer fanotify_flush_stress.c end ----------------
--- reproducer fanotify_init_stress.c start -----------_
#define _GNU_SOURCE     /* Needed to get O_LARGEFILE definition */
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/fanotify.h>

int main(int argc, char *argv[])
{
	char buf;
	int fd;
#if 0
	/* Check mount point is supplied */
	if (argc != 2) {
		fprintf(stderr, "Usage: %s MOUNT\n", argv[0]);
		exit(EXIT_FAILURE);
	}
#endif
	printf("%s on %s\n", argv[0], argv[1]);
	while (1) {

		/* Create the file descriptor for accessing the fanotify API */
		fd = fanotify_init(FAN_CLOEXEC | FAN_CLASS_CONTENT |
				FAN_NONBLOCK, O_RDONLY | O_LARGEFILE);
		if (fd == -1)
			perror("fanotify_init");

		if (fanotify_mark(fd, FAN_MARK_ADD | FAN_MARK_MOUNT,
				FAN_ACCESS | FAN_MODIFY | FAN_OPEN_PERM |
				FAN_CLOSE | FAN_OPEN | FAN_ACCESS_PERM |
				FAN_ONDIR | FAN_EVENT_ON_CHILD, -1,
				argv[1]) == -1)
			perror("fanotify_mark");

		close(fd);
	}

	exit(EXIT_SUCCESS);
}
--- reproducer fanotify_init_stress.c end -----------_

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
  2021-09-07  6:33 [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged Murphy Zhou
@ 2021-09-09 11:00 ` Amir Goldstein
  2021-09-09 11:11   ` Petr Vorel
  2021-09-10  1:05   ` Murphy Zhou
  0 siblings, 2 replies; 6+ messages in thread
From: Amir Goldstein @ 2021-09-09 11:00 UTC (permalink / raw)
  To: Murphy Zhou; +Cc: linux-fsdevel, Jan Kara, Petr Vorel

[-- Attachment #1: Type: text/plain, Size: 849 bytes --]

On Tue, Sep 7, 2021 at 9:33 AM Murphy Zhou <jencce.kernel@gmail.com> wrote:
>
> Hi,
>
> Since this commit:
>
> commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
> Author: Amir Goldstein <amir73il@gmail.com>
> Date:   Tue Aug 10 18:12:19 2021 +0300
>
>     fsnotify: count all objects with attached connectors
>
>
>
>
> Kernel fsnotify can't finish a stress testcase that used to pass quickly.
>
> Kernel hung at umount. Can not be killed but restarting the server.
>
> Reproducer text is attached.
>

Hi Murphy,

Thank you for the detailed report.
I was able to reproduce the hang and the attached patch fixes it for me.
Cloud you please verify the fix yourself as well?

This is a good regression test.
Did you consider contributing it to LTP?
I think the LTP team could also help converting your reproducer to
an LTP test (CC: Petr).

Thanks,
Amir.

[-- Attachment #2: fsnotify-fix-sb_connectors-leak.patch --]
[-- Type: text/x-patch, Size: 1198 bytes --]

From 14d3c313062dfbc86b3d2c4d7deec56a096432f7 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 9 Sep 2021 13:46:34 +0300
Subject: [PATCH] fsnotify: fix sb_connectors leak

Fix a leak in s_fsnotify_connectors counter in case of a race between
concurrent add of new fsnotify mark to an object.

The task that lost the race fails to drop the counter before freeing
the unused connector.

Fixes: ec44610fe2b8 ("fsnotify: count all objects with attached connectors")
Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
Link: https://lore.kernel.org/linux-fsdevel/20210907063338.ycaw6wvhzrfsfdlp@xzhoux.usersys.redhat.com/
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/mark.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 95006d1d29ab..fa1d99101f89 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		/* Someone else created list structure for us */
 		if (inode)
 			fsnotify_put_inode_ref(inode);
+		fsnotify_put_sb_connectors(conn);
 		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 	}
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
  2021-09-09 11:00 ` Amir Goldstein
@ 2021-09-09 11:11   ` Petr Vorel
  2021-09-09 11:14     ` Petr Vorel
  2021-09-10  1:05   ` Murphy Zhou
  1 sibling, 1 reply; 6+ messages in thread
From: Petr Vorel @ 2021-09-09 11:11 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Murphy Zhou, linux-fsdevel, Jan Kara

> On Tue, Sep 7, 2021 at 9:33 AM Murphy Zhou <jencce.kernel@gmail.com> wrote:

> > Hi,

> > Since this commit:

> > commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
> > Author: Amir Goldstein <amir73il@gmail.com>
> > Date:   Tue Aug 10 18:12:19 2021 +0300

> >     fsnotify: count all objects with attached connectors




> > Kernel fsnotify can't finish a stress testcase that used to pass quickly.

> > Kernel hung at umount. Can not be killed but restarting the server.

> > Reproducer text is attached.


> Hi Murphy,

> Thank you for the detailed report.
> I was able to reproduce the hang and the attached patch fixes it for me.
> Cloud you please verify the fix yourself as well?

> This is a good regression test.
> Did you consider contributing it to LTP?
> I think the LTP team could also help converting your reproducer to
> an LTP test (CC: Petr).

@Murphy: yes, please contribute that to LTP. There are already fanotify tests [1],
here is the C API [2] and shell API [3] (if needed, it should be enough to write
it just in C). If you have any questions, don't hesitate ask on LTP ML and Cc
me.

@Amir: thanks!

Kind regards,
Petr

[1] https://github.com/linux-test-project/ltp/tree/master/testcases/kernel/syscalls/fanotify/
[2] https://github.com/linux-test-project/ltp/wiki/C-Test-API
[3] https://github.com/linux-test-project/ltp/wiki/Shell-Test-API

> Thanks,
> Amir.

> From 14d3c313062dfbc86b3d2c4d7deec56a096432f7 Mon Sep 17 00:00:00 2001
> From: Amir Goldstein <amir73il@gmail.com>
> Date: Thu, 9 Sep 2021 13:46:34 +0300
> Subject: [PATCH] fsnotify: fix sb_connectors leak

> Fix a leak in s_fsnotify_connectors counter in case of a race between
> concurrent add of new fsnotify mark to an object.

> The task that lost the race fails to drop the counter before freeing
> the unused connector.

> Fixes: ec44610fe2b8 ("fsnotify: count all objects with attached connectors")
> Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
> Link: https://lore.kernel.org/linux-fsdevel/20210907063338.ycaw6wvhzrfsfdlp@xzhoux.usersys.redhat.com/
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/notify/mark.c | 1 +
>  1 file changed, 1 insertion(+)

> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 95006d1d29ab..fa1d99101f89 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
>  		/* Someone else created list structure for us */
>  		if (inode)
>  			fsnotify_put_inode_ref(inode);
> +		fsnotify_put_sb_connectors(conn);
>  		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
>  	}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
  2021-09-09 11:11   ` Petr Vorel
@ 2021-09-09 11:14     ` Petr Vorel
  2021-09-10  1:06       ` Murphy Zhou
  0 siblings, 1 reply; 6+ messages in thread
From: Petr Vorel @ 2021-09-09 11:14 UTC (permalink / raw)
  To: Amir Goldstein, Murphy Zhou, linux-fsdevel, Jan Kara

> > On Tue, Sep 7, 2021 at 9:33 AM Murphy Zhou <jencce.kernel@gmail.com> wrote:

> > > Hi,

> > > Since this commit:

> > > commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
> > > Author: Amir Goldstein <amir73il@gmail.com>
> > > Date:   Tue Aug 10 18:12:19 2021 +0300

> > >     fsnotify: count all objects with attached connectors




> > > Kernel fsnotify can't finish a stress testcase that used to pass quickly.

> > > Kernel hung at umount. Can not be killed but restarting the server.

> > > Reproducer text is attached.


> > Hi Murphy,

> > Thank you for the detailed report.
> > I was able to reproduce the hang and the attached patch fixes it for me.
> > Cloud you please verify the fix yourself as well?

> > This is a good regression test.
> > Did you consider contributing it to LTP?
> > I think the LTP team could also help converting your reproducer to
> > an LTP test (CC: Petr).

> @Murphy: yes, please contribute that to LTP. There are already fanotify tests [1],
> here is the C API [2] and shell API [3] (if needed, it should be enough to write
> it just in C). If you have any questions, don't hesitate ask on LTP ML and Cc
> me.

I see you've contributed to LTP already :).

Kind regards,
Petr

> @Amir: thanks!

> Kind regards,
> Petr

> [1] https://github.com/linux-test-project/ltp/tree/master/testcases/kernel/syscalls/fanotify/
> [2] https://github.com/linux-test-project/ltp/wiki/C-Test-API
> [3] https://github.com/linux-test-project/ltp/wiki/Shell-Test-API

> > Thanks,
> > Amir.

> > From 14d3c313062dfbc86b3d2c4d7deec56a096432f7 Mon Sep 17 00:00:00 2001
> > From: Amir Goldstein <amir73il@gmail.com>
> > Date: Thu, 9 Sep 2021 13:46:34 +0300
> > Subject: [PATCH] fsnotify: fix sb_connectors leak

> > Fix a leak in s_fsnotify_connectors counter in case of a race between
> > concurrent add of new fsnotify mark to an object.

> > The task that lost the race fails to drop the counter before freeing
> > the unused connector.

> > Fixes: ec44610fe2b8 ("fsnotify: count all objects with attached connectors")
> > Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
> > Link: https://lore.kernel.org/linux-fsdevel/20210907063338.ycaw6wvhzrfsfdlp@xzhoux.usersys.redhat.com/
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/notify/mark.c | 1 +
> >  1 file changed, 1 insertion(+)

> > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > index 95006d1d29ab..fa1d99101f89 100644
> > --- a/fs/notify/mark.c
> > +++ b/fs/notify/mark.c
> > @@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
> >  		/* Someone else created list structure for us */
> >  		if (inode)
> >  			fsnotify_put_inode_ref(inode);
> > +		fsnotify_put_sb_connectors(conn);
> >  		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
> >  	}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
  2021-09-09 11:00 ` Amir Goldstein
  2021-09-09 11:11   ` Petr Vorel
@ 2021-09-10  1:05   ` Murphy Zhou
  1 sibling, 0 replies; 6+ messages in thread
From: Murphy Zhou @ 2021-09-10  1:05 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Murphy Zhou, linux-fsdevel, Jan Kara, Petr Vorel

On Thu, Sep 09, 2021 at 02:00:04PM +0300, Amir Goldstein wrote:
> On Tue, Sep 7, 2021 at 9:33 AM Murphy Zhou <jencce.kernel@gmail.com> wrote:
> >
> > Hi,
> >
> > Since this commit:
> >
> > commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
> > Author: Amir Goldstein <amir73il@gmail.com>
> > Date:   Tue Aug 10 18:12:19 2021 +0300
> >
> >     fsnotify: count all objects with attached connectors
> >
> >
> >
> >
> > Kernel fsnotify can't finish a stress testcase that used to pass quickly.
> >
> > Kernel hung at umount. Can not be killed but restarting the server.
> >
> > Reproducer text is attached.
> >
> 
> Hi Murphy,
> 
> Thank you for the detailed report.
> I was able to reproduce the hang and the attached patch fixes it for me.
> Cloud you please verify the fix yourself as well?

That's quick. You rock Amir. Testing.
> 
> This is a good regression test.
> Did you consider contributing it to LTP?
> I think the LTP team could also help converting your reproducer to
> an LTP test (CC: Petr).

Ya, this is part of a stress test I maintained. Post to LTP is on
my todo list, for a long time.. I'll work on this.

> 
> Thanks,
> Amir.

-- 
Murphy

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged
  2021-09-09 11:14     ` Petr Vorel
@ 2021-09-10  1:06       ` Murphy Zhou
  0 siblings, 0 replies; 6+ messages in thread
From: Murphy Zhou @ 2021-09-10  1:06 UTC (permalink / raw)
  To: Petr Vorel; +Cc: Amir Goldstein, Murphy Zhou, linux-fsdevel, Jan Kara

On Thu, Sep 09, 2021 at 01:14:05PM +0200, Petr Vorel wrote:
> > > On Tue, Sep 7, 2021 at 9:33 AM Murphy Zhou <jencce.kernel@gmail.com> wrote:
> 
> > > > Hi,
> 
> > > > Since this commit:
> 
> > > > commit ec44610fe2b86daef70f3f53f47d2a2542d7094f
> > > > Author: Amir Goldstein <amir73il@gmail.com>
> > > > Date:   Tue Aug 10 18:12:19 2021 +0300
> 
> > > >     fsnotify: count all objects with attached connectors
> 
> 
> 
> 
> > > > Kernel fsnotify can't finish a stress testcase that used to pass quickly.
> 
> > > > Kernel hung at umount. Can not be killed but restarting the server.
> 
> > > > Reproducer text is attached.
> 
> 
> > > Hi Murphy,
> 
> > > Thank you for the detailed report.
> > > I was able to reproduce the hang and the attached patch fixes it for me.
> > > Cloud you please verify the fix yourself as well?
> 
> > > This is a good regression test.
> > > Did you consider contributing it to LTP?
> > > I think the LTP team could also help converting your reproducer to
> > > an LTP test (CC: Petr).
> 
> > @Murphy: yes, please contribute that to LTP. There are already fanotify tests [1],
> > here is the C API [2] and shell API [3] (if needed, it should be enough to write
> > it just in C). If you have any questions, don't hesitate ask on LTP ML and Cc
> > me.
> 
> I see you've contributed to LTP already :).

Ya :) Thank you for the links. That's very helpful!

> 
> Kind regards,
> Petr
> 
> > @Amir: thanks!
> 
> > Kind regards,
> > Petr
> 
> > [1] https://github.com/linux-test-project/ltp/tree/master/testcases/kernel/syscalls/fanotify/
> > [2] https://github.com/linux-test-project/ltp/wiki/C-Test-API
> > [3] https://github.com/linux-test-project/ltp/wiki/Shell-Test-API
> 
> > > Thanks,
> > > Amir.
> 
> > > From 14d3c313062dfbc86b3d2c4d7deec56a096432f7 Mon Sep 17 00:00:00 2001
> > > From: Amir Goldstein <amir73il@gmail.com>
> > > Date: Thu, 9 Sep 2021 13:46:34 +0300
> > > Subject: [PATCH] fsnotify: fix sb_connectors leak
> 
> > > Fix a leak in s_fsnotify_connectors counter in case of a race between
> > > concurrent add of new fsnotify mark to an object.
> 
> > > The task that lost the race fails to drop the counter before freeing
> > > the unused connector.
> 
> > > Fixes: ec44610fe2b8 ("fsnotify: count all objects with attached connectors")
> > > Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
> > > Link: https://lore.kernel.org/linux-fsdevel/20210907063338.ycaw6wvhzrfsfdlp@xzhoux.usersys.redhat.com/
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/notify/mark.c | 1 +
> > >  1 file changed, 1 insertion(+)
> 
> > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > > index 95006d1d29ab..fa1d99101f89 100644
> > > --- a/fs/notify/mark.c
> > > +++ b/fs/notify/mark.c
> > > @@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
> > >  		/* Someone else created list structure for us */
> > >  		if (inode)
> > >  			fsnotify_put_inode_ref(inode);
> > > +		fsnotify_put_sb_connectors(conn);
> > >  		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
> > >  	}

-- 
Murphy

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-09-10  1:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-07  6:33 [regression] fsnotify fails stress test since fsnotify_for_v5.15-rc1 merged Murphy Zhou
2021-09-09 11:00 ` Amir Goldstein
2021-09-09 11:11   ` Petr Vorel
2021-09-09 11:14     ` Petr Vorel
2021-09-10  1:06       ` Murphy Zhou
2021-09-10  1:05   ` Murphy Zhou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.