All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
@ 2022-02-10 16:18 Martin Doucha
  2022-02-11  6:47 ` Li Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Martin Doucha @ 2022-02-10 16:18 UTC (permalink / raw)
  To: ltp

When the main test process crashes or gets killed e.g. by OOM killer,
the watchdog process currently does not clean up any remaining child
processes. Fix this by sending SIGKILL to the test process group when
the watchdog process gets notified that the main test process has exited
for any reason.

Fixes #892

Signed-off-by: Martin Doucha <mdoucha@suse.cz>
---
 lib/newlib_tests/runtest.sh               |  3 +-
 lib/newlib_tests/test_children_cleanup.c  | 43 +++++++++++++++++++++++
 lib/newlib_tests/test_children_cleanup.sh | 17 +++++++++
 lib/tst_test.c                            |  9 +++++
 4 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 lib/newlib_tests/test_children_cleanup.c
 create mode 100755 lib/newlib_tests/test_children_cleanup.sh

diff --git a/lib/newlib_tests/runtest.sh b/lib/newlib_tests/runtest.sh
index 92fd3860e..327460e7b 100755
--- a/lib/newlib_tests/runtest.sh
+++ b/lib/newlib_tests/runtest.sh
@@ -4,7 +4,8 @@
 LTP_C_API_TESTS="${LTP_C_API_TESTS:-test05 test07 test09 test12 test15 test18
 tst_needs_cmds01 tst_needs_cmds02 tst_needs_cmds03 tst_needs_cmds06
 tst_needs_cmds07 tst_bool_expr test_exec test_timer tst_res_hexd tst_strstatus
-tst_fuzzy_sync03 test_zero_hugepage.sh test_kconfig.sh}"
+tst_fuzzy_sync03 test_zero_hugepage.sh test_kconfig.sh
+test_children_cleanup.sh}"
 
 LTP_SHELL_API_TESTS="${LTP_SHELL_API_TESTS:-shell/tst_check_driver.sh
 shell/tst_check_kconfig0[1-5].sh shell/net/*.sh}"
diff --git a/lib/newlib_tests/test_children_cleanup.c b/lib/newlib_tests/test_children_cleanup.c
new file mode 100644
index 000000000..2b1ca5f9c
--- /dev/null
+++ b/lib/newlib_tests/test_children_cleanup.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022 SUSE LLC <mdoucha@suse.cz>
+ */
+
+/*\
+ * Test whether the LTP library properly reaps any children left over when
+ * the main test process dies. Run using test_children_cleanup.sh.
+ */
+
+#include <unistd.h>
+#include <signal.h>
+
+#include "tst_test.h"
+
+static void run(void)
+{
+	pid_t child_pid, main_pid = getpid();
+
+	tst_res(TINFO, "Main process %d starting", main_pid);
+
+	/* Check that normal child reaping does not disrupt the test */
+	if (!SAFE_FORK())
+		return;
+
+	SAFE_WAIT(NULL);
+	child_pid = SAFE_FORK();
+
+	/* Start child that will outlive the main test process */
+	if (!child_pid) {
+		sleep(30);
+		return;
+	}
+
+	tst_res(TINFO, "Forked child %d", child_pid);
+	kill(main_pid, SIGKILL);
+}
+
+static struct tst_test test = {
+	.test_all = run,
+	.forks_child = 1,
+	.timeout = 10,
+};
diff --git a/lib/newlib_tests/test_children_cleanup.sh b/lib/newlib_tests/test_children_cleanup.sh
new file mode 100755
index 000000000..d99b9b3a2
--- /dev/null
+++ b/lib/newlib_tests/test_children_cleanup.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2022 SUSE LLC <mdoucha@suse.cz>
+
+TMPFILE="/tmp/ltp_children_cleanup_$$.log"
+CHILD_PID=`./test_children_cleanup 2>&1 | sed -n 's/^.*Forked child \([0-9]*\)$/\1/p'`
+
+if [ "x$CHILD_PID" = "x" ]; then
+	echo "TFAIL: Child process was not created"
+	exit 1
+elif ! kill -s 0 $CHILD_PID &>/dev/null; then
+	echo "TPASS: Child process was cleaned up"
+	exit 0
+else
+	echo "TFAIL: Child process was left behind"
+	exit 1
+fi
diff --git a/lib/tst_test.c b/lib/tst_test.c
index 191a9deab..09f65ac71 100644
--- a/lib/tst_test.c
+++ b/lib/tst_test.c
@@ -1399,6 +1399,13 @@ static void sigint_handler(int sig LTP_ATTRIBUTE_UNUSED)
 	}
 }
 
+static void sigchild_handler(int sig LTP_ATTRIBUTE_UNUSED)
+{
+	/* If the test process is dead, send SIGKILL to its children */
+	if (kill(test_pid, 0))
+		kill(-test_pid, SIGKILL);
+}
+
 unsigned int tst_timeout_remaining(void)
 {
 	static struct timespec now;
@@ -1481,6 +1488,7 @@ static int fork_testrun(void)
 		tst_disable_oom_protection(0);
 		SAFE_SIGNAL(SIGALRM, SIG_DFL);
 		SAFE_SIGNAL(SIGUSR1, SIG_DFL);
+		SAFE_SIGNAL(SIGCHLD, SIG_DFL);
 		SAFE_SIGNAL(SIGINT, SIG_DFL);
 		SAFE_SETPGID(0, 0);
 		testrun();
@@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[], struct tst_test *self)
 
 	SAFE_SIGNAL(SIGALRM, alarm_handler);
 	SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
+	SAFE_SIGNAL(SIGCHLD, sigchild_handler);
 
 	if (tst_test->test_variants)
 		test_variants = tst_test->test_variants;
-- 
2.34.1


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-10 16:18 [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes Martin Doucha
@ 2022-02-11  6:47 ` Li Wang
  2022-02-11  7:03   ` Jan Stancek
  2022-02-11  9:17   ` Martin Doucha
  0 siblings, 2 replies; 9+ messages in thread
From: Li Wang @ 2022-02-11  6:47 UTC (permalink / raw)
  To: Martin Doucha; +Cc: LTP List


[-- Attachment #1.1: Type: text/plain, Size: 2306 bytes --]

On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz> wrote:

> When the main test process crashes or gets killed e.g. by OOM killer,
> the watchdog process currently does not clean up any remaining child
> processes. Fix this by sending SIGKILL to the test process group when
> the watchdog process gets notified that the main test process has exited
> for any reason.
>


> --- a/lib/tst_test.c
> +++ b/lib/tst_test.c
> @@ -1399,6 +1399,13 @@ static void sigint_handler(int sig
> LTP_ATTRIBUTE_UNUSED)
>         }
>  }
>
> +static void sigchild_handler(int sig LTP_ATTRIBUTE_UNUSED)
> +{
> +       /* If the test process is dead, send SIGKILL to its children */
> +       if (kill(test_pid, 0))
> +               kill(-test_pid, SIGKILL);
> +}
> +
>  unsigned int tst_timeout_remaining(void)
>  {
>         static struct timespec now;
> @@ -1481,6 +1488,7 @@ static int fork_testrun(void)
>                 tst_disable_oom_protection(0);
>                 SAFE_SIGNAL(SIGALRM, SIG_DFL);
>                 SAFE_SIGNAL(SIGUSR1, SIG_DFL);
> +               SAFE_SIGNAL(SIGCHLD, SIG_DFL);
>                 SAFE_SIGNAL(SIGINT, SIG_DFL);
>                 SAFE_SETPGID(0, 0);
>                 testrun();
> @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[], struct
> tst_test *self)
>
>         SAFE_SIGNAL(SIGALRM, alarm_handler);
>         SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
> +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
>

Do we really need setup this signal handler for SIGCHILD?

Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
in the library process (lib_pid) which rely on SIGCHILD as well.
And even this handler will be called everytime when test exit normally.

Maybe better just add a kill function to cleanup the remain
descendants if main test process exit with abonormal status.

e.g.

--- a/lib/tst_test.c
+++ b/lib/tst_test.c
@@ -1503,6 +1503,8 @@ static int fork_testrun(void)
        if (WIFEXITED(status) && WEXITSTATUS(status))
                return WEXITSTATUS(status);

+       kill(-test_pid, SIGKILL);
+
        if (WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) {
                tst_res(TINFO, "If you are running on slow machine, "
                               "try exporting LTP_TIMEOUT_MUL > 1");

-- 
Regards,
Li Wang

[-- Attachment #1.2: Type: text/html, Size: 3943 bytes --]

[-- Attachment #2: Type: text/plain, Size: 60 bytes --]


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11  6:47 ` Li Wang
@ 2022-02-11  7:03   ` Jan Stancek
  2022-02-11  7:09     ` Li Wang
  2022-02-11  9:17   ` Martin Doucha
  1 sibling, 1 reply; 9+ messages in thread
From: Jan Stancek @ 2022-02-11  7:03 UTC (permalink / raw)
  To: Li Wang; +Cc: LTP List

On Fri, Feb 11, 2022 at 7:48 AM Li Wang <liwang@redhat.com> wrote:
>
>
>
> On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz> wrote:
>>
>> When the main test process crashes or gets killed e.g. by OOM killer,
>> the watchdog process currently does not clean up any remaining child
>> processes. Fix this by sending SIGKILL to the test process group when
>> the watchdog process gets notified that the main test process has exited
>> for any reason.
>
>
>>
>> --- a/lib/tst_test.c
>> +++ b/lib/tst_test.c
>> @@ -1399,6 +1399,13 @@ static void sigint_handler(int sig LTP_ATTRIBUTE_UNUSED)
>>         }
>>  }
>>
>> +static void sigchild_handler(int sig LTP_ATTRIBUTE_UNUSED)
>> +{
>> +       /* If the test process is dead, send SIGKILL to its children */
>> +       if (kill(test_pid, 0))
>> +               kill(-test_pid, SIGKILL);
>> +}
>> +
>>  unsigned int tst_timeout_remaining(void)
>>  {
>>         static struct timespec now;
>> @@ -1481,6 +1488,7 @@ static int fork_testrun(void)
>>                 tst_disable_oom_protection(0);
>>                 SAFE_SIGNAL(SIGALRM, SIG_DFL);
>>                 SAFE_SIGNAL(SIGUSR1, SIG_DFL);
>> +               SAFE_SIGNAL(SIGCHLD, SIG_DFL);
>>                 SAFE_SIGNAL(SIGINT, SIG_DFL);
>>                 SAFE_SETPGID(0, 0);
>>                 testrun();
>> @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[], struct tst_test *self)
>>
>>         SAFE_SIGNAL(SIGALRM, alarm_handler);
>>         SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
>> +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
>
>
> Do we really need setup this signal handler for SIGCHILD?

 I had same question.

>
> Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
> in the library process (lib_pid) which rely on SIGCHILD as well.
> And even this handler will be called everytime when test exit normally.
>
> Maybe better just add a kill function to cleanup the remain
> descendants if main test process exit with abonormal status.
>
> e.g.
>
> --- a/lib/tst_test.c
> +++ b/lib/tst_test.c
> @@ -1503,6 +1503,8 @@ static int fork_testrun(void)
>         if (WIFEXITED(status) && WEXITSTATUS(status))
>                 return WEXITSTATUS(status);
>
> +       kill(-test_pid, SIGKILL);

Could we skip the call if forks_child == 0 ?

> +
>         if (WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) {
>                 tst_res(TINFO, "If you are running on slow machine, "
>                                "try exporting LTP_TIMEOUT_MUL > 1");
>
> --
> Regards,
> Li Wang
>
> --
> Mailing list info: https://lists.linux.it/listinfo/ltp


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11  7:03   ` Jan Stancek
@ 2022-02-11  7:09     ` Li Wang
  0 siblings, 0 replies; 9+ messages in thread
From: Li Wang @ 2022-02-11  7:09 UTC (permalink / raw)
  To: Jan Stancek; +Cc: LTP List


[-- Attachment #1.1: Type: text/plain, Size: 2544 bytes --]

On Fri, Feb 11, 2022 at 3:03 PM Jan Stancek <jstancek@redhat.com> wrote:

> On Fri, Feb 11, 2022 at 7:48 AM Li Wang <liwang@redhat.com> wrote:
> >
> >
> >
> > On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz> wrote:
> >>
> >> When the main test process crashes or gets killed e.g. by OOM killer,
> >> the watchdog process currently does not clean up any remaining child
> >> processes. Fix this by sending SIGKILL to the test process group when
> >> the watchdog process gets notified that the main test process has exited
> >> for any reason.
> >
> >
> >>
> >> --- a/lib/tst_test.c
> >> +++ b/lib/tst_test.c
> >> @@ -1399,6 +1399,13 @@ static void sigint_handler(int sig
> LTP_ATTRIBUTE_UNUSED)
> >>         }
> >>  }
> >>
> >> +static void sigchild_handler(int sig LTP_ATTRIBUTE_UNUSED)
> >> +{
> >> +       /* If the test process is dead, send SIGKILL to its children */
> >> +       if (kill(test_pid, 0))
> >> +               kill(-test_pid, SIGKILL);
> >> +}
> >> +
> >>  unsigned int tst_timeout_remaining(void)
> >>  {
> >>         static struct timespec now;
> >> @@ -1481,6 +1488,7 @@ static int fork_testrun(void)
> >>                 tst_disable_oom_protection(0);
> >>                 SAFE_SIGNAL(SIGALRM, SIG_DFL);
> >>                 SAFE_SIGNAL(SIGUSR1, SIG_DFL);
> >> +               SAFE_SIGNAL(SIGCHLD, SIG_DFL);
> >>                 SAFE_SIGNAL(SIGINT, SIG_DFL);
> >>                 SAFE_SETPGID(0, 0);
> >>                 testrun();
> >> @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[],
> struct tst_test *self)
> >>
> >>         SAFE_SIGNAL(SIGALRM, alarm_handler);
> >>         SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
> >> +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
> >
> >
> > Do we really need setup this signal handler for SIGCHILD?
>
>  I had same question.
>
> >
> > Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
> > in the library process (lib_pid) which rely on SIGCHILD as well.
> > And even this handler will be called everytime when test exit normally.
> >
> > Maybe better just add a kill function to cleanup the remain
> > descendants if main test process exit with abonormal status.
> >
> > e.g.
> >
> > --- a/lib/tst_test.c
> > +++ b/lib/tst_test.c
> > @@ -1503,6 +1503,8 @@ static int fork_testrun(void)
> >         if (WIFEXITED(status) && WEXITSTATUS(status))
> >                 return WEXITSTATUS(status);
> >
> > +       kill(-test_pid, SIGKILL);
>
> Could we skip the call if forks_child == 0 ?
>

+1 Obviously yes!


-- 
Regards,
Li Wang

[-- Attachment #1.2: Type: text/html, Size: 3870 bytes --]

[-- Attachment #2: Type: text/plain, Size: 60 bytes --]


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11  6:47 ` Li Wang
  2022-02-11  7:03   ` Jan Stancek
@ 2022-02-11  9:17   ` Martin Doucha
  2022-02-11 10:34     ` Li Wang
  1 sibling, 1 reply; 9+ messages in thread
From: Martin Doucha @ 2022-02-11  9:17 UTC (permalink / raw)
  To: Li Wang; +Cc: LTP List

On 11. 02. 22 7:47, Li Wang wrote:
> On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz
> <mailto:mdoucha@suse.cz>> wrote:
>     @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[],
>     struct tst_test *self)
> 
>             SAFE_SIGNAL(SIGALRM, alarm_handler);
>             SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
>     +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
> 
> 
> Do we really need setup this signal handler for SIGCHILD?
> 
> Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
> in the library process (lib_pid) which rely on SIGCHILD as well.
> And even this handler will be called everytime when test exit normally.
> 
> Maybe better just add a kill function to cleanup the remain
> descendants if main test process exit with abonormal status.
> 
> e.g.
> 
> --- a/lib/tst_test.c
> +++ b/lib/tst_test.c
> @@ -1503,6 +1503,8 @@ static int fork_testrun(void)
>         if (WIFEXITED(status) && WEXITSTATUS(status))
>                 return WEXITSTATUS(status);
>  
> +       kill(-test_pid, SIGKILL);

This will not work because at this point, the child process was already
destroyed by waitpid() and all its remaining children were moved under
PID 1 (init). The only place where the grandchildren are still reachable
this way is in SIGCHLD handler while the dead child process still exists
in zombie state.

-- 
Martin Doucha   mdoucha@suse.cz
QA Engineer for Software Maintenance
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11  9:17   ` Martin Doucha
@ 2022-02-11 10:34     ` Li Wang
  2022-02-11 11:01       ` Li Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Li Wang @ 2022-02-11 10:34 UTC (permalink / raw)
  To: Martin Doucha; +Cc: LTP List


[-- Attachment #1.1: Type: text/plain, Size: 1863 bytes --]

On Fri, Feb 11, 2022 at 5:17 PM Martin Doucha <mdoucha@suse.cz> wrote:

> On 11. 02. 22 7:47, Li Wang wrote:
> > On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz
> > <mailto:mdoucha@suse.cz>> wrote:
> >     @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[],
> >     struct tst_test *self)
> >
> >             SAFE_SIGNAL(SIGALRM, alarm_handler);
> >             SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
> >     +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
> >
> >
> > Do we really need setup this signal handler for SIGCHILD?
> >
> > Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
> > in the library process (lib_pid) which rely on SIGCHILD as well.
> > And even this handler will be called everytime when test exit normally.
> >
> > Maybe better just add a kill function to cleanup the remain
> > descendants if main test process exit with abonormal status.
> >
> > e.g.
> >
> > --- a/lib/tst_test.c
> > +++ b/lib/tst_test.c
> > @@ -1503,6 +1503,8 @@ static int fork_testrun(void)
> >         if (WIFEXITED(status) && WEXITSTATUS(status))
> >                 return WEXITSTATUS(status);
> >
> > +       kill(-test_pid, SIGKILL);
>
> This will not work because at this point, the child process was already
> destroyed by waitpid() and all its remaining children were moved under

PID 1 (init). The only place where the grandchildren are still reachable
> this way is in SIGCHLD handler while the dead child process still exists
> in zombie state.


Signal communicatoin is asynchronous processing, setup SIGCHILD
handler can not 100% garantee the libarary process response
in time as well.

Though the test_pid being moved under PID 1(init), kill(-test_pid, SIGKILL)
still works well for killing them. That beacuse the dead child process still
exists until kernel recliam its all parent.


-- 
Regards,
Li Wang

[-- Attachment #1.2: Type: text/html, Size: 3203 bytes --]

[-- Attachment #2: Type: text/plain, Size: 60 bytes --]


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11 10:34     ` Li Wang
@ 2022-02-11 11:01       ` Li Wang
  2022-02-11 11:35         ` Martin Doucha
  0 siblings, 1 reply; 9+ messages in thread
From: Li Wang @ 2022-02-11 11:01 UTC (permalink / raw)
  To: Martin Doucha; +Cc: LTP List


[-- Attachment #1.1: Type: text/plain, Size: 3788 bytes --]

On Fri, Feb 11, 2022 at 6:34 PM Li Wang <liwang@redhat.com> wrote:

>
>
> On Fri, Feb 11, 2022 at 5:17 PM Martin Doucha <mdoucha@suse.cz> wrote:
>
>> On 11. 02. 22 7:47, Li Wang wrote:
>> > On Fri, Feb 11, 2022 at 12:18 AM Martin Doucha <mdoucha@suse.cz
>> > <mailto:mdoucha@suse.cz>> wrote:
>> >     @@ -1560,6 +1568,7 @@ void tst_run_tcases(int argc, char *argv[],
>> >     struct tst_test *self)
>> >
>> >             SAFE_SIGNAL(SIGALRM, alarm_handler);
>> >             SAFE_SIGNAL(SIGUSR1, heartbeat_handler);
>> >     +       SAFE_SIGNAL(SIGCHLD, sigchild_handler);
>> >
>> >
>> > Do we really need setup this signal handler for SIGCHILD?
>> >
>> > Since we have already called 'SAFE_WAITPID(test_pid, &status, 0)'
>> > in the library process (lib_pid) which rely on SIGCHILD as well.
>> > And even this handler will be called everytime when test exit normally.
>> >
>> > Maybe better just add a kill function to cleanup the remain
>> > descendants if main test process exit with abonormal status.
>> >
>> > e.g.
>> >
>> > --- a/lib/tst_test.c
>> > +++ b/lib/tst_test.c
>> > @@ -1503,6 +1503,8 @@ static int fork_testrun(void)
>> >         if (WIFEXITED(status) && WEXITSTATUS(status))
>> >                 return WEXITSTATUS(status);
>> >
>> > +       kill(-test_pid, SIGKILL);
>>
>> This will not work because at this point, the child process was already
>> destroyed by waitpid() and all its remaining children were moved under
>
> PID 1 (init). The only place where the grandchildren are still reachable
>> this way is in SIGCHLD handler while the dead child process still exists
>> in zombie state.
>
>
> Signal communicatoin is asynchronous processing, setup SIGCHILD
> handler can not 100% garantee the libarary process response
> in time as well.
>
> Though the test_pid being moved under PID 1(init), kill(-test_pid, SIGKILL)
> still works well for killing them. That beacuse the dead child process
> still
> exists until kernel recliam its all parent.
>


I give 5 seconds sleep before sending SIGKILL in lib-process
and modified the test_children_cleanup.c to print ppid each 1sec
to verify this:

# ./test_children_cleanup
tst_test.c:1452: TINFO: Timeout per run is 0h 00m 10s
test_children_cleanup.c:20: TINFO: Main process 173236 starting
test_children_cleanup.c:39: TINFO: Forked child 173238
test_children_cleanup.c:33: TINFO: ppid is 173236
test_children_cleanup.c:33: TINFO: ppid is 1
test_children_cleanup.c:33: TINFO: ppid is 1
test_children_cleanup.c:33: TINFO: ppid is 1
test_children_cleanup.c:33: TINFO: ppid is 1
tst_test.c:1502: TINFO: If you are running on slow machine, try exporting
LTP_TIMEOUT_MUL > 1
tst_test.c:1504: TBROK: Test killed! (timeout?)

Summary:
passed   0
failed   0
broken   1
skipped  0
warnings 0
=======

--- a/lib/newlib_tests/test_children_cleanup.c
+++ b/lib/newlib_tests/test_children_cleanup.c
@@ -28,7 +28,11 @@ static void run(void)

        /* Start child that will outlive the main test process */
        if (!child_pid) {
-               sleep(30);
+               int i;
+               for (i = 0; i < 30; i++) {
+                       tst_res(TINFO, "ppid is %d", getppid());
+                       sleep(1);
+               }
                return;
        }

diff --git a/lib/tst_test.c b/lib/tst_test.c
index 84ce0a5d3..6f2d93611 100644
--- a/lib/tst_test.c
+++ b/lib/tst_test.c
@@ -1503,6 +1503,9 @@ static int fork_testrun(void)
        if (WIFEXITED(status) && WEXITSTATUS(status))
                return WEXITSTATUS(status);

+       sleep(5);
+       kill(-test_pid, SIGKILL);
+
        if (WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL) {
                tst_res(TINFO, "If you are running on slow machine, "
                               "try exporting LTP_TIMEOUT_MUL > 1");


-- 
Regards,
Li Wang

[-- Attachment #1.2: Type: text/html, Size: 6101 bytes --]

[-- Attachment #2: Type: text/plain, Size: 60 bytes --]


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11 11:01       ` Li Wang
@ 2022-02-11 11:35         ` Martin Doucha
  2022-02-11 12:15           ` Cyril Hrubis
  0 siblings, 1 reply; 9+ messages in thread
From: Martin Doucha @ 2022-02-11 11:35 UTC (permalink / raw)
  To: Li Wang; +Cc: LTP List

On 11. 02. 22 12:01, Li Wang wrote:
> I give 5 seconds sleep before sending SIGKILL in lib-process
> and modified the test_children_cleanup.c to print ppid each 1sec
> to verify this:
> 
> # ./test_children_cleanup
> tst_test.c:1452: TINFO: Timeout per run is 0h 00m 10s
> test_children_cleanup.c:20: TINFO: Main process 173236 starting
> test_children_cleanup.c:39: TINFO: Forked child 173238
> test_children_cleanup.c:33: TINFO: ppid is 173236
> test_children_cleanup.c:33: TINFO: ppid is 1
> test_children_cleanup.c:33: TINFO: ppid is 1
> test_children_cleanup.c:33: TINFO: ppid is 1
> test_children_cleanup.c:33: TINFO: ppid is 1
> tst_test.c:1502: TINFO: If you are running on slow machine, try
> exporting LTP_TIMEOUT_MUL > 1
> tst_test.c:1504: TBROK: Test killed! (timeout?)
> 
> Summary:
> passed   0
> failed   0
> broken   1
> skipped  0
> warnings 0
> =======

Hmm, that's weird, when I tried that approach yesterday, it kept leaving
the child behind. Now it seems to be working. I guess I messed up
somehow and the test program didn't get relinked against new libltp.a...

OK, sending v2 soon since I also need to fix a bug in the libtest shell
script.

-- 
Martin Doucha   mdoucha@suse.cz
QA Engineer for Software Maintenance
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes
  2022-02-11 11:35         ` Martin Doucha
@ 2022-02-11 12:15           ` Cyril Hrubis
  0 siblings, 0 replies; 9+ messages in thread
From: Cyril Hrubis @ 2022-02-11 12:15 UTC (permalink / raw)
  To: Martin Doucha; +Cc: LTP List

Hi!
> Hmm, that's weird, when I tried that approach yesterday, it kept leaving
> the child behind. Now it seems to be working. I guess I messed up
> somehow and the test program didn't get relinked against new libltp.a...

Just FYI the process group lifetime ends with the last process exitting
it's group. Otherwise there could be all kind of races in the case when
you send the SIGTERM signal to the whole group, e.g. if the leader got
the signal first the rest of the processes may not be terminated if the
group lifetime ended with the leader...

-- 
Cyril Hrubis
chrubis@suse.cz

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-02-11 12:13 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-10 16:18 [LTP] [PATCH] Terminate leftover subprocesses when main test process crashes Martin Doucha
2022-02-11  6:47 ` Li Wang
2022-02-11  7:03   ` Jan Stancek
2022-02-11  7:09     ` Li Wang
2022-02-11  9:17   ` Martin Doucha
2022-02-11 10:34     ` Li Wang
2022-02-11 11:01       ` Li Wang
2022-02-11 11:35         ` Martin Doucha
2022-02-11 12:15           ` Cyril Hrubis

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.