All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN
@ 2019-02-12 23:42 Stanislav Fomichev
  2019-02-12 23:42 ` [PATCH bpf 2/2] selftests/bpf: make sure signal interrupts BPF_PROG_TEST_RUN Stanislav Fomichev
  2019-02-16  1:17 ` [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Daniel Borkmann
  0 siblings, 2 replies; 5+ messages in thread
From: Stanislav Fomichev @ 2019-02-12 23:42 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev, syzbot

Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff
makes process unkillable. The problem is that when CONFIG_PREEMPT is
enabled, we never see need_resched() return true. This is due to the
fact that preempt_enable() (which we do in bpf_test_run_one on each
iteration) now handles resched if it's needed.

Let's disable preemption for the whole run, not per test. In this case
we can properly see whether resched is needed.
Let's also properly return -EINTR to the userspace in case of a signal
interrupt.

See recent discussion:
http://lore.kernel.org/netdev/CAH3MdRWHr4N8jei8jxDppXjmw-Nw=puNDLbu1dQOFQHxfU2onA@mail.gmail.com

I'll follow up with the same fix bpf_prog_test_run_flow_dissector in
bpf-next.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 net/bpf/test_run.c | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fa2644d276ef..e31e1b20f7f4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -13,27 +13,13 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
-static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
-		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
-	u32 ret;
-
-	preempt_disable();
-	rcu_read_lock();
-	bpf_cgroup_storage_set(storage);
-	ret = BPF_PROG_RUN(prog, ctx);
-	rcu_read_unlock();
-	preempt_enable();
-
-	return ret;
-}
-
-static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
-			u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+			u32 *retval, u32 *time)
 {
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
 	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
+	int ret = 0;
 	u32 i;
 
 	for_each_cgroup_storage_type(stype) {
@@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
 
 	if (!repeat)
 		repeat = 1;
+
+	rcu_read_lock();
+	preempt_disable();
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
-		*ret = bpf_test_run_one(prog, ctx, storage);
+		bpf_cgroup_storage_set(storage);
+		*retval = BPF_PROG_RUN(prog, ctx);
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
 		if (need_resched()) {
-			if (signal_pending(current))
-				break;
 			time_spent += ktime_get_ns() - time_start;
+			preempt_enable();
+			rcu_read_unlock();
+
 			cond_resched();
+
+			rcu_read_lock();
+			preempt_disable();
 			time_start = ktime_get_ns();
 		}
 	}
 	time_spent += ktime_get_ns() - time_start;
+	preempt_enable();
+	rcu_read_unlock();
+
 	do_div(time_spent, repeat);
 	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
 
 	for_each_cgroup_storage_type(stype)
 		bpf_cgroup_storage_free(storage[stype]);
 
-	return 0;
+	return ret;
 }
 
 static int bpf_test_finish(const union bpf_attr *kattr,
-- 
2.20.1.791.gb4d0f1c61a-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH bpf 2/2] selftests/bpf: make sure signal interrupts BPF_PROG_TEST_RUN
  2019-02-12 23:42 [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Stanislav Fomichev
@ 2019-02-12 23:42 ` Stanislav Fomichev
  2019-02-16  1:17 ` [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Daniel Borkmann
  1 sibling, 0 replies; 5+ messages in thread
From: Stanislav Fomichev @ 2019-02-12 23:42 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Simple test that I used to reproduce the issue in the previous commit:
Do BPF_PROG_TEST_RUN with max iterations, each program is 4096 simple
move instructions. File alarm in 0.1 second and check that
bpf_prog_test_run is interrupted (i.e. test doesn't hang).

Feel free to ignore it if you feel like that's just a one-off fix and it
doesn't require a test going forward.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_progs.c | 44 ++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 25f0083a9b2e..7842e3749b19 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -11,6 +11,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <time.h>
+#include <signal.h>
 
 #include <linux/types.h>
 typedef __u16 __sum16;
@@ -27,6 +28,7 @@ typedef __u16 __sum16;
 #include <sys/ioctl.h>
 #include <sys/wait.h>
 #include <sys/types.h>
+#include <sys/time.h>
 #include <fcntl.h>
 
 #include <linux/bpf.h>
@@ -1912,6 +1914,47 @@ static void test_queue_stack_map(int type)
 	bpf_object__close(obj);
 }
 
+static void sigalrm_handler(int s) {}
+static struct sigaction sigalrm_action = {
+	.sa_handler = sigalrm_handler,
+};
+
+static void test_signal_pending(void)
+{
+	struct bpf_insn prog[4096];
+	struct itimerval timeo = {
+		.it_value.tv_usec = 100000, /* 100ms */
+	};
+	__u32 duration, retval;
+	int prog_fd;
+	int err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prog); i++)
+		prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+	prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN();
+
+	prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER,
+				   prog, ARRAY_SIZE(prog),
+				   "GPL", 0, NULL, 0);
+	CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+
+	err = sigaction(SIGALRM, &sigalrm_action, NULL);
+	CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+
+	err = setitimer(ITIMER_REAL, &timeo, NULL);
+	CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
+
+	err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err != -1 || errno != EINTR || duration > 1000000000,
+	      "test-run-signal-run",
+	      "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	signal(SIGALRM, SIG_DFL);
+}
+
 int main(void)
 {
 	srand(time(NULL));
@@ -1939,6 +1982,7 @@ int main(void)
 	test_reference_tracking();
 	test_queue_stack_map(QUEUE);
 	test_queue_stack_map(STACK);
+	test_signal_pending();
 
 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-- 
2.20.1.791.gb4d0f1c61a-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN
  2019-02-12 23:42 [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Stanislav Fomichev
  2019-02-12 23:42 ` [PATCH bpf 2/2] selftests/bpf: make sure signal interrupts BPF_PROG_TEST_RUN Stanislav Fomichev
@ 2019-02-16  1:17 ` Daniel Borkmann
  2019-02-18 17:29   ` Stanislav Fomichev
  1 sibling, 1 reply; 5+ messages in thread
From: Daniel Borkmann @ 2019-02-16  1:17 UTC (permalink / raw)
  To: Stanislav Fomichev, netdev; +Cc: davem, ast, syzbot

On 02/13/2019 12:42 AM, Stanislav Fomichev wrote:
> Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff
> makes process unkillable. The problem is that when CONFIG_PREEMPT is
> enabled, we never see need_resched() return true. This is due to the
> fact that preempt_enable() (which we do in bpf_test_run_one on each
> iteration) now handles resched if it's needed.
> 
> Let's disable preemption for the whole run, not per test. In this case
> we can properly see whether resched is needed.
> Let's also properly return -EINTR to the userspace in case of a signal
> interrupt.
> 
> See recent discussion:
> http://lore.kernel.org/netdev/CAH3MdRWHr4N8jei8jxDppXjmw-Nw=puNDLbu1dQOFQHxfU2onA@mail.gmail.com
> 
> I'll follow up with the same fix bpf_prog_test_run_flow_dissector in
> bpf-next.
> 
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  net/bpf/test_run.c | 45 ++++++++++++++++++++++++---------------------
>  1 file changed, 24 insertions(+), 21 deletions(-)
> 
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index fa2644d276ef..e31e1b20f7f4 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -13,27 +13,13 @@
>  #include <net/sock.h>
>  #include <net/tcp.h>
>  
> -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
> -		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> -{
> -	u32 ret;
> -
> -	preempt_disable();
> -	rcu_read_lock();
> -	bpf_cgroup_storage_set(storage);
> -	ret = BPF_PROG_RUN(prog, ctx);
> -	rcu_read_unlock();
> -	preempt_enable();
> -
> -	return ret;
> -}
> -
> -static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
> -			u32 *time)
> +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
> +			u32 *retval, u32 *time)
>  {
>  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
>  	enum bpf_cgroup_storage_type stype;
>  	u64 time_start, time_spent = 0;
> +	int ret = 0;
>  	u32 i;
>  
>  	for_each_cgroup_storage_type(stype) {
> @@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
>  
>  	if (!repeat)
>  		repeat = 1;
> +
> +	rcu_read_lock();
> +	preempt_disable();
>  	time_start = ktime_get_ns();
>  	for (i = 0; i < repeat; i++) {
> -		*ret = bpf_test_run_one(prog, ctx, storage);
> +		bpf_cgroup_storage_set(storage);
> +		*retval = BPF_PROG_RUN(prog, ctx);
> +
> +		if (signal_pending(current)) {
> +			ret = -EINTR;
> +			break;
> +		}

Wouldn't it be enough to just move the signal_pending() test to
the above as you did to actually fix the unkillable issue? For
CONFIG_PREEMPT the below need_resched() is never triggered as you
mention as preempt_enable() handles rescheduling internally in
this situation, so moving it only out should suffice.

The rationale for disabling preemption for the whole run is imho
a bit different, namely that you would not screw up the ktime
measurements due to rescheduling happening in between otherwise.

But then, once preemption is disabled for the whole run, is there
a need to move out the extra signal_pending() test (presumably as
need_resched() does not handle TIF_SIGPENDING but only TIF_NEED_RESCHED
but we still wouldn't get into a unkillable situation here, no)?

>  		if (need_resched()) {
> -			if (signal_pending(current))
> -				break;
>  			time_spent += ktime_get_ns() - time_start;
> +			preempt_enable();
> +			rcu_read_unlock();
> +
>  			cond_resched();
> +
> +			rcu_read_lock();
> +			preempt_disable();
>  			time_start = ktime_get_ns();
>  		}
>  	}
>  	time_spent += ktime_get_ns() - time_start;
> +	preempt_enable();
> +	rcu_read_unlock();
> +
>  	do_div(time_spent, repeat);
>  	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
>  
>  	for_each_cgroup_storage_type(stype)
>  		bpf_cgroup_storage_free(storage[stype]);
>  
> -	return 0;
> +	return ret;
>  }
>  
>  static int bpf_test_finish(const union bpf_attr *kattr,
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN
  2019-02-16  1:17 ` [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Daniel Borkmann
@ 2019-02-18 17:29   ` Stanislav Fomichev
  2019-02-18 23:23     ` Daniel Borkmann
  0 siblings, 1 reply; 5+ messages in thread
From: Stanislav Fomichev @ 2019-02-18 17:29 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Stanislav Fomichev, netdev, davem, ast, syzbot

On 02/16, Daniel Borkmann wrote:
> On 02/13/2019 12:42 AM, Stanislav Fomichev wrote:
> > Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff
> > makes process unkillable. The problem is that when CONFIG_PREEMPT is
> > enabled, we never see need_resched() return true. This is due to the
> > fact that preempt_enable() (which we do in bpf_test_run_one on each
> > iteration) now handles resched if it's needed.
> > 
> > Let's disable preemption for the whole run, not per test. In this case
> > we can properly see whether resched is needed.
> > Let's also properly return -EINTR to the userspace in case of a signal
> > interrupt.
> > 
> > See recent discussion:
> > http://lore.kernel.org/netdev/CAH3MdRWHr4N8jei8jxDppXjmw-Nw=puNDLbu1dQOFQHxfU2onA@mail.gmail.com
> > 
> > I'll follow up with the same fix bpf_prog_test_run_flow_dissector in
> > bpf-next.
> > 
> > Reported-by: syzbot <syzkaller@googlegroups.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  net/bpf/test_run.c | 45 ++++++++++++++++++++++++---------------------
> >  1 file changed, 24 insertions(+), 21 deletions(-)
> > 
> > diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> > index fa2644d276ef..e31e1b20f7f4 100644
> > --- a/net/bpf/test_run.c
> > +++ b/net/bpf/test_run.c
> > @@ -13,27 +13,13 @@
> >  #include <net/sock.h>
> >  #include <net/tcp.h>
> >  
> > -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
> > -		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
> > -{
> > -	u32 ret;
> > -
> > -	preempt_disable();
> > -	rcu_read_lock();
> > -	bpf_cgroup_storage_set(storage);
> > -	ret = BPF_PROG_RUN(prog, ctx);
> > -	rcu_read_unlock();
> > -	preempt_enable();
> > -
> > -	return ret;
> > -}
> > -
> > -static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
> > -			u32 *time)
> > +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
> > +			u32 *retval, u32 *time)
> >  {
> >  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
> >  	enum bpf_cgroup_storage_type stype;
> >  	u64 time_start, time_spent = 0;
> > +	int ret = 0;
> >  	u32 i;
> >  
> >  	for_each_cgroup_storage_type(stype) {
> > @@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
> >  
> >  	if (!repeat)
> >  		repeat = 1;
> > +
> > +	rcu_read_lock();
> > +	preempt_disable();
> >  	time_start = ktime_get_ns();
> >  	for (i = 0; i < repeat; i++) {
> > -		*ret = bpf_test_run_one(prog, ctx, storage);
> > +		bpf_cgroup_storage_set(storage);
> > +		*retval = BPF_PROG_RUN(prog, ctx);
> > +
> > +		if (signal_pending(current)) {
> > +			ret = -EINTR;
> > +			break;
> > +		}
> 
> Wouldn't it be enough to just move the signal_pending() test to
> the above as you did to actually fix the unkillable issue? For
> CONFIG_PREEMPT the below need_resched() is never triggered as you
> mention as preempt_enable() handles rescheduling internally in
> this situation, so moving it only out should suffice.
> 
> The rationale for disabling preemption for the whole run is imho
> a bit different, namely that you would not screw up the ktime
> measurements due to rescheduling happening in between otherwise.
That's exactly the reason why we need to preempt_disable() the whole
run; we can't preempt on preempt_enable(), it would screw up our
ktime estimation.

> But then, once preemption is disabled for the whole run, is there
> a need to move out the extra signal_pending() test (presumably as
> need_resched() does not handle TIF_SIGPENDING but only TIF_NEED_RESCHED
> but we still wouldn't get into a unkillable situation here, no)?
I'm not sure, they look like two separate flags, it feels safer to handle
them separately (and we have a precedent in do_check in verifier.c). While
we do set them both when sending signal, it looks like need_resched is
for the cases where we wake up a task with a higher priority. So, in
theory, we can have a signal_pending without need_resched. (Also, with
CONFIG_PREEMT=y kernel, there is another complication with
preempt_count()).

> 
> >  		if (need_resched()) {
> > -			if (signal_pending(current))
> > -				break;
> >  			time_spent += ktime_get_ns() - time_start;
> > +			preempt_enable();
> > +			rcu_read_unlock();
> > +
> >  			cond_resched();
> > +
> > +			rcu_read_lock();
> > +			preempt_disable();
> >  			time_start = ktime_get_ns();
> >  		}
> >  	}
> >  	time_spent += ktime_get_ns() - time_start;
> > +	preempt_enable();
> > +	rcu_read_unlock();
> > +
> >  	do_div(time_spent, repeat);
> >  	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
> >  
> >  	for_each_cgroup_storage_type(stype)
> >  		bpf_cgroup_storage_free(storage[stype]);
> >  
> > -	return 0;
> > +	return ret;
> >  }
> >  
> >  static int bpf_test_finish(const union bpf_attr *kattr,
> > 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN
  2019-02-18 17:29   ` Stanislav Fomichev
@ 2019-02-18 23:23     ` Daniel Borkmann
  0 siblings, 0 replies; 5+ messages in thread
From: Daniel Borkmann @ 2019-02-18 23:23 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: Stanislav Fomichev, netdev, davem, ast, syzbot

On 02/18/2019 06:29 PM, Stanislav Fomichev wrote:
> On 02/16, Daniel Borkmann wrote:
>> On 02/13/2019 12:42 AM, Stanislav Fomichev wrote:
>>> Syzbot found out that running BPF_PROG_TEST_RUN with repeat=0xffffffff
>>> makes process unkillable. The problem is that when CONFIG_PREEMPT is
>>> enabled, we never see need_resched() return true. This is due to the
>>> fact that preempt_enable() (which we do in bpf_test_run_one on each
>>> iteration) now handles resched if it's needed.
>>>
>>> Let's disable preemption for the whole run, not per test. In this case
>>> we can properly see whether resched is needed.
>>> Let's also properly return -EINTR to the userspace in case of a signal
>>> interrupt.
>>>
>>> See recent discussion:
>>> http://lore.kernel.org/netdev/CAH3MdRWHr4N8jei8jxDppXjmw-Nw=puNDLbu1dQOFQHxfU2onA@mail.gmail.com
>>>
>>> I'll follow up with the same fix bpf_prog_test_run_flow_dissector in
>>> bpf-next.
>>>
>>> Reported-by: syzbot <syzkaller@googlegroups.com>
>>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
>>> ---
>>>  net/bpf/test_run.c | 45 ++++++++++++++++++++++++---------------------
>>>  1 file changed, 24 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
>>> index fa2644d276ef..e31e1b20f7f4 100644
>>> --- a/net/bpf/test_run.c
>>> +++ b/net/bpf/test_run.c
>>> @@ -13,27 +13,13 @@
>>>  #include <net/sock.h>
>>>  #include <net/tcp.h>
>>>  
>>> -static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>>> -		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
>>> -{
>>> -	u32 ret;
>>> -
>>> -	preempt_disable();
>>> -	rcu_read_lock();
>>> -	bpf_cgroup_storage_set(storage);
>>> -	ret = BPF_PROG_RUN(prog, ctx);
>>> -	rcu_read_unlock();
>>> -	preempt_enable();
>>> -
>>> -	return ret;
>>> -}
>>> -
>>> -static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
>>> -			u32 *time)
>>> +static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
>>> +			u32 *retval, u32 *time)
>>>  {
>>>  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
>>>  	enum bpf_cgroup_storage_type stype;
>>>  	u64 time_start, time_spent = 0;
>>> +	int ret = 0;
>>>  	u32 i;
>>>  
>>>  	for_each_cgroup_storage_type(stype) {
>>> @@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
>>>  
>>>  	if (!repeat)
>>>  		repeat = 1;
>>> +
>>> +	rcu_read_lock();
>>> +	preempt_disable();
>>>  	time_start = ktime_get_ns();
>>>  	for (i = 0; i < repeat; i++) {
>>> -		*ret = bpf_test_run_one(prog, ctx, storage);
>>> +		bpf_cgroup_storage_set(storage);
>>> +		*retval = BPF_PROG_RUN(prog, ctx);
>>> +
>>> +		if (signal_pending(current)) {
>>> +			ret = -EINTR;
>>> +			break;
>>> +		}
>>
>> Wouldn't it be enough to just move the signal_pending() test to
>> the above as you did to actually fix the unkillable issue? For
>> CONFIG_PREEMPT the below need_resched() is never triggered as you
>> mention as preempt_enable() handles rescheduling internally in
>> this situation, so moving it only out should suffice.
>>
>> The rationale for disabling preemption for the whole run is imho
>> a bit different, namely that you would not screw up the ktime
>> measurements due to rescheduling happening in between otherwise.
> That's exactly the reason why we need to preempt_disable() the whole
> run; we can't preempt on preempt_enable(), it would screw up our
> ktime estimation.
> 
>> But then, once preemption is disabled for the whole run, is there
>> a need to move out the extra signal_pending() test (presumably as
>> need_resched() does not handle TIF_SIGPENDING but only TIF_NEED_RESCHED
>> but we still wouldn't get into a unkillable situation here, no)?
> I'm not sure, they look like two separate flags, it feels safer to handle
> them separately (and we have a precedent in do_check in verifier.c). While
> we do set them both when sending signal, it looks like need_resched is
> for the cases where we wake up a task with a higher priority. So, in
> theory, we can have a signal_pending without need_resched. (Also, with
> CONFIG_PREEMT=y kernel, there is another complication with
> preempt_count()).

Yeah, given there is no separation, it's better to move it out, agree.
Applied both, thanks!

>>>  		if (need_resched()) {
>>> -			if (signal_pending(current))
>>> -				break;
>>>  			time_spent += ktime_get_ns() - time_start;
>>> +			preempt_enable();
>>> +			rcu_read_unlock();
>>> +
>>>  			cond_resched();
>>> +
>>> +			rcu_read_lock();
>>> +			preempt_disable();
>>>  			time_start = ktime_get_ns();
>>>  		}
>>>  	}
>>>  	time_spent += ktime_get_ns() - time_start;
>>> +	preempt_enable();
>>> +	rcu_read_unlock();
>>> +
>>>  	do_div(time_spent, repeat);
>>>  	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
>>>  
>>>  	for_each_cgroup_storage_type(stype)
>>>  		bpf_cgroup_storage_free(storage[stype]);
>>>  
>>> -	return 0;
>>> +	return ret;
>>>  }
>>>  
>>>  static int bpf_test_finish(const union bpf_attr *kattr,
>>>
>>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-02-18 23:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-12 23:42 [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Stanislav Fomichev
2019-02-12 23:42 ` [PATCH bpf 2/2] selftests/bpf: make sure signal interrupts BPF_PROG_TEST_RUN Stanislav Fomichev
2019-02-16  1:17 ` [PATCH bpf 1/2] bpf/test_run: fix unkillable BPF_PROG_TEST_RUN Daniel Borkmann
2019-02-18 17:29   ` Stanislav Fomichev
2019-02-18 23:23     ` Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.