* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs [not found] ` <20210108231950.3844417-2-songliubraving@fb.com> @ 2021-01-11 6:27 ` Yonghong Song 2021-01-11 10:17 ` KP Singh 2021-01-11 10:14 ` KP Singh ` (2 subsequent siblings) 3 siblings, 1 reply; 29+ messages in thread From: Yonghong Song @ 2021-01-11 6:27 UTC (permalink / raw) To: Song Liu, bpf, netdev, linux-kernel Cc: mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo, kernel test robot On 1/8/21 3:19 PM, Song Liu wrote: > To access per-task data, BPF program typically creates a hash table with > pid as the key. This is not ideal because: > 1. The use need to estimate requires size of the hash table, with may be > inaccurate; > 2. Big hash tables are slow; > 3. To clean up the data properly during task terminations, the user need > to write code. > > Task local storage overcomes these issues and becomes a better option for > these per-task data. Task local storage is only available to BPF_LSM. Now > enable it for tracing programs. > > Reported-by: kernel test robot <lkp@intel.com> > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/linux/bpf.h | 7 +++++++ > include/linux/bpf_lsm.h | 22 ---------------------- > include/linux/bpf_types.h | 2 +- > include/linux/sched.h | 5 +++++ > kernel/bpf/Makefile | 3 +-- > kernel/bpf/bpf_local_storage.c | 28 +++++++++++++++++----------- > kernel/bpf/bpf_lsm.c | 4 ---- > kernel/bpf/bpf_task_storage.c | 26 ++++++-------------------- > kernel/fork.c | 5 +++++ > kernel/trace/bpf_trace.c | 4 ++++ > 10 files changed, 46 insertions(+), 60 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 07cb5d15e7439..cf16548f28f7b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1480,6 +1480,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); > struct bpf_link *bpf_link_by_id(u32 id); > > const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); > +void bpf_task_storage_free(struct task_struct *task); > #else /* !CONFIG_BPF_SYSCALL */ > static inline struct bpf_prog *bpf_prog_get(u32 ufd) > { > @@ -1665,6 +1666,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) > { > return NULL; > } > + > +static inline void bpf_task_storage_free(struct task_struct *task) > +{ > +} > #endif /* CONFIG_BPF_SYSCALL */ > > static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, > @@ -1860,6 +1865,8 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; > extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; > extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; > extern const struct bpf_func_proto bpf_sock_from_file_proto; > +extern const struct bpf_func_proto bpf_task_storage_get_proto; > +extern const struct bpf_func_proto bpf_task_storage_delete_proto; > > const struct bpf_func_proto *bpf_tracing_func_proto( > enum bpf_func_id func_id, const struct bpf_prog *prog); > diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h > index 0d1c33ace3987..479c101546ad1 100644 > --- a/include/linux/bpf_lsm.h > +++ b/include/linux/bpf_lsm.h > @@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode( > return inode->i_security + bpf_lsm_blob_sizes.lbs_inode; > } > > -static inline struct bpf_storage_blob *bpf_task( > - const struct task_struct *task) > -{ > - if (unlikely(!task->security)) > - return NULL; > - > - return task->security + bpf_lsm_blob_sizes.lbs_task; > -} > - > extern const struct bpf_func_proto bpf_inode_storage_get_proto; > extern const struct bpf_func_proto bpf_inode_storage_delete_proto; > -extern const struct bpf_func_proto bpf_task_storage_get_proto; > -extern const struct bpf_func_proto bpf_task_storage_delete_proto; > void bpf_inode_storage_free(struct inode *inode); > -void bpf_task_storage_free(struct task_struct *task); > > #else /* !CONFIG_BPF_LSM */ > > @@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode( > return NULL; > } > > -static inline struct bpf_storage_blob *bpf_task( > - const struct task_struct *task) > -{ > - return NULL; > -} > - > static inline void bpf_inode_storage_free(struct inode *inode) > { > } > > -static inline void bpf_task_storage_free(struct task_struct *task) > -{ > -} > - > #endif /* CONFIG_BPF_LSM */ > > #endif /* _LINUX_BPF_LSM_H */ > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index 99f7fd657d87a..b9edee336d804 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) > #endif > #ifdef CONFIG_BPF_LSM > BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) > -BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) > #endif > +BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) > #if defined(CONFIG_XDP_SOCKETS) > BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 51d535b69bd6f..4a173defa2010 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -42,6 +42,7 @@ struct audit_context; > struct backing_dev_info; > struct bio_list; > struct blk_plug; > +struct bpf_local_storage; > struct capture_control; > struct cfs_rq; > struct fs_struct; > @@ -1348,6 +1349,10 @@ struct task_struct { > /* Used by LSM modules for access restriction: */ > void *security; > #endif > +#ifdef CONFIG_BPF_SYSCALL > + /* Used by BPF task local storage */ > + struct bpf_local_storage *bpf_storage; > +#endif I remembered there is a discussion where KP initially wanted to put bpf_local_storage in task_struct, but later on changed to use in lsm as his use case mostly for lsm. Did anybody remember the details of the discussion? Just want to be sure what is the concern people has with putting bpf_local_storage in task_struct and whether the use case presented by Song will justify it. > > #ifdef CONFIG_GCC_PLUGIN_STACKLEAK > unsigned long lowest_stack; > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index d1249340fd6ba..ca995fdfa45e7 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o > -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o > obj-$(CONFIG_BPF_SYSCALL) += disasm.o > obj-$(CONFIG_BPF_JIT) += trampoline.o > obj-$(CONFIG_BPF_SYSCALL) += btf.o [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 6:27 ` [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs Yonghong Song @ 2021-01-11 10:17 ` KP Singh 2021-01-11 15:56 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: KP Singh @ 2021-01-11 10:17 UTC (permalink / raw) To: Yonghong Song Cc: Song Liu, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On Mon, Jan 11, 2021 at 7:27 AM Yonghong Song <yhs@fb.com> wrote: > > > > On 1/8/21 3:19 PM, Song Liu wrote: > > To access per-task data, BPF program typically creates a hash table with > > pid as the key. This is not ideal because: > > 1. The use need to estimate requires size of the hash table, with may be > > inaccurate; > > 2. Big hash tables are slow; > > 3. To clean up the data properly during task terminations, the user need > > to write code. > > > > Task local storage overcomes these issues and becomes a better option for > > these per-task data. Task local storage is only available to BPF_LSM. Now > > enable it for tracing programs. > > > > Reported-by: kernel test robot <lkp@intel.com> > > Signed-off-by: Song Liu <songliubraving@fb.com> > > --- [...] > > struct cfs_rq; > > struct fs_struct; > > @@ -1348,6 +1349,10 @@ struct task_struct { > > /* Used by LSM modules for access restriction: */ > > void *security; > > #endif > > +#ifdef CONFIG_BPF_SYSCALL > > + /* Used by BPF task local storage */ > > + struct bpf_local_storage *bpf_storage; > > +#endif > > I remembered there is a discussion where KP initially wanted to put > bpf_local_storage in task_struct, but later on changed to > use in lsm as his use case mostly for lsm. Did anybody > remember the details of the discussion? Just want to be > sure what is the concern people has with putting bpf_local_storage > in task_struct and whether the use case presented by > Song will justify it. > If I recall correctly, the discussion was about inode local storage and it was decided to use the security blob since the use-case was only LSM programs. Since we now plan to use it in tracing, detangling the dependency from CONFIG_BPF_LSM sounds logical to me. > > > > #ifdef CONFIG_GCC_PLUGIN_STACKLEAK > > unsigned long lowest_stack; > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > > index d1249340fd6ba..ca995fdfa45e7 100644 > > --- a/kernel/bpf/Makefile > > +++ b/kernel/bpf/Makefile > > @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) > > > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o > > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o > > -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > > +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o > > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > > -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o > > obj-$(CONFIG_BPF_SYSCALL) += disasm.o > > obj-$(CONFIG_BPF_JIT) += trampoline.o > > obj-$(CONFIG_BPF_SYSCALL) += btf.o > [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 10:17 ` KP Singh @ 2021-01-11 15:56 ` Yonghong Song 0 siblings, 0 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-11 15:56 UTC (permalink / raw) To: KP Singh Cc: Song Liu, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On 1/11/21 2:17 AM, KP Singh wrote: > On Mon, Jan 11, 2021 at 7:27 AM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/8/21 3:19 PM, Song Liu wrote: >>> To access per-task data, BPF program typically creates a hash table with >>> pid as the key. This is not ideal because: >>> 1. The use need to estimate requires size of the hash table, with may be >>> inaccurate; >>> 2. Big hash tables are slow; >>> 3. To clean up the data properly during task terminations, the user need >>> to write code. >>> >>> Task local storage overcomes these issues and becomes a better option for >>> these per-task data. Task local storage is only available to BPF_LSM. Now >>> enable it for tracing programs. >>> >>> Reported-by: kernel test robot <lkp@intel.com> >>> Signed-off-by: Song Liu <songliubraving@fb.com> >>> --- > > [...] > >>> struct cfs_rq; >>> struct fs_struct; >>> @@ -1348,6 +1349,10 @@ struct task_struct { >>> /* Used by LSM modules for access restriction: */ >>> void *security; >>> #endif >>> +#ifdef CONFIG_BPF_SYSCALL >>> + /* Used by BPF task local storage */ >>> + struct bpf_local_storage *bpf_storage; >>> +#endif >> >> I remembered there is a discussion where KP initially wanted to put >> bpf_local_storage in task_struct, but later on changed to >> use in lsm as his use case mostly for lsm. Did anybody >> remember the details of the discussion? Just want to be >> sure what is the concern people has with putting bpf_local_storage >> in task_struct and whether the use case presented by >> Song will justify it. >> > > If I recall correctly, the discussion was about inode local storage and > it was decided to use the security blob since the use-case was only LSM > programs. Since we now plan to use it in tracing, > detangling the dependency from CONFIG_BPF_LSM > sounds logical to me. Sounds good. Thanks for explanation. > > >>> >>> #ifdef CONFIG_GCC_PLUGIN_STACKLEAK >>> unsigned long lowest_stack; >>> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile >>> index d1249340fd6ba..ca995fdfa45e7 100644 >>> --- a/kernel/bpf/Makefile >>> +++ b/kernel/bpf/Makefile >>> @@ -8,9 +8,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) >>> >>> obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o >>> obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o >>> -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o >>> +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_task_storage.o >>> obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o >>> -obj-${CONFIG_BPF_LSM} += bpf_task_storage.o >>> obj-$(CONFIG_BPF_SYSCALL) += disasm.o >>> obj-$(CONFIG_BPF_JIT) += trampoline.o >>> obj-$(CONFIG_BPF_SYSCALL) += btf.o >> [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs [not found] ` <20210108231950.3844417-2-songliubraving@fb.com> 2021-01-11 6:27 ` [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs Yonghong Song @ 2021-01-11 10:14 ` KP Singh 2021-01-11 23:16 ` Song Liu 2021-01-11 17:16 ` Yonghong Song 2021-01-11 18:56 ` Martin KaFai Lau 3 siblings, 1 reply; 29+ messages in thread From: KP Singh @ 2021-01-11 10:14 UTC (permalink / raw) To: Song Liu Cc: bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On Sat, Jan 9, 2021 at 12:35 AM Song Liu <songliubraving@fb.com> wrote: > > To access per-task data, BPF program typically creates a hash table with > pid as the key. This is not ideal because: > 1. The use need to estimate requires size of the hash table, with may be > inaccurate; > 2. Big hash tables are slow; > 3. To clean up the data properly during task terminations, the user need > to write code. > > Task local storage overcomes these issues and becomes a better option for > these per-task data. Task local storage is only available to BPF_LSM. Now > enable it for tracing programs. Also mention here that you change the pointer from being a security blob to a dedicated member in the task struct. I assume this is because you want to use it without CONFIG_BPF_LSM? > Can you also mention the reasons for changing the raw_spin_lock_bh to raw_spin_lock_irqsave in the commit log? > Reported-by: kernel test robot <lkp@intel.com> > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/linux/bpf.h | 7 +++++++ > include/linux/bpf_lsm.h | 22 ---------------------- > include/linux/bpf_types.h | 2 +- > include/linux/sched.h | 5 +++++ > kernel/bpf/Makefile | 3 +-- > kernel/bpf/bpf_local_storage.c | 28 +++++++++++++++++----------- > kernel/bpf/bpf_lsm.c | 4 ---- > kernel/bpf/bpf_task_storage.c | 26 ++++++-------------------- > kernel/fork.c | 5 +++++ > kernel/trace/bpf_trace.c | 4 ++++ > 10 files changed, 46 insertions(+), 60 deletions(-) > [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 10:14 ` KP Singh @ 2021-01-11 23:16 ` Song Liu 0 siblings, 0 replies; 29+ messages in thread From: Song Liu @ 2021-01-11 23:16 UTC (permalink / raw) To: KP Singh Cc: bpf, Networking, open list, Ingo Molnar, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot > On Jan 11, 2021, at 2:14 AM, KP Singh <kpsingh@kernel.org> wrote: > > On Sat, Jan 9, 2021 at 12:35 AM Song Liu <songliubraving@fb.com> wrote: >> >> To access per-task data, BPF program typically creates a hash table with >> pid as the key. This is not ideal because: >> 1. The use need to estimate requires size of the hash table, with may be >> inaccurate; >> 2. Big hash tables are slow; >> 3. To clean up the data properly during task terminations, the user need >> to write code. >> >> Task local storage overcomes these issues and becomes a better option for >> these per-task data. Task local storage is only available to BPF_LSM. Now >> enable it for tracing programs. > > Also mention here that you change the pointer from being a security blob to a > dedicated member in the task struct. I assume this is because you want to > use it without CONFIG_BPF_LSM? Yes, exactly. I will add this to the commit log. > >> > > Can you also mention the reasons for changing the > raw_spin_lock_bh to raw_spin_lock_irqsave in the commit log? This is because we will use these in irq context. I will add this to the commit log. Thanks, Song ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs [not found] ` <20210108231950.3844417-2-songliubraving@fb.com> 2021-01-11 6:27 ` [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs Yonghong Song 2021-01-11 10:14 ` KP Singh @ 2021-01-11 17:16 ` Yonghong Song 2021-01-11 18:56 ` Martin KaFai Lau 3 siblings, 0 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-11 17:16 UTC (permalink / raw) To: Song Liu, bpf, netdev, linux-kernel Cc: mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo, kernel test robot On 1/8/21 3:19 PM, Song Liu wrote: > To access per-task data, BPF program typically creates a hash table with > pid as the key. This is not ideal because: > 1. The use need to estimate requires size of the hash table, with may be > inaccurate; > 2. Big hash tables are slow; > 3. To clean up the data properly during task terminations, the user need > to write code. > > Task local storage overcomes these issues and becomes a better option for > these per-task data. Task local storage is only available to BPF_LSM. Now > enable it for tracing programs. > > Reported-by: kernel test robot <lkp@intel.com> The whole patch is not reported by kernel test robot. I think we should drop this. > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/linux/bpf.h | 7 +++++++ > include/linux/bpf_lsm.h | 22 ---------------------- > include/linux/bpf_types.h | 2 +- > include/linux/sched.h | 5 +++++ > kernel/bpf/Makefile | 3 +-- > kernel/bpf/bpf_local_storage.c | 28 +++++++++++++++++----------- > kernel/bpf/bpf_lsm.c | 4 ---- > kernel/bpf/bpf_task_storage.c | 26 ++++++-------------------- > kernel/fork.c | 5 +++++ > kernel/trace/bpf_trace.c | 4 ++++ > 10 files changed, 46 insertions(+), 60 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 07cb5d15e7439..cf16548f28f7b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1480,6 +1480,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id); > struct bpf_link *bpf_link_by_id(u32 id); > > const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); > +void bpf_task_storage_free(struct task_struct *task); > #else /* !CONFIG_BPF_SYSCALL */ > static inline struct bpf_prog *bpf_prog_get(u32 ufd) > { > @@ -1665,6 +1666,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) > { > return NULL; > } > + > +static inline void bpf_task_storage_free(struct task_struct *task) > +{ > +} > #endif /* CONFIG_BPF_SYSCALL */ [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs [not found] ` <20210108231950.3844417-2-songliubraving@fb.com> ` (2 preceding siblings ...) 2021-01-11 17:16 ` Yonghong Song @ 2021-01-11 18:56 ` Martin KaFai Lau 2021-01-11 21:35 ` KP Singh 2021-01-11 23:41 ` Song Liu 3 siblings, 2 replies; 29+ messages in thread From: Martin KaFai Lau @ 2021-01-11 18:56 UTC (permalink / raw) To: Song Liu Cc: bpf, netdev, linux-kernel, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo, kernel test robot On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: [ ... ] > diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c > index dd5aedee99e73..9bd47ad2b26f1 100644 > --- a/kernel/bpf/bpf_local_storage.c > +++ b/kernel/bpf/bpf_local_storage.c > @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) > { > struct bpf_local_storage *local_storage; > bool free_local_storage = false; > + unsigned long flags; > > if (unlikely(!selem_linked_to_storage(selem))) > /* selem has already been unlinked from sk */ > return; > > local_storage = rcu_dereference(selem->local_storage); > - raw_spin_lock_bh(&local_storage->lock); > + raw_spin_lock_irqsave(&local_storage->lock, flags); It will be useful to have a few words in commit message on this change for future reference purpose. Please also remove the in_irq() check from bpf_sk_storage.c to avoid confusion in the future. It probably should be in a separate patch. [ ... ] > diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c > index 4ef1959a78f27..f654b56907b69 100644 > diff --git a/kernel/fork.c b/kernel/fork.c > index 7425b3224891d..3d65c8ebfd594 100644 [ ... ] > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -96,6 +96,7 @@ > #include <linux/kasan.h> > #include <linux/scs.h> > #include <linux/io_uring.h> > +#include <linux/bpf.h> > > #include <asm/pgalloc.h> > #include <linux/uaccess.h> > @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) > cgroup_free(tsk); > task_numa_free(tsk, true); > security_task_free(tsk); > + bpf_task_storage_free(tsk); > exit_creds(tsk); If exit_creds() is traced by a bpf and this bpf is doing bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), new task storage will be created after bpf_task_storage_free(). I recalled there was an earlier discussion with KP and KP mentioned BPF_LSM will not be called with a task that is going away. It seems enabling bpf task storage in bpf tracing will break this assumption and needs to be addressed? ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 18:56 ` Martin KaFai Lau @ 2021-01-11 21:35 ` KP Singh 2021-01-11 21:58 ` Martin KaFai Lau 2021-01-11 23:41 ` Song Liu 1 sibling, 1 reply; 29+ messages in thread From: KP Singh @ 2021-01-11 21:35 UTC (permalink / raw) To: Martin KaFai Lau Cc: Song Liu, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: > > On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: > > [ ... ] > > > diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c > > index dd5aedee99e73..9bd47ad2b26f1 100644 > > --- a/kernel/bpf/bpf_local_storage.c > > +++ b/kernel/bpf/bpf_local_storage.c > > @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) > > { > > struct bpf_local_storage *local_storage; > > bool free_local_storage = false; > > + unsigned long flags; > > > > if (unlikely(!selem_linked_to_storage(selem))) > > /* selem has already been unlinked from sk */ > > return; > > > > local_storage = rcu_dereference(selem->local_storage); > > - raw_spin_lock_bh(&local_storage->lock); > > + raw_spin_lock_irqsave(&local_storage->lock, flags); > It will be useful to have a few words in commit message on this change > for future reference purpose. > > Please also remove the in_irq() check from bpf_sk_storage.c > to avoid confusion in the future. It probably should > be in a separate patch. > > [ ... ] > > > diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c > > index 4ef1959a78f27..f654b56907b69 100644 > > diff --git a/kernel/fork.c b/kernel/fork.c > > index 7425b3224891d..3d65c8ebfd594 100644 > [ ... ] > > > --- a/kernel/fork.c > > +++ b/kernel/fork.c > > @@ -96,6 +96,7 @@ > > #include <linux/kasan.h> > > #include <linux/scs.h> > > #include <linux/io_uring.h> > > +#include <linux/bpf.h> > > > > #include <asm/pgalloc.h> > > #include <linux/uaccess.h> > > @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) > > cgroup_free(tsk); > > task_numa_free(tsk, true); > > security_task_free(tsk); > > + bpf_task_storage_free(tsk); > > exit_creds(tsk); > If exit_creds() is traced by a bpf and this bpf is doing > bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), > new task storage will be created after bpf_task_storage_free(). > > I recalled there was an earlier discussion with KP and KP mentioned > BPF_LSM will not be called with a task that is going away. > It seems enabling bpf task storage in bpf tracing will break > this assumption and needs to be addressed? For tracing programs, I think we will need an allow list where task local storage can be used. ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 21:35 ` KP Singh @ 2021-01-11 21:58 ` Martin KaFai Lau 2021-01-11 23:45 ` Song Liu 0 siblings, 1 reply; 29+ messages in thread From: Martin KaFai Lau @ 2021-01-11 21:58 UTC (permalink / raw) To: KP Singh Cc: Song Liu, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: > On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: > > > > On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: > > > > [ ... ] > > > > > diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c > > > index dd5aedee99e73..9bd47ad2b26f1 100644 > > > --- a/kernel/bpf/bpf_local_storage.c > > > +++ b/kernel/bpf/bpf_local_storage.c > > > @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) > > > { > > > struct bpf_local_storage *local_storage; > > > bool free_local_storage = false; > > > + unsigned long flags; > > > > > > if (unlikely(!selem_linked_to_storage(selem))) > > > /* selem has already been unlinked from sk */ > > > return; > > > > > > local_storage = rcu_dereference(selem->local_storage); > > > - raw_spin_lock_bh(&local_storage->lock); > > > + raw_spin_lock_irqsave(&local_storage->lock, flags); > > It will be useful to have a few words in commit message on this change > > for future reference purpose. > > > > Please also remove the in_irq() check from bpf_sk_storage.c > > to avoid confusion in the future. It probably should > > be in a separate patch. > > > > [ ... ] > > > > > diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c > > > index 4ef1959a78f27..f654b56907b69 100644 > > > diff --git a/kernel/fork.c b/kernel/fork.c > > > index 7425b3224891d..3d65c8ebfd594 100644 > > [ ... ] > > > > > --- a/kernel/fork.c > > > +++ b/kernel/fork.c > > > @@ -96,6 +96,7 @@ > > > #include <linux/kasan.h> > > > #include <linux/scs.h> > > > #include <linux/io_uring.h> > > > +#include <linux/bpf.h> > > > > > > #include <asm/pgalloc.h> > > > #include <linux/uaccess.h> > > > @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) > > > cgroup_free(tsk); > > > task_numa_free(tsk, true); > > > security_task_free(tsk); > > > + bpf_task_storage_free(tsk); > > > exit_creds(tsk); > > If exit_creds() is traced by a bpf and this bpf is doing > > bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), > > new task storage will be created after bpf_task_storage_free(). > > > > I recalled there was an earlier discussion with KP and KP mentioned > > BPF_LSM will not be called with a task that is going away. > > It seems enabling bpf task storage in bpf tracing will break > > this assumption and needs to be addressed? > > For tracing programs, I think we will need an allow list where > task local storage can be used. Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 21:58 ` Martin KaFai Lau @ 2021-01-11 23:45 ` Song Liu 2021-01-12 16:32 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: Song Liu @ 2021-01-11 23:45 UTC (permalink / raw) To: Martin Lau Cc: KP Singh, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot > On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: > > On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>> >>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>> >>> [ ... ] >>> >>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>> --- a/kernel/bpf/bpf_local_storage.c >>>> +++ b/kernel/bpf/bpf_local_storage.c >>>> @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) >>>> { >>>> struct bpf_local_storage *local_storage; >>>> bool free_local_storage = false; >>>> + unsigned long flags; >>>> >>>> if (unlikely(!selem_linked_to_storage(selem))) >>>> /* selem has already been unlinked from sk */ >>>> return; >>>> >>>> local_storage = rcu_dereference(selem->local_storage); >>>> - raw_spin_lock_bh(&local_storage->lock); >>>> + raw_spin_lock_irqsave(&local_storage->lock, flags); >>> It will be useful to have a few words in commit message on this change >>> for future reference purpose. >>> >>> Please also remove the in_irq() check from bpf_sk_storage.c >>> to avoid confusion in the future. It probably should >>> be in a separate patch. >>> >>> [ ... ] >>> >>>> diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c >>>> index 4ef1959a78f27..f654b56907b69 100644 >>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>> index 7425b3224891d..3d65c8ebfd594 100644 >>> [ ... ] >>> >>>> --- a/kernel/fork.c >>>> +++ b/kernel/fork.c >>>> @@ -96,6 +96,7 @@ >>>> #include <linux/kasan.h> >>>> #include <linux/scs.h> >>>> #include <linux/io_uring.h> >>>> +#include <linux/bpf.h> >>>> >>>> #include <asm/pgalloc.h> >>>> #include <linux/uaccess.h> >>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>> cgroup_free(tsk); >>>> task_numa_free(tsk, true); >>>> security_task_free(tsk); >>>> + bpf_task_storage_free(tsk); >>>> exit_creds(tsk); >>> If exit_creds() is traced by a bpf and this bpf is doing >>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>> new task storage will be created after bpf_task_storage_free(). >>> >>> I recalled there was an earlier discussion with KP and KP mentioned >>> BPF_LSM will not be called with a task that is going away. >>> It seems enabling bpf task storage in bpf tracing will break >>> this assumption and needs to be addressed? >> >> For tracing programs, I think we will need an allow list where >> task local storage can be used. > Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c index f654b56907b69..93d01b0a010e6 100644 --- i/kernel/bpf/bpf_task_storage.c +++ w/kernel/bpf/bpf_task_storage.c @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, * by an RCU read-side critical section. */ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + if (!refcount_inc_not_zero(&task->usage)) + return -EBUSY; + sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST); But where shall we add the refcount_dec()? IIUC, we cannot add it to __put_task_struct(). Thanks, Song ^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 23:45 ` Song Liu @ 2021-01-12 16:32 ` Yonghong Song 2021-01-12 16:53 ` KP Singh 0 siblings, 1 reply; 29+ messages in thread From: Yonghong Song @ 2021-01-12 16:32 UTC (permalink / raw) To: Song Liu, Martin Lau Cc: KP Singh, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On 1/11/21 3:45 PM, Song Liu wrote: > > >> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: >> >> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>>> >>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>>> >>>> [ ... ] >>>> >>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>>> --- a/kernel/bpf/bpf_local_storage.c >>>>> +++ b/kernel/bpf/bpf_local_storage.c >>>>> @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) >>>>> { >>>>> struct bpf_local_storage *local_storage; >>>>> bool free_local_storage = false; >>>>> + unsigned long flags; >>>>> >>>>> if (unlikely(!selem_linked_to_storage(selem))) >>>>> /* selem has already been unlinked from sk */ >>>>> return; >>>>> >>>>> local_storage = rcu_dereference(selem->local_storage); >>>>> - raw_spin_lock_bh(&local_storage->lock); >>>>> + raw_spin_lock_irqsave(&local_storage->lock, flags); >>>> It will be useful to have a few words in commit message on this change >>>> for future reference purpose. >>>> >>>> Please also remove the in_irq() check from bpf_sk_storage.c >>>> to avoid confusion in the future. It probably should >>>> be in a separate patch. >>>> >>>> [ ... ] >>>> >>>>> diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c >>>>> index 4ef1959a78f27..f654b56907b69 100644 >>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>> index 7425b3224891d..3d65c8ebfd594 100644 >>>> [ ... ] >>>> >>>>> --- a/kernel/fork.c >>>>> +++ b/kernel/fork.c >>>>> @@ -96,6 +96,7 @@ >>>>> #include <linux/kasan.h> >>>>> #include <linux/scs.h> >>>>> #include <linux/io_uring.h> >>>>> +#include <linux/bpf.h> >>>>> >>>>> #include <asm/pgalloc.h> >>>>> #include <linux/uaccess.h> >>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>>> cgroup_free(tsk); >>>>> task_numa_free(tsk, true); >>>>> security_task_free(tsk); >>>>> + bpf_task_storage_free(tsk); >>>>> exit_creds(tsk); >>>> If exit_creds() is traced by a bpf and this bpf is doing >>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>>> new task storage will be created after bpf_task_storage_free(). >>>> >>>> I recalled there was an earlier discussion with KP and KP mentioned >>>> BPF_LSM will not be called with a task that is going away. >>>> It seems enabling bpf task storage in bpf tracing will break >>>> this assumption and needs to be addressed? >>> >>> For tracing programs, I think we will need an allow list where >>> task local storage can be used. >> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? > > I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: > > diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c > index f654b56907b69..93d01b0a010e6 100644 > --- i/kernel/bpf/bpf_task_storage.c > +++ w/kernel/bpf/bpf_task_storage.c > @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, > * by an RCU read-side critical section. > */ > if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { > + if (!refcount_inc_not_zero(&task->usage)) > + return -EBUSY; > + > sdata = bpf_local_storage_update( > task, (struct bpf_local_storage_map *)map, value, > BPF_NOEXIST); > > But where shall we add the refcount_dec()? IIUC, we cannot add it to > __put_task_struct(). Maybe put_task_struct()? > Thanks, > Song > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-12 16:32 ` Yonghong Song @ 2021-01-12 16:53 ` KP Singh 2021-01-15 23:34 ` Song Liu 0 siblings, 1 reply; 29+ messages in thread From: KP Singh @ 2021-01-12 16:53 UTC (permalink / raw) To: Yonghong Song Cc: Song Liu, Martin Lau, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 1/11/21 3:45 PM, Song Liu wrote: > > > > > >> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: > >> > >> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: > >>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: > >>>> > >>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: > >>>> > >>>> [ ... ] > >>>> > >>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c > >>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 > >>>>> --- a/kernel/bpf/bpf_local_storage.c > >>>>> +++ b/kernel/bpf/bpf_local_storage.c [...] > >>>>> +#include <linux/bpf.h> > >>>>> > >>>>> #include <asm/pgalloc.h> > >>>>> #include <linux/uaccess.h> > >>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) > >>>>> cgroup_free(tsk); > >>>>> task_numa_free(tsk, true); > >>>>> security_task_free(tsk); > >>>>> + bpf_task_storage_free(tsk); > >>>>> exit_creds(tsk); > >>>> If exit_creds() is traced by a bpf and this bpf is doing > >>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), > >>>> new task storage will be created after bpf_task_storage_free(). > >>>> > >>>> I recalled there was an earlier discussion with KP and KP mentioned > >>>> BPF_LSM will not be called with a task that is going away. > >>>> It seems enabling bpf task storage in bpf tracing will break > >>>> this assumption and needs to be addressed? > >>> > >>> For tracing programs, I think we will need an allow list where > >>> task local storage can be used. > >> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? > > > > I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: > > > > diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c > > index f654b56907b69..93d01b0a010e6 100644 > > --- i/kernel/bpf/bpf_task_storage.c > > +++ w/kernel/bpf/bpf_task_storage.c > > @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, > > * by an RCU read-side critical section. > > */ > > if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { > > + if (!refcount_inc_not_zero(&task->usage)) > > + return -EBUSY; > > + > > sdata = bpf_local_storage_update( > > task, (struct bpf_local_storage_map *)map, value, > > BPF_NOEXIST); > > > > But where shall we add the refcount_dec()? IIUC, we cannot add it to > > __put_task_struct(). > > Maybe put_task_struct()? Yeah, something like, or if you find a more elegant alternative :) --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) { - if (refcount_dec_and_test(&t->usage)) + + if (rcu_access_pointer(t->bpf_storage)) { + if (refcount_sub_and_test(2, &t->usage)) + __put_task_struct(t); + } else if (refcount_dec_and_test(&t->usage)) __put_task_struct(t); } static inline void put_task_struct_many(struct task_struct *t, int nr) { - if (refcount_sub_and_test(nr, &t->usage)) + if (rcu_access_pointer(t->bpf_storage)) { + if (refcount_sub_and_test(nr + 1, &t->usage)) + __put_task_struct(t); + } else if (refcount_sub_and_test(nr, &t->usage)) __put_task_struct(t); } I may be missing something but shouldn't bpf_storage be an __rcu member like we have for sk_bpf_storage? #ifdef CONFIG_BPF_SYSCALL struct bpf_local_storage __rcu *sk_bpf_storage; #endif > > > Thanks, > > Song > > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-12 16:53 ` KP Singh @ 2021-01-15 23:34 ` Song Liu 2021-01-16 0:55 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: Song Liu @ 2021-01-15 23:34 UTC (permalink / raw) To: KP Singh Cc: Yonghong Song, Martin Lau, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot > On Jan 12, 2021, at 8:53 AM, KP Singh <kpsingh@kernel.org> wrote: > > On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/11/21 3:45 PM, Song Liu wrote: >>> >>> >>>> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: >>>> >>>> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >>>>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>>>>> >>>>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>>>>> >>>>>> [ ... ] >>>>>> >>>>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>>>>> --- a/kernel/bpf/bpf_local_storage.c >>>>>>> +++ b/kernel/bpf/bpf_local_storage.c > > [...] > >>>>>>> +#include <linux/bpf.h> >>>>>>> >>>>>>> #include <asm/pgalloc.h> >>>>>>> #include <linux/uaccess.h> >>>>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>>>>> cgroup_free(tsk); >>>>>>> task_numa_free(tsk, true); >>>>>>> security_task_free(tsk); >>>>>>> + bpf_task_storage_free(tsk); >>>>>>> exit_creds(tsk); >>>>>> If exit_creds() is traced by a bpf and this bpf is doing >>>>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>>>>> new task storage will be created after bpf_task_storage_free(). >>>>>> >>>>>> I recalled there was an earlier discussion with KP and KP mentioned >>>>>> BPF_LSM will not be called with a task that is going away. >>>>>> It seems enabling bpf task storage in bpf tracing will break >>>>>> this assumption and needs to be addressed? >>>>> >>>>> For tracing programs, I think we will need an allow list where >>>>> task local storage can be used. >>>> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? >>> >>> I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: >>> >>> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >>> index f654b56907b69..93d01b0a010e6 100644 >>> --- i/kernel/bpf/bpf_task_storage.c >>> +++ w/kernel/bpf/bpf_task_storage.c >>> @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >>> * by an RCU read-side critical section. >>> */ >>> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >>> + if (!refcount_inc_not_zero(&task->usage)) >>> + return -EBUSY; >>> + >>> sdata = bpf_local_storage_update( >>> task, (struct bpf_local_storage_map *)map, value, >>> BPF_NOEXIST); >>> >>> But where shall we add the refcount_dec()? IIUC, we cannot add it to >>> __put_task_struct(). >> >> Maybe put_task_struct()? > > Yeah, something like, or if you find a more elegant alternative :) > > --- a/include/linux/sched/task.h > +++ b/include/linux/sched/task.h > @@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t); > > static inline void put_task_struct(struct task_struct *t) > { > - if (refcount_dec_and_test(&t->usage)) > + > + if (rcu_access_pointer(t->bpf_storage)) { > + if (refcount_sub_and_test(2, &t->usage)) > + __put_task_struct(t); > + } else if (refcount_dec_and_test(&t->usage)) > __put_task_struct(t); > } > > static inline void put_task_struct_many(struct task_struct *t, int nr) > { > - if (refcount_sub_and_test(nr, &t->usage)) > + if (rcu_access_pointer(t->bpf_storage)) { > + if (refcount_sub_and_test(nr + 1, &t->usage)) > + __put_task_struct(t); > + } else if (refcount_sub_and_test(nr, &t->usage)) > __put_task_struct(t); > } It is not ideal to leak bpf_storage here. How about we only add the following: diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c index f654b56907b69..2811b9fc47233 100644 --- i/kernel/bpf/bpf_task_storage.c +++ w/kernel/bpf/bpf_task_storage.c @@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, * by an RCU read-side critical section. */ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + /* the task_struct is being freed, fail over*/ + if (!refcount_read(&task->usage)) + return -EBUSY; + sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST); > > > I may be missing something but shouldn't bpf_storage be an __rcu > member like we have for sk_bpf_storage? Good catch! I will fix this in v2. Thanks, Song ^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-15 23:34 ` Song Liu @ 2021-01-16 0:55 ` Yonghong Song 2021-01-16 1:12 ` Song Liu 0 siblings, 1 reply; 29+ messages in thread From: Yonghong Song @ 2021-01-16 0:55 UTC (permalink / raw) To: Song Liu, KP Singh Cc: Martin Lau, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On 1/15/21 3:34 PM, Song Liu wrote: > > >> On Jan 12, 2021, at 8:53 AM, KP Singh <kpsingh@kernel.org> wrote: >> >> On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song <yhs@fb.com> wrote: >>> >>> >>> >>> On 1/11/21 3:45 PM, Song Liu wrote: >>>> >>>> >>>>> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: >>>>> >>>>> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >>>>>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>>>>>> >>>>>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>>>>>> >>>>>>> [ ... ] >>>>>>> >>>>>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>>>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>>>>>> --- a/kernel/bpf/bpf_local_storage.c >>>>>>>> +++ b/kernel/bpf/bpf_local_storage.c >> >> [...] >> >>>>>>>> +#include <linux/bpf.h> >>>>>>>> >>>>>>>> #include <asm/pgalloc.h> >>>>>>>> #include <linux/uaccess.h> >>>>>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>>>>>> cgroup_free(tsk); >>>>>>>> task_numa_free(tsk, true); >>>>>>>> security_task_free(tsk); >>>>>>>> + bpf_task_storage_free(tsk); >>>>>>>> exit_creds(tsk); >>>>>>> If exit_creds() is traced by a bpf and this bpf is doing >>>>>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>>>>>> new task storage will be created after bpf_task_storage_free(). >>>>>>> >>>>>>> I recalled there was an earlier discussion with KP and KP mentioned >>>>>>> BPF_LSM will not be called with a task that is going away. >>>>>>> It seems enabling bpf task storage in bpf tracing will break >>>>>>> this assumption and needs to be addressed? >>>>>> >>>>>> For tracing programs, I think we will need an allow list where >>>>>> task local storage can be used. >>>>> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? >>>> >>>> I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: >>>> >>>> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >>>> index f654b56907b69..93d01b0a010e6 100644 >>>> --- i/kernel/bpf/bpf_task_storage.c >>>> +++ w/kernel/bpf/bpf_task_storage.c >>>> @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >>>> * by an RCU read-side critical section. >>>> */ >>>> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >>>> + if (!refcount_inc_not_zero(&task->usage)) >>>> + return -EBUSY; >>>> + >>>> sdata = bpf_local_storage_update( >>>> task, (struct bpf_local_storage_map *)map, value, >>>> BPF_NOEXIST); >>>> >>>> But where shall we add the refcount_dec()? IIUC, we cannot add it to >>>> __put_task_struct(). >>> >>> Maybe put_task_struct()? >> >> Yeah, something like, or if you find a more elegant alternative :) >> >> --- a/include/linux/sched/task.h >> +++ b/include/linux/sched/task.h >> @@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t); >> >> static inline void put_task_struct(struct task_struct *t) >> { >> - if (refcount_dec_and_test(&t->usage)) >> + >> + if (rcu_access_pointer(t->bpf_storage)) { >> + if (refcount_sub_and_test(2, &t->usage)) >> + __put_task_struct(t); >> + } else if (refcount_dec_and_test(&t->usage)) >> __put_task_struct(t); >> } >> >> static inline void put_task_struct_many(struct task_struct *t, int nr) >> { >> - if (refcount_sub_and_test(nr, &t->usage)) >> + if (rcu_access_pointer(t->bpf_storage)) { >> + if (refcount_sub_and_test(nr + 1, &t->usage)) >> + __put_task_struct(t); >> + } else if (refcount_sub_and_test(nr, &t->usage)) >> __put_task_struct(t); >> } > > It is not ideal to leak bpf_storage here. How about we only add the > following: > > diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c > index f654b56907b69..2811b9fc47233 100644 > --- i/kernel/bpf/bpf_task_storage.c > +++ w/kernel/bpf/bpf_task_storage.c > @@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, > * by an RCU read-side critical section. > */ > if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { > + /* the task_struct is being freed, fail over*/ > + if (!refcount_read(&task->usage)) > + return -EBUSY; This may not work? Even we check here and task->usage is not 0, it could still become 0 immediately after the above refcount_read, right? > + > sdata = bpf_local_storage_update( > task, (struct bpf_local_storage_map *)map, value, > BPF_NOEXIST); > >> >> >> I may be missing something but shouldn't bpf_storage be an __rcu >> member like we have for sk_bpf_storage? > > Good catch! I will fix this in v2. > > Thanks, > Song > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-16 0:55 ` Yonghong Song @ 2021-01-16 1:12 ` Song Liu 2021-01-16 1:50 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: Song Liu @ 2021-01-16 1:12 UTC (permalink / raw) To: Yonghong Song Cc: KP Singh, Martin Lau, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot > On Jan 15, 2021, at 4:55 PM, Yonghong Song <yhs@fb.com> wrote: > > > > On 1/15/21 3:34 PM, Song Liu wrote: >>> On Jan 12, 2021, at 8:53 AM, KP Singh <kpsingh@kernel.org> wrote: >>> >>> On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song <yhs@fb.com> wrote: >>>> >>>> >>>> >>>> On 1/11/21 3:45 PM, Song Liu wrote: >>>>> >>>>> >>>>>> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: >>>>>> >>>>>> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >>>>>>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>>>>>>> >>>>>>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>>>>>>> >>>>>>>> [ ... ] >>>>>>>> >>>>>>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>>>>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>>>>>>> --- a/kernel/bpf/bpf_local_storage.c >>>>>>>>> +++ b/kernel/bpf/bpf_local_storage.c >>> >>> [...] >>> >>>>>>>>> +#include <linux/bpf.h> >>>>>>>>> >>>>>>>>> #include <asm/pgalloc.h> >>>>>>>>> #include <linux/uaccess.h> >>>>>>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>>>>>>> cgroup_free(tsk); >>>>>>>>> task_numa_free(tsk, true); >>>>>>>>> security_task_free(tsk); >>>>>>>>> + bpf_task_storage_free(tsk); >>>>>>>>> exit_creds(tsk); >>>>>>>> If exit_creds() is traced by a bpf and this bpf is doing >>>>>>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>>>>>>> new task storage will be created after bpf_task_storage_free(). >>>>>>>> >>>>>>>> I recalled there was an earlier discussion with KP and KP mentioned >>>>>>>> BPF_LSM will not be called with a task that is going away. >>>>>>>> It seems enabling bpf task storage in bpf tracing will break >>>>>>>> this assumption and needs to be addressed? >>>>>>> >>>>>>> For tracing programs, I think we will need an allow list where >>>>>>> task local storage can be used. >>>>>> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? >>>>> >>>>> I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: >>>>> >>>>> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >>>>> index f654b56907b69..93d01b0a010e6 100644 >>>>> --- i/kernel/bpf/bpf_task_storage.c >>>>> +++ w/kernel/bpf/bpf_task_storage.c >>>>> @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >>>>> * by an RCU read-side critical section. >>>>> */ >>>>> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >>>>> + if (!refcount_inc_not_zero(&task->usage)) >>>>> + return -EBUSY; >>>>> + >>>>> sdata = bpf_local_storage_update( >>>>> task, (struct bpf_local_storage_map *)map, value, >>>>> BPF_NOEXIST); >>>>> >>>>> But where shall we add the refcount_dec()? IIUC, we cannot add it to >>>>> __put_task_struct(). >>>> >>>> Maybe put_task_struct()? >>> >>> Yeah, something like, or if you find a more elegant alternative :) >>> >>> --- a/include/linux/sched/task.h >>> +++ b/include/linux/sched/task.h >>> @@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t); >>> >>> static inline void put_task_struct(struct task_struct *t) >>> { >>> - if (refcount_dec_and_test(&t->usage)) >>> + >>> + if (rcu_access_pointer(t->bpf_storage)) { >>> + if (refcount_sub_and_test(2, &t->usage)) >>> + __put_task_struct(t); >>> + } else if (refcount_dec_and_test(&t->usage)) >>> __put_task_struct(t); >>> } >>> >>> static inline void put_task_struct_many(struct task_struct *t, int nr) >>> { >>> - if (refcount_sub_and_test(nr, &t->usage)) >>> + if (rcu_access_pointer(t->bpf_storage)) { >>> + if (refcount_sub_and_test(nr + 1, &t->usage)) >>> + __put_task_struct(t); >>> + } else if (refcount_sub_and_test(nr, &t->usage)) >>> __put_task_struct(t); >>> } >> It is not ideal to leak bpf_storage here. How about we only add the >> following: >> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >> index f654b56907b69..2811b9fc47233 100644 >> --- i/kernel/bpf/bpf_task_storage.c >> +++ w/kernel/bpf/bpf_task_storage.c >> @@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >> * by an RCU read-side critical section. >> */ >> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >> + /* the task_struct is being freed, fail over*/ >> + if (!refcount_read(&task->usage)) >> + return -EBUSY; > > This may not work? Even we check here and task->usage is not 0, it could still become 0 immediately after the above refcount_read, right? We call bpf_task_storage_get() with "task" that has valid BTF, so "task" should not go away during the BPF program? Whatever mechanism that triggers the BPF program should either hold a reference to task (usage > 0) or be the only one owning it (usage == 0, in __put_task_struct). Did I miss anything? Thanks, Song > >> + >> sdata = bpf_local_storage_update( >> task, (struct bpf_local_storage_map *)map, value, >> BPF_NOEXIST); >>> >>> >>> I may be missing something but shouldn't bpf_storage be an __rcu >>> member like we have for sk_bpf_storage? >> Good catch! I will fix this in v2. >> Thanks, >> Song ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-16 1:12 ` Song Liu @ 2021-01-16 1:50 ` Yonghong Song 0 siblings, 0 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-16 1:50 UTC (permalink / raw) To: Song Liu Cc: KP Singh, Martin Lau, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo, kernel test robot On 1/15/21 5:12 PM, Song Liu wrote: > > >> On Jan 15, 2021, at 4:55 PM, Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/15/21 3:34 PM, Song Liu wrote: >>>> On Jan 12, 2021, at 8:53 AM, KP Singh <kpsingh@kernel.org> wrote: >>>> >>>> On Tue, Jan 12, 2021 at 5:32 PM Yonghong Song <yhs@fb.com> wrote: >>>>> >>>>> >>>>> >>>>> On 1/11/21 3:45 PM, Song Liu wrote: >>>>>> >>>>>> >>>>>>> On Jan 11, 2021, at 1:58 PM, Martin Lau <kafai@fb.com> wrote: >>>>>>> >>>>>>> On Mon, Jan 11, 2021 at 10:35:43PM +0100, KP Singh wrote: >>>>>>>> On Mon, Jan 11, 2021 at 7:57 PM Martin KaFai Lau <kafai@fb.com> wrote: >>>>>>>>> >>>>>>>>> On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: >>>>>>>>> >>>>>>>>> [ ... ] >>>>>>>>> >>>>>>>>>> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >>>>>>>>>> index dd5aedee99e73..9bd47ad2b26f1 100644 >>>>>>>>>> --- a/kernel/bpf/bpf_local_storage.c >>>>>>>>>> +++ b/kernel/bpf/bpf_local_storage.c >>>> >>>> [...] >>>> >>>>>>>>>> +#include <linux/bpf.h> >>>>>>>>>> >>>>>>>>>> #include <asm/pgalloc.h> >>>>>>>>>> #include <linux/uaccess.h> >>>>>>>>>> @@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk) >>>>>>>>>> cgroup_free(tsk); >>>>>>>>>> task_numa_free(tsk, true); >>>>>>>>>> security_task_free(tsk); >>>>>>>>>> + bpf_task_storage_free(tsk); >>>>>>>>>> exit_creds(tsk); >>>>>>>>> If exit_creds() is traced by a bpf and this bpf is doing >>>>>>>>> bpf_task_storage_get(..., BPF_LOCAL_STORAGE_GET_F_CREATE), >>>>>>>>> new task storage will be created after bpf_task_storage_free(). >>>>>>>>> >>>>>>>>> I recalled there was an earlier discussion with KP and KP mentioned >>>>>>>>> BPF_LSM will not be called with a task that is going away. >>>>>>>>> It seems enabling bpf task storage in bpf tracing will break >>>>>>>>> this assumption and needs to be addressed? >>>>>>>> >>>>>>>> For tracing programs, I think we will need an allow list where >>>>>>>> task local storage can be used. >>>>>>> Instead of whitelist, can refcount_inc_not_zero(&tsk->usage) be used? >>>>>> >>>>>> I think we can put refcount_inc_not_zero() in bpf_task_storage_get, like: >>>>>> >>>>>> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >>>>>> index f654b56907b69..93d01b0a010e6 100644 >>>>>> --- i/kernel/bpf/bpf_task_storage.c >>>>>> +++ w/kernel/bpf/bpf_task_storage.c >>>>>> @@ -216,6 +216,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >>>>>> * by an RCU read-side critical section. >>>>>> */ >>>>>> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >>>>>> + if (!refcount_inc_not_zero(&task->usage)) >>>>>> + return -EBUSY; >>>>>> + >>>>>> sdata = bpf_local_storage_update( >>>>>> task, (struct bpf_local_storage_map *)map, value, >>>>>> BPF_NOEXIST); >>>>>> >>>>>> But where shall we add the refcount_dec()? IIUC, we cannot add it to >>>>>> __put_task_struct(). >>>>> >>>>> Maybe put_task_struct()? >>>> >>>> Yeah, something like, or if you find a more elegant alternative :) >>>> >>>> --- a/include/linux/sched/task.h >>>> +++ b/include/linux/sched/task.h >>>> @@ -107,13 +107,20 @@ extern void __put_task_struct(struct task_struct *t); >>>> >>>> static inline void put_task_struct(struct task_struct *t) >>>> { >>>> - if (refcount_dec_and_test(&t->usage)) >>>> + >>>> + if (rcu_access_pointer(t->bpf_storage)) { >>>> + if (refcount_sub_and_test(2, &t->usage)) >>>> + __put_task_struct(t); >>>> + } else if (refcount_dec_and_test(&t->usage)) >>>> __put_task_struct(t); >>>> } >>>> >>>> static inline void put_task_struct_many(struct task_struct *t, int nr) >>>> { >>>> - if (refcount_sub_and_test(nr, &t->usage)) >>>> + if (rcu_access_pointer(t->bpf_storage)) { >>>> + if (refcount_sub_and_test(nr + 1, &t->usage)) >>>> + __put_task_struct(t); >>>> + } else if (refcount_sub_and_test(nr, &t->usage)) >>>> __put_task_struct(t); >>>> } >>> It is not ideal to leak bpf_storage here. How about we only add the >>> following: >>> diff --git i/kernel/bpf/bpf_task_storage.c w/kernel/bpf/bpf_task_storage.c >>> index f654b56907b69..2811b9fc47233 100644 >>> --- i/kernel/bpf/bpf_task_storage.c >>> +++ w/kernel/bpf/bpf_task_storage.c >>> @@ -216,6 +216,10 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, >>> * by an RCU read-side critical section. >>> */ >>> if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { >>> + /* the task_struct is being freed, fail over*/ >>> + if (!refcount_read(&task->usage)) >>> + return -EBUSY; >> >> This may not work? Even we check here and task->usage is not 0, it could still become 0 immediately after the above refcount_read, right? > > We call bpf_task_storage_get() with "task" that has valid BTF, so "task" > should not go away during the BPF program? Whatever mechanism that Oh, right. this is true. Otherwise, we cannot use task ptr in the helper. > triggers the BPF program should either hold a reference to task (usage > 0) > or be the only one owning it (usage == 0, in __put_task_struct). Did I miss > anything? Sorry. I think you are right. Not sure lsm requirement. There are two more possible ways to check task is exiting which happens before __put_task_struct(): . check task->exit_state . check task->flags & PF_EXITING (used in bpf_trace.c) Not sure which condition is the correct one to check. > > Thanks, > Song > >> >>> + >>> sdata = bpf_local_storage_update( >>> task, (struct bpf_local_storage_map *)map, value, >>> BPF_NOEXIST); >>>> >>>> >>>> I may be missing something but shouldn't bpf_storage be an __rcu >>>> member like we have for sk_bpf_storage? >>> Good catch! I will fix this in v2. >>> Thanks, >>> Song > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 18:56 ` Martin KaFai Lau 2021-01-11 21:35 ` KP Singh @ 2021-01-11 23:41 ` Song Liu 2021-01-12 18:21 ` Martin KaFai Lau 1 sibling, 1 reply; 29+ messages in thread From: Song Liu @ 2021-01-11 23:41 UTC (permalink / raw) To: Martin Lau Cc: bpf, Networking, open list, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo, kernel test robot > On Jan 11, 2021, at 10:56 AM, Martin Lau <kafai@fb.com> wrote: > > On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: > > [ ... ] > >> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c >> index dd5aedee99e73..9bd47ad2b26f1 100644 >> --- a/kernel/bpf/bpf_local_storage.c >> +++ b/kernel/bpf/bpf_local_storage.c >> @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) >> { >> struct bpf_local_storage *local_storage; >> bool free_local_storage = false; >> + unsigned long flags; >> >> if (unlikely(!selem_linked_to_storage(selem))) >> /* selem has already been unlinked from sk */ >> return; >> >> local_storage = rcu_dereference(selem->local_storage); >> - raw_spin_lock_bh(&local_storage->lock); >> + raw_spin_lock_irqsave(&local_storage->lock, flags); > It will be useful to have a few words in commit message on this change > for future reference purpose. > > Please also remove the in_irq() check from bpf_sk_storage.c > to avoid confusion in the future. It probably should > be in a separate patch. Do you mean we allow bpf_sk_storage_get_tracing() and bpf_sk_storage_delete_tracing() in irq context? Like diff --git i/net/core/bpf_sk_storage.c w/net/core/bpf_sk_storage.c index 4edd033e899c0..14dd5e3c67402 100644 --- i/net/core/bpf_sk_storage.c +++ w/net/core/bpf_sk_storage.c @@ -425,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, struct sock *, sk) { - if (in_irq() || in_nmi()) + if (in_nmi()) return -EPERM; return ____bpf_sk_storage_delete(map, sk); [...] ^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs 2021-01-11 23:41 ` Song Liu @ 2021-01-12 18:21 ` Martin KaFai Lau 0 siblings, 0 replies; 29+ messages in thread From: Martin KaFai Lau @ 2021-01-12 18:21 UTC (permalink / raw) To: Song Liu Cc: bpf, Networking, open list, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo, kernel test robot On Mon, Jan 11, 2021 at 03:41:26PM -0800, Song Liu wrote: > > > > On Jan 11, 2021, at 10:56 AM, Martin Lau <kafai@fb.com> wrote: > > > > On Fri, Jan 08, 2021 at 03:19:47PM -0800, Song Liu wrote: > > > > [ ... ] > > > >> diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c > >> index dd5aedee99e73..9bd47ad2b26f1 100644 > >> --- a/kernel/bpf/bpf_local_storage.c > >> +++ b/kernel/bpf/bpf_local_storage.c > >> @@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem) > >> { > >> struct bpf_local_storage *local_storage; > >> bool free_local_storage = false; > >> + unsigned long flags; > >> > >> if (unlikely(!selem_linked_to_storage(selem))) > >> /* selem has already been unlinked from sk */ > >> return; > >> > >> local_storage = rcu_dereference(selem->local_storage); > >> - raw_spin_lock_bh(&local_storage->lock); > >> + raw_spin_lock_irqsave(&local_storage->lock, flags); > > It will be useful to have a few words in commit message on this change > > for future reference purpose. > > > > Please also remove the in_irq() check from bpf_sk_storage.c > > to avoid confusion in the future. It probably should > > be in a separate patch. > > Do you mean we allow bpf_sk_storage_get_tracing() and > bpf_sk_storage_delete_tracing() in irq context? Like Right. However, after another thought, may be lets skip that for now till a use case comes up and a test can be written. ^ permalink raw reply [flat|nested] 29+ messages in thread
[parent not found: <20210108231950.3844417-4-songliubraving@fb.com>]
* Re: [PATCH bpf-next 3/4] bpf: runqslower: prefer use local vmlinux [not found] ` <20210108231950.3844417-4-songliubraving@fb.com> @ 2021-01-11 17:37 ` Yonghong Song 0 siblings, 0 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-11 17:37 UTC (permalink / raw) To: Song Liu, bpf, netdev, linux-kernel Cc: mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo On 1/8/21 3:19 PM, Song Liu wrote: > Update the Makefile to prefer using ../../../vmlinux, which has latest > definitions for vmlinux.h > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > tools/bpf/runqslower/Makefile | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile > index 4d5ca54fcd4c8..306f1ce5a97b2 100644 > --- a/tools/bpf/runqslower/Makefile > +++ b/tools/bpf/runqslower/Makefile > @@ -19,7 +19,8 @@ CFLAGS := -g -Wall > > # Try to detect best kernel BTF source > KERNEL_REL := $(shell uname -r) > -VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) > +VMLINUX_BTF_PATHS := ../../../vmlinux /sys/kernel/btf/vmlinux \ > + /boot/vmlinux-$(KERNEL_REL) selftests/bpf Makefile has: VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) If you intend to add ../../../vmlinux, I think we should also add $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux). > VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ > $(wildcard $(VMLINUX_BTF_PATHS)))) > > ^ permalink raw reply [flat|nested] 29+ messages in thread
[parent not found: <20210108231950.3844417-5-songliubraving@fb.com>]
* Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage [not found] ` <20210108231950.3844417-5-songliubraving@fb.com> @ 2021-01-11 17:49 ` Yonghong Song 2021-01-11 22:54 ` Song Liu 0 siblings, 1 reply; 29+ messages in thread From: Yonghong Song @ 2021-01-11 17:49 UTC (permalink / raw) To: Song Liu, bpf, netdev, linux-kernel Cc: mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo On 1/8/21 3:19 PM, Song Liu wrote: > Replace hashtab with task local storage in runqslower. This improves the > performance of these BPF programs. The following table summarizes average > runtime of these programs, in nanoseconds: > > task-local hash-prealloc hash-no-prealloc > handle__sched_wakeup 125 340 3124 > handle__sched_wakeup_new 2812 1510 2998 > handle__sched_switch 151 208 991 > > Note that, task local storage gives better performance than hashtab for > handle__sched_wakeup and handle__sched_switch. On the other hand, for > handle__sched_wakeup_new, task local storage is slower than hashtab with > prealloc. This is because handle__sched_wakeup_new accesses the data for > the first time, so it has to allocate the data for task local storage. > Once the initial allocation is done, subsequent accesses, as those in > handle__sched_wakeup, are much faster with task local storage. If we > disable hashtab prealloc, task local storage is much faster for all 3 > functions. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > tools/bpf/runqslower/runqslower.bpf.c | 26 +++++++++++++++----------- > 1 file changed, 15 insertions(+), 11 deletions(-) > > diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c > index 1f18a409f0443..c4de4179a0a17 100644 > --- a/tools/bpf/runqslower/runqslower.bpf.c > +++ b/tools/bpf/runqslower/runqslower.bpf.c > @@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; > const volatile pid_t targ_pid = 0; > > struct { > - __uint(type, BPF_MAP_TYPE_HASH); > - __uint(max_entries, 10240); > - __type(key, u32); > + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); > + __uint(map_flags, BPF_F_NO_PREALLOC); > + __type(key, int); > __type(value, u64); > } start SEC(".maps"); > > @@ -25,15 +25,19 @@ struct { > > /* record enqueue timestamp */ > __always_inline > -static int trace_enqueue(u32 tgid, u32 pid) > +static int trace_enqueue(struct task_struct *t) > { > - u64 ts; > + u32 pid = t->pid; > + u64 ts, *ptr; > > if (!pid || (targ_pid && targ_pid != pid)) > return 0; > > ts = bpf_ktime_get_ns(); > - bpf_map_update_elem(&start, &pid, &ts, 0); > + ptr = bpf_task_storage_get(&start, t, 0, > + BPF_LOCAL_STORAGE_GET_F_CREATE); > + if (ptr) > + *ptr = ts; > return 0; > } > > @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx) > /* TP_PROTO(struct task_struct *p) */ > struct task_struct *p = (void *)ctx[0]; > > - return trace_enqueue(p->tgid, p->pid); > + return trace_enqueue(p); > } > > SEC("tp_btf/sched_wakeup_new") > @@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx) > /* TP_PROTO(struct task_struct *p) */ > struct task_struct *p = (void *)ctx[0]; > > - return trace_enqueue(p->tgid, p->pid); > + return trace_enqueue(p); > } > > SEC("tp_btf/sched_switch") > @@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx) > > /* ivcsw: treat like an enqueue event and store timestamp */ > if (prev->state == TASK_RUNNING) > - trace_enqueue(prev->tgid, prev->pid); > + trace_enqueue(prev); > > pid = next->pid; > > /* fetch timestamp and calculate delta */ > - tsp = bpf_map_lookup_elem(&start, &pid); > + tsp = bpf_task_storage_get(&start, next, 0, 0); > if (!tsp) > return 0; /* missed enqueue */ Previously, hash table may overflow so we may have missed enqueue. Here with task local storage, is it possible to add additional pid filtering in the beginning of handle__sched_switch such that missed enqueue here can be treated as an error? > > @@ -91,7 +95,7 @@ int handle__sched_switch(u64 *ctx) > bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, > &event, sizeof(event)); > > - bpf_map_delete_elem(&start, &pid); > + bpf_task_storage_delete(&start, next); > return 0; > } > > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage 2021-01-11 17:49 ` [PATCH bpf-next 4/4] bpf: runqslower: use task local storage Yonghong Song @ 2021-01-11 22:54 ` Song Liu 2021-01-12 3:24 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: Song Liu @ 2021-01-11 22:54 UTC (permalink / raw) To: Yonghong Song Cc: bpf, Networking, linux-kernel, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo > On Jan 11, 2021, at 9:49 AM, Yonghong Song <yhs@fb.com> wrote: > > > > On 1/8/21 3:19 PM, Song Liu wrote: >> Replace hashtab with task local storage in runqslower. This improves the >> performance of these BPF programs. The following table summarizes average >> runtime of these programs, in nanoseconds: >> task-local hash-prealloc hash-no-prealloc >> handle__sched_wakeup 125 340 3124 >> handle__sched_wakeup_new 2812 1510 2998 >> handle__sched_switch 151 208 991 >> Note that, task local storage gives better performance than hashtab for >> handle__sched_wakeup and handle__sched_switch. On the other hand, for >> handle__sched_wakeup_new, task local storage is slower than hashtab with >> prealloc. This is because handle__sched_wakeup_new accesses the data for >> the first time, so it has to allocate the data for task local storage. >> Once the initial allocation is done, subsequent accesses, as those in >> handle__sched_wakeup, are much faster with task local storage. If we >> disable hashtab prealloc, task local storage is much faster for all 3 >> functions. >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> tools/bpf/runqslower/runqslower.bpf.c | 26 +++++++++++++++----------- >> 1 file changed, 15 insertions(+), 11 deletions(-) >> diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c >> index 1f18a409f0443..c4de4179a0a17 100644 >> --- a/tools/bpf/runqslower/runqslower.bpf.c >> +++ b/tools/bpf/runqslower/runqslower.bpf.c >> @@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; >> const volatile pid_t targ_pid = 0; >> struct { >> - __uint(type, BPF_MAP_TYPE_HASH); >> - __uint(max_entries, 10240); >> - __type(key, u32); >> + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); >> + __uint(map_flags, BPF_F_NO_PREALLOC); >> + __type(key, int); >> __type(value, u64); >> } start SEC(".maps"); >> @@ -25,15 +25,19 @@ struct { >> /* record enqueue timestamp */ >> __always_inline >> -static int trace_enqueue(u32 tgid, u32 pid) >> +static int trace_enqueue(struct task_struct *t) >> { >> - u64 ts; >> + u32 pid = t->pid; >> + u64 ts, *ptr; >> if (!pid || (targ_pid && targ_pid != pid)) >> return 0; >> ts = bpf_ktime_get_ns(); >> - bpf_map_update_elem(&start, &pid, &ts, 0); >> + ptr = bpf_task_storage_get(&start, t, 0, >> + BPF_LOCAL_STORAGE_GET_F_CREATE); >> + if (ptr) >> + *ptr = ts; >> return 0; >> } >> @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx) >> /* TP_PROTO(struct task_struct *p) */ >> struct task_struct *p = (void *)ctx[0]; >> - return trace_enqueue(p->tgid, p->pid); >> + return trace_enqueue(p); >> } >> SEC("tp_btf/sched_wakeup_new") >> @@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx) >> /* TP_PROTO(struct task_struct *p) */ >> struct task_struct *p = (void *)ctx[0]; >> - return trace_enqueue(p->tgid, p->pid); >> + return trace_enqueue(p); >> } >> SEC("tp_btf/sched_switch") >> @@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx) >> /* ivcsw: treat like an enqueue event and store timestamp */ >> if (prev->state == TASK_RUNNING) >> - trace_enqueue(prev->tgid, prev->pid); >> + trace_enqueue(prev); >> pid = next->pid; >> /* fetch timestamp and calculate delta */ >> - tsp = bpf_map_lookup_elem(&start, &pid); >> + tsp = bpf_task_storage_get(&start, next, 0, 0); >> if (!tsp) >> return 0; /* missed enqueue */ > > Previously, hash table may overflow so we may have missed enqueue. > Here with task local storage, is it possible to add additional pid > filtering in the beginning of handle__sched_switch such that > missed enqueue here can be treated as an error? IIUC, hashtab overflow is not the only reason of missed enqueue. If the wakeup (which calls trace_enqueue) happens before runqslower starts, we may still get missed enqueue in sched_switch, no? Thanks, Song ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage 2021-01-11 22:54 ` Song Liu @ 2021-01-12 3:24 ` Yonghong Song 2021-01-12 7:14 ` Andrii Nakryiko 0 siblings, 1 reply; 29+ messages in thread From: Yonghong Song @ 2021-01-12 3:24 UTC (permalink / raw) To: Song Liu Cc: bpf, Networking, linux-kernel, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo On 1/11/21 2:54 PM, Song Liu wrote: > > >> On Jan 11, 2021, at 9:49 AM, Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/8/21 3:19 PM, Song Liu wrote: >>> Replace hashtab with task local storage in runqslower. This improves the >>> performance of these BPF programs. The following table summarizes average >>> runtime of these programs, in nanoseconds: >>> task-local hash-prealloc hash-no-prealloc >>> handle__sched_wakeup 125 340 3124 >>> handle__sched_wakeup_new 2812 1510 2998 >>> handle__sched_switch 151 208 991 >>> Note that, task local storage gives better performance than hashtab for >>> handle__sched_wakeup and handle__sched_switch. On the other hand, for >>> handle__sched_wakeup_new, task local storage is slower than hashtab with >>> prealloc. This is because handle__sched_wakeup_new accesses the data for >>> the first time, so it has to allocate the data for task local storage. >>> Once the initial allocation is done, subsequent accesses, as those in >>> handle__sched_wakeup, are much faster with task local storage. If we >>> disable hashtab prealloc, task local storage is much faster for all 3 >>> functions. >>> Signed-off-by: Song Liu <songliubraving@fb.com> >>> --- >>> tools/bpf/runqslower/runqslower.bpf.c | 26 +++++++++++++++----------- >>> 1 file changed, 15 insertions(+), 11 deletions(-) >>> diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c >>> index 1f18a409f0443..c4de4179a0a17 100644 >>> --- a/tools/bpf/runqslower/runqslower.bpf.c >>> +++ b/tools/bpf/runqslower/runqslower.bpf.c >>> @@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; >>> const volatile pid_t targ_pid = 0; >>> struct { >>> - __uint(type, BPF_MAP_TYPE_HASH); >>> - __uint(max_entries, 10240); >>> - __type(key, u32); >>> + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); >>> + __uint(map_flags, BPF_F_NO_PREALLOC); >>> + __type(key, int); >>> __type(value, u64); >>> } start SEC(".maps"); >>> @@ -25,15 +25,19 @@ struct { >>> /* record enqueue timestamp */ >>> __always_inline >>> -static int trace_enqueue(u32 tgid, u32 pid) >>> +static int trace_enqueue(struct task_struct *t) >>> { >>> - u64 ts; >>> + u32 pid = t->pid; >>> + u64 ts, *ptr; >>> if (!pid || (targ_pid && targ_pid != pid)) >>> return 0; >>> ts = bpf_ktime_get_ns(); >>> - bpf_map_update_elem(&start, &pid, &ts, 0); >>> + ptr = bpf_task_storage_get(&start, t, 0, >>> + BPF_LOCAL_STORAGE_GET_F_CREATE); >>> + if (ptr) >>> + *ptr = ts; >>> return 0; >>> } >>> @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx) >>> /* TP_PROTO(struct task_struct *p) */ >>> struct task_struct *p = (void *)ctx[0]; >>> - return trace_enqueue(p->tgid, p->pid); >>> + return trace_enqueue(p); >>> } >>> SEC("tp_btf/sched_wakeup_new") >>> @@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx) >>> /* TP_PROTO(struct task_struct *p) */ >>> struct task_struct *p = (void *)ctx[0]; >>> - return trace_enqueue(p->tgid, p->pid); >>> + return trace_enqueue(p); >>> } >>> SEC("tp_btf/sched_switch") >>> @@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx) >>> /* ivcsw: treat like an enqueue event and store timestamp */ >>> if (prev->state == TASK_RUNNING) >>> - trace_enqueue(prev->tgid, prev->pid); >>> + trace_enqueue(prev); >>> pid = next->pid; >>> /* fetch timestamp and calculate delta */ >>> - tsp = bpf_map_lookup_elem(&start, &pid); >>> + tsp = bpf_task_storage_get(&start, next, 0, 0); >>> if (!tsp) >>> return 0; /* missed enqueue */ >> >> Previously, hash table may overflow so we may have missed enqueue. >> Here with task local storage, is it possible to add additional pid >> filtering in the beginning of handle__sched_switch such that >> missed enqueue here can be treated as an error? > > IIUC, hashtab overflow is not the only reason of missed enqueue. If the > wakeup (which calls trace_enqueue) happens before runqslower starts, we > may still get missed enqueue in sched_switch, no? the wakeup won't happen before runqslower starts since runqslower needs to start to do attachment first and then trace_enqueue() can run. For the current implementation trace_enqueue() will happen for any non-0 pid before setting test_progs tgid, and will happen for any non-0 and test_progs tgid if it is set, so this should be okay if we do filtering in handle__sched_switch. Maybe you can do an experiment to prove whether my point is correct or not. > > Thanks, > Song > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage 2021-01-12 3:24 ` Yonghong Song @ 2021-01-12 7:14 ` Andrii Nakryiko 2021-01-12 7:33 ` Yonghong Song 0 siblings, 1 reply; 29+ messages in thread From: Andrii Nakryiko @ 2021-01-12 7:14 UTC (permalink / raw) To: Yonghong Song Cc: Song Liu, bpf, Networking, linux-kernel, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo On Mon, Jan 11, 2021 at 7:24 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 1/11/21 2:54 PM, Song Liu wrote: > > > > > >> On Jan 11, 2021, at 9:49 AM, Yonghong Song <yhs@fb.com> wrote: > >> > >> > >> > >> On 1/8/21 3:19 PM, Song Liu wrote: > >>> Replace hashtab with task local storage in runqslower. This improves the > >>> performance of these BPF programs. The following table summarizes average > >>> runtime of these programs, in nanoseconds: > >>> task-local hash-prealloc hash-no-prealloc > >>> handle__sched_wakeup 125 340 3124 > >>> handle__sched_wakeup_new 2812 1510 2998 > >>> handle__sched_switch 151 208 991 > >>> Note that, task local storage gives better performance than hashtab for > >>> handle__sched_wakeup and handle__sched_switch. On the other hand, for > >>> handle__sched_wakeup_new, task local storage is slower than hashtab with > >>> prealloc. This is because handle__sched_wakeup_new accesses the data for > >>> the first time, so it has to allocate the data for task local storage. > >>> Once the initial allocation is done, subsequent accesses, as those in > >>> handle__sched_wakeup, are much faster with task local storage. If we > >>> disable hashtab prealloc, task local storage is much faster for all 3 > >>> functions. > >>> Signed-off-by: Song Liu <songliubraving@fb.com> > >>> --- > >>> tools/bpf/runqslower/runqslower.bpf.c | 26 +++++++++++++++----------- > >>> 1 file changed, 15 insertions(+), 11 deletions(-) > >>> diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c > >>> index 1f18a409f0443..c4de4179a0a17 100644 > >>> --- a/tools/bpf/runqslower/runqslower.bpf.c > >>> +++ b/tools/bpf/runqslower/runqslower.bpf.c > >>> @@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; > >>> const volatile pid_t targ_pid = 0; > >>> struct { > >>> - __uint(type, BPF_MAP_TYPE_HASH); > >>> - __uint(max_entries, 10240); > >>> - __type(key, u32); > >>> + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); > >>> + __uint(map_flags, BPF_F_NO_PREALLOC); > >>> + __type(key, int); > >>> __type(value, u64); > >>> } start SEC(".maps"); > >>> @@ -25,15 +25,19 @@ struct { > >>> /* record enqueue timestamp */ > >>> __always_inline > >>> -static int trace_enqueue(u32 tgid, u32 pid) > >>> +static int trace_enqueue(struct task_struct *t) > >>> { > >>> - u64 ts; > >>> + u32 pid = t->pid; > >>> + u64 ts, *ptr; > >>> if (!pid || (targ_pid && targ_pid != pid)) > >>> return 0; > >>> ts = bpf_ktime_get_ns(); > >>> - bpf_map_update_elem(&start, &pid, &ts, 0); > >>> + ptr = bpf_task_storage_get(&start, t, 0, > >>> + BPF_LOCAL_STORAGE_GET_F_CREATE); > >>> + if (ptr) > >>> + *ptr = ts; > >>> return 0; > >>> } > >>> @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx) > >>> /* TP_PROTO(struct task_struct *p) */ > >>> struct task_struct *p = (void *)ctx[0]; > >>> - return trace_enqueue(p->tgid, p->pid); > >>> + return trace_enqueue(p); > >>> } > >>> SEC("tp_btf/sched_wakeup_new") > >>> @@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx) > >>> /* TP_PROTO(struct task_struct *p) */ > >>> struct task_struct *p = (void *)ctx[0]; > >>> - return trace_enqueue(p->tgid, p->pid); > >>> + return trace_enqueue(p); > >>> } > >>> SEC("tp_btf/sched_switch") > >>> @@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx) > >>> /* ivcsw: treat like an enqueue event and store timestamp */ > >>> if (prev->state == TASK_RUNNING) > >>> - trace_enqueue(prev->tgid, prev->pid); > >>> + trace_enqueue(prev); > >>> pid = next->pid; > >>> /* fetch timestamp and calculate delta */ > >>> - tsp = bpf_map_lookup_elem(&start, &pid); > >>> + tsp = bpf_task_storage_get(&start, next, 0, 0); > >>> if (!tsp) > >>> return 0; /* missed enqueue */ > >> > >> Previously, hash table may overflow so we may have missed enqueue. > >> Here with task local storage, is it possible to add additional pid > >> filtering in the beginning of handle__sched_switch such that > >> missed enqueue here can be treated as an error? > > > > IIUC, hashtab overflow is not the only reason of missed enqueue. If the > > wakeup (which calls trace_enqueue) happens before runqslower starts, we > > may still get missed enqueue in sched_switch, no? > > the wakeup won't happen before runqslower starts since runqslower needs > to start to do attachment first and then trace_enqueue() can run. I think Song is right. Given wakeup and sched_switch need to be matched, depending at which exact time we attach BPF programs, we can end up missing wakeup, but not missing sched_switch, no? So it's not an error. > > For the current implementation trace_enqueue() will happen for any non-0 > pid before setting test_progs tgid, and will happen for any non-0 and > test_progs tgid if it is set, so this should be okay if we do filtering > in handle__sched_switch. Maybe you can do an experiment to prove whether > my point is correct or not. > > > > > Thanks, > > Song > > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 4/4] bpf: runqslower: use task local storage 2021-01-12 7:14 ` Andrii Nakryiko @ 2021-01-12 7:33 ` Yonghong Song 0 siblings, 0 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-12 7:33 UTC (permalink / raw) To: Andrii Nakryiko Cc: Song Liu, bpf, Networking, linux-kernel, mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, Kernel Team, haoluo On 1/11/21 11:14 PM, Andrii Nakryiko wrote: > On Mon, Jan 11, 2021 at 7:24 PM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/11/21 2:54 PM, Song Liu wrote: >>> >>> >>>> On Jan 11, 2021, at 9:49 AM, Yonghong Song <yhs@fb.com> wrote: >>>> >>>> >>>> >>>> On 1/8/21 3:19 PM, Song Liu wrote: >>>>> Replace hashtab with task local storage in runqslower. This improves the >>>>> performance of these BPF programs. The following table summarizes average >>>>> runtime of these programs, in nanoseconds: >>>>> task-local hash-prealloc hash-no-prealloc >>>>> handle__sched_wakeup 125 340 3124 >>>>> handle__sched_wakeup_new 2812 1510 2998 >>>>> handle__sched_switch 151 208 991 >>>>> Note that, task local storage gives better performance than hashtab for >>>>> handle__sched_wakeup and handle__sched_switch. On the other hand, for >>>>> handle__sched_wakeup_new, task local storage is slower than hashtab with >>>>> prealloc. This is because handle__sched_wakeup_new accesses the data for >>>>> the first time, so it has to allocate the data for task local storage. >>>>> Once the initial allocation is done, subsequent accesses, as those in >>>>> handle__sched_wakeup, are much faster with task local storage. If we >>>>> disable hashtab prealloc, task local storage is much faster for all 3 >>>>> functions. >>>>> Signed-off-by: Song Liu <songliubraving@fb.com> >>>>> --- >>>>> tools/bpf/runqslower/runqslower.bpf.c | 26 +++++++++++++++----------- >>>>> 1 file changed, 15 insertions(+), 11 deletions(-) >>>>> diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c >>>>> index 1f18a409f0443..c4de4179a0a17 100644 >>>>> --- a/tools/bpf/runqslower/runqslower.bpf.c >>>>> +++ b/tools/bpf/runqslower/runqslower.bpf.c >>>>> @@ -11,9 +11,9 @@ const volatile __u64 min_us = 0; >>>>> const volatile pid_t targ_pid = 0; >>>>> struct { >>>>> - __uint(type, BPF_MAP_TYPE_HASH); >>>>> - __uint(max_entries, 10240); >>>>> - __type(key, u32); >>>>> + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); >>>>> + __uint(map_flags, BPF_F_NO_PREALLOC); >>>>> + __type(key, int); >>>>> __type(value, u64); >>>>> } start SEC(".maps"); >>>>> @@ -25,15 +25,19 @@ struct { >>>>> /* record enqueue timestamp */ >>>>> __always_inline >>>>> -static int trace_enqueue(u32 tgid, u32 pid) >>>>> +static int trace_enqueue(struct task_struct *t) >>>>> { >>>>> - u64 ts; >>>>> + u32 pid = t->pid; >>>>> + u64 ts, *ptr; >>>>> if (!pid || (targ_pid && targ_pid != pid)) >>>>> return 0; >>>>> ts = bpf_ktime_get_ns(); >>>>> - bpf_map_update_elem(&start, &pid, &ts, 0); >>>>> + ptr = bpf_task_storage_get(&start, t, 0, >>>>> + BPF_LOCAL_STORAGE_GET_F_CREATE); >>>>> + if (ptr) >>>>> + *ptr = ts; >>>>> return 0; >>>>> } >>>>> @@ -43,7 +47,7 @@ int handle__sched_wakeup(u64 *ctx) >>>>> /* TP_PROTO(struct task_struct *p) */ >>>>> struct task_struct *p = (void *)ctx[0]; >>>>> - return trace_enqueue(p->tgid, p->pid); >>>>> + return trace_enqueue(p); >>>>> } >>>>> SEC("tp_btf/sched_wakeup_new") >>>>> @@ -52,7 +56,7 @@ int handle__sched_wakeup_new(u64 *ctx) >>>>> /* TP_PROTO(struct task_struct *p) */ >>>>> struct task_struct *p = (void *)ctx[0]; >>>>> - return trace_enqueue(p->tgid, p->pid); >>>>> + return trace_enqueue(p); >>>>> } >>>>> SEC("tp_btf/sched_switch") >>>>> @@ -70,12 +74,12 @@ int handle__sched_switch(u64 *ctx) >>>>> /* ivcsw: treat like an enqueue event and store timestamp */ >>>>> if (prev->state == TASK_RUNNING) >>>>> - trace_enqueue(prev->tgid, prev->pid); >>>>> + trace_enqueue(prev); >>>>> pid = next->pid; >>>>> /* fetch timestamp and calculate delta */ >>>>> - tsp = bpf_map_lookup_elem(&start, &pid); >>>>> + tsp = bpf_task_storage_get(&start, next, 0, 0); >>>>> if (!tsp) >>>>> return 0; /* missed enqueue */ >>>> >>>> Previously, hash table may overflow so we may have missed enqueue. >>>> Here with task local storage, is it possible to add additional pid >>>> filtering in the beginning of handle__sched_switch such that >>>> missed enqueue here can be treated as an error? >>> >>> IIUC, hashtab overflow is not the only reason of missed enqueue. If the >>> wakeup (which calls trace_enqueue) happens before runqslower starts, we >>> may still get missed enqueue in sched_switch, no? >> >> the wakeup won't happen before runqslower starts since runqslower needs >> to start to do attachment first and then trace_enqueue() can run. > > I think Song is right. Given wakeup and sched_switch need to be > matched, depending at which exact time we attach BPF programs, we can > end up missing wakeup, but not missing sched_switch, no? So it's not > an error. The current approach works fine. What I suggested is to tighten sched_switch only for target_pid. wakeup (doing queuing) will be more relaxed than sched_switch to ensure task local storage creation is always there for target_pid regardless of attachment timing. I think it should work, but we have to experiment to see actual results... > >> >> For the current implementation trace_enqueue() will happen for any non-0 >> pid before setting test_progs tgid, and will happen for any non-0 and >> test_progs tgid if it is set, so this should be okay if we do filtering >> in handle__sched_switch. Maybe you can do an experiment to prove whether >> my point is correct or not. >> >>> >>> Thanks, >>> Song >>> ^ permalink raw reply [flat|nested] 29+ messages in thread
[parent not found: <20210108231950.3844417-3-songliubraving@fb.com>]
* Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage [not found] ` <20210108231950.3844417-3-songliubraving@fb.com> @ 2021-01-11 17:30 ` Yonghong Song 2021-01-11 17:44 ` KP Singh 2021-01-11 22:49 ` Song Liu 2021-01-12 7:06 ` Andrii Nakryiko 1 sibling, 2 replies; 29+ messages in thread From: Yonghong Song @ 2021-01-11 17:30 UTC (permalink / raw) To: Song Liu, bpf, netdev, linux-kernel Cc: mingo, peterz, ast, daniel, andrii, john.fastabend, kpsingh, kernel-team, haoluo On 1/8/21 3:19 PM, Song Liu wrote: > Task local storage is enabled for tracing programs. Add a test for it > without CONFIG_BPF_LSM. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > .../bpf/prog_tests/test_task_local_storage.c | 34 +++++++++++++++++ > .../selftests/bpf/progs/task_local_storage.c | 37 +++++++++++++++++++ > 2 files changed, 71 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > new file mode 100644 > index 0000000000000..7de7a154ebbe6 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > @@ -0,0 +1,34 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ 2020 -> 2021 > + > +#include <sys/types.h> > +#include <unistd.h> > +#include <test_progs.h> > +#include "task_local_storage.skel.h" > + > +static unsigned int duration; > + > +void test_test_task_local_storage(void) > +{ > + struct task_local_storage *skel; > + const int count = 10; > + int i, err; > + > + skel = task_local_storage__open_and_load(); > + Extra line is unnecessary here. > + if (CHECK(!skel, "skel_open_and_load", "skeleton open and load failed\n")) > + return; > + > + err = task_local_storage__attach(skel); > + ditto. > + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) > + goto out; > + > + for (i = 0; i < count; i++) > + usleep(1000); Does a smaller usleep value will work? If it is, recommend to have a smaller value here to reduce test_progs running time. > + CHECK(skel->bss->value < count, "task_local_storage_value", > + "task local value too small\n"); > + > +out: > + task_local_storage__destroy(skel); > +} > diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c > new file mode 100644 > index 0000000000000..807255c5c162d > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/task_local_storage.c > @@ -0,0 +1,37 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ 2020 -> 2021 > + > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct local_data { > + __u64 val; > +}; > + > +struct { > + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); > + __uint(map_flags, BPF_F_NO_PREALLOC); > + __type(key, int); > + __type(value, struct local_data); > +} task_storage_map SEC(".maps"); > + > +int value = 0; > + > +SEC("tp_btf/sched_switch") > +int BPF_PROG(on_switch, bool preempt, struct task_struct *prev, > + struct task_struct *next) > +{ > + struct local_data *storage; If it possible that we do some filtering based on test_progs pid so below bpf_task_storage_get is only called for test_progs process? This is more targeted and can avoid counter contributions from other unrelated processes and make test_task_local_storage.c result comparison more meaningful. > + > + storage = bpf_task_storage_get(&task_storage_map, > + next, 0, > + BPF_LOCAL_STORAGE_GET_F_CREATE); > + if (storage) { > + storage->val++; > + value = storage->val; > + } > + return 0; > +} > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage 2021-01-11 17:30 ` [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for " Yonghong Song @ 2021-01-11 17:44 ` KP Singh 2021-01-11 22:50 ` Song Liu 2021-01-11 22:49 ` Song Liu 1 sibling, 1 reply; 29+ messages in thread From: KP Singh @ 2021-01-11 17:44 UTC (permalink / raw) To: Yonghong Song Cc: Song Liu, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo On Mon, Jan 11, 2021 at 6:31 PM Yonghong Song <yhs@fb.com> wrote: > > > > On 1/8/21 3:19 PM, Song Liu wrote: > > Task local storage is enabled for tracing programs. Add a test for it > > without CONFIG_BPF_LSM. Can you also explain what the test does in the commit log? It would also be nicer to have a somewhat more realistic selftest which represents a simple tracing + task local storage use case. > > > > Signed-off-by: Song Liu <songliubraving@fb.com> > > --- > > .../bpf/prog_tests/test_task_local_storage.c | 34 +++++++++++++++++ > > .../selftests/bpf/progs/task_local_storage.c | 37 +++++++++++++++++++ > > 2 files changed, 71 insertions(+) > > create mode 100644 tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > > create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > > new file mode 100644 > > index 0000000000000..7de7a154ebbe6 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > > @@ -0,0 +1,34 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* Copyright (c) 2020 Facebook */ > > 2020 -> 2021 > > > + > > +#include <sys/types.h> > > +#include <unistd.h> > > +#include <test_progs.h> > > +#include "task_local_storage.skel.h" > > + > > +static unsigned int duration; > > + > > +void test_test_task_local_storage(void) > > +{ > > + struct task_local_storage *skel; > > + const int count = 10; > > + int i, err; > > + > > + skel = task_local_storage__open_and_load(); > > + > > Extra line is unnecessary here. > > > + if (CHECK(!skel, "skel_open_and_load", "skeleton open and load failed\n")) > > + return; > > + > > + err = task_local_storage__attach(skel); > > + > > ditto. > > > + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) > > + goto out; > > + > > + for (i = 0; i < count; i++) > > + usleep(1000); > > Does a smaller usleep value will work? If it is, recommend to have a > smaller value here to reduce test_progs running time. > > > + CHECK(skel->bss->value < count, "task_local_storage_value", > > + "task local value too small\n"); [...] > > +// SPDX-License-Identifier: GPL-2.0 > > +/* Copyright (c) 2020 Facebook */ > > 2020 -> 2021 > > > + > > +#include "vmlinux.h" > > +#include <bpf/bpf_helpers.h> > > +#include <bpf/bpf_tracing.h> > > + > > +char _license[] SEC("license") = "GPL"; [...] > > +{ > > + struct local_data *storage; > > If it possible that we do some filtering based on test_progs pid > so below bpf_task_storage_get is only called for test_progs process? > This is more targeted and can avoid counter contributions from > other unrelated processes and make test_task_local_storage.c result > comparison more meaningful. Indeed, have a look at the monitored_pid approach some of the LSM programs do. > > > + > > + storage = bpf_task_storage_get(&task_storage_map, > > + next, 0, > > + BPF_LOCAL_STORAGE_GET_F_CREATE); > > + if (storage) { > > + storage->val++; > > + value = storage->val; > > + } > > + return 0; > > +} > > ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage 2021-01-11 17:44 ` KP Singh @ 2021-01-11 22:50 ` Song Liu 0 siblings, 0 replies; 29+ messages in thread From: Song Liu @ 2021-01-11 22:50 UTC (permalink / raw) To: KP Singh Cc: Yonghong Song, bpf, Networking, open list, mingo, Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, John Fastabend, Kernel Team, Hao Luo > On Jan 11, 2021, at 9:44 AM, KP Singh <kpsingh@kernel.org> wrote: > > On Mon, Jan 11, 2021 at 6:31 PM Yonghong Song <yhs@fb.com> wrote: >> >> >> >> On 1/8/21 3:19 PM, Song Liu wrote: >>> Task local storage is enabled for tracing programs. Add a test for it >>> without CONFIG_BPF_LSM. > > Can you also explain what the test does in the commit log? > > It would also be nicer to have a somewhat more realistic selftest which > represents a simple tracing + task local storage use case. Let me try to make this more realistic. Thanks, Song [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage 2021-01-11 17:30 ` [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for " Yonghong Song 2021-01-11 17:44 ` KP Singh @ 2021-01-11 22:49 ` Song Liu 1 sibling, 0 replies; 29+ messages in thread From: Song Liu @ 2021-01-11 22:49 UTC (permalink / raw) To: Yonghong Song Cc: bpf, netdev, lkml, Ingo Molnar, Peter Ziljstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, john fastabend, KP Singh, Kernel Team, haoluo > On Jan 11, 2021, at 9:30 AM, Yonghong Song <yhs@fb.com> wrote: > > > > On 1/8/21 3:19 PM, Song Liu wrote: >> Task local storage is enabled for tracing programs. Add a test for it >> without CONFIG_BPF_LSM. >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> .../bpf/prog_tests/test_task_local_storage.c | 34 +++++++++++++++++ >> .../selftests/bpf/progs/task_local_storage.c | 37 +++++++++++++++++++ >> 2 files changed, 71 insertions(+) >> create mode 100644 tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c >> create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c >> diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c >> new file mode 100644 >> index 0000000000000..7de7a154ebbe6 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c >> @@ -0,0 +1,34 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2020 Facebook */ > > 2020 -> 2021 > >> + >> +#include <sys/types.h> >> +#include <unistd.h> >> +#include <test_progs.h> >> +#include "task_local_storage.skel.h" >> + >> +static unsigned int duration; >> + >> +void test_test_task_local_storage(void) >> +{ >> + struct task_local_storage *skel; >> + const int count = 10; >> + int i, err; >> + >> + skel = task_local_storage__open_and_load(); >> + > > Extra line is unnecessary here. > >> + if (CHECK(!skel, "skel_open_and_load", "skeleton open and load failed\n")) >> + return; >> + >> + err = task_local_storage__attach(skel); >> + > > ditto. > >> + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) >> + goto out; >> + >> + for (i = 0; i < count; i++) >> + usleep(1000); > > Does a smaller usleep value will work? If it is, recommend to have a smaller value here to reduce test_progs running time. I thought 10ms total was acceptable. But yeah, smaller value should still work. > >> + CHECK(skel->bss->value < count, "task_local_storage_value", >> + "task local value too small\n"); >> + >> +out: >> + task_local_storage__destroy(skel); >> +} >> diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c >> new file mode 100644 >> index 0000000000000..807255c5c162d >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/task_local_storage.c >> @@ -0,0 +1,37 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Copyright (c) 2020 Facebook */ > > 2020 -> 2021 > >> + >> +#include "vmlinux.h" >> +#include <bpf/bpf_helpers.h> >> +#include <bpf/bpf_tracing.h> >> + >> +char _license[] SEC("license") = "GPL"; >> + >> +struct local_data { >> + __u64 val; >> +}; >> + >> +struct { >> + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); >> + __uint(map_flags, BPF_F_NO_PREALLOC); >> + __type(key, int); >> + __type(value, struct local_data); >> +} task_storage_map SEC(".maps"); >> + >> +int value = 0; >> + >> +SEC("tp_btf/sched_switch") >> +int BPF_PROG(on_switch, bool preempt, struct task_struct *prev, >> + struct task_struct *next) >> +{ >> + struct local_data *storage; > > If it possible that we do some filtering based on test_progs pid > so below bpf_task_storage_get is only called for test_progs process? > This is more targeted and can avoid counter contributions from > other unrelated processes and make test_task_local_storage.c result > comparison more meaningful. Make sense. Will fix in the next version. > >> + >> + storage = bpf_task_storage_get(&task_storage_map, >> + next, 0, >> + BPF_LOCAL_STORAGE_GET_F_CREATE); >> + if (storage) { >> + storage->val++; >> + value = storage->val; >> + } >> + return 0; >> +} ^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for task local storage [not found] ` <20210108231950.3844417-3-songliubraving@fb.com> 2021-01-11 17:30 ` [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for " Yonghong Song @ 2021-01-12 7:06 ` Andrii Nakryiko 1 sibling, 0 replies; 29+ messages in thread From: Andrii Nakryiko @ 2021-01-12 7:06 UTC (permalink / raw) To: Song Liu Cc: bpf, Networking, open list, Ingo Molnar, Peter Ziljstra, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, john fastabend, KP Singh, Kernel Team, Hao Luo On Fri, Jan 8, 2021 at 3:30 PM Song Liu <songliubraving@fb.com> wrote: > > Task local storage is enabled for tracing programs. Add a test for it > without CONFIG_BPF_LSM. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > .../bpf/prog_tests/test_task_local_storage.c | 34 +++++++++++++++++ > .../selftests/bpf/progs/task_local_storage.c | 37 +++++++++++++++++++ > 2 files changed, 71 insertions(+) > create mode 100644 tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c > > diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > new file mode 100644 > index 0000000000000..7de7a154ebbe6 > --- /dev/null > +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_storage.c > @@ -0,0 +1,34 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > + > +#include <sys/types.h> > +#include <unistd.h> > +#include <test_progs.h> > +#include "task_local_storage.skel.h" > + > +static unsigned int duration; > + > +void test_test_task_local_storage(void) nit: let's not use "test_test_" tautology. It can be simply test_task_local_storage() and this file itself should be called task_local_storage.c. > +{ > + struct task_local_storage *skel; > + const int count = 10; > + int i, err; > + > + skel = task_local_storage__open_and_load(); > + > + if (CHECK(!skel, "skel_open_and_load", "skeleton open and load failed\n")) btw, can you ASSERT_OK_PTR(skel, "skel_open_and_load"); and save yourself a bunch of typing > + return; > + > + err = task_local_storage__attach(skel); > + > + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) > + goto out; similarly, ASSERT_OK(err, "skel_attach") > + > + for (i = 0; i < count; i++) > + usleep(1000); > + CHECK(skel->bss->value < count, "task_local_storage_value", > + "task local value too small\n"); > + > +out: > + task_local_storage__destroy(skel); > +} [...] ^ permalink raw reply [flat|nested] 29+ messages in thread
end of thread, other threads:[~2021-01-16 1:51 UTC | newest] Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20210108231950.3844417-1-songliubraving@fb.com> [not found] ` <20210108231950.3844417-2-songliubraving@fb.com> 2021-01-11 6:27 ` [PATCH bpf-next 1/4] bpf: enable task local storage for tracing programs Yonghong Song 2021-01-11 10:17 ` KP Singh 2021-01-11 15:56 ` Yonghong Song 2021-01-11 10:14 ` KP Singh 2021-01-11 23:16 ` Song Liu 2021-01-11 17:16 ` Yonghong Song 2021-01-11 18:56 ` Martin KaFai Lau 2021-01-11 21:35 ` KP Singh 2021-01-11 21:58 ` Martin KaFai Lau 2021-01-11 23:45 ` Song Liu 2021-01-12 16:32 ` Yonghong Song 2021-01-12 16:53 ` KP Singh 2021-01-15 23:34 ` Song Liu 2021-01-16 0:55 ` Yonghong Song 2021-01-16 1:12 ` Song Liu 2021-01-16 1:50 ` Yonghong Song 2021-01-11 23:41 ` Song Liu 2021-01-12 18:21 ` Martin KaFai Lau [not found] ` <20210108231950.3844417-4-songliubraving@fb.com> 2021-01-11 17:37 ` [PATCH bpf-next 3/4] bpf: runqslower: prefer use local vmlinux Yonghong Song [not found] ` <20210108231950.3844417-5-songliubraving@fb.com> 2021-01-11 17:49 ` [PATCH bpf-next 4/4] bpf: runqslower: use task local storage Yonghong Song 2021-01-11 22:54 ` Song Liu 2021-01-12 3:24 ` Yonghong Song 2021-01-12 7:14 ` Andrii Nakryiko 2021-01-12 7:33 ` Yonghong Song [not found] ` <20210108231950.3844417-3-songliubraving@fb.com> 2021-01-11 17:30 ` [PATCH bpf-next 2/4] selftests/bpf: add non-BPF_LSM test for " Yonghong Song 2021-01-11 17:44 ` KP Singh 2021-01-11 22:50 ` Song Liu 2021-01-11 22:49 ` Song Liu 2021-01-12 7:06 ` Andrii Nakryiko
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).