All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@fb.com>
To: Martin KaFai Lau <kafai@fb.com>,
	Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Andrii Nakryiko <andriin@fb.com>, bpf <bpf@vger.kernel.org>,
	Networking <netdev@vger.kernel.org>,
	Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Kernel Team <kernel-team@fb.com>
Subject: Re: [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator
Date: Wed, 29 Apr 2020 12:20:05 -0700	[thread overview]
Message-ID: <88bfc829-3c2d-96aa-7d32-4f3ff9b4ad08@fb.com> (raw)
In-Reply-To: <20200429184623.ul7nxelzxeip2ign@kafai-mbp>



On 4/29/20 11:46 AM, Martin KaFai Lau wrote:
> On Wed, Apr 29, 2020 at 11:16:35AM -0700, Andrii Nakryiko wrote:
>> On Wed, Apr 29, 2020 at 12:07 AM Yonghong Song <yhs@fb.com> wrote:
>>>
>>>
>>>
>>> On 4/28/20 11:56 PM, Andrii Nakryiko wrote:
>>>> On Mon, Apr 27, 2020 at 1:19 PM Yonghong Song <yhs@fb.com> wrote:
>>>>>
>>>>> A new bpf command BPF_ITER_CREATE is added.
>>>>>
>>>>> The anonymous bpf iterator is seq_file based.
>>>>> The seq_file private data are referenced by targets.
>>>>> The bpf_iter infrastructure allocated additional space
>>>>> at seq_file->private after the space used by targets
>>>>> to store some meta data, e.g.,
>>>>>     prog:       prog to run
>>>>>     session_id: an unique id for each opened seq_file
>>>>>     seq_num:    how many times bpf programs are queried in this session
>>>>>     has_last:   indicate whether or not bpf_prog has been called after
>>>>>                 all valid objects have been processed
>>>>>
>>>>> A map between file and prog/link is established to help
>>>>> fops->release(). When fops->release() is called, just based on
>>>>> inode and file, bpf program cannot be located since target
>>>>> seq_priv_size not available. This map helps retrieve the prog
>>>>> whose reference count needs to be decremented.
>>>>>
>>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>>> ---
>>>>>    include/linux/bpf.h            |   3 +
>>>>>    include/uapi/linux/bpf.h       |   6 ++
>>>>>    kernel/bpf/bpf_iter.c          | 162 ++++++++++++++++++++++++++++++++-
>>>>>    kernel/bpf/syscall.c           |  27 ++++++
>>>>>    tools/include/uapi/linux/bpf.h |   6 ++
>>>>>    5 files changed, 203 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>>>> index 4fc39d9b5cd0..0f0cafc65a04 100644
>>>>> --- a/include/linux/bpf.h
>>>>> +++ b/include/linux/bpf.h
>>>>> @@ -1112,6 +1112,8 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
>>>>>    int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
>>>>>    int bpf_obj_get_user(const char __user *pathname, int flags);
>>>>>
>>>>> +#define BPF_DUMP_SEQ_NET_PRIVATE       BIT(0)
>>>>> +
>>>>>    struct bpf_iter_reg {
>>>>>           const char *target;
>>>>>           const char *target_func_name;
>>>>> @@ -1133,6 +1135,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
>>>>>    int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
>>>>>    int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
>>>>>                             struct bpf_prog *new_prog);
>>>>> +int bpf_iter_new_fd(struct bpf_link *link);
>>>>>
>>>>>    int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>>>>>    int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
>>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>>>> index f39b9fec37ab..576651110d16 100644
>>>>> --- a/include/uapi/linux/bpf.h
>>>>> +++ b/include/uapi/linux/bpf.h
>>>>> @@ -113,6 +113,7 @@ enum bpf_cmd {
>>>>>           BPF_MAP_DELETE_BATCH,
>>>>>           BPF_LINK_CREATE,
>>>>>           BPF_LINK_UPDATE,
>>>>> +       BPF_ITER_CREATE,
>>>>>    };
>>>>>
>>>>>    enum bpf_map_type {
>>>>> @@ -590,6 +591,11 @@ union bpf_attr {
>>>>>                   __u32           old_prog_fd;
>>>>>           } link_update;
>>>>>
>>>>> +       struct { /* struct used by BPF_ITER_CREATE command */
>>>>> +               __u32           link_fd;
>>>>> +               __u32           flags;
>>>>> +       } iter_create;
>>>>> +
>>>>>    } __attribute__((aligned(8)));
>>>>>
>>>>>    /* The description below is an attempt at providing documentation to eBPF
>>>>> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
>>>>> index fc1ce5ee5c3f..1f4e778d1814 100644
>>>>> --- a/kernel/bpf/bpf_iter.c
>>>>> +++ b/kernel/bpf/bpf_iter.c
>>>>> @@ -2,6 +2,7 @@
>>>>>    /* Copyright (c) 2020 Facebook */
>>>>>
>>>>>    #include <linux/fs.h>
>>>>> +#include <linux/anon_inodes.h>
>>>>>    #include <linux/filter.h>
>>>>>    #include <linux/bpf.h>
>>>>>
>>>>> @@ -19,6 +20,19 @@ struct bpf_iter_link {
>>>>>           struct bpf_iter_target_info *tinfo;
>>>>>    };
>>>>>
>>>>> +struct extra_priv_data {
>>>>> +       struct bpf_prog *prog;
>>>>> +       u64 session_id;
>>>>> +       u64 seq_num;
>>>>> +       bool has_last;
>>>>> +};
>>>>> +
>>>>> +struct anon_file_prog_assoc {
>>>>> +       struct list_head list;
>>>>> +       struct file *file;
>>>>> +       struct bpf_prog *prog;
>>>>> +};
>>>>> +
>>>>>    static struct list_head targets;
>>>>>    static struct mutex targets_mutex;
>>>>>    static bool bpf_iter_inited = false;
>>>>> @@ -26,6 +40,50 @@ static bool bpf_iter_inited = false;
>>>>>    /* protect bpf_iter_link.link->prog upddate */
>>>>>    static struct mutex bpf_iter_mutex;
>>>>>
>>>>> +/* Since at anon seq_file release function, the prog cannot
>>>>> + * be retrieved since target seq_priv_size is not available.
>>>>> + * Keep a list of <anon_file, prog> mapping, so that
>>>>> + * at file release stage, the prog can be released properly.
>>>>> + */
>>>>> +static struct list_head anon_iter_info;
>>>>> +static struct mutex anon_iter_info_mutex;
>>>>> +
>>>>> +/* incremented on every opened seq_file */
>>>>> +static atomic64_t session_id;
>>>>> +
>>>>> +static u32 get_total_priv_dsize(u32 old_size)
>>>>> +{
>>>>> +       return roundup(old_size, 8) + sizeof(struct extra_priv_data);
>>>>> +}
>>>>> +
>>>>> +static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
>>>>> +{
>>>>> +       return old_ptr + roundup(old_size, 8);
>>>>> +}
>>>>> +
>>>>> +static int anon_iter_release(struct inode *inode, struct file *file)
>>>>> +{
>>>>> +       struct anon_file_prog_assoc *finfo;
>>>>> +
>>>>> +       mutex_lock(&anon_iter_info_mutex);
>>>>> +       list_for_each_entry(finfo, &anon_iter_info, list) {
>>>>> +               if (finfo->file == file) {
>>>>
>>>> I'll look at this and other patches more thoroughly tomorrow with
>>>> clear head, but this iteration to find anon_file_prog_assoc is really
>>>> unfortunate.
>>>>
>>>> I think the problem is that you are allowing seq_file infrastructure
>>>> to call directly into target implementation of seq_operations without
>>>> intercepting them. If you change that and put whatever extra info is
>>>> necessary into seq_file->private in front of target's private state,
>>>> then you shouldn't need this, right?
>>>
>>> Yes. This is true. The idea is to minimize the target change.
>>> But maybe this is not a good goal by itself.
>>>
>>> You are right, if I intercept all seq_ops(), I do not need the
>>> above change, I can tailor seq_file private_data right before
>>> calling target one and restore after the target call.
>>>
>>> Originally I only have one interception, show(), now I have
>>> stop() too to call bpf at the end of iteration. Maybe I can
>>> interpret all four, I think. This way, I can also get ride
>>> of target feature.
>>
>> If the main goal is to minimize target changes and make them exactly
>> seq_operations implementation, then one easier way to get easy access
>> to our own metadata in seq_file->private is to set it to point
>> **after** our metadata, but before target's metadata. Roughly in
>> pseudo code:
>>
>> struct bpf_iter_seq_file_meta {} __attribute((aligned(8)));
>>
>> void *meta = kmalloc(sizeof(struct bpf_iter_seq_file_meta) +
>> target_private_size);
>> seq_file->private = meta + sizeof(struct bpf_iter_seq_file_meta);
> I have suggested the same thing earlier.  Good to know that we think alike ;)
> 
> May be put them in a struct such that container_of...etc can be used:
> struct bpf_iter_private {
>          struct extra_priv_data iter_private;
> 	u8 target_private[] __aligned(8);
> };

This should work, but need to intercept all seq_ops() operations
because target expects private data is `target_private` only.
Let me experiment what is the best way to do this.

> 
>>
>>
>> Then to recover bpf_iter_Seq_file_meta:
>>
>> struct bpf_iter_seq_file_meta *meta = seq_file->private - sizeof(*meta);
>>
>> /* voila! */
>>
>> This doesn't have a benefit of making targets simpler, but will
>> require no changes to them at all. Plus less indirect calls, so less
>> performance penalty.
>>

  reply	other threads:[~2020-04-29 19:20 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-27 20:12 [PATCH bpf-next v1 00/19] bpf: implement bpf iterator for kernel data Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 01/19] net: refactor net assignment for seq_net_private structure Yonghong Song
2020-04-29  5:38   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 02/19] bpf: implement an interface to register bpf_iter targets Yonghong Song
2020-04-28 16:20   ` Martin KaFai Lau
2020-04-28 16:50     ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 03/19] bpf: add bpf_map iterator Yonghong Song
2020-04-29  0:37   ` Martin KaFai Lau
2020-04-29  0:48     ` Alexei Starovoitov
2020-04-29  1:15       ` Yonghong Song
2020-04-29  2:44         ` Alexei Starovoitov
2020-04-29  5:09           ` Yonghong Song
2020-04-29  6:08             ` Andrii Nakryiko
2020-04-29  6:20               ` Yonghong Song
2020-04-29  6:30                 ` Alexei Starovoitov
2020-04-29  6:40                   ` Andrii Nakryiko
2020-04-29  6:44                     ` Yonghong Song
2020-04-29 15:34                       ` Alexei Starovoitov
2020-04-29 18:14                         ` Yonghong Song
2020-04-29 19:19                         ` Andrii Nakryiko
2020-04-29 20:15                           ` Yonghong Song
2020-04-30  3:06                             ` Alexei Starovoitov
2020-04-30  4:01                               ` Yonghong Song
2020-04-29  6:34                 ` Martin KaFai Lau
2020-04-29  6:51                   ` Yonghong Song
2020-04-29 19:25                     ` Andrii Nakryiko
2020-04-29  1:02     ` Yonghong Song
2020-04-29  6:04   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 04/19] bpf: allow loading of a bpf_iter program Yonghong Song
2020-04-29  0:54   ` Martin KaFai Lau
2020-04-29  1:27     ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 05/19] bpf: support bpf tracing/iter programs for BPF_LINK_CREATE Yonghong Song
2020-04-29  1:17   ` [Potential Spoof] " Martin KaFai Lau
2020-04-29  6:25   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 06/19] bpf: support bpf tracing/iter programs for BPF_LINK_UPDATE Yonghong Song
2020-04-29  1:32   ` Martin KaFai Lau
2020-04-29  5:04     ` Yonghong Song
2020-04-29  5:58       ` Martin KaFai Lau
2020-04-29  6:32         ` Andrii Nakryiko
2020-04-29  6:41           ` Martin KaFai Lau
2020-04-27 20:12 ` [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator Yonghong Song
2020-04-29  5:39   ` Martin KaFai Lau
2020-04-29  6:56   ` Andrii Nakryiko
2020-04-29  7:06     ` Yonghong Song
2020-04-29 18:16       ` Andrii Nakryiko
2020-04-29 18:46         ` Martin KaFai Lau
2020-04-29 19:20           ` Yonghong Song [this message]
2020-04-29 20:50             ` Martin KaFai Lau
2020-04-29 20:54               ` Yonghong Song
2020-04-29 19:39   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 08/19] bpf: create file " Yonghong Song
2020-04-29 20:40   ` Andrii Nakryiko
2020-04-30 18:02     ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 09/19] bpf: add PTR_TO_BTF_ID_OR_NULL support Yonghong Song
2020-04-29 20:46   ` Andrii Nakryiko
2020-04-29 20:51     ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 10/19] bpf: add netlink and ipv6_route targets Yonghong Song
2020-04-28 19:49   ` kbuild test robot
2020-04-28 19:49     ` kbuild test robot
2020-04-28 19:50   ` [RFC PATCH] bpf: __bpf_iter__netlink() can be static kbuild test robot
2020-04-28 19:50     ` kbuild test robot
2020-04-27 20:12 ` [PATCH bpf-next v1 11/19] bpf: add task and task/file targets Yonghong Song
2020-04-30  2:08   ` Andrii Nakryiko
2020-05-01 17:23     ` Yonghong Song
2020-05-01 19:01       ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 12/19] bpf: add bpf_seq_printf and bpf_seq_write helpers Yonghong Song
2020-04-28  6:02   ` kbuild test robot
2020-04-28  6:02     ` kbuild test robot
2020-04-28 16:35     ` Yonghong Song
2020-04-28 16:35       ` Yonghong Song
2020-04-30 20:06       ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 13/19] bpf: handle spilled PTR_TO_BTF_ID properly when checking stack_boundary Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 14/19] bpf: support variable length array in tracing programs Yonghong Song
2020-04-30 20:04   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 15/19] tools/libbpf: add bpf_iter support Yonghong Song
2020-04-30  1:41   ` Andrii Nakryiko
2020-05-02  7:17     ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 16/19] tools/bpftool: add bpf_iter support for bptool Yonghong Song
2020-04-28  9:27   ` Quentin Monnet
2020-04-28 17:35     ` Yonghong Song
2020-04-29  8:37       ` Quentin Monnet
2020-04-27 20:12 ` [PATCH bpf-next v1 17/19] tools/bpf: selftests: add iterator programs for ipv6_route and netlink Yonghong Song
2020-04-30  2:12   ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 18/19] tools/bpf: selftests: add iter progs for bpf_map/task/task_file Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 19/19] tools/bpf: selftests: add bpf_iter selftests Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=88bfc829-3c2d-96aa-7d32-4f3ff9b4ad08@fb.com \
    --to=yhs@fb.com \
    --cc=andrii.nakryiko@gmail.com \
    --cc=andriin@fb.com \
    --cc=ast@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kafai@fb.com \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.