bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: deb.chatterjee@intel.com, anjali.singhai@intel.com,
	namrata.limaye@intel.com, tom@sipanda.io, mleitner@redhat.com,
	Mahesh.Shirshyad@amd.com, Vipin.Jain@amd.com,
	tomasz.osinski@intel.com, jiri@resnulli.us,
	xiyou.wangcong@gmail.com, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	vladbu@nvidia.com, horms@kernel.org, khalidm@nvidia.com,
	toke@redhat.com, daniel@iogearbox.net, victor@mojatatu.com,
	pctammela@mojatatu.com, bpf@vger.kernel.org,
	netdev@vger.kernel.org
Subject: Re: [PATCH net-next v12 14/15] p4tc: add set of P4TC table kfuncs
Date: Sat, 2 Mar 2024 17:32:42 -0800	[thread overview]
Message-ID: <f88b5f65-957e-4b5d-8959-d16e79372658@linux.dev> (raw)
In-Reply-To: <CAM0EoMniOaKn4W_WN9rmQZ1JY3qCugn34mmqCy9UdCTAj_tuTQ@mail.gmail.com>

On 3/1/24 4:31 AM, Jamal Hadi Salim wrote:
> On Fri, Mar 1, 2024 at 1:53 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 2/25/24 8:54 AM, Jamal Hadi Salim wrote:
>>> +struct p4tc_table_entry_act_bpf_params {
>>
>> Will this struct be extended in the future?
>>
>>> +     u32 pipeid;
>>> +     u32 tblid;
>>> +};
>>> +
> 
> Not that i can think of. We probably want to have the option to do so
> if needed. Do you see any harm if we were to make changes for whatever
> reason in the future?

It will be useful to add an argument named with "__sz" suffix to the kfunc.
Take a look at how the kfunc in nf_conntrack_bpf.c is handling the "opts" and 
"opts__sz" argument in its kfunc.

> 
>>> +struct p4tc_table_entry_create_bpf_params {
>>> +     u32 profile_id;
>>> +     u32 pipeid;
>>> +     u32 tblid;
>>> +};
>>> +
>>
>> [ ... ]
>>
>>> diff --git a/include/net/tc_act/p4tc.h b/include/net/tc_act/p4tc.h
>>> index c5256d821..155068de0 100644
>>> --- a/include/net/tc_act/p4tc.h
>>> +++ b/include/net/tc_act/p4tc.h
>>> @@ -13,10 +13,26 @@ struct tcf_p4act_params {
>>>        u32 tot_params_sz;
>>>    };
>>>
>>> +#define P4TC_MAX_PARAM_DATA_SIZE 124
>>> +
>>> +struct p4tc_table_entry_act_bpf {
>>> +     u32 act_id;
>>> +     u32 hit:1,
>>> +         is_default_miss_act:1,
>>> +         is_default_hit_act:1;
>>> +     u8 params[P4TC_MAX_PARAM_DATA_SIZE];
>>> +} __packed;
>>> +
>>> +struct p4tc_table_entry_act_bpf_kern {
>>> +     struct rcu_head rcu;
>>> +     struct p4tc_table_entry_act_bpf act_bpf;
>>> +};
>>> +
>>>    struct tcf_p4act {
>>>        struct tc_action common;
>>>        /* Params IDR reference passed during runtime */
>>>        struct tcf_p4act_params __rcu *params;
>>> +     struct p4tc_table_entry_act_bpf_kern __rcu *act_bpf;
>>>        u32 p_id;
>>>        u32 act_id;
>>>        struct list_head node;
>>> @@ -24,4 +40,39 @@ struct tcf_p4act {
>>>
>>>    #define to_p4act(a) ((struct tcf_p4act *)a)
>>>
>>> +static inline struct p4tc_table_entry_act_bpf *
>>> +p4tc_table_entry_act_bpf(struct tc_action *action)
>>> +{
>>> +     struct p4tc_table_entry_act_bpf_kern *act_bpf;
>>> +     struct tcf_p4act *p4act = to_p4act(action);
>>> +
>>> +     act_bpf = rcu_dereference(p4act->act_bpf);
>>> +
>>> +     return &act_bpf->act_bpf;
>>> +}
>>> +
>>> +static inline int
>>> +p4tc_table_entry_act_bpf_change_flags(struct tc_action *action, u32 hit,
>>> +                                   u32 dflt_miss, u32 dflt_hit)
>>> +{
>>> +     struct p4tc_table_entry_act_bpf_kern *act_bpf, *act_bpf_old;
>>> +     struct tcf_p4act *p4act = to_p4act(action);
>>> +
>>> +     act_bpf = kzalloc(sizeof(*act_bpf), GFP_KERNEL);
>>
>>
>> [ ... ]
>>
>>> +__bpf_kfunc static struct p4tc_table_entry_act_bpf *
>>> +bpf_p4tc_tbl_read(struct __sk_buff *skb_ctx,
>>
>> The argument could be "struct sk_buff *skb" instead of __sk_buff. Take a look at
>> commit 2f4643934670.
> 
> We'll make that change.
> 
>>
>>> +               struct p4tc_table_entry_act_bpf_params *params,
>>> +               void *key, const u32 key__sz)
>>> +{
>>> +     struct sk_buff *skb = (struct sk_buff *)skb_ctx;
>>> +     struct net *caller_net;
>>> +
>>> +     caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
>>> +
>>> +     return __bpf_p4tc_tbl_read(caller_net, params, key, key__sz);
>>> +}
>>> +
>>> +__bpf_kfunc static struct p4tc_table_entry_act_bpf *
>>> +xdp_p4tc_tbl_read(struct xdp_md *xdp_ctx,
>>> +               struct p4tc_table_entry_act_bpf_params *params,
>>> +               void *key, const u32 key__sz)
>>> +{
>>> +     struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
>>> +     struct net *caller_net;
>>> +
>>> +     caller_net = dev_net(ctx->rxq->dev);
>>> +
>>> +     return __bpf_p4tc_tbl_read(caller_net, params, key, key__sz);
>>> +}
>>> +
>>> +static int
>>> +__bpf_p4tc_entry_create(struct net *net,
>>> +                     struct p4tc_table_entry_create_bpf_params *params,
>>> +                     void *key, const u32 key__sz,
>>> +                     struct p4tc_table_entry_act_bpf *act_bpf)
>>> +{
>>> +     struct p4tc_table_entry_key *entry_key = key;
>>> +     struct p4tc_pipeline *pipeline;
>>> +     struct p4tc_table *table;
>>> +
>>> +     if (!params || !key)
>>> +             return -EINVAL;
>>> +     if (key__sz != P4TC_ENTRY_KEY_SZ_BYTES(entry_key->keysz))
>>> +             return -EINVAL;
>>> +
>>> +     pipeline = p4tc_pipeline_find_byid(net, params->pipeid);
>>> +     if (!pipeline)
>>> +             return -ENOENT;
>>> +
>>> +     table = p4tc_tbl_cache_lookup(net, params->pipeid, params->tblid);
>>> +     if (!table)
>>> +             return -ENOENT;
>>> +
>>> +     if (entry_key->keysz != table->tbl_keysz)
>>> +             return -EINVAL;
>>> +
>>> +     return p4tc_table_entry_create_bpf(pipeline, table, entry_key, act_bpf,
>>> +                                        params->profile_id);
>>
>> My understanding is this kfunc will allocate a "struct
>> p4tc_table_entry_act_bpf_kern" object. If the bpf_p4tc_entry_delete() kfunc is
>> never called and the bpf prog is unloaded, how the act_bpf object will be
>> cleaned up?
>>
> 
> The TC code takes care of this. Unloading the bpf prog does not affect
> the deletion, it is the TC control side that will take care of it. If
> we delete the pipeline otoh then not just this entry but all entries
> will be flushed.

It looks like the "struct p4tc_table_entry_act_bpf_kern" object is allocated by 
the bpf prog through kfunc and will only be useful for the bpf prog but not 
other parts of the kernel. However, if the bpf prog is unloaded, these bpf 
specific objects will be left over in the kernel until the tc pipeline (where 
the act_bpf_kern object resided) is gone.

It is the expectation on bpf prog (not only tc/xdp bpf prog) about resources 
clean up that these bpf objects will be gone after unloading the bpf prog and 
unpinning its bpf map.

[ ... ]

>>> +BTF_SET8_START(p4tc_kfunc_check_tbl_set_skb)
>>
>> This soon will be broken with the latest change in bpf-next. It is replaced by
>> BTF_KFUNCS_START. commit a05e90427ef6.

It has already been included in the latest bpf-next pull-request, so should 
reach net-next soon.

>>
> 
> Ok, this wasnt in net-next when we pushed. We base our changes on
> net-next. When do you plan to merge that into net-next?
> 
>> What is the plan on the selftest ?
>>
> 
> We may need some guidance. How do you see us writing a selftest for this?
> We have extensive testing on the control side which is netlink (not
> part of the current series).

There are examples in tools/testing/selftests/bpf, e.g. the test_bpf_nf.c to 
test the kfuncs in nf_conntrack_bpf mentioned above. There are also selftests 
doing netlink to setup the test. The bpf/test_progs tries to avoid external 
dependency as much as possible, so linking to an extra external library and 
using an extra tool/binary will be unacceptable.
and only the bpf/test_progs binary will be run by bpf CI.

The selftest does not have to be complicated. It can exercise the kfunc and show 
how the new struct (e.g. struct p4tc_table_entry_bpf_*) will be used. There is 
BPF_PROG_RUN for the tc and xdp prog, so should be quite doable.


  reply	other threads:[~2024-03-03  1:32 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-25 16:54 [PATCH net-next v12 00/15] Introducing P4TC (series 1) Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 01/15] net: sched: act_api: Introduce P4 actions list Jamal Hadi Salim
2024-02-29 15:05   ` Paolo Abeni
2024-02-29 18:21     ` Jamal Hadi Salim
2024-03-01  7:30       ` Paolo Abeni
2024-03-01 12:39         ` Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 02/15] net/sched: act_api: increase action kind string length Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 03/15] net/sched: act_api: Update tc_action_ops to account for P4 actions Jamal Hadi Salim
2024-02-29 16:19   ` Paolo Abeni
2024-02-29 18:30     ` Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 04/15] net/sched: act_api: add struct p4tc_action_ops as a parameter to lookup callback Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 05/15] net: sched: act_api: Add support for preallocated P4 action instances Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 06/15] p4tc: add P4 data types Jamal Hadi Salim
2024-02-29 15:09   ` Paolo Abeni
2024-02-29 18:31     ` Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 07/15] p4tc: add template API Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 08/15] p4tc: add template pipeline create, get, update, delete Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 09/15] p4tc: add template action create, update, delete, get, flush and dump Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 10/15] p4tc: add runtime action support Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 11/15] p4tc: add template table create, update, delete, get, flush and dump Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 12/15] p4tc: add runtime table entry create and update Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 13/15] p4tc: add runtime table entry get, delete, flush and dump Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 14/15] p4tc: add set of P4TC table kfuncs Jamal Hadi Salim
2024-03-01  6:53   ` Martin KaFai Lau
2024-03-01 12:31     ` Jamal Hadi Salim
2024-03-03  1:32       ` Martin KaFai Lau [this message]
2024-03-03 17:20         ` Jamal Hadi Salim
2024-03-05  7:40           ` Martin KaFai Lau
2024-03-05 12:30             ` Jamal Hadi Salim
2024-03-06  7:58               ` Martin KaFai Lau
2024-03-06 20:22                 ` Jamal Hadi Salim
2024-03-06 22:21                   ` Martin KaFai Lau
2024-03-06 23:19                     ` Jamal Hadi Salim
2024-02-25 16:54 ` [PATCH net-next v12 15/15] p4tc: add P4 classifier Jamal Hadi Salim
2024-02-28 17:11 ` [PATCH net-next v12 00/15] Introducing P4TC (series 1) John Fastabend
2024-02-28 18:23   ` Jamal Hadi Salim
2024-02-28 21:13     ` John Fastabend
2024-03-01  7:02   ` Martin KaFai Lau
2024-03-01 12:36     ` Jamal Hadi Salim
2024-02-29 17:13 ` Paolo Abeni
2024-02-29 18:49   ` Jamal Hadi Salim
2024-02-29 20:52     ` John Fastabend
2024-02-29 21:49   ` Singhai, Anjali
2024-02-29 22:33     ` John Fastabend
2024-02-29 22:48       ` Jamal Hadi Salim
     [not found]         ` <CAOuuhY8qbsYCjdUYUZv8J3jz8HGXmtxLmTDP6LKgN5uRVZwMnQ@mail.gmail.com>
2024-03-01 17:00           ` Jakub Kicinski
2024-03-01 17:39             ` Jamal Hadi Salim
2024-03-02  1:32               ` Jakub Kicinski
2024-03-02  2:20                 ` Tom Herbert
2024-03-03  3:15                   ` Jakub Kicinski
2024-03-03 16:31                     ` Tom Herbert
2024-03-04 20:07                       ` Jakub Kicinski
2024-03-04 20:58                         ` eBPF to implement core functionility WAS " Tom Herbert
2024-03-04 21:19                       ` Stanislav Fomichev
2024-03-04 22:01                         ` Tom Herbert
2024-03-04 23:24                           ` Stanislav Fomichev
2024-03-04 23:50                             ` Tom Herbert
2024-03-02  2:59                 ` Hardware Offload discussion WAS(Re: " Jamal Hadi Salim
2024-03-02 14:36                   ` Jamal Hadi Salim
2024-03-03  3:27                     ` Jakub Kicinski
2024-03-03 17:00                       ` Jamal Hadi Salim
2024-03-03 18:10                         ` Tom Herbert
2024-03-03 19:04                           ` Jamal Hadi Salim
2024-03-04 20:18                             ` Jakub Kicinski
2024-03-04 21:02                               ` Jamal Hadi Salim
2024-03-04 21:23                             ` Stanislav Fomichev
2024-03-04 21:44                               ` Jamal Hadi Salim
2024-03-04 22:23                                 ` Stanislav Fomichev
2024-03-04 22:59                                   ` Jamal Hadi Salim
2024-03-04 23:14                                     ` Stanislav Fomichev
2024-03-01 18:53   ` Chris Sommers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f88b5f65-957e-4b5d-8959-d16e79372658@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=Mahesh.Shirshyad@amd.com \
    --cc=Vipin.Jain@amd.com \
    --cc=anjali.singhai@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=deb.chatterjee@intel.com \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=khalidm@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=mleitner@redhat.com \
    --cc=namrata.limaye@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pctammela@mojatatu.com \
    --cc=toke@redhat.com \
    --cc=tom@sipanda.io \
    --cc=tomasz.osinski@intel.com \
    --cc=victor@mojatatu.com \
    --cc=vladbu@nvidia.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).