All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bpf: export the net namespace for bpf_sock_ops
@ 2020-06-05 12:40 Wang Li
  2020-06-05 14:53 ` Jakub Sitnicki
  0 siblings, 1 reply; 6+ messages in thread
From: Wang Li @ 2020-06-05 12:40 UTC (permalink / raw)
  To: bpf, daniel; +Cc: Wang Li, huangxuesen, yangxingwu

Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
distinguish the connections with same five-tuples, for example when we do the
sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
in different containers on same node.
And we export the netns inum instead of the real pointer of struct net to avoid
the potential security issue.

Signed-off-by: Wang Li <wangli09@kuaishou.com>
Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
---
 include/uapi/linux/bpf.h       |  2 ++
 net/core/filter.c              | 17 +++++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 3 files changed, 21 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c65b374a5090..0fe7e459f023 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
 				 * there is a full socket. If not, the
 				 * fields read as zero.
 				 */
+	__u32 netns_inum;	/* The net namespace this sock belongs to */
+
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
diff --git a/net/core/filter.c b/net/core/filter.c
index d01a244b5087..bfe448ace25f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 					       is_fullsock));
 		break;
 
+	case offsetof(struct bpf_sock_ops, netns_inum):
+#ifdef CONFIG_NET_NS
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct bpf_sock_ops_kern, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct bpf_sock_ops_kern, sk));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+						struct sock_common, skc_net),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct sock_common, skc_net));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct net, ns.inum));
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+		break;
+
 	case offsetof(struct bpf_sock_ops, state):
 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c65b374a5090..0fe7e459f023 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
 				 * there is a full socket. If not, the
 				 * fields read as zero.
 				 */
+	__u32 netns_inum;	/* The net namespace this sock belongs to */
+
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] bpf: export the net namespace for bpf_sock_ops
  2020-06-05 12:40 [PATCH] bpf: export the net namespace for bpf_sock_ops Wang Li
@ 2020-06-05 14:53 ` Jakub Sitnicki
  2020-06-05 15:22   ` Daniel Borkmann
  2020-06-08  2:45   ` 王黎
  0 siblings, 2 replies; 6+ messages in thread
From: Jakub Sitnicki @ 2020-06-05 14:53 UTC (permalink / raw)
  To: Wang Li; +Cc: bpf, daniel, Wang Li, huangxuesen, yangxingwu

On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote:
> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
> distinguish the connections with same five-tuples, for example when we do the
> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
> in different containers on same node.
> And we export the netns inum instead of the real pointer of struct net to avoid
> the potential security issue.
>
> Signed-off-by: Wang Li <wangli09@kuaishou.com>
> Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
> Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
> ---
>  include/uapi/linux/bpf.h       |  2 ++
>  net/core/filter.c              | 17 +++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  2 ++
>  3 files changed, 21 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c65b374a5090..0fe7e459f023 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>  				 * there is a full socket. If not, the
>  				 * fields read as zero.
>  				 */
> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
> +

In uapi/linux/bpf.h we have a field `netns_ino` for storing net
namespace inode number in a couple structs (bpf_prog_info,
bpf_map_info). Would be nice to keep the naming constent.

>  	__u32 snd_cwnd;
>  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>  	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index d01a244b5087..bfe448ace25f 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>  					       is_fullsock));
>  		break;
>
> +	case offsetof(struct bpf_sock_ops, netns_inum):
> +#ifdef CONFIG_NET_NS
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> +						struct bpf_sock_ops_kern, sk),
> +				      si->dst_reg, si->src_reg,
> +				      offsetof(struct bpf_sock_ops_kern, sk));
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> +						struct sock_common, skc_net),
> +				      si->dst_reg, si->dst_reg,
> +				      offsetof(struct sock_common, skc_net));
> +		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> +				      offsetof(struct net, ns.inum));
> +#else
> +		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
> +#endif
> +		break;
> +
>  	case offsetof(struct bpf_sock_ops, state):
>  		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c65b374a5090..0fe7e459f023 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>  				 * there is a full socket. If not, the
>  				 * fields read as zero.
>  				 */
> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
> +
>  	__u32 snd_cwnd;
>  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>  	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] bpf: export the net namespace for bpf_sock_ops
  2020-06-05 14:53 ` Jakub Sitnicki
@ 2020-06-05 15:22   ` Daniel Borkmann
  2020-06-08  2:51     ` 王黎
  2020-06-08  2:45   ` 王黎
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Borkmann @ 2020-06-05 15:22 UTC (permalink / raw)
  To: Jakub Sitnicki, Wang Li; +Cc: bpf, Wang Li, huangxuesen, yangxingwu

On 6/5/20 4:53 PM, Jakub Sitnicki wrote:
> On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote:
>> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
>> distinguish the connections with same five-tuples, for example when we do the
>> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
>> in different containers on same node.
>> And we export the netns inum instead of the real pointer of struct net to avoid
>> the potential security issue.
>>
>> Signed-off-by: Wang Li <wangli09@kuaishou.com>
>> Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
>> Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
>> ---
>>   include/uapi/linux/bpf.h       |  2 ++
>>   net/core/filter.c              | 17 +++++++++++++++++
>>   tools/include/uapi/linux/bpf.h |  2 ++
>>   3 files changed, 21 insertions(+)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index c65b374a5090..0fe7e459f023 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>   				 * there is a full socket. If not, the
>>   				 * fields read as zero.
>>   				 */
>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>> +
> 
> In uapi/linux/bpf.h we have a field `netns_ino` for storing net
> namespace inode number in a couple structs (bpf_prog_info,
> bpf_map_info). Would be nice to keep the naming constent.

Adding in the middle will break programs. Also, currently we have the
merge window open and as such bpf-next is closed. Check status here [0].

Regarding above, we recently added bpf_get_netns_cookie() helper, have
you tried to enable this one instead?

   [0] http://vger.kernel.org/~davem/net-next.html

>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index d01a244b5087..bfe448ace25f 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>>   					       is_fullsock));
>>   		break;
>>
>> +	case offsetof(struct bpf_sock_ops, netns_inum):
>> +#ifdef CONFIG_NET_NS
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>> +						struct bpf_sock_ops_kern, sk),
>> +				      si->dst_reg, si->src_reg,
>> +				      offsetof(struct bpf_sock_ops_kern, sk));
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>> +						struct sock_common, skc_net),
>> +				      si->dst_reg, si->dst_reg,
>> +				      offsetof(struct sock_common, skc_net));
>> +		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>> +				      offsetof(struct net, ns.inum));
>> +#else
>> +		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
>> +#endif
>> +		break;
>> +
>>   	case offsetof(struct bpf_sock_ops, state):
>>   		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
>>
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index c65b374a5090..0fe7e459f023 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>   				 * there is a full socket. If not, the
>>   				 * fields read as zero.
>>   				 */
>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>> +
>>   	__u32 snd_cwnd;
>>   	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>>   	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] bpf: export the net namespace for bpf_sock_ops
  2020-06-05 14:53 ` Jakub Sitnicki
  2020-06-05 15:22   ` Daniel Borkmann
@ 2020-06-08  2:45   ` 王黎
  1 sibling, 0 replies; 6+ messages in thread
From: 王黎 @ 2020-06-08  2:45 UTC (permalink / raw)
  To: Jakub Sitnicki
  Cc: Wang Li, bpf, daniel, 黄学森, 杨兴武

Jakub, thanks for your comments.

> 在 2020年6月5日,下午10:53,Jakub Sitnicki <jakub@cloudflare.com> 写道:
> 
> On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote:
>> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
>> distinguish the connections with same five-tuples, for example when we do the
>> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
>> in different containers on same node.
>> And we export the netns inum instead of the real pointer of struct net to avoid
>> the potential security issue.
>> 
>> Signed-off-by: Wang Li <wangli09@kuaishou.com>
>> Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
>> Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
>> ---
>> include/uapi/linux/bpf.h       |  2 ++
>> net/core/filter.c              | 17 +++++++++++++++++
>> tools/include/uapi/linux/bpf.h |  2 ++
>> 3 files changed, 21 insertions(+)
>> 
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index c65b374a5090..0fe7e459f023 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>> 				 * there is a full socket. If not, the
>> 				 * fields read as zero.
>> 				 */
>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>> +
> 
> In uapi/linux/bpf.h we have a field `netns_ino` for storing net
> namespace inode number in a couple structs (bpf_prog_info,
> bpf_map_info). Would be nice to keep the naming constent.
> 
>> 	__u32 snd_cwnd;
>> 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>> 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index d01a244b5087..bfe448ace25f 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>> 					       is_fullsock));
>> 		break;
>> 
>> +	case offsetof(struct bpf_sock_ops, netns_inum):
>> +#ifdef CONFIG_NET_NS
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>> +						struct bpf_sock_ops_kern, sk),
>> +				      si->dst_reg, si->src_reg,
>> +				      offsetof(struct bpf_sock_ops_kern, sk));
>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>> +						struct sock_common, skc_net),
>> +				      si->dst_reg, si->dst_reg,
>> +				      offsetof(struct sock_common, skc_net));
>> +		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>> +				      offsetof(struct net, ns.inum));
>> +#else
>> +		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
>> +#endif
>> +		break;
>> +
>> 	case offsetof(struct bpf_sock_ops, state):
>> 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
>> 
>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>> index c65b374a5090..0fe7e459f023 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>> 				 * there is a full socket. If not, the
>> 				 * fields read as zero.
>> 				 */
>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>> +
>> 	__u32 snd_cwnd;
>> 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>> 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] bpf: export the net namespace for bpf_sock_ops
  2020-06-05 15:22   ` Daniel Borkmann
@ 2020-06-08  2:51     ` 王黎
  2020-06-08  4:12       ` 王黎
  0 siblings, 1 reply; 6+ messages in thread
From: 王黎 @ 2020-06-08  2:51 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Jakub Sitnicki, Wang Li, bpf, 黄学森,
	杨兴武

Daniel, thanks for your comments.

bpf_get_netns_cookie_sock really sounds better. 

But I still have another question that is how can I get the netns cookie when I invoke the function “msg_redirect_hash" because its parameter "struct sk_msg_md” does not have any netns information, and there is no "struct sock * ctx” to be used for the helper function “bpf_get_netns_cookie_sock" too. Add a field for sk_msg_md ? And just like I did for bpf_sock_ops ?



> 在 2020年6月5日,下午11:22,Daniel Borkmann <daniel@iogearbox.net> 写道:
> 
> On 6/5/20 4:53 PM, Jakub Sitnicki wrote:
>> On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote:
>>> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
>>> distinguish the connections with same five-tuples, for example when we do the
>>> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
>>> in different containers on same node.
>>> And we export the netns inum instead of the real pointer of struct net to avoid
>>> the potential security issue.
>>> 
>>> Signed-off-by: Wang Li <wangli09@kuaishou.com>
>>> Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
>>> Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
>>> ---
>>>  include/uapi/linux/bpf.h       |  2 ++
>>>  net/core/filter.c              | 17 +++++++++++++++++
>>>  tools/include/uapi/linux/bpf.h |  2 ++
>>>  3 files changed, 21 insertions(+)
>>> 
>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>> index c65b374a5090..0fe7e459f023 100644
>>> --- a/include/uapi/linux/bpf.h
>>> +++ b/include/uapi/linux/bpf.h
>>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>>  				 * there is a full socket. If not, the
>>>  				 * fields read as zero.
>>>  				 */
>>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>>> +
>> In uapi/linux/bpf.h we have a field `netns_ino` for storing net
>> namespace inode number in a couple structs (bpf_prog_info,
>> bpf_map_info). Would be nice to keep the naming constent.
> 
> Adding in the middle will break programs. Also, currently we have the
> merge window open and as such bpf-next is closed. Check status here [0].
> 
> Regarding above, we recently added bpf_get_netns_cookie() helper, have
> you tried to enable this one instead?
> 
>  [0] http://vger.kernel.org/~davem/net-next.html
> 
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index d01a244b5087..bfe448ace25f 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>>>  					       is_fullsock));
>>>  		break;
>>> 
>>> +	case offsetof(struct bpf_sock_ops, netns_inum):
>>> +#ifdef CONFIG_NET_NS
>>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>>> +						struct bpf_sock_ops_kern, sk),
>>> +				      si->dst_reg, si->src_reg,
>>> +				      offsetof(struct bpf_sock_ops_kern, sk));
>>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>>> +						struct sock_common, skc_net),
>>> +				      si->dst_reg, si->dst_reg,
>>> +				      offsetof(struct sock_common, skc_net));
>>> +		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>>> +				      offsetof(struct net, ns.inum));
>>> +#else
>>> +		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
>>> +#endif
>>> +		break;
>>> +
>>>  	case offsetof(struct bpf_sock_ops, state):
>>>  		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
>>> 
>>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>>> index c65b374a5090..0fe7e459f023 100644
>>> --- a/tools/include/uapi/linux/bpf.h
>>> +++ b/tools/include/uapi/linux/bpf.h
>>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>>  				 * there is a full socket. If not, the
>>>  				 * fields read as zero.
>>>  				 */
>>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>>> +
>>>  	__u32 snd_cwnd;
>>>  	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>>>  	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] bpf: export the net namespace for bpf_sock_ops
  2020-06-08  2:51     ` 王黎
@ 2020-06-08  4:12       ` 王黎
  0 siblings, 0 replies; 6+ messages in thread
From: 王黎 @ 2020-06-08  4:12 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Jakub Sitnicki, Wang Li, bpf, 黄学森,
	杨兴武

I just saw the commit 13d70f5a5ecff367db2fb18ed4ebe433eab8a74c Sun May 24 09:51:15 2020 that already added "struct bpf_sock *, sk” for sk_msg_md.

So Just ignore my question. Thank you all anyway.

> 在 2020年6月8日,上午10:51,王黎 <wangli09@kuaishou.com> 写道:
> 
> Daniel, thanks for your comments.
> 
> bpf_get_netns_cookie_sock really sounds better. 
> 
> But I still have another question that is how can I get the netns cookie when I invoke the function “msg_redirect_hash" because its parameter "struct sk_msg_md” does not have any netns information, and there is no "struct sock * ctx” to be used for the helper function “bpf_get_netns_cookie_sock" too. Add a field for sk_msg_md ? And just like I did for bpf_sock_ops ?
> 
> 
> 
>> 在 2020年6月5日,下午11:22,Daniel Borkmann <daniel@iogearbox.net> 写道:
>> 
>> On 6/5/20 4:53 PM, Jakub Sitnicki wrote:
>>> On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote:
>>>> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to
>>>> distinguish the connections with same five-tuples, for example when we do the
>>>> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections
>>>> in different containers on same node.
>>>> And we export the netns inum instead of the real pointer of struct net to avoid
>>>> the potential security issue.
>>>> 
>>>> Signed-off-by: Wang Li <wangli09@kuaishou.com>
>>>> Signed-off-by: huangxuesen <huangxuesen@kuaishou.com>
>>>> Signed-off-by: yangxingwu <yangxingwu@kuaishou.com>
>>>> ---
>>>> include/uapi/linux/bpf.h       |  2 ++
>>>> net/core/filter.c              | 17 +++++++++++++++++
>>>> tools/include/uapi/linux/bpf.h |  2 ++
>>>> 3 files changed, 21 insertions(+)
>>>> 
>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>>> index c65b374a5090..0fe7e459f023 100644
>>>> --- a/include/uapi/linux/bpf.h
>>>> +++ b/include/uapi/linux/bpf.h
>>>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>>> 				 * there is a full socket. If not, the
>>>> 				 * fields read as zero.
>>>> 				 */
>>>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>>>> +
>>> In uapi/linux/bpf.h we have a field `netns_ino` for storing net
>>> namespace inode number in a couple structs (bpf_prog_info,
>>> bpf_map_info). Would be nice to keep the naming constent.
>> 
>> Adding in the middle will break programs. Also, currently we have the
>> merge window open and as such bpf-next is closed. Check status here [0].
>> 
>> Regarding above, we recently added bpf_get_netns_cookie() helper, have
>> you tried to enable this one instead?
>> 
>> [0] http://vger.kernel.org/~davem/net-next.html
>> 
>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>> index d01a244b5087..bfe448ace25f 100644
>>>> --- a/net/core/filter.c
>>>> +++ b/net/core/filter.c
>>>> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
>>>> 					       is_fullsock));
>>>> 		break;
>>>> 
>>>> +	case offsetof(struct bpf_sock_ops, netns_inum):
>>>> +#ifdef CONFIG_NET_NS
>>>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>>>> +						struct bpf_sock_ops_kern, sk),
>>>> +				      si->dst_reg, si->src_reg,
>>>> +				      offsetof(struct bpf_sock_ops_kern, sk));
>>>> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
>>>> +						struct sock_common, skc_net),
>>>> +				      si->dst_reg, si->dst_reg,
>>>> +				      offsetof(struct sock_common, skc_net));
>>>> +		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
>>>> +				      offsetof(struct net, ns.inum));
>>>> +#else
>>>> +		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
>>>> +#endif
>>>> +		break;
>>>> +
>>>> 	case offsetof(struct bpf_sock_ops, state):
>>>> 		BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
>>>> 
>>>> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
>>>> index c65b374a5090..0fe7e459f023 100644
>>>> --- a/tools/include/uapi/linux/bpf.h
>>>> +++ b/tools/include/uapi/linux/bpf.h
>>>> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops {
>>>> 				 * there is a full socket. If not, the
>>>> 				 * fields read as zero.
>>>> 				 */
>>>> +	__u32 netns_inum;	/* The net namespace this sock belongs to */
>>>> +
>>>> 	__u32 snd_cwnd;
>>>> 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
>>>> 	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
>> 
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-06-08  4:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-05 12:40 [PATCH] bpf: export the net namespace for bpf_sock_ops Wang Li
2020-06-05 14:53 ` Jakub Sitnicki
2020-06-05 15:22   ` Daniel Borkmann
2020-06-08  2:51     ` 王黎
2020-06-08  4:12       ` 王黎
2020-06-08  2:45   ` 王黎

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.