All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
@ 2018-11-08 15:11 Nicolas Dichtel
  2018-11-09 18:51 ` Martin Lau
  0 siblings, 1 reply; 8+ messages in thread
From: Nicolas Dichtel @ 2018-11-08 15:11 UTC (permalink / raw)
  To: ast, daniel, davem; +Cc: netdev, Nicolas Dichtel

This new mode enables to add or remove an l2 header in a programmatic way
with cls_bpf.
For example, it enables to play with mpls headers.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/uapi/linux/bpf.h       |  3 ++
 net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++
 3 files changed, 60 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@ union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@ enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..e699849b269d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2884,6 +2884,58 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 	return ret;
 }
 
+static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	__skb_pull(skb, len);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->network_header += hhlen;
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_cow(skb, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_push(skb, len);
+	skb_reset_mac_header(skb);
+	return 0;
+}
+
+static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
+{
+	u32 len_diff_abs = abs(len_diff);
+	bool shrink = len_diff < 0;
+	int ret;
+
+	if (unlikely(len_diff_abs > 0xfffU))
+		return -EFAULT;
+
+	if (shrink && len_diff_abs >= skb_headlen(skb))
+		return -EFAULT;
+
+	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
+		       bpf_skb_data_grow(skb, len_diff_abs);
+
+	bpf_compute_data_pointers(skb);
+	return ret;
+}
+
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	   u32, mode, u64, flags)
 {
@@ -2891,6 +2943,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -EINVAL;
 	if (likely(mode == BPF_ADJ_ROOM_NET))
 		return bpf_skb_adjust_net(skb, len_diff);
+	if (likely(mode == BPF_ADJ_ROOM_DATA))
+		return bpf_skb_adjust_data(skb, len_diff);
 
 	return -ENOTSUPP;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@ union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@ enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-08 15:11 [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room() Nicolas Dichtel
@ 2018-11-09 18:51 ` Martin Lau
  2018-11-10 23:43   ` Nicolas Dichtel
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Lau @ 2018-11-09 18:51 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: ast, daniel, davem, netdev

On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
>  include/uapi/linux/bpf.h       |  3 ++
>  net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  3 ++
>  3 files changed, 60 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e521c5ebc7d1..e699849b269d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2884,6 +2884,58 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  	return ret;
>  }
>  
> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_unclone(skb, GFP_ATOMIC);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	__skb_pull(skb, len);
> +	skb_reset_mac_header(skb);
> +	skb_reset_network_header(skb);
> +	skb->network_header += hhlen;
> +	skb_reset_transport_header(skb);
hmm...why transport_header does not need += hhlen here
while network_header does?

> +	return 0;
> +}
> +
> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_cow(skb, len);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_push(skb, len);
> +	skb_reset_mac_header(skb);
> +	return 0;
> +}
> +
> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
> +{
> +	u32 len_diff_abs = abs(len_diff);
> +	bool shrink = len_diff < 0;
> +	int ret;
> +
> +	if (unlikely(len_diff_abs > 0xfffU))
> +		return -EFAULT;
> +
> +	if (shrink && len_diff_abs >= skb_headlen(skb))
> +		return -EFAULT;
> +
> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
> +		       bpf_skb_data_grow(skb, len_diff_abs);
> +
> +	bpf_compute_data_pointers(skb);
> +	return ret;
> +}
> +
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  	   u32, mode, u64, flags)
>  {
> @@ -2891,6 +2943,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  		return -EINVAL;
>  	if (likely(mode == BPF_ADJ_ROOM_NET))
>  		return bpf_skb_adjust_net(skb, len_diff);
> +	if (likely(mode == BPF_ADJ_ROOM_DATA))
> +		return bpf_skb_adjust_data(skb, len_diff);
>  
>  	return -ENOTSUPP;
>  }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> -- 
> 2.18.0
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-09 18:51 ` Martin Lau
@ 2018-11-10 23:43   ` Nicolas Dichtel
  2018-11-12 18:39     ` Martin Lau
  0 siblings, 1 reply; 8+ messages in thread
From: Nicolas Dichtel @ 2018-11-10 23:43 UTC (permalink / raw)
  To: Martin Lau; +Cc: ast, daniel, davem, netdev

Le 09/11/2018 à 19:51, Martin Lau a écrit :
> On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
[snip]
>> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_unclone(skb, GFP_ATOMIC);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	__skb_pull(skb, len);
>> +	skb_reset_mac_header(skb);
>> +	skb_reset_network_header(skb);
>> +	skb->network_header += hhlen;
>> +	skb_reset_transport_header(skb);
> hmm...why transport_header does not need += hhlen here
> while network_header does?

network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called
and network_header is expected to be correctly set in this case.
For transport_header, I choose to not set it, because the stack will set it
later (for example ip_rcv_core()).


Regards,
Nicolas

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-10 23:43   ` Nicolas Dichtel
@ 2018-11-12 18:39     ` Martin Lau
  2018-11-13 16:35       ` [PATCH bpf-next v2] " Nicolas Dichtel
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Lau @ 2018-11-12 18:39 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: ast, daniel, davem, netdev

On Sun, Nov 11, 2018 at 12:43:27AM +0100, Nicolas Dichtel wrote:
> Le 09/11/2018 à 19:51, Martin Lau a écrit :
> > On Thu, Nov 08, 2018 at 04:11:37PM +0100, Nicolas Dichtel wrote:
> [snip]
> >> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> >> +{
> >> +	unsigned short hhlen = skb->dev->header_ops ?
> >> +			       skb->dev->hard_header_len : 0;
> >> +	int ret;
> >> +
> >> +	ret = skb_unclone(skb, GFP_ATOMIC);
> >> +	if (unlikely(ret < 0))
> >> +		return ret;
> >> +
> >> +	__skb_pull(skb, len);
> >> +	skb_reset_mac_header(skb);
> >> +	skb_reset_network_header(skb);
> >> +	skb->network_header += hhlen;
Nit. skb_set_network_header(skb, hhlen);

Othen than that

Acked-by: Martin KaFai Lau <kafai@fb.com>

> >> +	skb_reset_transport_header(skb);
> > hmm...why transport_header does not need += hhlen here
> > while network_header does?
> 
> network_header is mandatory because bpf_redirect(BPF_F_INGRESS) can be called
> and network_header is expected to be correctly set in this case.
> For transport_header, I choose to not set it, because the stack will set it
> later (for example ip_rcv_core()).
ic. make sense.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-12 18:39     ` Martin Lau
@ 2018-11-13 16:35       ` Nicolas Dichtel
  2018-11-17  4:52         ` Alexei Starovoitov
  2018-11-20  1:06         ` Daniel Borkmann
  0 siblings, 2 replies; 8+ messages in thread
From: Nicolas Dichtel @ 2018-11-13 16:35 UTC (permalink / raw)
  To: kafai; +Cc: ast, daniel, davem, netdev, Nicolas Dichtel

This new mode enables to add or remove an l2 header in a programmatic way
with cls_bpf.
For example, it enables to play with mpls headers.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
---

v2: use skb_set_network_header()

 include/uapi/linux/bpf.h       |  3 ++
 net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++
 3 files changed, 59 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 47d606d744cc..9762ef3a536c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@ union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2412,6 +2414,7 @@ enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
diff --git a/net/core/filter.c b/net/core/filter.c
index 53d50fb75ea1..e56da3077dca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2884,6 +2884,57 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
 	return ret;
 }
 
+static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_unclone(skb, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	__skb_pull(skb, len);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, hhlen);
+	skb_reset_transport_header(skb);
+	return 0;
+}
+
+static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
+{
+	unsigned short hhlen = skb->dev->header_ops ?
+			       skb->dev->hard_header_len : 0;
+	int ret;
+
+	ret = skb_cow(skb, len);
+	if (unlikely(ret < 0))
+		return ret;
+
+	skb_push(skb, len);
+	skb_reset_mac_header(skb);
+	return 0;
+}
+
+static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
+{
+	u32 len_diff_abs = abs(len_diff);
+	bool shrink = len_diff < 0;
+	int ret;
+
+	if (unlikely(len_diff_abs > 0xfffU))
+		return -EFAULT;
+
+	if (shrink && len_diff_abs >= skb_headlen(skb))
+		return -EFAULT;
+
+	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
+		       bpf_skb_data_grow(skb, len_diff_abs);
+
+	bpf_compute_data_pointers(skb);
+	return ret;
+}
+
 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	   u32, mode, u64, flags)
 {
@@ -2891,6 +2942,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 		return -EINVAL;
 	if (likely(mode == BPF_ADJ_ROOM_NET))
 		return bpf_skb_adjust_net(skb, len_diff);
+	if (likely(mode == BPF_ADJ_ROOM_DATA))
+		return bpf_skb_adjust_data(skb, len_diff);
 
 	return -ENOTSUPP;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a..47407fd5162b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1467,6 +1467,8 @@ union bpf_attr {
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
  * 		  (room space is added or removed below the layer 3 header).
+ * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
+ * 		  packet (room space is added or removed below skb->data).
  *
  * 		All values for *flags* are reserved for future usage, and must
  * 		be left at zero.
@@ -2408,6 +2410,7 @@ enum bpf_func_id {
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
+	BPF_ADJ_ROOM_DATA,
 };
 
 /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
-- 
2.18.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-13 16:35       ` [PATCH bpf-next v2] " Nicolas Dichtel
@ 2018-11-17  4:52         ` Alexei Starovoitov
  2018-11-20  1:06         ` Daniel Borkmann
  1 sibling, 0 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2018-11-17  4:52 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: kafai, ast, daniel, davem, netdev

On Tue, Nov 13, 2018 at 05:35:17PM +0100, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Acked-by: Martin KaFai Lau <kafai@fb.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>

Daniel, thoughts?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-13 16:35       ` [PATCH bpf-next v2] " Nicolas Dichtel
  2018-11-17  4:52         ` Alexei Starovoitov
@ 2018-11-20  1:06         ` Daniel Borkmann
  2018-11-21 14:43           ` Nicolas Dichtel
  1 sibling, 1 reply; 8+ messages in thread
From: Daniel Borkmann @ 2018-11-20  1:06 UTC (permalink / raw)
  To: Nicolas Dichtel, kafai; +Cc: ast, davem, netdev

On 11/13/2018 05:35 PM, Nicolas Dichtel wrote:
> This new mode enables to add or remove an l2 header in a programmatic way
> with cls_bpf.
> For example, it enables to play with mpls headers.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Acked-by: Martin KaFai Lau <kafai@fb.com>

(Sorry for late reply, swamped due to Plumbers.)

> ---
> 
> v2: use skb_set_network_header()
> 
>  include/uapi/linux/bpf.h       |  3 ++
>  net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  3 ++
>  3 files changed, 59 insertions(+)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 47d606d744cc..9762ef3a536c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).

Nit: in this case it would probably make more sense to name it BPF_ADJ_ROOM_MAC.
BPF_ADJ_ROOM_DATA reads like we're adjusting payload data.

>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2412,6 +2414,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 53d50fb75ea1..e56da3077dca 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2884,6 +2884,57 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>  	return ret;
>  }
>  
> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_unclone(skb, GFP_ATOMIC);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	__skb_pull(skb, len);
> +	skb_reset_mac_header(skb);
> +	skb_set_network_header(skb, hhlen);
> +	skb_reset_transport_header(skb);
> +	return 0;
> +}
> +
> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
> +{
> +	unsigned short hhlen = skb->dev->header_ops ?
> +			       skb->dev->hard_header_len : 0;
> +	int ret;
> +
> +	ret = skb_cow(skb, len);
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	skb_push(skb, len);
> +	skb_reset_mac_header(skb);

Looks like this could leak uninitialized mem into the BPF prog as it's
not zeroed out. Can't we just reuse bpf_skb_generic_push() and the
bpf_skb_generic_pop()?

bpf_skb_vlan_push() and bpf_skb_vlan_pop() do a bpf_push_mac_rcsum() /
bpf_pull_mac_rcsum() in order to not screw up things like csum complete.
Wouldn't this be needed here as well?

How does this interact with MPLS GSO?

If the packet gets pushed back into the stack, would AF_MPLS handle it?
Probably good to add test cases to BPF kselftest suite with it.

Don't we need to reject the packet in case of skb->encapsulation?

Looking at push_mpls() and pop_mpls() from OVS implementation, do we
also need to deal with skb inner network header / proto?

Given all this would it rather make sense to add MPLS specific helpers
in similar fashion to the vlan ones we have?

Is there any other use case aside from MPLS?

> +	return 0;
> +}
> +
> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
> +{
> +	u32 len_diff_abs = abs(len_diff);
> +	bool shrink = len_diff < 0;
> +	int ret;
> +
> +	if (unlikely(len_diff_abs > 0xfffU))
> +		return -EFAULT;
> +
> +	if (shrink && len_diff_abs >= skb_headlen(skb))
> +		return -EFAULT;
> +
> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
> +		       bpf_skb_data_grow(skb, len_diff_abs);

Hmm, I think this has a bug in that the above two do not adjust
skb->mac_len. Then things like cls_bpf_classify() will __skb_pull()
based on the old mac_len, getting you to a wrong offset in the
packet when this is for example pushed back into ingress, etc.

> +	bpf_compute_data_pointers(skb);
> +	return ret;
> +}
> +
>  BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  	   u32, mode, u64, flags)
>  {
> @@ -2891,6 +2942,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
>  		return -EINVAL;
>  	if (likely(mode == BPF_ADJ_ROOM_NET))
>  		return bpf_skb_adjust_net(skb, len_diff);
> +	if (likely(mode == BPF_ADJ_ROOM_DATA))
> +		return bpf_skb_adjust_data(skb, len_diff);
>  
>  	return -ENOTSUPP;
>  }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 852dc17ab47a..47407fd5162b 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1467,6 +1467,8 @@ union bpf_attr {
>   *
>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>   * 		  (room space is added or removed below the layer 3 header).
> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
> + * 		  packet (room space is added or removed below skb->data).
>   *
>   * 		All values for *flags* are reserved for future usage, and must
>   * 		be left at zero.
> @@ -2408,6 +2410,7 @@ enum bpf_func_id {
>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>  enum bpf_adj_room_mode {
>  	BPF_ADJ_ROOM_NET,
> +	BPF_ADJ_ROOM_DATA,
>  };
>  
>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next v2] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room()
  2018-11-20  1:06         ` Daniel Borkmann
@ 2018-11-21 14:43           ` Nicolas Dichtel
  0 siblings, 0 replies; 8+ messages in thread
From: Nicolas Dichtel @ 2018-11-21 14:43 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: kafai, ast, davem, netdev

Le 20/11/2018 à 02:06, Daniel Borkmann a écrit :
> On 11/13/2018 05:35 PM, Nicolas Dichtel wrote:
>> This new mode enables to add or remove an l2 header in a programmatic way
>> with cls_bpf.
>> For example, it enables to play with mpls headers.
>>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> Acked-by: Martin KaFai Lau <kafai@fb.com>
> 
> (Sorry for late reply, swamped due to Plumbers.)
I thought so, no problem ;-)

> 
>> ---
>>
>> v2: use skb_set_network_header()
>>
>>  include/uapi/linux/bpf.h       |  3 ++
>>  net/core/filter.c              | 53 ++++++++++++++++++++++++++++++++++
>>  tools/include/uapi/linux/bpf.h |  3 ++
>>  3 files changed, 59 insertions(+)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 47d606d744cc..9762ef3a536c 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1467,6 +1467,8 @@ union bpf_attr {
>>   *
>>   * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
>>   * 		  (room space is added or removed below the layer 3 header).
>> + * 		* **BPF_ADJ_ROOM_DATA**: Adjust room at the beginning of the
>> + * 		  packet (room space is added or removed below skb->data).
> 
> Nit: in this case it would probably make more sense to name it BPF_ADJ_ROOM_MAC.
> BPF_ADJ_ROOM_DATA reads like we're adjusting payload data.
Yep, I first had in mind skb->data, but you're right.

> 
>>   * 		All values for *flags* are reserved for future usage, and must
>>   * 		be left at zero.
>> @@ -2412,6 +2414,7 @@ enum bpf_func_id {
>>  /* Mode for BPF_FUNC_skb_adjust_room helper. */
>>  enum bpf_adj_room_mode {
>>  	BPF_ADJ_ROOM_NET,
>> +	BPF_ADJ_ROOM_DATA,
>>  };
>>  
>>  /* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 53d50fb75ea1..e56da3077dca 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -2884,6 +2884,57 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
>>  	return ret;
>>  }
>>  
>> +static int bpf_skb_data_shrink(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_unclone(skb, GFP_ATOMIC);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	__skb_pull(skb, len);
>> +	skb_reset_mac_header(skb);
>> +	skb_set_network_header(skb, hhlen);
>> +	skb_reset_transport_header(skb);
>> +	return 0;
>> +}
>> +
>> +static int bpf_skb_data_grow(struct sk_buff *skb, u32 len)
>> +{
>> +	unsigned short hhlen = skb->dev->header_ops ?
>> +			       skb->dev->hard_header_len : 0;
>> +	int ret;
>> +
>> +	ret = skb_cow(skb, len);
>> +	if (unlikely(ret < 0))
>> +		return ret;
>> +
>> +	skb_push(skb, len);
>> +	skb_reset_mac_header(skb);
> 
> Looks like this could leak uninitialized mem into the BPF prog as it's
> not zeroed out. Can't we just reuse bpf_skb_generic_push() and the
> bpf_skb_generic_pop()?
Hmm, at some point, it was not possible, but it was an intermediate version and
I don't see why now.

> 
> bpf_skb_vlan_push() and bpf_skb_vlan_pop() do a bpf_push_mac_rcsum() /
> bpf_pull_mac_rcsum() in order to not screw up things like csum complete.
> Wouldn't this be needed here as well?
> 
> How does this interact with MPLS GSO?
I overlooked both points.

> 
> If the packet gets pushed back into the stack, would AF_MPLS handle it?
> Probably good to add test cases to BPF kselftest suite with it.
Ok, I will have a look to this.

> 
> Don't we need to reject the packet in case of skb->encapsulation?
> 
> Looking at push_mpls() and pop_mpls() from OVS implementation, do we
> also need to deal with skb inner network header / proto?
> 
> Given all this would it rather make sense to add MPLS specific helpers
> in similar fashion to the vlan ones we have?
> 
> Is there any other use case aside from MPLS?
Yes, I was targeting a generic encap/decap at l2 level. Maybe more complex than
I thought.

> 
>> +	return 0;
>> +}
>> +
>> +static int bpf_skb_adjust_data(struct sk_buff *skb, s32 len_diff)
>> +{
>> +	u32 len_diff_abs = abs(len_diff);
>> +	bool shrink = len_diff < 0;
>> +	int ret;
>> +
>> +	if (unlikely(len_diff_abs > 0xfffU))
>> +		return -EFAULT;
>> +
>> +	if (shrink && len_diff_abs >= skb_headlen(skb))
>> +		return -EFAULT;
>> +
>> +	ret = shrink ? bpf_skb_data_shrink(skb, len_diff_abs) :
>> +		       bpf_skb_data_grow(skb, len_diff_abs);
> 
> Hmm, I think this has a bug in that the above two do not adjust
> skb->mac_len. Then things like cls_bpf_classify() will __skb_pull()
> based on the old mac_len, getting you to a wrong offset in the
> packet when this is for example pushed back into ingress, etc.
Nice catch!


Thank you,
Nicolas

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-11-22  1:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-08 15:11 [PATCH bpf-next] filter: add BPF_ADJ_ROOM_DATA mode to bpf_skb_adjust_room() Nicolas Dichtel
2018-11-09 18:51 ` Martin Lau
2018-11-10 23:43   ` Nicolas Dichtel
2018-11-12 18:39     ` Martin Lau
2018-11-13 16:35       ` [PATCH bpf-next v2] " Nicolas Dichtel
2018-11-17  4:52         ` Alexei Starovoitov
2018-11-20  1:06         ` Daniel Borkmann
2018-11-21 14:43           ` Nicolas Dichtel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.