All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] tun: allow to attach eBPF filter
@ 2017-12-29  2:44 Jason Wang
  2017-12-29  2:44 ` [PATCH net-next 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
  2017-12-29  2:44 ` [PATCH net-next 2/2] tun: allow to attach ebpf socket filter Jason Wang
  0 siblings, 2 replies; 9+ messages in thread
From: Jason Wang @ 2017-12-29  2:44 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

Hi all:

This series tries to implement eBPF socket filter for tun. This could
be used for implementing efficient virtio-net receive filter for
vhost-net.

Thanks

Jason Wang (2):
  tuntap: rename struct tun_steering_prog to struct tun_prog
  tun: allow to attach ebpf socket filter

 drivers/net/tun.c           | 58 ++++++++++++++++++++++++++++++++-------------
 include/uapi/linux/if_tun.h |  1 +
 2 files changed, 43 insertions(+), 16 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog
  2017-12-29  2:44 [PATCH net-next 0/2] tun: allow to attach eBPF filter Jason Wang
@ 2017-12-29  2:44 ` Jason Wang
  2017-12-29  2:44 ` [PATCH net-next 2/2] tun: allow to attach ebpf socket filter Jason Wang
  1 sibling, 0 replies; 9+ messages in thread
From: Jason Wang @ 2017-12-29  2:44 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

To be reused by other eBPF program other than queue selection.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d631..0853829 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -195,7 +195,7 @@ struct tun_flow_entry {
 
 #define TUN_NUM_FLOW_ENTRIES 1024
 
-struct tun_steering_prog {
+struct tun_prog {
 	struct rcu_head rcu;
 	struct bpf_prog *prog;
 };
@@ -237,7 +237,7 @@ struct tun_struct {
 	u32 rx_batched;
 	struct tun_pcpu_stats __percpu *pcpu_stats;
 	struct bpf_prog __rcu *xdp_prog;
-	struct tun_steering_prog __rcu *steering_prog;
+	struct tun_prog __rcu *steering_prog;
 };
 
 static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -571,7 +571,7 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 
 static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 {
-	struct tun_steering_prog *prog;
+	struct tun_prog *prog;
 	u16 ret = 0;
 
 	prog = rcu_dereference(tun->steering_prog);
@@ -2027,19 +2027,18 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return ret;
 }
 
-static void tun_steering_prog_free(struct rcu_head *rcu)
+static void tun_prog_free(struct rcu_head *rcu)
 {
-	struct tun_steering_prog *prog = container_of(rcu,
-					 struct tun_steering_prog, rcu);
+	struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
 
 	bpf_prog_destroy(prog->prog);
 	kfree(prog);
 }
 
-static int __tun_set_steering_ebpf(struct tun_struct *tun,
-				   struct bpf_prog *prog)
+static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+			  struct bpf_prog *prog)
 {
-	struct tun_steering_prog *old, *new = NULL;
+	struct tun_prog *old, *new = NULL;
 
 	if (prog) {
 		new = kmalloc(sizeof(*new), GFP_KERNEL);
@@ -2049,13 +2048,13 @@ static int __tun_set_steering_ebpf(struct tun_struct *tun,
 	}
 
 	spin_lock_bh(&tun->lock);
-	old = rcu_dereference_protected(tun->steering_prog,
+	old = rcu_dereference_protected(*prog_p,
 					lockdep_is_held(&tun->lock));
-	rcu_assign_pointer(tun->steering_prog, new);
+	rcu_assign_pointer(*prog_p, new);
 	spin_unlock_bh(&tun->lock);
 
 	if (old)
-		call_rcu(&old->rcu, tun_steering_prog_free);
+		call_rcu(&old->rcu, tun_prog_free);
 
 	return 0;
 }
@@ -2068,7 +2067,7 @@ static void tun_free_netdev(struct net_device *dev)
 	free_percpu(tun->pcpu_stats);
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
-	__tun_set_steering_ebpf(tun, NULL);
+	__tun_set_ebpf(tun, &tun->steering_prog, NULL);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -2550,7 +2549,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	return ret;
 }
 
-static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+			void __user *data)
 {
 	struct bpf_prog *prog;
 	int fd;
@@ -2566,7 +2566,7 @@ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
 			return PTR_ERR(prog);
 	}
 
-	return __tun_set_steering_ebpf(tun, prog);
+	return __tun_set_ebpf(tun, prog_p, prog);
 }
 
 static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
@@ -2846,7 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 
 	case TUNSETSTEERINGEBPF:
-		ret = tun_set_steering_ebpf(tun, argp);
+		ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
 		break;
 
 	default:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2017-12-29  2:44 [PATCH net-next 0/2] tun: allow to attach eBPF filter Jason Wang
  2017-12-29  2:44 ` [PATCH net-next 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
@ 2017-12-29  2:44 ` Jason Wang
  2017-12-31 10:14   ` Willem de Bruijn
  1 sibling, 1 reply; 9+ messages in thread
From: Jason Wang @ 2017-12-29  2:44 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, willemb, Jason Wang

This patch allows userspace to attach eBPF filter to tun. This will
allow to implement VM dataplane filtering in a more efficient way
compared to cBPF filter.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c           | 26 ++++++++++++++++++++++++++
 include/uapi/linux/if_tun.h |  1 +
 2 files changed, 27 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0853829..6e9452b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -238,6 +238,7 @@ struct tun_struct {
 	struct tun_pcpu_stats __percpu *pcpu_stats;
 	struct bpf_prog __rcu *xdp_prog;
 	struct tun_prog __rcu *steering_prog;
+	struct tun_prog __rcu *filter_prog;
 };
 
 static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -984,12 +985,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
 #endif
 }
 
+static unsigned int run_ebpf_filter(struct tun_struct *tun,
+				    struct sk_buff *skb,
+				    int len)
+{
+	struct tun_prog *prog = rcu_dereference(tun->filter_prog);
+
+	if (prog)
+		len = bpf_prog_run_clear_cb(prog->prog, skb);
+
+	return len;
+}
+
 /* Net device start xmit */
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	int txq = skb->queue_mapping;
 	struct tun_file *tfile;
+	int len = skb->len;
 
 	rcu_read_lock();
 	tfile = rcu_dereference(tun->tfiles[txq]);
@@ -1015,9 +1029,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
+	len = run_ebpf_filter(tun, skb, len);
+	if (!len)
+		goto drop;
+
 	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
+	if (pskb_trim(skb, len))
+		goto drop;
+
 	skb_tx_timestamp(skb);
 
 	/* Orphan the skb - required as we might hang on to it
@@ -2068,6 +2089,7 @@ static void tun_free_netdev(struct net_device *dev)
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
 	__tun_set_ebpf(tun, &tun->steering_prog, NULL);
+	__tun_set_ebpf(tun, &tun->filter_prog, NULL);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -2849,6 +2871,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
 		break;
 
+	case TUNSETFILTEREBPF:
+		ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index fb38c17..ee432cd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -58,6 +58,7 @@
 #define TUNSETVNETBE _IOW('T', 222, int)
 #define TUNGETVNETBE _IOR('T', 223, int)
 #define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2017-12-29  2:44 ` [PATCH net-next 2/2] tun: allow to attach ebpf socket filter Jason Wang
@ 2017-12-31 10:14   ` Willem de Bruijn
  2018-01-02  3:28     ` Jason Wang
  0 siblings, 1 reply; 9+ messages in thread
From: Willem de Bruijn @ 2017-12-31 10:14 UTC (permalink / raw)
  To: Jason Wang
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn

On Fri, Dec 29, 2017 at 3:44 AM, Jason Wang <jasowang@redhat.com> wrote:
> This patch allows userspace to attach eBPF filter to tun. This will
> allow to implement VM dataplane filtering in a more efficient way
> compared to cBPF filter.

Is the idea to allow the trusted hypervisor to install these programs,
or the untrusted guests?

eBPF privilege escalations like those recently described in
https://lwn.net/Articles/742170/ would give me pause to expose
this to guests.

> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/tun.c           | 26 ++++++++++++++++++++++++++
>  include/uapi/linux/if_tun.h |  1 +
>  2 files changed, 27 insertions(+)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 0853829..6e9452b 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -238,6 +238,7 @@ struct tun_struct {
>         struct tun_pcpu_stats __percpu *pcpu_stats;
>         struct bpf_prog __rcu *xdp_prog;
>         struct tun_prog __rcu *steering_prog;
> +       struct tun_prog __rcu *filter_prog;
>  };
>
>  static int tun_napi_receive(struct napi_struct *napi, int budget)
> @@ -984,12 +985,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>  #endif
>  }
>
> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
> +                                   struct sk_buff *skb,
> +                                   int len)
> +{
> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
> +
> +       if (prog)
> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
> +
> +       return len;
> +}
> +
>  /* Net device start xmit */
>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>         struct tun_struct *tun = netdev_priv(dev);
>         int txq = skb->queue_mapping;
>         struct tun_file *tfile;
> +       int len = skb->len;
>
>         rcu_read_lock();
>         tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1015,9 +1029,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>             sk_filter(tfile->socket.sk, skb))
>                 goto drop;
>
> +       len = run_ebpf_filter(tun, skb, len);
> +       if (!len)
> +               goto drop;
> +

This adds a second filter step independent of the sk_filter call above.
Perhaps the two filter interfaces can map onto to the same instance.
I imagine that qemu never programs SO_ATTACH_FILTER.

More importantly, should this program just return a boolean pass or
drop. Taking a length and trimming may introduce bugs later on if the
stack parses the packet unconditionally, expecting a minimum size
to be present.

This was the reason for introducing sk_filter_trim_cap and using that
in other sk_filter sites.

A quick scan shows that tun_put_user expects a full vlan tag to exist
if skb_vlan_tag_present(skb), for instance. If trimmed to below this
length the final call to skb_copy_datagram_iter may have negative
length.

This is an issue with the existing sk_filter call as much as with the
new run_ebpf_filter call.

>         if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
>                 goto drop;
>
> +       if (pskb_trim(skb, len))
> +               goto drop;
> +
>         skb_tx_timestamp(skb);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2017-12-31 10:14   ` Willem de Bruijn
@ 2018-01-02  3:28     ` Jason Wang
  2018-01-02  9:19       ` Willem de Bruijn
  0 siblings, 1 reply; 9+ messages in thread
From: Jason Wang @ 2018-01-02  3:28 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn



On 2017年12月31日 18:14, Willem de Bruijn wrote:
> On Fri, Dec 29, 2017 at 3:44 AM, Jason Wang <jasowang@redhat.com> wrote:
>> This patch allows userspace to attach eBPF filter to tun. This will
>> allow to implement VM dataplane filtering in a more efficient way
>> compared to cBPF filter.
> Is the idea to allow the trusted hypervisor to install these programs,
> or the untrusted guests?

Not from guest (but I'm really considering bpf/XDP offload in the 
future, as you suggested in the pass, we probably need a new type other 
than socket filter), the main motivation is to implement vhost-net 
filtering. The idea is let qemu to attach eBPF socket filter to tun.


>
> eBPF privilege escalations like those recently described in
> https://lwn.net/Articles/742170/ would give me pause to expose
> this to guests.

Right, if underprivileged bpf is disabled by the distribution, it would 
be hard (but still possible):

- Qemu will notify libvirt about rx filter changes of virtio-net
- Libvirt will reprogram the ebpf filter

>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/net/tun.c           | 26 ++++++++++++++++++++++++++
>>   include/uapi/linux/if_tun.h |  1 +
>>   2 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 0853829..6e9452b 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -238,6 +238,7 @@ struct tun_struct {
>>          struct tun_pcpu_stats __percpu *pcpu_stats;
>>          struct bpf_prog __rcu *xdp_prog;
>>          struct tun_prog __rcu *steering_prog;
>> +       struct tun_prog __rcu *filter_prog;
>>   };
>>
>>   static int tun_napi_receive(struct napi_struct *napi, int budget)
>> @@ -984,12 +985,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>>   #endif
>>   }
>>
>> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
>> +                                   struct sk_buff *skb,
>> +                                   int len)
>> +{
>> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
>> +
>> +       if (prog)
>> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
>> +
>> +       return len;
>> +}
>> +
>>   /* Net device start xmit */
>>   static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>>          struct tun_struct *tun = netdev_priv(dev);
>>          int txq = skb->queue_mapping;
>>          struct tun_file *tfile;
>> +       int len = skb->len;
>>
>>          rcu_read_lock();
>>          tfile = rcu_dereference(tun->tfiles[txq]);
>> @@ -1015,9 +1029,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>              sk_filter(tfile->socket.sk, skb))
>>                  goto drop;
>>
>> +       len = run_ebpf_filter(tun, skb, len);
>> +       if (!len)
>> +               goto drop;
>> +
> This adds a second filter step independent of the sk_filter call above.
> Perhaps the two filter interfaces can map onto to the same instance.
> I imagine that qemu never programs SO_ATTACH_FILTER.

I think you mean TUNATTACHFILTER here (and we could not assume the tun 
is only used by qemu). A quick glance does not give any idea on how to 
reuse it for eBPF or differ eBPF from cBPF.

Btw, there're other differences. TUNATTACHBPF attach the prog to tun 
which means it simplifies lots of things e.g persist devices or queue 
enabling/disabling.  But TUNATTACHFILTER attach the prog to socket.

>
> More importantly, should this program just return a boolean pass or
> drop. Taking a length and trimming may introduce bugs later on if the
> stack parses the packet unconditionally, expecting a minimum size
> to be present.
>
> This was the reason for introducing sk_filter_trim_cap and using that
> in other sk_filter sites.
>
> A quick scan shows that tun_put_user expects a full vlan tag to exist
> if skb_vlan_tag_present(skb), for instance. If trimmed to below this
> length the final call to skb_copy_datagram_iter may have negative
> length.
>
> This is an issue with the existing sk_filter call as much as with the
> new run_ebpf_filter call.

Good point, so consider it was used by sk_filter too, we need to fix it 
anyway. Actually, I've considered the boolean return value but finally I 
decide to obey the style of sk filter. Maybe the trimming has real user. 
e.g high speed header recoding/analysis? Consider it's not hard to fix, 
how about just keep that?

Thanks

>
>>          if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
>>                  goto drop;
>>
>> +       if (pskb_trim(skb, len))
>> +               goto drop;
>> +
>>          skb_tx_timestamp(skb);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2018-01-02  3:28     ` Jason Wang
@ 2018-01-02  9:19       ` Willem de Bruijn
  2018-01-03  2:53         ` Jason Wang
  2018-01-04  7:28         ` Jason Wang
  0 siblings, 2 replies; 9+ messages in thread
From: Willem de Bruijn @ 2018-01-02  9:19 UTC (permalink / raw)
  To: Jason Wang
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn

>>>   /* Net device start xmit */
>>>   static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device
>>> *dev)
>>>   {
>>>          struct tun_struct *tun = netdev_priv(dev);
>>>          int txq = skb->queue_mapping;
>>>          struct tun_file *tfile;
>>> +       int len = skb->len;
>>>
>>>          rcu_read_lock();
>>>          tfile = rcu_dereference(tun->tfiles[txq]);
>>> @@ -1015,9 +1029,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff
>>> *skb, struct net_device *dev)
>>>              sk_filter(tfile->socket.sk, skb))
>>>                  goto drop;
>>>
>>> +       len = run_ebpf_filter(tun, skb, len);
>>> +       if (!len)
>>> +               goto drop;
>>> +
>>
>> This adds a second filter step independent of the sk_filter call above.
>> Perhaps the two filter interfaces can map onto to the same instance.
>> I imagine that qemu never programs SO_ATTACH_FILTER.
>
>
> I think you mean TUNATTACHFILTER here (and we could not assume the tun is
> only used by qemu). A quick glance does not give any idea on how to reuse it
> for eBPF or differ eBPF from cBPF.
>
> Btw, there're other differences. TUNATTACHBPF attach the prog to tun which
> means it simplifies lots of things e.g persist devices or queue
> enabling/disabling.  But TUNATTACHFILTER attach the prog to socket.

Sounds good. Thanks for taking a look whether it could be easily
deduplicated.

>>
>> More importantly, should this program just return a boolean pass or
>> drop. Taking a length and trimming may introduce bugs later on if the
>> stack parses the packet unconditionally, expecting a minimum size
>> to be present.
>>
>> This was the reason for introducing sk_filter_trim_cap and using that
>> in other sk_filter sites.
>>
>> A quick scan shows that tun_put_user expects a full vlan tag to exist
>> if skb_vlan_tag_present(skb), for instance. If trimmed to below this
>> length the final call to skb_copy_datagram_iter may have negative
>> length.
>>
>> This is an issue with the existing sk_filter call as much as with the
>> new run_ebpf_filter call.
>
>
> Good point, so consider it was used by sk_filter too, we need to fix it
> anyway. Actually, I've considered the boolean return value but finally I
> decide to obey the style of sk filter. Maybe the trimming has real user. e.g
> high speed header recoding/analysis? Consider it's not hard to fix, how
> about just keep that?

I don't see an obvious use case, but sure. We'll just need to look
at what the minimum trim length needs to be.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2018-01-02  9:19       ` Willem de Bruijn
@ 2018-01-03  2:53         ` Jason Wang
  2018-01-04  7:28         ` Jason Wang
  1 sibling, 0 replies; 9+ messages in thread
From: Jason Wang @ 2018-01-03  2:53 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn



On 2018年01月02日 17:19, Willem de Bruijn wrote:
>>> More importantly, should this program just return a boolean pass or
>>> drop. Taking a length and trimming may introduce bugs later on if the
>>> stack parses the packet unconditionally, expecting a minimum size
>>> to be present.
>>>
>>> This was the reason for introducing sk_filter_trim_cap and using that
>>> in other sk_filter sites.
>>>
>>> A quick scan shows that tun_put_user expects a full vlan tag to exist
>>> if skb_vlan_tag_present(skb), for instance. If trimmed to below this
>>> length the final call to skb_copy_datagram_iter may have negative
>>> length.
>>>
>>> This is an issue with the existing sk_filter call as much as with the
>>> new run_ebpf_filter call.
>> Good point, so consider it was used by sk_filter too, we need to fix it
>> anyway. Actually, I've considered the boolean return value but finally I
>> decide to obey the style of sk filter. Maybe the trimming has real user. e.g
>> high speed header recoding/analysis? Consider it's not hard to fix, how
>> about just keep that?
> I don't see an obvious use case, but sure. We'll just need to look
> at what the minimum trim length needs to be.

It looks to me that the minimum length is:

skb_vlan_tag_present(skb) ? offsetof(struct vlan_ethhdr, h_vlan_proto) : 0

And consider the vlan tag insertion done in tun_put_user(), we need trim 
4 more bytes if vlan tag is present.

Thanks

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2018-01-02  9:19       ` Willem de Bruijn
  2018-01-03  2:53         ` Jason Wang
@ 2018-01-04  7:28         ` Jason Wang
  2018-01-04 15:06           ` Willem de Bruijn
  1 sibling, 1 reply; 9+ messages in thread
From: Jason Wang @ 2018-01-04  7:28 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn



On 2018年01月02日 17:19, Willem de Bruijn wrote:
>>> More importantly, should this program just return a boolean pass or
>>> drop. Taking a length and trimming may introduce bugs later on if the
>>> stack parses the packet unconditionally, expecting a minimum size
>>> to be present.
>>>
>>> This was the reason for introducing sk_filter_trim_cap and using that
>>> in other sk_filter sites.
>>>
>>> A quick scan shows that tun_put_user expects a full vlan tag to exist
>>> if skb_vlan_tag_present(skb), for instance. If trimmed to below this
>>> length the final call to skb_copy_datagram_iter may have negative
>>> length.
>>>
>>> This is an issue with the existing sk_filter call as much as with the
>>> new run_ebpf_filter call.
>> Good point, so consider it was used by sk_filter too, we need to fix it
>> anyway. Actually, I've considered the boolean return value but finally I
>> decide to obey the style of sk filter. Maybe the trimming has real user. e.g
>> high speed header recoding/analysis? Consider it's not hard to fix, how
>> about just keep that?
> I don't see an obvious use case, but sure. We'll just need to look
> at what the minimum trim length needs to be.

Try to reproduce the possible issue, but looks like we are safe since we 
may hit -EFAULT which is returned by skb_copy_datagram_iter() before. So 
in V2, I will keep the code as is except trim 4 more bytes if vlan tag 
is present.

Thanks

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 2/2] tun: allow to attach ebpf socket filter
  2018-01-04  7:28         ` Jason Wang
@ 2018-01-04 15:06           ` Willem de Bruijn
  0 siblings, 0 replies; 9+ messages in thread
From: Willem de Bruijn @ 2018-01-04 15:06 UTC (permalink / raw)
  To: Jason Wang
  Cc: Network Development, LKML, Michael S. Tsirkin, Willem de Bruijn

On Thu, Jan 4, 2018 at 8:28 AM, Jason Wang <jasowang@redhat.com> wrote:
>
>
> On 2018年01月02日 17:19, Willem de Bruijn wrote:
>>>>
>>>> More importantly, should this program just return a boolean pass or
>>>> drop. Taking a length and trimming may introduce bugs later on if the
>>>> stack parses the packet unconditionally, expecting a minimum size
>>>> to be present.
>>>>
>>>> This was the reason for introducing sk_filter_trim_cap and using that
>>>> in other sk_filter sites.
>>>>
>>>> A quick scan shows that tun_put_user expects a full vlan tag to exist
>>>> if skb_vlan_tag_present(skb), for instance. If trimmed to below this
>>>> length the final call to skb_copy_datagram_iter may have negative
>>>> length.
>>>>
>>>> This is an issue with the existing sk_filter call as much as with the
>>>> new run_ebpf_filter call.
>>>
>>> Good point, so consider it was used by sk_filter too, we need to fix it
>>> anyway. Actually, I've considered the boolean return value but finally I
>>> decide to obey the style of sk filter. Maybe the trimming has real user.
>>> e.g
>>> high speed header recoding/analysis? Consider it's not hard to fix, how
>>> about just keep that?
>>
>> I don't see an obvious use case, but sure. We'll just need to look
>> at what the minimum trim length needs to be.
>
>
> Try to reproduce the possible issue, but looks like we are safe since we may
> hit -EFAULT which is returned by skb_copy_datagram_iter() before.

Ah, indeed, it can handle short lengths. Great, thanks for checking.

> So in V2,
> I will keep the code as is except trim 4 more bytes if vlan tag is present.

This is do not follow. Perhaps the patch will make it clear.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-01-04 15:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-29  2:44 [PATCH net-next 0/2] tun: allow to attach eBPF filter Jason Wang
2017-12-29  2:44 ` [PATCH net-next 1/2] tuntap: rename struct tun_steering_prog to struct tun_prog Jason Wang
2017-12-29  2:44 ` [PATCH net-next 2/2] tun: allow to attach ebpf socket filter Jason Wang
2017-12-31 10:14   ` Willem de Bruijn
2018-01-02  3:28     ` Jason Wang
2018-01-02  9:19       ` Willem de Bruijn
2018-01-03  2:53         ` Jason Wang
2018-01-04  7:28         ` Jason Wang
2018-01-04 15:06           ` Willem de Bruijn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.