[net-next,V2,2/2] tun: allow to attach ebpf socket filter
diff mbox series

Message ID 1515124488-396-3-git-send-email-jasowang@redhat.com
State New, archived
Headers show
Series
  • tun: allow to attahc eBPF filter
Related show

Commit Message

Jason Wang Jan. 5, 2018, 3:54 a.m. UTC
This patch allows userspace to attach eBPF filter to tun. This will
allow to implement VM dataplane filtering in a more efficient way
compared to cBPF filter by allowing either qemu or libvirt to
attach eBPF filter to tun.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
 include/uapi/linux/if_tun.h |  1 +
 2 files changed, 36 insertions(+), 4 deletions(-)

Comments

Willem de Bruijn Jan. 5, 2018, 4:21 p.m. UTC | #1
On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
> This patch allows userspace to attach eBPF filter to tun. This will
> allow to implement VM dataplane filtering in a more efficient way
> compared to cBPF filter by allowing either qemu or libvirt to
> attach eBPF filter to tun.
>
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
>  include/uapi/linux/if_tun.h |  1 +
>  2 files changed, 36 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 0853829..9fc8b70 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -238,6 +238,12 @@ struct tun_struct {
>         struct tun_pcpu_stats __percpu *pcpu_stats;
>         struct bpf_prog __rcu *xdp_prog;
>         struct tun_prog __rcu *steering_prog;
> +       struct tun_prog __rcu *filter_prog;
> +};
> +
> +struct veth {
> +       __be16 h_vlan_proto;
> +       __be16 h_vlan_TCI;
>  };
>
>  static int tun_napi_receive(struct napi_struct *napi, int budget)
> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>  #endif
>  }
>
> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
> +                                   struct sk_buff *skb,
> +                                   int len)
> +{
> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
> +
> +       if (prog)
> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
> +
> +       return len;
> +}
> +
>  /* Net device start xmit */
>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>         struct tun_struct *tun = netdev_priv(dev);
>         int txq = skb->queue_mapping;
>         struct tun_file *tfile;
> +       int len = skb->len;
>
>         rcu_read_lock();
>         tfile = rcu_dereference(tun->tfiles[txq]);
> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>             sk_filter(tfile->socket.sk, skb))
>                 goto drop;
>
> +       len = run_ebpf_filter(tun, skb, len);
> +
> +       /* Trim extra bytes since we may inster vlan proto & TCI

inster -> insert

> +        * in tun_put_user().
> +        */
> +       if (skb_vlan_tag_present(skb))
> +               len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;

no need for testing skb_vlan_tag_present twice.

more importantly, why trim these bytes unconditionally?

only if the filter trims a packet to a length shorter than the the minimum
could this cause problems. sk_filter_trim_cap with a lower bound avoids
that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;
Jason Wang Jan. 8, 2018, 3:55 a.m. UTC | #2
On 2018年01月06日 00:21, Willem de Bruijn wrote:
> On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote:
>> This patch allows userspace to attach eBPF filter to tun. This will
>> allow to implement VM dataplane filtering in a more efficient way
>> compared to cBPF filter by allowing either qemu or libvirt to
>> attach eBPF filter to tun.
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   drivers/net/tun.c           | 39 +++++++++++++++++++++++++++++++++++----
>>   include/uapi/linux/if_tun.h |  1 +
>>   2 files changed, 36 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index 0853829..9fc8b70 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -238,6 +238,12 @@ struct tun_struct {
>>          struct tun_pcpu_stats __percpu *pcpu_stats;
>>          struct bpf_prog __rcu *xdp_prog;
>>          struct tun_prog __rcu *steering_prog;
>> +       struct tun_prog __rcu *filter_prog;
>> +};
>> +
>> +struct veth {
>> +       __be16 h_vlan_proto;
>> +       __be16 h_vlan_TCI;
>>   };
>>
>>   static int tun_napi_receive(struct napi_struct *napi, int budget)
>> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
>>   #endif
>>   }
>>
>> +static unsigned int run_ebpf_filter(struct tun_struct *tun,
>> +                                   struct sk_buff *skb,
>> +                                   int len)
>> +{
>> +       struct tun_prog *prog = rcu_dereference(tun->filter_prog);
>> +
>> +       if (prog)
>> +               len = bpf_prog_run_clear_cb(prog->prog, skb);
>> +
>> +       return len;
>> +}
>> +
>>   /* Net device start xmit */
>>   static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>>          struct tun_struct *tun = netdev_priv(dev);
>>          int txq = skb->queue_mapping;
>>          struct tun_file *tfile;
>> +       int len = skb->len;
>>
>>          rcu_read_lock();
>>          tfile = rcu_dereference(tun->tfiles[txq]);
>> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>              sk_filter(tfile->socket.sk, skb))
>>                  goto drop;
>>
>> +       len = run_ebpf_filter(tun, skb, len);
>> +
>> +       /* Trim extra bytes since we may inster vlan proto & TCI
> inster -> insert

Will fix.

>
>> +        * in tun_put_user().
>> +        */
>> +       if (skb_vlan_tag_present(skb))
>> +               len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
> no need for testing skb_vlan_tag_present twice.

Right.

> more importantly, why trim these bytes unconditionally?
>
> only if the filter trims a packet to a length shorter than the the minimum
> could this cause problems. sk_filter_trim_cap with a lower bound avoids
> that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;

The problem is, if the filter want to trim to packet to 50 bytes, we may 
get 54 bytes if vlan tag is existed. This seems wrong.

Thanks

Patch
diff mbox series

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0853829..9fc8b70 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -238,6 +238,12 @@  struct tun_struct {
 	struct tun_pcpu_stats __percpu *pcpu_stats;
 	struct bpf_prog __rcu *xdp_prog;
 	struct tun_prog __rcu *steering_prog;
+	struct tun_prog __rcu *filter_prog;
+};
+
+struct veth {
+	__be16 h_vlan_proto;
+	__be16 h_vlan_TCI;
 };
 
 static int tun_napi_receive(struct napi_struct *napi, int budget)
@@ -984,12 +990,25 @@  static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
 #endif
 }
 
+static unsigned int run_ebpf_filter(struct tun_struct *tun,
+				    struct sk_buff *skb,
+				    int len)
+{
+	struct tun_prog *prog = rcu_dereference(tun->filter_prog);
+
+	if (prog)
+		len = bpf_prog_run_clear_cb(prog->prog, skb);
+
+	return len;
+}
+
 /* Net device start xmit */
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	int txq = skb->queue_mapping;
 	struct tun_file *tfile;
+	int len = skb->len;
 
 	rcu_read_lock();
 	tfile = rcu_dereference(tun->tfiles[txq]);
@@ -1015,6 +1034,16 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
+	len = run_ebpf_filter(tun, skb, len);
+
+	/* Trim extra bytes since we may inster vlan proto & TCI
+	 * in tun_put_user().
+	 */
+	if (skb_vlan_tag_present(skb))
+		len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
+	if (len <= 0 || pskb_trim(skb, len))
+		goto drop;
+
 	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
@@ -1904,10 +1933,7 @@  static ssize_t tun_put_user(struct tun_struct *tun,
 
 	if (vlan_hlen) {
 		int ret;
-		struct {
-			__be16 h_vlan_proto;
-			__be16 h_vlan_TCI;
-		} veth;
+		struct veth veth;
 
 		veth.h_vlan_proto = skb->vlan_proto;
 		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
@@ -2068,6 +2094,7 @@  static void tun_free_netdev(struct net_device *dev)
 	tun_flow_uninit(tun);
 	security_tun_dev_free_security(tun->security);
 	__tun_set_ebpf(tun, &tun->steering_prog, NULL);
+	__tun_set_ebpf(tun, &tun->filter_prog, NULL);
 }
 
 static void tun_setup(struct net_device *dev)
@@ -2849,6 +2876,10 @@  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
 		break;
 
+	case TUNSETFILTEREBPF:
+		ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index fb38c17..ee432cd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -58,6 +58,7 @@ 
 #define TUNSETVNETBE _IOW('T', 222, int)
 #define TUNGETVNETBE _IOR('T', 223, int)
 #define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001