Message ID | 1515124488-396-3-git-send-email-jasowang@redhat.com |
---|---|
State | New, archived |
Headers | show |
Series |
|
Related | show |
On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote: > This patch allows userspace to attach eBPF filter to tun. This will > allow to implement VM dataplane filtering in a more efficient way > compared to cBPF filter by allowing either qemu or libvirt to > attach eBPF filter to tun. > > Signed-off-by: Jason Wang <jasowang@redhat.com> > --- > drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++---- > include/uapi/linux/if_tun.h | 1 + > 2 files changed, 36 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 0853829..9fc8b70 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -238,6 +238,12 @@ struct tun_struct { > struct tun_pcpu_stats __percpu *pcpu_stats; > struct bpf_prog __rcu *xdp_prog; > struct tun_prog __rcu *steering_prog; > + struct tun_prog __rcu *filter_prog; > +}; > + > +struct veth { > + __be16 h_vlan_proto; > + __be16 h_vlan_TCI; > }; > > static int tun_napi_receive(struct napi_struct *napi, int budget) > @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) > #endif > } > > +static unsigned int run_ebpf_filter(struct tun_struct *tun, > + struct sk_buff *skb, > + int len) > +{ > + struct tun_prog *prog = rcu_dereference(tun->filter_prog); > + > + if (prog) > + len = bpf_prog_run_clear_cb(prog->prog, skb); > + > + return len; > +} > + > /* Net device start xmit */ > static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > { > struct tun_struct *tun = netdev_priv(dev); > int txq = skb->queue_mapping; > struct tun_file *tfile; > + int len = skb->len; > > rcu_read_lock(); > tfile = rcu_dereference(tun->tfiles[txq]); > @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) > sk_filter(tfile->socket.sk, skb)) > goto drop; > > + len = run_ebpf_filter(tun, skb, len); > + > + /* Trim extra bytes since we may inster vlan proto & TCI inster -> insert > + * in tun_put_user(). > + */ > + if (skb_vlan_tag_present(skb)) > + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; no need for testing skb_vlan_tag_present twice. more importantly, why trim these bytes unconditionally? only if the filter trims a packet to a length shorter than the the minimum could this cause problems. sk_filter_trim_cap with a lower bound avoids that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0;
On 2018年01月06日 00:21, Willem de Bruijn wrote: > On Fri, Jan 5, 2018 at 4:54 AM, Jason Wang <jasowang@redhat.com> wrote: >> This patch allows userspace to attach eBPF filter to tun. This will >> allow to implement VM dataplane filtering in a more efficient way >> compared to cBPF filter by allowing either qemu or libvirt to >> attach eBPF filter to tun. >> >> Signed-off-by: Jason Wang <jasowang@redhat.com> >> --- >> drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++---- >> include/uapi/linux/if_tun.h | 1 + >> 2 files changed, 36 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c >> index 0853829..9fc8b70 100644 >> --- a/drivers/net/tun.c >> +++ b/drivers/net/tun.c >> @@ -238,6 +238,12 @@ struct tun_struct { >> struct tun_pcpu_stats __percpu *pcpu_stats; >> struct bpf_prog __rcu *xdp_prog; >> struct tun_prog __rcu *steering_prog; >> + struct tun_prog __rcu *filter_prog; >> +}; >> + >> +struct veth { >> + __be16 h_vlan_proto; >> + __be16 h_vlan_TCI; >> }; >> >> static int tun_napi_receive(struct napi_struct *napi, int budget) >> @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) >> #endif >> } >> >> +static unsigned int run_ebpf_filter(struct tun_struct *tun, >> + struct sk_buff *skb, >> + int len) >> +{ >> + struct tun_prog *prog = rcu_dereference(tun->filter_prog); >> + >> + if (prog) >> + len = bpf_prog_run_clear_cb(prog->prog, skb); >> + >> + return len; >> +} >> + >> /* Net device start xmit */ >> static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) >> { >> struct tun_struct *tun = netdev_priv(dev); >> int txq = skb->queue_mapping; >> struct tun_file *tfile; >> + int len = skb->len; >> >> rcu_read_lock(); >> tfile = rcu_dereference(tun->tfiles[txq]); >> @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) >> sk_filter(tfile->socket.sk, skb)) >> goto drop; >> >> + len = run_ebpf_filter(tun, skb, len); >> + >> + /* Trim extra bytes since we may inster vlan proto & TCI > inster -> insert Will fix. > >> + * in tun_put_user(). >> + */ >> + if (skb_vlan_tag_present(skb)) >> + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; > no need for testing skb_vlan_tag_present twice. Right. > more importantly, why trim these bytes unconditionally? > > only if the filter trims a packet to a length shorter than the the minimum > could this cause problems. sk_filter_trim_cap with a lower bound avoids > that: skb_vlan_tag_present(skb) ? sizeof(struct vlan_ethhdr) : 0; The problem is, if the filter want to trim to packet to 50 bytes, we may get 54 bytes if vlan tag is existed. This seems wrong. Thanks
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0853829..9fc8b70 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -238,6 +238,12 @@ struct tun_struct { struct tun_pcpu_stats __percpu *pcpu_stats; struct bpf_prog __rcu *xdp_prog; struct tun_prog __rcu *steering_prog; + struct tun_prog __rcu *filter_prog; +}; + +struct veth { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; }; static int tun_napi_receive(struct napi_struct *napi, int budget) @@ -984,12 +990,25 @@ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) #endif } +static unsigned int run_ebpf_filter(struct tun_struct *tun, + struct sk_buff *skb, + int len) +{ + struct tun_prog *prog = rcu_dereference(tun->filter_prog); + + if (prog) + len = bpf_prog_run_clear_cb(prog->prog, skb); + + return len; +} + /* Net device start xmit */ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; struct tun_file *tfile; + int len = skb->len; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); @@ -1015,6 +1034,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) sk_filter(tfile->socket.sk, skb)) goto drop; + len = run_ebpf_filter(tun, skb, len); + + /* Trim extra bytes since we may inster vlan proto & TCI + * in tun_put_user(). + */ + if (skb_vlan_tag_present(skb)) + len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0; + if (len <= 0 || pskb_trim(skb, len)) + goto drop; + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) goto drop; @@ -1904,10 +1933,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (vlan_hlen) { int ret; - struct { - __be16 h_vlan_proto; - __be16 h_vlan_TCI; - } veth; + struct veth veth; veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); @@ -2068,6 +2094,7 @@ static void tun_free_netdev(struct net_device *dev) tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); + __tun_set_ebpf(tun, &tun->filter_prog, NULL); } static void tun_setup(struct net_device *dev) @@ -2849,6 +2876,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = tun_set_ebpf(tun, &tun->steering_prog, argp); break; + case TUNSETFILTEREBPF: + ret = tun_set_ebpf(tun, &tun->filter_prog, argp); + break; + default: ret = -EINVAL; break; diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index fb38c17..ee432cd 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -58,6 +58,7 @@ #define TUNSETVNETBE _IOW('T', 222, int) #define TUNGETVNETBE _IOR('T', 223, int) #define TUNSETSTEERINGEBPF _IOR('T', 224, int) +#define TUNSETFILTEREBPF _IOR('T', 225, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001
This patch allows userspace to attach eBPF filter to tun. This will allow to implement VM dataplane filtering in a more efficient way compared to cBPF filter by allowing either qemu or libvirt to attach eBPF filter to tun. Signed-off-by: Jason Wang <jasowang@redhat.com> --- drivers/net/tun.c | 39 +++++++++++++++++++++++++++++++++++---- include/uapi/linux/if_tun.h | 1 + 2 files changed, 36 insertions(+), 4 deletions(-)