All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Ahern <dsa@cumulusnetworks.com>
To: Mahesh Bandewar <mahesh@bandewar.net>, netdev <netdev@vger.kernel.org>
Cc: Eric Dumazet <edumazet@google.com>,
	David Miller <davem@davemloft.net>,
	Mahesh Bandewar <maheshb@google.com>
Subject: Re: [PATCHv2 next 3/3] ipvlan: Introduce l3s mode
Date: Tue, 13 Sep 2016 21:24:05 -0600	[thread overview]
Message-ID: <1177f2c5-6c2d-32e1-1628-f272d6b0b5c6@cumulusnetworks.com> (raw)
In-Reply-To: <1473703312-26757-1-git-send-email-mahesh@bandewar.net>

On 9/12/16 12:01 PM, Mahesh Bandewar wrote:

> +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
> +			      u16 proto)
> +{
> +	struct ipvl_addr *addr;
> +	struct net_device *sdev;
> +
> +	addr = ipvlan_skb_to_addr(skb, dev);
> +	if (!addr)
> +		goto out;
> +
> +	sdev = addr->master->dev;
> +	switch (proto) {
> +	case AF_INET:
> +	{
> +		int err;
> +		struct iphdr *ip4h = ip_hdr(skb);
> +
> +		err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
> +					   ip4h->tos, sdev);
> +		if (unlikely(err))
> +			goto out;
> +		break;
> +	}
> +	case AF_INET6:
> +	{
> +		struct dst_entry *dst;
> +		struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +		int flags = RT6_LOOKUP_F_HAS_SADDR;
> +		struct flowi6 fl6 = {
> +			.flowi6_iif   = sdev->ifindex,
> +			.daddr        = ip6h->daddr,
> +			.saddr        = ip6h->saddr,
> +			.flowlabel    = ip6_flowinfo(ip6h),
> +			.flowi6_mark  = skb->mark,
> +			.flowi6_proto = ip6h->nexthdr,
> +		};
> +
> +		skb_dst_drop(skb);
> +		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
> +		skb_dst_set(skb, dst);
> +		break;
> +	}
> +	default:
> +		break;
> +	}

Nit: why not put the above in separate per-version functions (ipvlan_ip_rcv and ipvlan_ip6_rcv) similar to what is done for ipvlan_process_outbound?


> +
> +out:
> +	return skb;
> +}
> +
> +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
> +			     const struct nf_hook_state *state)
> +{
> +	struct ipvl_addr *addr;
> +	unsigned int len;
> +
> +	addr = ipvlan_skb_to_addr(skb, skb->dev);
> +	if (!addr)
> +		goto out;
> +
> +	skb->dev = addr->master->dev;
> +	len = skb->len + ETH_HLEN;
> +	ipvlan_count_rx(addr->master, len, true, false);
> +out:
> +	return NF_ACCEPT;
> +}
> diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
> index 18b4e8c7f68a..d02be277e1db 100644
> --- a/drivers/net/ipvlan/ipvlan_main.c
> +++ b/drivers/net/ipvlan/ipvlan_main.c
> @@ -9,24 +9,65 @@
>  
>  #include "ipvlan.h"
>  
> +static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
> +	{
> +		.hook     = ipvlan_nf_input,
> +		.pf       = NFPROTO_IPV4,
> +		.hooknum  = NF_INET_LOCAL_IN,
> +		.priority = INT_MAX,
> +	},
> +	{
> +		.hook     = ipvlan_nf_input,
> +		.pf       = NFPROTO_IPV6,
> +		.hooknum  = NF_INET_LOCAL_IN,
> +		.priority = INT_MAX,
> +	},
> +};
> +
> +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
> +	.l3mdev_l3_rcv = ipvlan_l3_rcv,
> +};
> +
>  static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
>  {
>  	ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
>  }
>  
> -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
> +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
>  {
>  	struct ipvl_dev *ipvlan;
> +	int err = 0;
>  
> +	ASSERT_RTNL();
>  	if (port->mode != nval) {
> +		if (nval == IPVLAN_MODE_L3S) {
> +			port->dev->l3mdev_ops = &ipvl_l3mdev_ops;
> +			port->dev->priv_flags |= IFF_L3MDEV_MASTER;
> +			if (!port->ipt_hook_added) {
> +				err = _nf_register_hooks(ipvl_nfops,
> +							ARRAY_SIZE(ipvl_nfops));

That's clever. The hooks are not device based so why do the register for each device? Alternatively, you could use a static dst like VRF does for Tx. In the ipvlan rcv function set the dst input handler to send the packet back to the ipvlan driver via dst->input. From there send the packet through the netfilter hooks and then do the real lookup, update the dst and call its input function. I have working code for VRF driver somewhere that shows how to do this.

 
> +				if (!err)
> +					port->ipt_hook_added = true;
> +				else
> +					return err;
> +			}
> +		} else {
> +			port->dev->priv_flags &= ~IFF_L3MDEV_MASTER;
> +			port->dev->l3mdev_ops = NULL;
> +			if (port->ipt_hook_added)
> +				_nf_unregister_hooks(ipvl_nfops,
> +						     ARRAY_SIZE(ipvl_nfops));
> +			port->ipt_hook_added = false;
> +		}

  reply	other threads:[~2016-09-14  3:24 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-12 18:01 [PATCHv2 next 3/3] ipvlan: Introduce l3s mode Mahesh Bandewar
2016-09-14  3:24 ` David Ahern [this message]
2016-09-14 16:30   ` Mahesh Bandewar (महेश बंडेवार)
2016-09-14 18:04     ` David Ahern
2016-09-14 18:47       ` Mahesh Bandewar (महेश बंडेवार)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1177f2c5-6c2d-32e1-1628-f272d6b0b5c6@cumulusnetworks.com \
    --to=dsa@cumulusnetworks.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=mahesh@bandewar.net \
    --cc=maheshb@google.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.