* [PATCH net-next 01/12] net: flow: Add l3mdev flow update
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 02/12] net: l3mdev: Add hook to output path David Ahern
` (11 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Add l3mdev hook to set FLOWI_FLAG_SKIP_NH_OIF flag and update oif/iif
in flow struct if its oif or iif points to a device enslaved to an L3
Master device. Only 1 needs to be converted to match the l3mdev FIB
rule. This moves the flow adjustment for l3mdev to a single point
catching all lookups. It is redundant for existing hooks (those are
removed in later patches) but is needed for missed lookups such as
PMTU updates.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/net/l3mdev.h | 6 ++++++
net/ipv4/fib_rules.c | 3 +++
net/ipv6/fib6_rules.c | 3 +++
net/l3mdev/l3mdev.c | 35 +++++++++++++++++++++++++++++++++++
4 files changed, 47 insertions(+)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e90095091aa0..81e175e80537 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -49,6 +49,8 @@ struct l3mdev_ops {
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
+void l3mdev_update_flow(struct net *net, struct flowi *fl);
+
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
static inline int l3mdev_master_ifindex(struct net_device *dev)
{
@@ -290,6 +292,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
{
return 1;
}
+static inline
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+}
#endif
#endif /* _NET_L3MDEV_H_ */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6e9ea69e5f75..770bebed6b28 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp,
};
int err;
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi4_to_flowi(flp));
+
err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
#ifdef CONFIG_IP_ROUTE_CLASSID
if (arg.rule)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5857c1fc8b67..eea23b57c6a5 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
.flags = FIB_LOOKUP_NOREF,
};
+ /* update flow if oif or iif point to device enslaved to l3mdev */
+ l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index c4a1c3e84e12..43610e5acc4e 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -222,3 +222,38 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
return rc;
}
+
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+ struct net_device *dev;
+ int ifindex;
+
+ rcu_read_lock();
+
+ if (fl->flowi_oif) {
+ dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+ if (dev) {
+ ifindex = l3mdev_master_ifindex_rcu(dev);
+ if (ifindex) {
+ fl->flowi_oif = ifindex;
+ fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+ goto out;
+ }
+ }
+ }
+
+ if (fl->flowi_iif) {
+ dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+ if (dev) {
+ ifindex = l3mdev_master_ifindex_rcu(dev);
+ if (ifindex) {
+ fl->flowi_iif = ifindex;
+ fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+ }
+ }
+ }
+
+out:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(l3mdev_update_flow);
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 02/12] net: l3mdev: Add hook to output path
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
2016-08-30 17:34 ` [PATCH net-next 01/12] net: flow: Add l3mdev flow update David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 03/12] net: l3mdev: Allow the l3mdev to be a loopback David Ahern
` (10 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
This patch adds the infrastructure to the output path to pass an skb
to an l3mdev device if it has a hook registered. This is the Tx parallel
to l3mdev_ip{6}_rcv in the receive path and is the basis for removing
the dst based hook.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/net/l3mdev.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/ip_output.c | 8 ++++++++
net/ipv6/ip6_output.c | 8 ++++++++
net/ipv6/output_core.c | 7 +++++++
net/ipv6/raw.c | 7 +++++++
5 files changed, 77 insertions(+)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 81e175e80537..74ffe5aef299 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -11,6 +11,7 @@
#ifndef _NET_L3MDEV_H_
#define _NET_L3MDEV_H_
+#include <net/dst.h>
#include <net/fib_rules.h>
/**
@@ -18,6 +19,10 @@
*
* @l3mdev_fib_table: Get FIB table id to use for lookups
*
+ * @l3mdev_l3_rcv: Hook in L3 receive path
+ *
+ * @l3mdev_l3_out: Hook in L3 output path
+ *
* @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
*
* @l3mdev_get_saddr: Get source address for a flow
@@ -29,6 +34,9 @@ struct l3mdev_ops {
u32 (*l3mdev_fib_table)(const struct net_device *dev);
struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
struct sk_buff *skb, u16 proto);
+ struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
+ struct sock *sk, struct sk_buff *skb,
+ u16 proto);
/* IPv4 ops */
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
@@ -201,6 +209,33 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
return l3mdev_l3_rcv(skb, AF_INET6);
}
+static inline
+struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *master = NULL;
+
+ if (netif_is_l3_slave(dev)) {
+ master = netdev_master_upper_dev_get_rcu(dev);
+ if (master && master->l3mdev_ops->l3mdev_l3_out)
+ skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
+ skb, proto);
+ }
+
+ return skb;
+}
+
+static inline
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
+{
+ return l3mdev_l3_out(sk, skb, AF_INET);
+}
+
+static inline
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
+{
+ return l3mdev_l3_out(sk, skb, AF_INET6);
+}
#else
static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -287,6 +322,18 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
}
static inline
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
+{
+ return skb;
+}
+
+static inline
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
+{
+ return skb;
+}
+
+static inline
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg)
{
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index dde37fb340bf..3c727d4eaba9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -98,6 +98,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
iph->tot_len = htons(skb->len);
ip_send_check(iph);
+
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip_out(sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1dfc402d9ad1..bcec7e73eb0b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -228,6 +228,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
+
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out((struct sock *)sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
/* hooks should never assume socket lock is held.
* we promote our socket to non const
*/
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 462f2a76b5c2..7cca8ac66fe9 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out(sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, skb_dst(skb)->dev,
dst_output);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 590dd1f7746f..54404f08efcc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (err)
goto error_fault;
+ /* if egress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip6_out(sk, skb);
+ if (unlikely(!skb))
+ return 0;
+
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 03/12] net: l3mdev: Allow the l3mdev to be a loopback
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
2016-08-30 17:34 ` [PATCH net-next 01/12] net: flow: Add l3mdev flow update David Ahern
2016-08-30 17:34 ` [PATCH net-next 02/12] net: l3mdev: Add hook to output path David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 04/12] net: vrf: Flip IPv4 path from dst to out hook David Ahern
` (9 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Allow an L3 master device to act as the loopback for that L3 domain.
For IPv4 the device can also have the address 127.0.0.1.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/net/l3mdev.h | 6 +++---
net/ipv4/route.c | 8 ++++++--
net/ipv6/route.c | 12 ++++++++++--
3 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 74ffe5aef299..5f03a89bb075 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -90,7 +90,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
}
static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
{
/* netdev_master_upper_dev_get_rcu calls
* list_first_or_null_rcu to walk the upper dev list.
@@ -99,7 +99,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
* typecast to remove the const
*/
struct net_device *dev = (struct net_device *)_dev;
- const struct net_device *master;
+ struct net_device *master;
if (!dev)
return NULL;
@@ -253,7 +253,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
}
static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
{
return NULL;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1f2830d8110..1119f18fb720 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2016,7 +2016,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
return ERR_PTR(-EINVAL);
if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ if (ipv4_is_loopback(fl4->saddr) &&
+ !(dev_out->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(dev_out))
return ERR_PTR(-EINVAL);
if (ipv4_is_lbcast(fl4->daddr))
@@ -2300,7 +2302,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
else
fl4->saddr = fl4->daddr;
}
- dev_out = net->loopback_dev;
+
+ /* L3 master device is the loopback for that domain */
+ dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49817555449e..4a0f77aa49cf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2556,8 +2556,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT);
+ struct net_device *dev = net->loopback_dev;
+ struct rt6_info *rt;
+
+ /* use L3 Master device as loopback for host routes if device
+ * is enslaved and address is not link local or multicast
+ */
+ if (!rt6_need_strict(addr))
+ dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+ rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 04/12] net: vrf: Flip IPv4 path from dst to out hook
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (2 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 03/12] net: l3mdev: Allow the l3mdev to be a loopback David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 05/12] net: vrf: Flip IPv6 " David Ahern
` (8 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Flip the IPv4 output path from use of the vrf dst to the l3mdev tx out
hook.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 171 ++++++++++++++++++++----------------------------------
net/ipv4/route.c | 4 --
2 files changed, 64 insertions(+), 111 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 1ce7420322ee..7517645347c3 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -230,79 +230,28 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
struct net_device *vrf_dev)
{
- struct iphdr *ip4h = ip_hdr(skb);
- int ret = NET_XMIT_DROP;
- struct flowi4 fl4 = {
- /* needed to match OIF rule */
- .flowi4_oif = vrf_dev->ifindex,
- .flowi4_iif = LOOPBACK_IFINDEX,
- .flowi4_tos = RT_TOS(ip4h->tos),
- .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF,
- .daddr = ip4h->daddr,
- };
- struct net *net = dev_net(vrf_dev);
- struct rtable *rt;
-
- rt = ip_route_output_flow(net, &fl4, NULL);
- if (IS_ERR(rt))
- goto err;
-
- if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
- ip_rt_put(rt);
- goto err;
- }
+ struct net_vrf *vrf = netdev_priv(vrf_dev);
+ struct dst_entry *dst = NULL;
+ struct rtable *rth_local;
skb_dst_drop(skb);
- /* if dst.dev is loopback or the VRF device again this is locally
- * originated traffic destined to a local address. Short circuit
- * to Rx path using our local dst
- */
- if (rt->dst.dev == net->loopback_dev || rt->dst.dev == vrf_dev) {
- struct net_vrf *vrf = netdev_priv(vrf_dev);
- struct rtable *rth_local;
- struct dst_entry *dst = NULL;
-
- ip_rt_put(rt);
-
- rcu_read_lock();
-
- rth_local = rcu_dereference(vrf->rth_local);
- if (likely(rth_local)) {
- dst = &rth_local->dst;
- dst_hold(dst);
- }
-
- rcu_read_unlock();
-
- if (unlikely(!dst))
- goto err;
+ rcu_read_lock();
- return vrf_local_xmit(skb, vrf_dev, dst);
+ rth_local = rcu_dereference(vrf->rth_local);
+ if (likely(rth_local)) {
+ dst = &rth_local->dst;
+ dst_hold(dst);
}
- skb_dst_set(skb, &rt->dst);
-
- /* strip the ethernet header added for pass through VRF device */
- __skb_pull(skb, skb_network_offset(skb));
+ rcu_read_unlock();
- if (!ip4h->saddr) {
- ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
- RT_SCOPE_LINK);
+ if (unlikely(!dst)) {
+ vrf_tx_error(vrf_dev, skb);
+ return NET_XMIT_DROP;
}
- ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
- if (unlikely(net_xmit_eval(ret)))
- vrf_dev->stats.tx_errors++;
- else
- ret = NET_XMIT_SUCCESS;
-
-out:
- return ret;
-err:
- vrf_tx_error(vrf_dev, skb);
- goto out;
+ return vrf_local_xmit(skb, vrf_dev, dst);
}
static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
@@ -473,64 +422,71 @@ static int vrf_rt6_create(struct net_device *dev)
}
#endif
-/* modelled after ip_finish_output2 */
+/* run skb through packet sockets for tcpdump with dev set to vrf dev */
static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct rtable *rt = (struct rtable *)dst;
- struct net_device *dev = dst->dev;
- unsigned int hh_len = LL_RESERVED_SPACE(dev);
- struct neighbour *neigh;
- u32 nexthop;
- int ret = -EINVAL;
-
- /* Be paranoid, rather than too clever. */
- if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- ret = -ENOMEM;
- goto err;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
-
- consume_skb(skb);
- skb = skb2;
+ if (likely(skb_headroom(skb) >= ETH_HLEN)) {
+ struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+
+ ether_addr_copy(eth->h_source, skb->dev->dev_addr);
+ eth_zero_addr(eth->h_dest);
+ eth->h_proto = skb->protocol;
+ dev_queue_xmit_nit(skb, skb->dev);
+ skb_pull(skb, ETH_HLEN);
}
- rcu_read_lock_bh();
-
- nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr);
- neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
- if (!IS_ERR(neigh))
- ret = dst_neigh_output(dst, neigh, skb);
-
- rcu_read_unlock_bh();
-err:
- if (unlikely(ret < 0))
- vrf_tx_error(skb->dev, skb);
- return ret;
+ return 1;
}
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
-
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
- skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
+ net, sk, skb, NULL, skb->dev,
vrf_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct net *net = dev_net(vrf_dev);
+ struct net_device *dev = skb->dev;
+ int err;
+
+ skb->dev = vrf_dev;
+
+ err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_output);
+ if (likely(err == 1))
+ err = vrf_output(net, sk, skb);
+
+ if (likely(err == 1)) {
+ skb->dev = dev;
+ nf_reset(skb);
+ } else {
+ skb = NULL;
+ }
+
+ return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb,
+ u16 proto)
+{
+ switch (proto) {
+ case AF_INET:
+ return vrf_ip_out(vrf_dev, sk, skb);
+ }
+
+ return skb;
+}
+
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
@@ -1067,6 +1023,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_get_rtable = vrf_get_rtable,
.l3mdev_get_saddr = vrf_get_saddr,
.l3mdev_l3_rcv = vrf_l3_rcv,
+ .l3mdev_l3_out = vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
.l3mdev_get_rt6_dst = vrf_get_rt6_dst,
.l3mdev_get_saddr6 = vrf_get_saddr6,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1119f18fb720..d9936f90a755 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2244,10 +2244,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
-
- rth = l3mdev_get_rtable(dev_out, fl4);
- if (rth)
- goto out;
}
if (!fl4->daddr) {
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 05/12] net: vrf: Flip IPv6 path from dst to out hook
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (3 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 04/12] net: vrf: Flip IPv4 path from dst to out hook David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 06/12] net: remove redundant l3mdev calls David Ahern
` (7 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
Flip the IPv6 output path from use of the vrf dst to the l3mdev tx out
hook.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 156 ++++++++++++++++++++------------------------------
net/ipv6/ip6_output.c | 9 ++-
net/ipv6/route.c | 5 --
3 files changed, 70 insertions(+), 100 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 7517645347c3..df58bc791cfd 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -140,80 +140,42 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct net *net = dev_net(skb->dev);
- struct flowi6 fl6 = {
- /* needed to match OIF rule */
- .flowi6_oif = dev->ifindex,
- .flowi6_iif = LOOPBACK_IFINDEX,
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowlabel = ip6_flowinfo(iph),
- .flowi6_mark = skb->mark,
- .flowi6_proto = iph->nexthdr,
- .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
- };
- int ret = NET_XMIT_DROP;
- struct dst_entry *dst;
- struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst == dst_null)
- goto err;
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct dst_entry *dst = NULL;
+ struct rt6_info *rt6_local;
skb_dst_drop(skb);
- /* if dst.dev is loopback or the VRF device again this is locally
- * originated traffic destined to a local address. Short circuit
- * to Rx path using our local dst
- */
- if (dst->dev == net->loopback_dev || dst->dev == dev) {
- struct net_vrf *vrf = netdev_priv(dev);
- struct rt6_info *rt6_local;
-
- /* release looked up dst and use cached local dst */
- dst_release(dst);
+ rcu_read_lock();
- rcu_read_lock();
+ rt6_local = rcu_dereference(vrf->rt6_local);
+ if (unlikely(!rt6_local)) {
+ rcu_read_unlock();
+ goto err;
+ }
- rt6_local = rcu_dereference(vrf->rt6_local);
- if (unlikely(!rt6_local)) {
+ /* Ordering issue: cached local dst is created on newlink
+ * before the IPv6 initialization. Using the local dst
+ * requires rt6i_idev to be set so make sure it is.
+ */
+ if (unlikely(!rt6_local->rt6i_idev)) {
+ rt6_local->rt6i_idev = in6_dev_get(dev);
+ if (!rt6_local->rt6i_idev) {
rcu_read_unlock();
goto err;
}
-
- /* Ordering issue: cached local dst is created on newlink
- * before the IPv6 initialization. Using the local dst
- * requires rt6i_idev to be set so make sure it is.
- */
- if (unlikely(!rt6_local->rt6i_idev)) {
- rt6_local->rt6i_idev = in6_dev_get(dev);
- if (!rt6_local->rt6i_idev) {
- rcu_read_unlock();
- goto err;
- }
- }
-
- dst = &rt6_local->dst;
- dst_hold(dst);
-
- rcu_read_unlock();
-
- return vrf_local_xmit(skb, dev, &rt6_local->dst);
}
- skb_dst_set(skb, dst);
+ dst = &rt6_local->dst;
+ if (likely(dst))
+ dst_hold(dst);
- /* strip the ethernet header added for pass through VRF device */
- __skb_pull(skb, skb_network_offset(skb));
+ rcu_read_unlock();
- ret = ip6_local_out(net, skb->sk, skb);
- if (unlikely(net_xmit_eval(ret)))
- dev->stats.tx_errors++;
- else
- ret = NET_XMIT_SUCCESS;
+ if (unlikely(!dst))
+ goto err;
- return ret;
+ return vrf_local_xmit(skb, dev, dst);
err:
vrf_tx_error(dev, skb);
return NET_XMIT_DROP;
@@ -286,44 +248,43 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
}
#if IS_ENABLED(CONFIG_IPV6)
-/* modelled after ip6_finish_output2 */
-static int vrf_finish_output6(struct net *net, struct sock *sk,
- struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst->dev;
- struct neighbour *neigh;
- struct in6_addr *nexthop;
- int ret;
+static int vrf_finish_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb);
+/* modelled after ip6_output */
+static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
skb->protocol = htons(ETH_P_IPV6);
- skb->dev = dev;
-
- rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
- neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
- if (!IS_ERR(neigh)) {
- ret = dst_neigh_output(dst, neigh, skb);
- rcu_read_unlock_bh();
- return ret;
- }
- rcu_read_unlock_bh();
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
- kfree_skb(skb);
- return -EINVAL;
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
+ vrf_finish_output,
+ !(IPCB(skb)->flags & IP6SKB_REROUTED));
}
-/* modelled after ip6_output */
-static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb_dst(skb)->dev,
- vrf_finish_output6,
- !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ struct net *net = dev_net(vrf_dev);
+ struct net_device *dev = skb->dev;
+ int err;
+
+ skb->dev = vrf_dev;
+
+ err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
+ skb, NULL, vrf_dev, vrf_output6);
+ if (likely(err == 1))
+ err = vrf_output6(net, sk, skb);
+
+ if (likely(err == 1)) {
+ skb->dev = dev;
+ nf_reset(skb);
+ } else {
+ skb = NULL;
+ }
+
+ return skb;
}
/* holding rtnl */
@@ -412,6 +373,13 @@ static int vrf_rt6_create(struct net_device *dev)
return rc;
}
#else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+ struct sock *sk,
+ struct sk_buff *skb)
+{
+ return skb;
+}
+
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
}
@@ -482,6 +450,8 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
switch (proto) {
case AF_INET:
return vrf_ip_out(vrf_dev, sk, skb);
+ case AF_INET6:
+ return vrf_ip6_out(vrf_dev, sk, skb);
}
return skb;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bcec7e73eb0b..9711f32eedd7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1054,10 +1054,15 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
+ struct net *net = sock_net(sk);
struct dst_entry *dst = NULL;
int err;
- err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
+ if (rt6_need_strict(&fl6->daddr) &&
+ netif_index_is_l3_master(net, fl6->flowi6_oif))
+ return ERR_PTR(-ENETUNREACH);
+
+ err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
@@ -1065,7 +1070,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
if (!fl6->flowi6_oif)
fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
- return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4a0f77aa49cf..65ee42ad2afd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1188,13 +1188,8 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags)
{
- struct dst_entry *dst;
bool any_src;
- dst = l3mdev_get_rt6_dst(net, fl6);
- if (dst)
- return dst;
-
fl6->flowi6_iif = LOOPBACK_IFINDEX;
any_src = ipv6_addr_any(&fl6->saddr);
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 06/12] net: remove redundant l3mdev calls
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (4 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 05/12] net: vrf: Flip IPv6 " David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 07/12] net: ipv4: Remove l3mdev_get_saddr David Ahern
` (6 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
A previous patch added l3mdev flow update making these hooks redundant.
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
net/ipv4/ip_output.c | 3 +--
net/ipv4/route.c | 12 ++----------
net/ipv4/xfrm4_policy.c | 2 +-
net/ipv6/ip6_output.c | 2 --
net/ipv6/ndisc.c | 11 ++---------
net/ipv6/route.c | 7 +------
net/ipv6/tcp_ipv6.c | 8 ++------
net/ipv6/xfrm6_policy.c | 2 +-
8 files changed, 10 insertions(+), 37 deletions(-)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3c727d4eaba9..75f8167615ba 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1574,8 +1574,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
- oif = skb->skb_iif;
+ oif = oif ? : skb->skb_iif;
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d9936f90a755..ec994380d354 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1829,7 +1829,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+ fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -2148,7 +2148,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- int master_idx;
int orig_oif;
int err = -ENETUNREACH;
@@ -2158,9 +2157,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
orig_oif = fl4->flowi4_oif;
- master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
- if (master_idx)
- fl4->flowi4_oif = master_idx;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@ -2261,8 +2257,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif &&
- !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ if (fl4->flowi4_oif) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2575,9 +2570,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
- if (netif_index_is_l3_master(net, fl4.flowi4_oif))
- fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
if (iif) {
struct net_device *dev;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b644a23c3db0..3155ed73d3b3 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
int oif = 0;
if (skb_dst(skb))
- oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9711f32eedd7..84d1b3feaf2e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1067,8 +1067,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
- if (!fl6->flowi6_oif)
- fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fe65cdc28a45..d8e671457d10 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,7 +67,6 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
-#include <net/l3mdev.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
@@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
if (!dst) {
struct flowi6 fl6;
- int oif = l3mdev_fib_oif(skb->dev);
+ int oif = skb->dev->ifindex;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
- if (oif != skb->dev->ifindex)
- fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
@@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
- int oif = l3mdev_fib_oif(dev);
bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
- &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
-
- if (oif != skb->dev->ifindex)
- fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+ &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 65ee42ad2afd..f21d85cc2e77 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1164,7 +1164,7 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = l3mdev_fib_oif(skb->dev),
+ .flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -3339,11 +3339,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
} else {
fl6.flowi6_oif = oif;
- if (netif_index_is_l3_master(net, oif)) {
- fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF;
- }
-
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 04529a3d42cb..54cf7197c7ab 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -818,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowi6_proto = IPPROTO_TCP;
if (rt6_need_strict(&fl6.daddr) && !oif)
fl6.flowi6_oif = tcp_v6_iif(skb);
- else {
- if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
- oif = skb->skb_iif;
-
- fl6.flowi6_oif = oif;
- }
+ else
+ fl6.flowi6_oif = oif ? : skb->skb_iif;
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
fl6.fl6_dport = t1->dest;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6cc97003e4a9..b7b7e863a2bb 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
if (skb_dst(skb))
- oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+ oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 07/12] net: ipv4: Remove l3mdev_get_saddr
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (5 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 06/12] net: remove redundant l3mdev calls David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 08/12] net: ipv6: Remove l3mdev_get_saddr6 David Ahern
` (5 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer needed
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 38 --------------------------------------
include/net/l3mdev.h | 12 ------------
include/net/route.h | 10 ----------
net/ipv4/raw.c | 6 ------
net/ipv4/udp.c | 6 ------
net/l3mdev/l3mdev.c | 31 -------------------------------
6 files changed, 103 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index df58bc791cfd..ec65bf2afcb2 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -668,43 +668,6 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
return rth;
}
-/* called under rcu_read_lock */
-static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
-{
- struct fib_result res = { .tclassid = 0 };
- struct net *net = dev_net(dev);
- u32 orig_tos = fl4->flowi4_tos;
- u8 flags = fl4->flowi4_flags;
- u8 scope = fl4->flowi4_scope;
- u8 tos = RT_FL_TOS(fl4);
- int rc;
-
- if (unlikely(!fl4->daddr))
- return 0;
-
- fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
- fl4->flowi4_iif = LOOPBACK_IFINDEX;
- /* make sure oif is set to VRF device for lookup */
- fl4->flowi4_oif = dev->ifindex;
- fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
-
- rc = fib_lookup(net, fl4, &res, 0);
- if (!rc) {
- if (res.type == RTN_LOCAL)
- fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
- else
- fib_select_path(net, &res, fl4, -1);
- }
-
- fl4->flowi4_flags = flags;
- fl4->flowi4_tos = orig_tos;
- fl4->flowi4_scope = scope;
-
- return rc;
-}
-
static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return 0;
@@ -991,7 +954,6 @@ static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
.l3mdev_get_rtable = vrf_get_rtable,
- .l3mdev_get_saddr = vrf_get_saddr,
.l3mdev_l3_rcv = vrf_l3_rcv,
.l3mdev_l3_out = vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 5f03a89bb075..8085be19a767 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -25,8 +25,6 @@
*
* @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
*
- * @l3mdev_get_saddr: Get source address for a flow
- *
* @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
*/
@@ -41,8 +39,6 @@ struct l3mdev_ops {
/* IPv4 ops */
struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
const struct flowi4 *fl4);
- int (*l3mdev_get_saddr)(struct net_device *dev,
- struct flowi4 *fl4);
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
@@ -175,8 +171,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return rc;
}
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
-
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
struct flowi6 *fl6);
@@ -291,12 +285,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return false;
}
-static inline int l3mdev_get_saddr(struct net *net, int ifindex,
- struct flowi4 *fl4)
-{
- return 0;
-}
-
static inline
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
{
diff --git a/include/net/route.h b/include/net/route.h
index ad777d79af94..0429d47cad25 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -29,7 +29,6 @@
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
-#include <net/l3mdev.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
sport, dport, sk);
- if (!src && oif) {
- int rc;
-
- rc = l3mdev_get_saddr(net, oif, fl4);
- if (rc < 0)
- return ERR_PTR(rc);
-
- src = fl4->saddr;
- }
if (!dst || !src) {
rt = __ip_route_output_key(net, fl4);
if (IS_ERR(rt))
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 438f50c1a676..90a85c955872 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
- if (!saddr && ipc.oif) {
- err = l3mdev_get_saddr(net, ipc.oif, &fl4);
- if (err < 0)
- goto done;
- }
-
if (!inet->hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 058c31286ce1..7d96dc2d3d08 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1021,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flow_flags,
faddr, saddr, dport, inet->inet_sport);
- if (!saddr && ipc.oif) {
- err = l3mdev_get_saddr(net, ipc.oif, fl4);
- if (err < 0)
- goto out;
- }
-
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt)) {
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 43610e5acc4e..b30034efccff 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -131,37 +131,6 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
}
EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
-/**
- * l3mdev_get_saddr - get source address for a flow based on an interface
- * enslaved to an L3 master device
- * @net: network namespace for device index lookup
- * @ifindex: Interface index
- * @fl4: IPv4 flow struct
- */
-
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
-{
- struct net_device *dev;
- int rc = 0;
-
- if (ifindex) {
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev && netif_is_l3_slave(dev))
- dev = netdev_master_upper_dev_get_rcu(dev);
-
- if (dev && netif_is_l3_master(dev) &&
- dev->l3mdev_ops->l3mdev_get_saddr)
- rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
-
- rcu_read_unlock();
- }
-
- return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
-
int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 08/12] net: ipv6: Remove l3mdev_get_saddr6
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (6 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 07/12] net: ipv4: Remove l3mdev_get_saddr David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 09/12] net: l3mdev: Remove l3mdev_get_rtable David Ahern
` (4 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer needed
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 41 -----------------------------------------
include/net/l3mdev.h | 11 -----------
net/ipv6/ip6_output.c | 9 +--------
net/l3mdev/l3mdev.c | 24 ------------------------
4 files changed, 1 insertion(+), 84 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index ec65bf2afcb2..cc18319b4b0d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -909,46 +909,6 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
return dst;
}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
- struct flowi6 *fl6)
-{
- struct net *net = dev_net(dev);
- struct dst_entry *dst;
- struct rt6_info *rt;
- int err;
-
- if (rt6_need_strict(&fl6->daddr)) {
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
- RT6_LOOKUP_F_IFACE);
- if (unlikely(!rt))
- return 0;
-
- dst = &rt->dst;
- } else {
- __u8 flags = fl6->flowi6_flags;
-
- fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
- fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-
- dst = ip6_route_output(net, sk, fl6);
- rt = (struct rt6_info *)dst;
-
- fl6->flowi6_flags = flags;
- }
-
- err = dst->error;
- if (!err) {
- err = ip6_route_get_saddr(net, rt, &fl6->daddr,
- sk ? inet6_sk(sk)->srcprefs : 0,
- &fl6->saddr);
- }
-
- dst_release(dst);
-
- return err;
-}
#endif
static const struct l3mdev_ops vrf_l3mdev_ops = {
@@ -958,7 +918,6 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_l3_out = vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
.l3mdev_get_rt6_dst = vrf_get_rt6_dst,
- .l3mdev_get_saddr6 = vrf_get_saddr6,
#endif
};
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 8085be19a767..391c46130ef6 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -43,9 +43,6 @@ struct l3mdev_ops {
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
struct flowi6 *fl6);
- int (*l3mdev_get_saddr6)(struct net_device *dev,
- const struct sock *sk,
- struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -172,8 +169,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
}
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
- struct flowi6 *fl6);
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -291,12 +286,6 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
return NULL;
}
-static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
-{
- return 0;
-}
-
static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 84d1b3feaf2e..2d067b0c2f10 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -918,13 +918,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
int err;
int flags = 0;
- if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
- (!*dst || !(*dst)->error)) {
- err = l3mdev_get_saddr6(net, sk, fl6);
- if (err)
- goto out_err;
- }
-
/* The correct way to handle this would be to do
* ip6_route_get_saddr, and then ip6_route_output; however,
* the route-specific preferred source forces the
@@ -1016,7 +1009,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
out_err_release:
dst_release(*dst);
*dst = NULL;
-out_err:
+
if (err == -ENETUNREACH)
IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
return err;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index b30034efccff..998e4dc2e6f9 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -131,30 +131,6 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
}
EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
- struct flowi6 *fl6)
-{
- struct net_device *dev;
- int rc = 0;
-
- if (fl6->flowi6_oif) {
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
- if (dev && netif_is_l3_slave(dev))
- dev = netdev_master_upper_dev_get_rcu(dev);
-
- if (dev && netif_is_l3_master(dev) &&
- dev->l3mdev_ops->l3mdev_get_saddr6)
- rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
-
- rcu_read_unlock();
- }
-
- return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
-
/**
* l3mdev_fib_rule_match - Determine if flowi references an
* L3 master device
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 09/12] net: l3mdev: Remove l3mdev_get_rtable
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (7 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 08/12] net: ipv6: Remove l3mdev_get_saddr6 David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 10/12] net: l3mdev: Remove l3mdev_get_rt6_dst David Ahern
` (3 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer used
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 47 ++---------------------------------------------
include/net/l3mdev.h | 21 ---------------------
2 files changed, 2 insertions(+), 66 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index cc18319b4b0d..08103bc7f1f5 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -47,7 +47,6 @@
static bool add_fib_rules = true;
struct net_vrf {
- struct rtable __rcu *rth;
struct rtable __rcu *rth_local;
struct rt6_info __rcu *rt6;
struct rt6_info __rcu *rt6_local;
@@ -460,26 +459,16 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
- struct rtable *rth = rtnl_dereference(vrf->rth);
struct rtable *rth_local = rtnl_dereference(vrf->rth_local);
struct net *net = dev_net(dev);
struct dst_entry *dst;
- RCU_INIT_POINTER(vrf->rth, NULL);
RCU_INIT_POINTER(vrf->rth_local, NULL);
synchronize_rcu();
/* move dev in dst's to loopback so this VRF device can be deleted
* - based on dst_ifdown
*/
- if (rth) {
- dst = &rth->dst;
- dev_put(dst->dev);
- dst->dev = net->loopback_dev;
- dev_hold(dst->dev);
- dst_release(dst);
- }
-
if (rth_local) {
dst = &rth_local->dst;
dev_put(dst->dev);
@@ -492,31 +481,20 @@ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
static int vrf_rtable_create(struct net_device *dev)
{
struct net_vrf *vrf = netdev_priv(dev);
- struct rtable *rth, *rth_local;
+ struct rtable *rth_local;
if (!fib_new_table(dev_net(dev), vrf->tb_id))
return -ENOMEM;
- /* create a dst for routing packets out through a VRF device */
- rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0);
- if (!rth)
- return -ENOMEM;
-
/* create a dst for local ingress routing - packets sent locally
* to local address via the VRF device as a loopback
*/
rth_local = rt_dst_alloc(dev, RTCF_LOCAL, RTN_LOCAL, 1, 1, 0);
- if (!rth_local) {
- dst_release(&rth->dst);
+ if (!rth_local)
return -ENOMEM;
- }
-
- rth->dst.output = vrf_output;
- rth->rt_table_id = vrf->tb_id;
rth_local->rt_table_id = vrf->tb_id;
- rcu_assign_pointer(vrf->rth, rth);
rcu_assign_pointer(vrf->rth_local, rth_local);
return 0;
@@ -648,26 +626,6 @@ static u32 vrf_fib_table(const struct net_device *dev)
return vrf->tb_id;
}
-static struct rtable *vrf_get_rtable(const struct net_device *dev,
- const struct flowi4 *fl4)
-{
- struct rtable *rth = NULL;
-
- if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
- struct net_vrf *vrf = netdev_priv(dev);
-
- rcu_read_lock();
-
- rth = rcu_dereference(vrf->rth);
- if (likely(rth))
- dst_hold(&rth->dst);
-
- rcu_read_unlock();
- }
-
- return rth;
-}
-
static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return 0;
@@ -913,7 +871,6 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
- .l3mdev_get_rtable = vrf_get_rtable,
.l3mdev_l3_rcv = vrf_l3_rcv,
.l3mdev_l3_out = vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 391c46130ef6..44ceec61de63 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -23,8 +23,6 @@
*
* @l3mdev_l3_out: Hook in L3 output path
*
- * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
- *
* @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
*/
@@ -36,10 +34,6 @@ struct l3mdev_ops {
struct sock *sk, struct sk_buff *skb,
u16 proto);
- /* IPv4 ops */
- struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
- const struct flowi4 *fl4);
-
/* IPv6 ops */
struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
struct flowi6 *fl6);
@@ -140,15 +134,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev)
return tb_id;
}
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
- const struct flowi4 *fl4)
-{
- if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
- return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
-
- return NULL;
-}
-
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
struct net_device *dev;
@@ -269,12 +254,6 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
return 0;
}
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
- const struct flowi4 *fl4)
-{
- return NULL;
-}
-
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
return false;
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 10/12] net: l3mdev: Remove l3mdev_get_rt6_dst
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (8 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 09/12] net: l3mdev: Remove l3mdev_get_rtable David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 11/12] net: l3mdev: Remove l3mdev_fib_oif David Ahern
` (2 subsequent siblings)
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer used
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
drivers/net/vrf.c | 92 +++-------------------------------------------------
include/net/l3mdev.h | 14 --------
net/l3mdev/l3mdev.c | 32 ------------------
3 files changed, 4 insertions(+), 134 deletions(-)
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 08103bc7f1f5..23801647c113 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -48,7 +48,6 @@ static bool add_fib_rules = true;
struct net_vrf {
struct rtable __rcu *rth_local;
- struct rt6_info __rcu *rt6;
struct rt6_info __rcu *rt6_local;
u32 tb_id;
};
@@ -289,25 +288,11 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
- struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
struct rt6_info *rt6_local = rtnl_dereference(vrf->rt6_local);
struct net *net = dev_net(dev);
struct dst_entry *dst;
- RCU_INIT_POINTER(vrf->rt6, NULL);
- RCU_INIT_POINTER(vrf->rt6_local, NULL);
- synchronize_rcu();
-
- /* move dev in dst's to loopback so this VRF device can be deleted
- * - based on dst_ifdown
- */
- if (rt6) {
- dst = &rt6->dst;
- dev_put(dst->dev);
- dst->dev = net->loopback_dev;
- dev_hold(dst->dev);
- dst_release(dst);
- }
+ rcu_assign_pointer(vrf->rt6_local, NULL);
if (rt6_local) {
if (rt6_local->rt6i_idev)
@@ -327,7 +312,7 @@ static int vrf_rt6_create(struct net_device *dev)
struct net_vrf *vrf = netdev_priv(dev);
struct net *net = dev_net(dev);
struct fib6_table *rt6i_table;
- struct rt6_info *rt6, *rt6_local;
+ struct rt6_info *rt6_local;
int rc = -ENOMEM;
/* IPv6 can be CONFIG enabled and then disabled runtime */
@@ -338,24 +323,12 @@ static int vrf_rt6_create(struct net_device *dev)
if (!rt6i_table)
goto out;
- /* create a dst for routing packets out a VRF device */
- rt6 = ip6_dst_alloc(net, dev, flags);
- if (!rt6)
- goto out;
-
- dst_hold(&rt6->dst);
-
- rt6->rt6i_table = rt6i_table;
- rt6->dst.output = vrf_output6;
-
/* create a dst for local routing - packets sent locally
* to local address via the VRF device as a loopback
*/
rt6_local = ip6_dst_alloc(net, dev, flags);
- if (!rt6_local) {
- dst_release(&rt6->dst);
+ if (!rt6_local)
goto out;
- }
dst_hold(&rt6_local->dst);
@@ -364,7 +337,6 @@ static int vrf_rt6_create(struct net_device *dev)
rt6_local->rt6i_table = rt6i_table;
rt6_local->dst.input = ip6_input;
- rcu_assign_pointer(vrf->rt6, rt6);
rcu_assign_pointer(vrf->rt6_local, rt6_local);
rc = 0;
@@ -693,7 +665,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net,
rcu_read_lock();
/* fib6_table does not have a refcnt and can not be freed */
- rt6 = rcu_dereference(vrf->rt6);
+ rt6 = rcu_dereference(vrf->rt6_local);
if (likely(rt6))
table = rt6->rt6i_table;
@@ -816,66 +788,10 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
return skb;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
- struct flowi6 *fl6)
-{
- bool need_strict = rt6_need_strict(&fl6->daddr);
- struct net_vrf *vrf = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct dst_entry *dst = NULL;
- struct rt6_info *rt;
-
- /* send to link-local or multicast address */
- if (need_strict) {
- int flags = RT6_LOOKUP_F_IFACE;
-
- /* VRF device does not have a link-local address and
- * sending packets to link-local or mcast addresses over
- * a VRF device does not make sense
- */
- if (fl6->flowi6_oif == dev->ifindex) {
- struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
-
- dst_hold(dst);
- return dst;
- }
-
- if (!ipv6_addr_any(&fl6->saddr))
- flags |= RT6_LOOKUP_F_HAS_SADDR;
-
- rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
- if (rt)
- dst = &rt->dst;
-
- } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-
- rcu_read_lock();
-
- rt = rcu_dereference(vrf->rt6);
- if (likely(rt)) {
- dst = &rt->dst;
- dst_hold(dst);
- }
-
- rcu_read_unlock();
- }
-
- /* make sure oif is set to VRF device for lookup */
- if (!need_strict)
- fl6->flowi6_oif = dev->ifindex;
-
- return dst;
-}
-#endif
-
static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
.l3mdev_l3_rcv = vrf_l3_rcv,
.l3mdev_l3_out = vrf_l3_out,
-#if IS_ENABLED(CONFIG_IPV6)
- .l3mdev_get_rt6_dst = vrf_get_rt6_dst,
-#endif
};
static void vrf_get_drvinfo(struct net_device *dev,
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 44ceec61de63..3c1d71474f55 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -22,8 +22,6 @@
* @l3mdev_l3_rcv: Hook in L3 receive path
*
* @l3mdev_l3_out: Hook in L3 output path
- *
- * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
*/
struct l3mdev_ops {
@@ -33,10 +31,6 @@ struct l3mdev_ops {
struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
struct sock *sk, struct sk_buff *skb,
u16 proto);
-
- /* IPv6 ops */
- struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
- struct flowi6 *fl6);
};
#ifdef CONFIG_NET_L3_MASTER_DEV
@@ -153,8 +147,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
return rc;
}
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
-
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
{
@@ -260,12 +252,6 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
}
static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
-{
- return NULL;
-}
-
-static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
return skb;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 998e4dc2e6f9..2ba2a650bae9 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -100,38 +100,6 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
/**
- * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns
- * cached route for L3 master device if relevant
- * to flow
- * @net: network namespace for device index lookup
- * @fl6: IPv6 flow struct for lookup
- */
-
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
- struct flowi6 *fl6)
-{
- struct dst_entry *dst = NULL;
- struct net_device *dev;
-
- if (fl6->flowi6_oif) {
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
- if (dev && netif_is_l3_slave(dev))
- dev = netdev_master_upper_dev_get_rcu(dev);
-
- if (dev && netif_is_l3_master(dev) &&
- dev->l3mdev_ops->l3mdev_get_rt6_dst)
- dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
-
- rcu_read_unlock();
- }
-
- return dst;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
-
-/**
* l3mdev_fib_rule_match - Determine if flowi references an
* L3 master device
* @net: network namespace for device index lookup
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 11/12] net: l3mdev: Remove l3mdev_fib_oif
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (9 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 10/12] net: l3mdev: Remove l3mdev_get_rt6_dst David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-30 17:34 ` [PATCH net-next 12/12] net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag David Ahern
2016-08-31 23:14 ` [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer used
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/net/l3mdev.h | 29 -----------------------------
1 file changed, 29 deletions(-)
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 3c1d71474f55..6aae664b427a 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -95,26 +95,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
return master;
}
-/* get index of an interface to use for FIB lookups. For devices
- * enslaved to an L3 master device FIB lookups are based on the
- * master index
- */
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
- return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
-}
-
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
- int oif;
-
- rcu_read_lock();
- oif = l3mdev_fib_oif_rcu(dev);
- rcu_read_unlock();
-
- return oif;
-}
-
u32 l3mdev_fib_table_rcu(const struct net_device *dev);
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
static inline u32 l3mdev_fib_table(const struct net_device *dev)
@@ -224,15 +204,6 @@ struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
return NULL;
}
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
- return dev ? dev->ifindex : 0;
-}
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
- return dev ? dev->ifindex : 0;
-}
-
static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
{
return 0;
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 12/12] net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (10 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 11/12] net: l3mdev: Remove l3mdev_fib_oif David Ahern
@ 2016-08-30 17:34 ` David Ahern
2016-08-31 23:14 ` [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
12 siblings, 0 replies; 15+ messages in thread
From: David Ahern @ 2016-08-30 17:34 UTC (permalink / raw)
To: netdev; +Cc: David Ahern
No longer used
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
include/net/flow.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/include/net/flow.h b/include/net/flow.h
index d47ef4bb5423..035aa7716967 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -34,8 +34,7 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_KNOWN_NH 0x02
-#define FLOWI_FLAG_L3MDEV_SRC 0x04
-#define FLOWI_FLAG_SKIP_NH_OIF 0x08
+#define FLOWI_FLAG_SKIP_NH_OIF 0x04
__u32 flowic_secid;
struct flowi_tunnel flowic_tun_key;
};
--
2.1.4
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 00/12] net: Convert vrf from dst to tx hook
2016-08-30 17:34 [PATCH net-next 00/12] net: Convert vrf from dst to tx hook David Ahern
` (11 preceding siblings ...)
2016-08-30 17:34 ` [PATCH net-next 12/12] net: flow: Remove FLOWI_FLAG_L3MDEV_SRC flag David Ahern
@ 2016-08-31 23:14 ` David Ahern
2016-09-01 3:43 ` David Miller
12 siblings, 1 reply; 15+ messages in thread
From: David Ahern @ 2016-08-31 23:14 UTC (permalink / raw)
To: netdev, David Miller
On 8/30/16 11:34 AM, David Ahern wrote:
> This series fixes this problem by removing the output dst that points
> to the VRF and always doing the actual FIB lookup. This allows the real
> dst to be cached on sockets and used for MSS. Packets are diverted to
> the VRF device on Tx using an l3mdev hook in the output path similar to
> to what is done for Rx.
Dave:
please drop this series. BGP smoke tests triggered a couple of problems I need to resolve.
^ permalink raw reply [flat|nested] 15+ messages in thread