All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paolo Abeni <pabeni@redhat.com>
To: netdev@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	Edward Cree <ecree@solarflare.com>
Subject: [PATCH net-next v2 2/2] ipv4: use dst hint for ipv4 list receive
Date: Mon, 18 Nov 2019 12:01:30 +0100	[thread overview]
Message-ID: <592c763828171c414e8927878b1a22027e33dee7.1574071944.git.pabeni@redhat.com> (raw)
In-Reply-To: <cover.1574071944.git.pabeni@redhat.com>

This is alike the previous change, with some additional ipv4 specific
quirk. Even when using the route hint we still have to do perform
additional per packet checks about source address validity: a new
helper is added to wrap them.

Moreover, the ipv4 route lookup, even in the absence of policy routing,
may depend on pkts ToS, so we cache that values, too.

Explicitly avoid hints for local broadcast: this simplify the code
and broadcasts are slower path anyway.

UDP flood performances vs recvmmsg() receiver:

vanilla		patched		delta
Kpps		Kpps		%
1683		1833		+8

In the worst case scenario - each packet has a different
destination address - the performance delta is within noise
range.

v1 -> v2:
 - fix build issue with !CONFIG_IP_MULTIPLE_TABLES

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/route.h | 11 +++++++++++
 net/ipv4/ip_input.c | 38 +++++++++++++++++++++++++++++++++-----
 net/ipv4/route.c    | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 6c516840380d..f7a8a52318cd 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -185,6 +185,17 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
 		       u8 tos, struct net_device *devin,
 		       struct fib_result *res);
 
+struct ip_route_input_hint {
+	unsigned long	refdst;
+	__be32		daddr;
+	char		tos;
+	bool		local;
+};
+
+int ip_route_use_hint(struct sk_buff *skb, __be32 dst, __be32 src,
+		      u8 tos, struct net_device *devin,
+		      struct ip_route_input_hint *hint);
+
 static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 				 u8 tos, struct net_device *devin)
 {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 24a95126e698..25f6fcc65380 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -305,7 +305,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
 static int ip_rcv_finish_core(struct net *net, struct sock *sk,
-			      struct sk_buff *skb, struct net_device *dev)
+			      struct sk_buff *skb, struct net_device *dev,
+			      struct ip_route_input_hint *hint)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
@@ -335,8 +336,12 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (!skb_valid_dst(skb)) {
-		err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-					   iph->tos, dev);
+		if (hint && hint->daddr == iph->daddr && hint->tos == iph->tos)
+			err = ip_route_use_hint(skb, iph->daddr, iph->saddr,
+						iph->tos, dev, hint);
+		else
+			err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+						   iph->tos, dev);
 		if (unlikely(err))
 			goto drop_error;
 	}
@@ -408,7 +413,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (!skb)
 		return NET_RX_SUCCESS;
 
-	ret = ip_rcv_finish_core(net, sk, skb, dev);
+	ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
 	if (ret != NET_RX_DROP)
 		ret = dst_input(skb);
 	return ret;
@@ -535,9 +540,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 	}
 }
 
+static bool ip_can_cache_route_hint(struct net *net, struct rtable *rt)
+{
+	return rt->rt_type != RTN_BROADCAST &&
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	       !net->ipv6.fib6_has_custom_rules;
+#else
+	       1;
+#endif
+}
+
 static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 			       struct list_head *head)
 {
+	struct ip_route_input_hint _hint, *hint = NULL;
 	struct dst_entry *curr_dst = NULL;
 	struct sk_buff *skb, *next;
 	struct list_head sublist;
@@ -554,11 +570,23 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 		skb = l3mdev_ip_rcv(skb);
 		if (!skb)
 			continue;
-		if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
+		if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
 			continue;
 
 		dst = skb_dst(skb);
 		if (curr_dst != dst) {
+			struct rtable *rt = (struct rtable *)dst;
+
+			if (ip_can_cache_route_hint(net, rt)) {
+				_hint.refdst = skb->_skb_refdst;
+				_hint.daddr = ip_hdr(skb)->daddr;
+				_hint.tos = ip_hdr(skb)->tos;
+				_hint.local = rt->rt_type == RTN_LOCAL;
+				hint = &_hint;
+			} else {
+				hint = NULL;
+			}
+
 			/* dispatch old sublist */
 			if (!list_empty(&sublist))
 				ip_sublist_rcv_finish(&sublist);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dcc4fa10138d..b0ddff17db80 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2019,6 +2019,44 @@ static int ip_mkroute_input(struct sk_buff *skb,
 	return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
 }
 
+/* Implements all the saddr-related checks as ip_route_input_slow(),
+ * assuming daddr is valid and this is not a local broadcast.
+ * Uses the provided hint instead of performing a route lookup.
+ */
+int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+		      u8 tos, struct net_device *dev,
+		      struct ip_route_input_hint *hint)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct net *net = dev_net(dev);
+	int err = -EINVAL;
+	u32 itag = 0;
+
+	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+		goto martian_source;
+
+	if (ipv4_is_zeronet(saddr))
+		goto martian_source;
+
+	if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+		goto martian_source;
+
+	if (hint->local) {
+		err = fib_validate_source(skb, saddr, daddr, tos, 0, dev,
+					  in_dev, &itag);
+		if (err < 0)
+			goto martian_source;
+	}
+
+	err = 0;
+	__skb_dst_copy(skb, hint->refdst);
+	return err;
+
+martian_source:
+	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
+	return err;
+}
+
 /*
  *	NOTE. We drop all the packets that has local source
  *	addresses, because every properly looped back packet
-- 
2.21.0


  parent reply	other threads:[~2019-11-18 11:02 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-18 11:01 [PATCH net-next v2 0/2] net: introduce and use route hint Paolo Abeni
2019-11-18 11:01 ` [PATCH net-next v2 1/2] ipv6: introduce and uses route look hints for list input Paolo Abeni
2019-11-18 20:29   ` Willem de Bruijn
2019-11-18 21:58     ` Paolo Abeni
2019-11-19 14:10       ` Willem de Bruijn
2019-11-18 11:01 ` Paolo Abeni [this message]
2019-11-18 14:11   ` [PATCH net-next v2 2/2] ipv4: use dst hint for ipv4 list receive kbuild test robot
2019-11-18 14:11     ` kbuild test robot
2019-11-18 16:07   ` David Ahern
2019-11-18 16:31     ` Paolo Abeni
2019-11-18 16:40       ` David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=592c763828171c414e8927878b1a22027e33dee7.1574071944.git.pabeni@redhat.com \
    --to=pabeni@redhat.com \
    --cc=davem@davemloft.net \
    --cc=ecree@solarflare.com \
    --cc=netdev@vger.kernel.org \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.