netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC v4 net 0/3]: ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route()
@ 2014-10-10 18:48 Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro Martin KaFai Lau
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-10 18:48 UTC (permalink / raw)
  To: netdev

Hi,

This patch set is trying to reduce the number of fib6_lookup()
calls from ip6_pol_route().

I have adapted davem's udpflood and kbench_mod test
(https://git.kernel.org/pub/scm/linux/kernel/git/davem/net_test_tools.git) to
support IPv6 and here is the result:


Before:
[root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done

real    0m34.190s
user    0m3.047s
sys     0m31.108s

real    0m34.635s
user    0m3.125s
sys     0m31.475s

real    0m34.517s
user    0m3.034s
sys     0m31.449s

[root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
[  660.160976] ip6_route_kbench: ip6_route_output tdiff: 933
[  660.207261] ip6_route_kbench: ip6_route_output tdiff: 988
[  660.253492] ip6_route_kbench: ip6_route_output tdiff: 896
[  660.298862] ip6_route_kbench: ip6_route_output tdiff: 898

After:
[root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done

real    0m32.695s
user    0m2.925s
sys     0m29.737s

real    0m32.636s
user    0m3.007s
sys     0m29.596s

real    0m32.797s
user    0m2.866s
sys     0m29.898s

[root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
[  881.220793] ip6_route_kbench: ip6_route_output tdiff: 684
[  881.253477] ip6_route_kbench: ip6_route_output tdiff: 640
[  881.286867] ip6_route_kbench: ip6_route_output tdiff: 630
[  881.320749] ip6_route_kbench: ip6_route_output tdiff: 653


/****************************** udpflood.c ******************************/
/* It is an adaptation of the Eric Dumazet's and David Miller's
 * udpflood tool, by adding IPv6 support.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <malloc.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <stdint.h>
#include <assert.h>

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#define _GNU_SOURCE
#include <getopt.h>

typedef uint32_t u32;

static int debug =3D 0;

/* Allow -fstrict-aliasing */
typedef union sa_u {
	struct sockaddr_storage a46;
	struct sockaddr_in a4;
	struct sockaddr_in6 a6;
} sa_u;

static int usage(void)
{
	printf("usage: udpflood [ -l count ] [ -m message_size ] [ -c num_ip_addrs=
 ] IP_ADDRESS\n");
	return -1;
}

static u32 get_last32h(const sa_u *sa)
{
	if (sa->a46.ss_family =3D=3D PF_INET)
		return ntohl(sa->a4.sin_addr.s_addr);
	else
		return ntohl(sa->a6.sin6_addr.s6_addr32[3]);
}

static void set_last32h(sa_u *sa, u32 last32h)
{
	if (sa->a46.ss_family =3D=3D PF_INET)
		sa->a4.sin_addr.s_addr =3D htonl(last32h);
	else
		sa->a6.sin6_addr.s6_addr32[3] =3D htonl(last32h);
}

static void print_saddr(const sa_u *sa, const char *msg)
{
	char buf[64];

	if (!debug)
		return;

	switch (sa->a46.ss_family) {
	case PF_INET:
		inet_ntop(PF_INET, &(sa->a4.sin_addr.s_addr), buf,
			  sizeof(buf));
		break;
	case PF_INET6:
		inet_ntop(PF_INET6, &(sa->a6.sin6_addr), buf, sizeof(buf));
		break;
	}

	printf("%s: %s\n", msg, buf);
}

static int send_packets(const sa_u *sa, size_t num_addrs, int count, int ms=
g_sz)
{
	char *msg =3D malloc(msg_sz);
	sa_u saddr;
	u32 start_addr32h, end_addr32h, cur_addr32h;
	int fd, i, err;

	if (!msg)
		return -ENOMEM;

	memset(msg, 0, msg_sz);

	memcpy(&saddr, sa, sizeof(saddr));
	cur_addr32h =3D start_addr32h =3D get_last32h(&saddr);
	end_addr32h =3D start_addr32h + num_addrs;

	fd =3D socket(saddr.a46.ss_family, SOCK_DGRAM, 0);
	if (fd < 0) {
		perror("socket");
		err =3D fd;
		goto out_nofd;
	}

	/* connect to avoid the kernel spending time in figuring
	 * out the source address (i.e pin the src address)
	 */
	err =3D connect(fd, (struct sockaddr *) &saddr, sizeof(saddr));
	if (err < 0) {
		perror("connect");
		goto out;
	}

	print_saddr(&saddr, "start_addr");
	for (i =3D 0; i < count; i++) {
		print_saddr(&saddr, "sendto");
		err =3D sendto(fd, msg, msg_sz, 0, (struct sockaddr *)&saddr,
			     sizeof(saddr));
		if (err < 0) {
			perror("sendto");
			goto out;
		}

		if (++cur_addr32h >=3D end_addr32h)
			cur_addr32h =3D start_addr32h;
		set_last32h(&saddr, cur_addr32h);
	}

	err =3D 0;
out:
	close(fd);
out_nofd:
	free(msg);
	return err;
}

int main(int argc, char **argv, char **envp)
{
	int port, msg_sz, count, num_addrs, ret;

	sa_u start_addr;

	port =3D 6000;
	msg_sz =3D 32;
	count =3D 10000000;
	num_addrs =3D 1;

	while ((ret =3D getopt(argc, argv, "dl:s:p:c:")) >=3D 0) {
		switch (ret) {
		case 'l':
			sscanf(optarg, "%d", &count);
			break;
		case 's':
			sscanf(optarg, "%d", &msg_sz);
			break;
		case 'p':
			sscanf(optarg, "%d", &port);
			break;
		case 'c':
			sscanf(optarg, "%d", &num_addrs);
			break;
		case 'd':
			debug =3D 1;
			break;
		case '?':
			return usage();
		}
	}

	if (num_addrs < 1)
		return usage();

	if (!argv[optind])
		return usage();

	start_addr.a4.sin_port =3D htons(port);
	if (inet_pton(PF_INET, argv[optind], &start_addr.a4.sin_addr))
		start_addr.a46.ss_family =3D PF_INET;
	else if (inet_pton(PF_INET6, argv[optind], &start_addr.a6.sin6_addr.s6_add=
r))
		start_addr.a46.ss_family =3D PF_INET6;
	else
		return usage();

	return send_packets(&start_addr, num_addrs, count, msg_sz);
}

/****************** ip6_route_kbench_mod.c ******************/
#define pr_fmt(fmt) "ip6_route_kbench: " fmt

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/inet.h>
#include <linux/in6.h>

#include <net/route.h>
#include <net/ip6_route.h>

#include <linux/timex.h>
#include <uapi/linux/icmpv6.h>

/* We can't just use "get_cycles()" as on some platforms, such
 * as sparc64, that gives system cycles rather than cpu clock
 * cycles.
 */

#ifdef CONFIG_SPARC64
static inline unsigned long long get_tick(void)
{
	unsigned long long t;

	__asm__ __volatile__("rd %%tick, %0" : "=r" (t));
	return t;
}
#elif defined(CONFIG_X86)
static inline unsigned long long get_tick(void)
{
	unsigned long long t;

	rdtscll(t);

	return t;
}
#elif defined(CONFIG_POWERPC)
static inline unsigned long long get_tick(void)
{
	return get_cycles();
}
#else
#error Unsupported architecture, please implement get_tick()
#endif

#define DEFAULT_WARMUP_COUNT 100000

#define DEFAULT_DST_IP_ADDR	0x4a800001
#define DEFAULT_SRC_IP_ADDR	0x00000000
#define DEFAULT_OIF		0
#define DEFAULT_IIF		0
#define DEFAULT_MARK		0x00000000
#define DEFAULT_TOS		0x00

static int flow_oif = DEFAULT_OIF;
static int flow_iif = DEFAULT_IIF;
static u32 flow_mark = DEFAULT_MARK;
static struct in6_addr flow_dst_ip_addr;
static struct in6_addr flow_src_ip_addr;
static int flow_tos = DEFAULT_TOS;

static char dst_string[64];
static char src_string[64];

module_param_string(dst, dst_string, sizeof(dst_string), 0);
module_param_string(src, src_string, sizeof(src_string), 0);

static int __init flow_setup(void)
{
	if (dst_string[0] &&
	    !in6_pton(dst_string, -1, &flow_dst_ip_addr.s6_addr[0], -1, NULL)) {
		pr_info("cannot parse \"%s\"\n", dst_string);
		return -1;
	}

	if (src_string[0] &&
	    !in6_pton(src_string, -1, &flow_src_ip_addr.s6_addr[0], -1, NULL)) {
		pr_info("cannot parse \"%s\"\n", dst_string);
		return -1;
	}

	return 0;
}

module_param_named(oif, flow_oif, int, 0);
module_param_named(iif, flow_iif, int, 0);
module_param_named(mark, flow_mark, uint, 0);
module_param_named(tos, flow_tos, int, 0);

static int warmup_count = DEFAULT_WARMUP_COUNT;
module_param_named(count, warmup_count, int, 0);

static void flow_init(struct flowi6 *fl6)
{
	memset(fl6, 0, sizeof(*fl6));
	fl6->flowi6_proto = IPPROTO_ICMPV6;
	fl6->flowi6_oif = flow_oif;
	fl6->flowi6_iif = flow_iif;
	fl6->flowi6_mark = flow_mark;
	fl6->flowi6_tos = flow_tos;
	fl6->daddr = flow_dst_ip_addr;
	fl6->saddr = flow_src_ip_addr;
}

static struct sk_buff * fake_skb_get(void)
{
	struct ipv6hdr *hdr;
	struct sk_buff *skb;

	skb = alloc_skb(4096, GFP_KERNEL);
	if (!skb) {
		pr_info("Cannot alloc SKB for test\n");
		return NULL;
	}
	skb->dev = __dev_get_by_index(&init_net, flow_iif);
	if (skb->dev == NULL) {
		pr_info("Input device (%d) does not exist\n", flow_iif);
		goto err;
	}

	skb_reset_mac_header(skb);
	skb_reset_network_header(skb);
	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
	hdr = ipv6_hdr(skb);

	hdr->priority = 0;
	hdr->version = 6;
	memset(hdr->flow_lbl, 0, sizeof(hdr->flow_lbl));
	hdr->payload_len = htons(sizeof(struct icmp6hdr));
	hdr->nexthdr = IPPROTO_ICMPV6;
	hdr->saddr = flow_src_ip_addr;
	hdr->daddr = flow_dst_ip_addr;
	skb->protocol = htons(ETH_P_IPV6);
	skb->mark = flow_mark;

	return skb;
err:
	kfree_skb(skb);
	return NULL;
}

static void do_full_output_lookup_bench(void)
{
	unsigned long long t1, t2, tdiff;
	struct rt6_info *rt;
	struct flowi6 fl6;
	int i;

	rt = NULL;

	for (i = 0; i < warmup_count; i++) {
		flow_init(&fl6);

		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
		if (IS_ERR(rt))
			break;
		ip6_rt_put(rt);
	}
	if (IS_ERR(rt)) {
		pr_info("ip_route_output_key: err=%ld\n", PTR_ERR(rt));
		return;
	}

	flow_init(&fl6);

	t1 = get_tick();
	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
	t2 = get_tick();
	if (!IS_ERR(rt))
		ip6_rt_put(rt);

	tdiff = t2 - t1;
	pr_info("ip6_route_output tdiff: %llu\n", tdiff);
}

static void do_full_input_lookup_bench(void)
{
	unsigned long long t1, t2, tdiff;
	struct sk_buff *skb;
	struct rt6_info *rt;
	int err, i;

	skb = fake_skb_get();
	if (skb == NULL)
		goto out_free;

	err = 0;
	local_bh_disable();
	for (i = 0; i < warmup_count; i++) {
		ip6_route_input(skb);
		rt = (struct rt6_info *)skb_dst(skb);
		err = (!rt || rt == init_net.ipv6.ip6_null_entry);
		skb_dst_drop(skb);
		if (err)
			break;
	}
	local_bh_enable();

	if (err) {
		pr_info("Input route lookup fails\n");
		goto out_free;
	}

	local_bh_disable();
	t1 = get_tick();
	ip6_route_input(skb);
	t2 = get_tick();
	local_bh_enable();

	rt = (struct rt6_info *)skb_dst(skb);
	err = (!rt || rt == init_net.ipv6.ip6_null_entry);
	skb_dst_drop(skb);
	if (err) {
		pr_info("Input route lookup fails\n");
		goto out_free;
	}

	tdiff = t2 - t1;
	pr_info("ip6_route_input tdiff: %llu\n", tdiff);

out_free:
	kfree_skb(skb);
}

static void do_full_lookup_bench(void)
{
	if (!flow_iif)
		do_full_output_lookup_bench();
	else
		do_full_input_lookup_bench();
}

static void do_bench(void)
{
	do_full_lookup_bench();
	do_full_lookup_bench();
	do_full_lookup_bench();
	do_full_lookup_bench();
}

static int __init kbench_init(void)
{
	if (flow_setup())
		return -EINVAL;

	pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D("IP6_FMT"),"
		"S("IP6_FMT"),TOS(0x%02x)]\n",
		flow_iif, flow_oif, flow_mark,
		IP6_PRT(flow_dst_ip_addr),
		IP6_PRT(flow_src_ip_addr),
		flow_tos);

#if defined(CONFIG_X86)
	if (!cpu_has_tsc) {
		pr_err("X86 TSC is required, but is unavailable.\n");
		return -EINVAL;
	}
#endif

	pr_info("sizeof(struct rt6_info)==%zu\n", sizeof(struct rt6_info));

	do_bench();

	return -ENODEV;
}

static void __exit kbench_exit(void)
{
}

module_init(kbench_init);
module_exit(kbench_exit);
MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro
  2014-10-10 18:48 [PATCH RFC v4 net 0/3]: ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route() Martin KaFai Lau
@ 2014-10-10 18:48 ` Martin KaFai Lau
  2014-10-14 19:01   ` David Miller
  2014-10-10 18:48 ` [PATCH RFC v4 net 2/3] ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 3/3] ipv6: Avoid redo-ing fib6_lookup() with reachable = 0 by saving fn Martin KaFai Lau
  2 siblings, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-10 18:48 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Hannes Frederic Sowa

It is the prep work to reduce the number of calls to fib6_lookup().

The BACKTRACK macro could be hard-to-read and error-prone due to
its side effects (mainly goto).

This patch is to:
1. Replace BACKTRACK macro with a function (fib6_backtrack) with the following
   return values:
   * If it is backtrack-able, returns next fn for retry.
   * If it reaches the root, returns NULL.
2. The caller needs to decide if a backtrack is needed (by testing
   rt == net->ipv6.ip6_null_entry).
3. Rename the goto labels in ip6_pol_route() to make the next few
   patches easier to read.

Cc: David Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/net/ip6_fib.h |  3 +++
 net/ipv6/ip6_fib.c    | 17 ++++++++++++++++
 net/ipv6/route.c      | 55 ++++++++++++++++++++++-----------------------------
 3 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8eea35d..1f1d7f8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -288,6 +288,9 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 			      const struct in6_addr *daddr, int dst_len,
 			      const struct in6_addr *saddr, int src_len);
 
+struct fib6_node *fib6_backtrack(struct fib6_node *fn,
+				 struct in6_addr *saddr);
+
 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    void *arg);
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b2d1838..594d189 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1139,6 +1139,23 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 	return NULL;
 }
 
+struct fib6_node* fib6_backtrack(struct fib6_node *fn,
+				 struct in6_addr *saddr)
+{
+	struct fib6_node *pn;
+	while (1) {
+		if (fn->fn_flags & RTN_TL_ROOT)
+			return NULL;
+		pn = fn->parent;
+		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
+			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+		else
+			fn = pn;
+		if (fn->fn_flags & RTN_RTINFO)
+			return fn;
+	}
+}
+
 
 /*
  *	Deletion
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a318dd89..797a6e7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -772,24 +772,6 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 }
 #endif
 
-#define BACKTRACK(__net, saddr)			\
-do { \
-	if (rt == __net->ipv6.ip6_null_entry) {	\
-		struct fib6_node *pn; \
-		while (1) { \
-			if (fn->fn_flags & RTN_TL_ROOT) \
-				goto out; \
-			pn = fn->parent; \
-			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
-				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
-			else \
-				fn = pn; \
-			if (fn->fn_flags & RTN_RTINFO) \
-				goto restart; \
-		} \
-	} \
-} while (0)
-
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6, int flags)
@@ -804,8 +786,11 @@ restart:
 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
-	BACKTRACK(net, &fl6->saddr);
-out:
+	if (rt == net->ipv6.ip6_null_entry) {
+		fn = fib6_backtrack(fn, &fl6->saddr);
+		if (fn)
+			goto restart;
+	}
 	dst_use(&rt->dst, jiffies);
 	read_unlock_bh(&table->tb6_lock);
 	return rt;
@@ -924,19 +909,25 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
 
-relookup:
+redo_fib6_lookup_lock:
 	read_lock_bh(&table->tb6_lock);
 
-restart_2:
+redo_fib6_lookup:
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
-restart:
+redo_rt6_select:
 	rt = rt6_select(fn, oif, strict | reachable);
 	if (rt->rt6i_nsiblings)
 		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
-	BACKTRACK(net, &fl6->saddr);
-	if (rt == net->ipv6.ip6_null_entry ||
-	    rt->rt6i_flags & RTF_CACHE)
+	if (rt == net->ipv6.ip6_null_entry) {
+		fn = fib6_backtrack(fn, &fl6->saddr);
+		if (fn)
+			goto redo_rt6_select;
+		else
+			goto out;
+	}
+
+	if (rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
 	dst_hold(&rt->dst);
@@ -967,12 +958,12 @@ restart:
 	 * released someone could insert this route.  Relookup.
 	 */
 	ip6_rt_put(rt);
-	goto relookup;
+	goto redo_fib6_lookup_lock;
 
 out:
 	if (reachable) {
 		reachable = 0;
-		goto restart_2;
+		goto redo_fib6_lookup;
 	}
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
@@ -1235,10 +1226,12 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	else if (rt->dst.error) {
 		rt = net->ipv6.ip6_null_entry;
-		goto out;
+	} else if (rt == net->ipv6.ip6_null_entry) {
+		fn = fib6_backtrack(fn, &fl6->saddr);
+		if (fn)
+			goto restart;
 	}
-	BACKTRACK(net, &fl6->saddr);
-out:
+
 	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
-- 
1.8.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC v4 net 2/3] ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case
  2014-10-10 18:48 [PATCH RFC v4 net 0/3]: ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route() Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro Martin KaFai Lau
@ 2014-10-10 18:48 ` Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 3/3] ipv6: Avoid redo-ing fib6_lookup() with reachable = 0 by saving fn Martin KaFai Lau
  2 siblings, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-10 18:48 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Hannes Frederic Sowa

When there is a RTF_CACHE hit, no need to redo fib6_lookup()
with reachable=0.

Cc: David Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 net/ipv6/route.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 797a6e7..5469ecf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -927,12 +927,12 @@ redo_rt6_select:
 			goto out;
 	}
 
-	if (rt->rt6i_flags & RTF_CACHE)
-		goto out;
-
 	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
+	if (rt->rt6i_flags & RTF_CACHE)
+		goto out2;
+
 	if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 	else if (!(rt->dst.flags & DST_HOST))
-- 
1.8.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH RFC v4 net 3/3] ipv6: Avoid redo-ing fib6_lookup() with reachable = 0 by saving fn
  2014-10-10 18:48 [PATCH RFC v4 net 0/3]: ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route() Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro Martin KaFai Lau
  2014-10-10 18:48 ` [PATCH RFC v4 net 2/3] ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
@ 2014-10-10 18:48 ` Martin KaFai Lau
  2 siblings, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2014-10-10 18:48 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Hannes Frederic Sowa

This patch saves the fn before doing rt6_backtrack.
Hence, without redo-ing the fib6_lookup(), saved_fn can be used
to redo rt6_select() with RT6_LOOKUP_F_REACHABLE off.

Some minor changes I think make sense to review as a single patch:
* Remove the 'out:' goto label.
* Remove the 'reachable' variable.

After this patch, "failing ip6_ins_rt()" is the only case that
requires a redo of fib6_lookup().

Cc: David Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 net/ipv6/route.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5469ecf..a8ecb9f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -900,31 +900,40 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 				      struct flowi6 *fl6, int flags)
 {
-	struct fib6_node *fn;
+	struct fib6_node *fn, *saved_fn;
 	struct rt6_info *rt, *nrt;
 	int strict = 0;
 	int attempts = 3;
 	int err;
-	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
+	if (net->ipv6.devconf_all->forwarding == 0)
+		strict |= RT6_LOOKUP_F_REACHABLE;
 
 redo_fib6_lookup_lock:
 	read_lock_bh(&table->tb6_lock);
 
-redo_fib6_lookup:
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+	saved_fn = fn;
 
 redo_rt6_select:
-	rt = rt6_select(fn, oif, strict | reachable);
+	rt = rt6_select(fn, oif, strict);
 	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
+		rt = rt6_multipath_select(rt, fl6, oif, strict);
 	if (rt == net->ipv6.ip6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
-		else
-			goto out;
+		else if (strict & RT6_LOOKUP_F_REACHABLE) {
+			/* also consider unreachable route */
+			strict &= ~RT6_LOOKUP_F_REACHABLE;
+			fn = saved_fn;
+			goto redo_rt6_select;
+		} else {
+			dst_hold(&rt->dst);
+			read_unlock_bh(&table->tb6_lock);
+			goto out2;
+		}
 	}
 
 	dst_hold(&rt->dst);
@@ -960,13 +969,6 @@ redo_rt6_select:
 	ip6_rt_put(rt);
 	goto redo_fib6_lookup_lock;
 
-out:
-	if (reachable) {
-		reachable = 0;
-		goto redo_fib6_lookup;
-	}
-	dst_hold(&rt->dst);
-	read_unlock_bh(&table->tb6_lock);
 out2:
 	rt->dst.lastuse = jiffies;
 	rt->dst.__use++;
-- 
1.8.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro
  2014-10-10 18:48 ` [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro Martin KaFai Lau
@ 2014-10-14 19:01   ` David Miller
  2014-10-15  0:14     ` Martin Lau
  0 siblings, 1 reply; 7+ messages in thread
From: David Miller @ 2014-10-14 19:01 UTC (permalink / raw)
  To: kafai; +Cc: netdev, hannes

From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 10 Oct 2014 11:48:06 -0700

> +struct fib6_node *fib6_backtrack(struct fib6_node *fn,
> +				 struct in6_addr *saddr);
> +

I am completely mystified why you did this, could you explain the
logic?  I want to know what drove you to make this exported.

I marked it static in my example patch, and there is no caller outside
of route.c

Doing this also eliminates inlining opportunitites.

Please keep this private inside of route.c

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro
  2014-10-14 19:01   ` David Miller
@ 2014-10-15  0:14     ` Martin Lau
  2014-10-15  2:04       ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Martin Lau @ 2014-10-15  0:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, hannes

Hi,

> > +struct fib6_node *fib6_backtrack(struct fib6_node *fn,
> > +				 struct in6_addr *saddr);
> > +
> 
> I am completely mystified why you did this, could you explain the
> logic?  I want to know what drove you to make this exported.
>
> I marked it static in my example patch, and there is no caller outside
> of route.c
> 

I was thinking this function only works on 'struct fib6_node', so
it belongs to ip6_fib.c more than route.c.
f.e. like fib6_lookup() whose callers are also only in route.c

> Doing this also eliminates inlining opportunitites.
> 
> Please keep this private inside of route.c
I will keep it private in route.c and re-submit.

Do you have input on the function signature and also the following two
patches?

Thanks,
--Martin

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro
  2014-10-15  0:14     ` Martin Lau
@ 2014-10-15  2:04       ` David Miller
  0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2014-10-15  2:04 UTC (permalink / raw)
  To: kafai; +Cc: netdev, hannes

From: Martin Lau <kafai@fb.com>
Date: Tue, 14 Oct 2014 17:14:11 -0700

> Do you have input on the function signature and also the following
> two patches?

Sorry, patch review doesn't work this way.

Invest the time into reposting your series with the most glaring
problems fixed, and then reviewers will invest their time into
reviewing the updated patch series.

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-10-15  2:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-10 18:48 [PATCH RFC v4 net 0/3]: ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route() Martin KaFai Lau
2014-10-10 18:48 ` [PATCH RFC v4 net 1/3] ipv6: Remove BACKTRACK macro Martin KaFai Lau
2014-10-14 19:01   ` David Miller
2014-10-15  0:14     ` Martin Lau
2014-10-15  2:04       ` David Miller
2014-10-10 18:48 ` [PATCH RFC v4 net 2/3] ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case Martin KaFai Lau
2014-10-10 18:48 ` [PATCH RFC v4 net 3/3] ipv6: Avoid redo-ing fib6_lookup() with reachable = 0 by saving fn Martin KaFai Lau

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).