linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Edward Cree <ecree@solarflare.com>
To: Nadav Amit <namit@vmware.com>, Josh Poimboeuf <jpoimboe@redhat.com>
Cc: <linux-kernel@vger.kernel.org>, <x86@kernel.org>,
	Paolo Abeni <pabeni@redhat.com>
Subject: [RFC PATCH 2/2] net: core: rather hacky PoC implementation of dynamic calls
Date: Wed, 12 Dec 2018 17:52:43 +0000	[thread overview]
Message-ID: <7ab6063d-92ac-4708-d820-0cf175cf0f92@solarflare.com> (raw)
In-Reply-To: <cf6e9449-f3f6-e0fc-8096-eaba6b5a3b97@solarflare.com>

Uses runtime instrumentation of callees from an indirect call site
 (deliver_skb, and also __netif_receive_skb_one_core()) to populate an
 indirect-call-wrapper branch tree.  Essentially we're doing indirect
 branch prediction in software because the hardware can't be trusted to
 get it right; this is sad.

It's also full of printk()s right now to display what it's doing for
 debugging purposes; obviously those wouldn't be quite the same in a
 finished version.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 net/core/dev.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 217 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 04a6b7100aac..f69c110c34e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
 #include <linux/sctp.h>
 #include <net/udp_tunnel.h>
 #include <linux/net_namespace.h>
+#include <linux/static_call.h>
 
 #include "net-sysfs.h"
 
@@ -1935,14 +1936,223 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
-static inline int deliver_skb(struct sk_buff *skb,
-			      struct packet_type *pt_prev,
-			      struct net_device *orig_dev)
+static void deliver_skb_update(struct work_struct *unused);
+
+static DECLARE_WORK(deliver_skb_update_work, deliver_skb_update);
+
+typedef int (*deliver_skb_func)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
+
+struct deliver_skb_candidate {
+	deliver_skb_func func;
+	unsigned long hit_count;
+};
+
+static DEFINE_PER_CPU(struct deliver_skb_candidate[4], deliver_skb_candidates);
+
+static DEFINE_PER_CPU(unsigned long, deliver_skb_miss_count);
+
+/* Used to route around the dynamic version when we're changing it, as well as
+ * as a fallback if none of our static calls match.
+ */
+static int do_deliver_skb(struct sk_buff *skb,
+			  struct packet_type *pt_prev,
+			  struct net_device *orig_dev)
+{
+	struct deliver_skb_candidate *cands = *this_cpu_ptr(&deliver_skb_candidates);
+	deliver_skb_func func = pt_prev->func;
+	unsigned long total_count;
+	int i;
+
+	for (i = 0; i < 4; i++)
+		if (func == cands[i].func) {
+			cands[i].hit_count++;
+			break;
+		}
+	if (i == 4) /* no match */
+		for (i = 0; i < 4; i++)
+			if (!cands[i].func) {
+				cands[i].func = func;
+				cands[i].hit_count = 1;
+				break;
+			}
+	if (i == 4) /* no space */
+		(*this_cpu_ptr(&deliver_skb_miss_count))++;
+
+	total_count = *this_cpu_ptr(&deliver_skb_miss_count);
+	for (i = 0; i < 4; i++)
+		total_count += cands[i].hit_count;
+	if (total_count > 1000) /* Arbitrary threshold */
+		schedule_work(&deliver_skb_update_work);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+DEFINE_STATIC_CALL(dispatch_deliver_skb, do_deliver_skb);
+
+static int dummy_deliver_skb(struct sk_buff *skb, struct net_device *dev,
+			     struct packet_type *pt_prev,
+			     struct net_device *orig_dev)
+{
+	WARN_ON_ONCE(1); /* shouldn't ever actually get here */
+	return do_deliver_skb(skb, pt_prev, orig_dev);
+}
+
+DEFINE_STATIC_CALL(dynamic_deliver_skb_1, dummy_deliver_skb);
+DEFINE_STATIC_CALL(dynamic_deliver_skb_2, dummy_deliver_skb);
+
+static DEFINE_PER_CPU(unsigned long, dds1_hit_count);
+static DEFINE_PER_CPU(unsigned long, dds2_hit_count);
+
+static int dynamic_deliver_skb(struct sk_buff *skb,
+			       struct packet_type *pt_prev,
+			       struct net_device *orig_dev)
+{
+	deliver_skb_func func = pt_prev->func;
+
+	if (func == dynamic_deliver_skb_1.func) {
+		(*this_cpu_ptr(&dds1_hit_count))++;
+		return static_call(dynamic_deliver_skb_1, skb, skb->dev,
+				   pt_prev, orig_dev);
+	}
+	if (func == dynamic_deliver_skb_2.func) {
+		(*this_cpu_ptr(&dds2_hit_count))++;
+		return static_call(dynamic_deliver_skb_2, skb, skb->dev,
+				   pt_prev, orig_dev);
+	}
+	return do_deliver_skb(skb, pt_prev, orig_dev);
+}
+
+DEFINE_MUTEX(deliver_skb_update_lock);
+
+static void deliver_skb_add_cand(struct deliver_skb_candidate *top,
+				 size_t ncands,
+				 struct deliver_skb_candidate next)
+{
+	struct deliver_skb_candidate old;
+	int i;
+
+	for (i = 0; i < ncands; i++) {
+		if (next.hit_count > top[i].hit_count) {
+			/* Swap next with top[i], so that the old top[i] can
+			 * shunt along all lower scores
+			 */
+			old = top[i];
+			top[i] = next;
+			next = old;
+		}
+	}
+}
+
+static void deliver_skb_count_hits(struct deliver_skb_candidate *top,
+				   size_t ncands, struct static_call_key *key,
+				   unsigned long __percpu *hit_count)
+{
+	struct deliver_skb_candidate next;
+	int cpu;
+
+	next.func = key->func;
+	next.hit_count = 0;
+	for_each_online_cpu(cpu) {
+		next.hit_count += *per_cpu_ptr(hit_count, cpu);
+		*per_cpu_ptr(hit_count, cpu) = 0;
+	}
+
+	printk(KERN_ERR "hit_count for old %pf: %lu\n", next.func,
+	       next.hit_count);
+
+	deliver_skb_add_cand(top, ncands, next);
+}
+
+static void deliver_skb_update(struct work_struct *unused)
+{
+	struct deliver_skb_candidate top[4], next, *cands, *cands2;
+	int cpu, i, cpu2, j;
+
+	memset(top, 0, sizeof(top));
+
+	printk(KERN_ERR "deliver_skb_update called\n");
+	mutex_lock(&deliver_skb_update_lock);
+	printk(KERN_ERR "deliver_skb_update_lock acquired\n");
+	/* We don't stop the other CPUs adding to their counts while this is
+	 * going on; but it doesn't really matter because this is a heuristic
+	 * anyway so we don't care about perfect accuracy.
+	 */
+	/* First count up the hits on the existing static branches */
+	deliver_skb_count_hits(top, ARRAY_SIZE(top), &dynamic_deliver_skb_1,
+			       &dds1_hit_count);
+	deliver_skb_count_hits(top, ARRAY_SIZE(top), &dynamic_deliver_skb_2,
+			       &dds2_hit_count);
+	/* Next count up the callees seen in the fallback path */
+	for_each_online_cpu(cpu) {
+		cands = *per_cpu_ptr(&deliver_skb_candidates, cpu);
+		printk(KERN_ERR "miss_count for %d: %lu\n", cpu,
+		       *per_cpu_ptr(&deliver_skb_miss_count, cpu));
+		for (i = 0; i < 4; i++) {
+			next = cands[i];
+			if (next.func == NULL)
+				continue;
+			next.hit_count = 0;
+			for_each_online_cpu(cpu2) {
+				cands2 = *per_cpu_ptr(&deliver_skb_candidates,
+						      cpu2);
+				for (j = 0; j < 4; j++) {
+					if (cands2[j].func == next.func) {
+						next.hit_count += cands2[j].hit_count;
+						cands2[j].hit_count = 0;
+						cands2[j].func = NULL;
+						break;
+					}
+				}
+			}
+			printk(KERN_ERR "candidate %d/%d: %pf %lu\n", cpu, i,
+			       next.func, next.hit_count);
+			deliver_skb_add_cand(top, ARRAY_SIZE(top), next);
+		}
+	}
+	/* Record our results (for debugging) */
+	for (i = 0; i < ARRAY_SIZE(top); i++) {
+		if (i < 2) /* 2 == number of static calls in the branch tree */
+			printk(KERN_ERR "selected [%d] %pf, score %lu\n", i,
+			       top[i].func, top[i].hit_count);
+		else
+			printk(KERN_ERR "runnerup [%d] %pf, score %lu\n", i,
+			       top[i].func, top[i].hit_count);
+	}
+	/* It's possible that we could have picked up multiple pushes of the
+	 * workitem, so someone already collected most of the count.  In that
+	 * case, don't make a decision based on only a small number of calls.
+	 */
+	if (top[0].hit_count > 250) {
+		/* Divert callers away from the fast path */
+		static_call_update(dispatch_deliver_skb, do_deliver_skb);
+		printk(KERN_ERR "patched dds to %pf\n", dispatch_deliver_skb.func);
+		/* Wait for existing fast path callers to finish */
+		synchronize_rcu();
+		/* Patch the chosen callees into the fast path */
+		static_call_update(dynamic_deliver_skb_1, *top[0].func);
+		printk(KERN_ERR "patched dds1 to %pf\n", dynamic_deliver_skb_1.func);
+		static_call_update(dynamic_deliver_skb_2, *top[1].func);
+		printk(KERN_ERR "patched dds2 to %pf\n", dynamic_deliver_skb_2.func);
+		/* Ensure the new fast path is seen before we direct anyone
+		 * into it.  This probably isn't necessary (the binary-patching
+		 * framework probably takes care of it) but let's be paranoid.
+		 */
+		wmb();
+		/* Switch callers back onto the fast path */
+		static_call_update(dispatch_deliver_skb, dynamic_deliver_skb);
+		printk(KERN_ERR "patched dds to %pf\n", dispatch_deliver_skb.func);
+	}
+	mutex_unlock(&deliver_skb_update_lock);
+	printk(KERN_ERR "deliver_skb_update finished\n");
+}
+
+static noinline int deliver_skb(struct sk_buff *skb,
+				struct packet_type *pt_prev,
+				struct net_device *orig_dev)
 {
 	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		return -ENOMEM;
 	refcount_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	return static_call(dispatch_deliver_skb, skb, pt_prev, orig_dev);
 }
 
 static inline void deliver_ptype_list_skb(struct sk_buff *skb,
@@ -4951,7 +5161,9 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
 
 	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
 	if (pt_prev)
-		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		/* ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); */
+		/* but (hopefully) faster */
+		ret = static_call(dispatch_deliver_skb, skb, pt_prev, orig_dev);
 	return ret;
 }
 

  parent reply	other threads:[~2018-12-12 17:52 UTC|newest]

Thread overview: 120+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-26 13:54 [PATCH v2 0/4] Static calls Josh Poimboeuf
2018-11-26 13:54 ` [PATCH v2 1/4] compiler.h: Make __ADDRESSABLE() symbol truly unique Josh Poimboeuf
2018-11-27  8:49   ` Ard Biesheuvel
2018-11-26 13:54 ` [PATCH v2 2/4] static_call: Add static call infrastructure Josh Poimboeuf
2018-11-26 13:54 ` [PATCH v2 3/4] x86/static_call: Add out-of-line static call implementation Josh Poimboeuf
2018-11-26 15:43   ` Peter Zijlstra
2018-11-26 16:19     ` Steven Rostedt
2018-11-26 13:55 ` [PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64 Josh Poimboeuf
2018-11-26 16:02   ` Peter Zijlstra
2018-11-26 17:10     ` Josh Poimboeuf
2018-11-26 17:56       ` Josh Poimboeuf
2018-11-26 20:00         ` Peter Zijlstra
2018-11-26 20:08         ` Peter Zijlstra
2018-11-26 21:26           ` Josh Poimboeuf
2018-11-27  8:43             ` Peter Zijlstra
2018-11-27  8:50               ` Peter Zijlstra
2018-11-29  6:05               ` Andy Lutomirski
2018-11-29  9:42                 ` Peter Zijlstra
2018-11-29 13:11                   ` Josh Poimboeuf
2018-11-29 13:37                   ` Andy Lutomirski
2018-11-29 14:38                     ` Peter Zijlstra
2018-11-29 14:42                       ` Jiri Kosina
2018-11-29 16:33                       ` Josh Poimboeuf
2018-11-29 16:49                         ` Peter Zijlstra
2018-11-29 16:59                           ` Andy Lutomirski
2018-11-29 17:10                             ` Josh Poimboeuf
2018-11-29 22:01                               ` Peter Zijlstra
2018-11-29 22:14                                 ` Josh Poimboeuf
2018-11-29 22:22                                   ` Peter Zijlstra
2018-11-29 22:25                                     ` Andy Lutomirski
2018-11-29 22:30                                       ` Josh Poimboeuf
2018-11-29 17:15                             ` Peter Zijlstra
2018-11-29 17:20                               ` Steven Rostedt
2018-11-29 17:21                                 ` Steven Rostedt
2018-11-29 17:41                                   ` Andy Lutomirski
2018-11-29 17:45                                     ` Josh Poimboeuf
2018-11-29 17:52                                       ` Andy Lutomirski
2018-11-29 17:49                                     ` Steven Rostedt
2018-11-29 18:37                               ` Josh Poimboeuf
2018-11-29 16:50                         ` Linus Torvalds
2018-11-29 16:55                           ` Steven Rostedt
2018-11-29 17:02                           ` Andy Lutomirski
2018-11-29 17:07                             ` Peter Zijlstra
2018-11-29 17:31                               ` Andy Lutomirski
2018-11-29 17:35                                 ` Jiri Kosina
2018-11-29 17:13                             ` Steven Rostedt
2018-11-29 17:35                               ` Linus Torvalds
2018-11-29 17:44                                 ` Steven Rostedt
2018-11-29 17:50                                   ` Linus Torvalds
2018-11-29 17:54                                     ` Linus Torvalds
2018-11-29 17:58                                     ` Steven Rostedt
2018-11-29 18:23                                       ` Linus Torvalds
2018-11-29 18:47                                         ` Steven Rostedt
2018-11-29 18:58                                           ` Linus Torvalds
2018-11-29 19:08                                             ` Linus Torvalds
2018-11-29 19:11                                               ` Linus Torvalds
2018-12-10 23:58                                                 ` Pavel Machek
2018-12-11  1:43                                                   ` Linus Torvalds
2018-11-29 19:12                                               ` Steven Rostedt
2018-11-29 19:27                                               ` Andy Lutomirski
2018-11-29 20:24                                                 ` Josh Poimboeuf
2018-11-29 22:17                                                   ` Josh Poimboeuf
2018-11-29 23:04                                                   ` Linus Torvalds
2018-11-30 16:27                                                     ` Josh Poimboeuf
2018-12-11  9:41                                                       ` David Laight
2018-12-11 17:19                                                         ` Josh Poimboeuf
2018-12-12 18:29                                                     ` Josh Poimboeuf
2018-11-30 16:42                                                   ` Andy Lutomirski
2018-11-30 18:39                                                     ` Josh Poimboeuf
2018-11-30 19:45                                                       ` Linus Torvalds
2018-11-30 20:18                                                         ` Andy Lutomirski
2018-11-30 20:28                                                           ` Steven Rostedt
2018-11-30 20:59                                                             ` Andy Lutomirski
2018-11-30 21:01                                                               ` Steven Rostedt
2018-11-30 21:13                                                               ` Jiri Kosina
2018-11-30 21:10                                                           ` Josh Poimboeuf
2018-11-29 19:16                                             ` Steven Rostedt
2018-11-29 19:22                                               ` Josh Poimboeuf
2018-11-29 19:27                                                 ` Steven Rostedt
2018-11-30 22:16                                                 ` Rasmus Villemoes
2018-11-30 22:24                                                   ` Josh Poimboeuf
2018-11-29 19:24                                               ` Linus Torvalds
2018-11-29 19:28                                                 ` Andy Lutomirski
2018-11-29 19:31                                                 ` Steven Rostedt
2018-11-29 20:12                                             ` Josh Poimboeuf
2018-11-29 18:00                                     ` Andy Lutomirski
2018-11-29 18:42                                       ` Linus Torvalds
2018-11-29 18:55                                       ` Steven Rostedt
2018-11-29 17:29                             ` Linus Torvalds
2018-11-29 17:35                               ` Andy Lutomirski
2018-11-26 18:28       ` Andy Lutomirski
2018-11-26 20:14         ` Josh Poimboeuf
2018-11-27  8:46           ` Peter Zijlstra
2018-11-26 16:08   ` Peter Zijlstra
2018-11-26 16:11     ` Ard Biesheuvel
2018-11-26 16:33       ` Andy Lutomirski
2018-11-26 16:39       ` Peter Zijlstra
2018-11-26 16:44         ` Josh Poimboeuf
2018-11-26 14:01 ` [PATCH v2 0/4] Static calls Josh Poimboeuf
2018-11-26 20:54 ` Steven Rostedt
2018-11-26 22:24   ` Josh Poimboeuf
2018-11-26 22:53     ` Steven Rostedt
2018-12-04 23:08 ` Steven Rostedt
2018-12-04 23:41   ` Andy Lutomirski
2018-12-05 15:04     ` Josh Poimboeuf
2018-12-05 23:36       ` Andy Lutomirski
2018-12-07 16:06 ` Edward Cree
2018-12-07 16:49   ` Edward Cree
2018-12-11 18:05   ` Josh Poimboeuf
2018-12-12  5:59     ` Nadav Amit
2018-12-12 17:11       ` Edward Cree
2018-12-12 17:47         ` [RFC/WIP PATCH 0/2] dynamic calls Edward Cree
2018-12-12 17:50           ` [RFC PATCH 1/2] static_call: fix out-of-line static call implementation Edward Cree
2018-12-12 17:52           ` Edward Cree [this message]
2018-12-12 18:14         ` [PATCH v2 0/4] Static calls Nadav Amit
2018-12-12 18:33           ` Edward Cree
2018-12-12 21:15             ` Nadav Amit
2018-12-12 21:36               ` Edward Cree
2018-12-12 21:45                 ` Nadav Amit
2018-12-10 23:57 ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7ab6063d-92ac-4708-d820-0cf175cf0f92@solarflare.com \
    --to=ecree@solarflare.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=namit@vmware.com \
    --cc=pabeni@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).