From: Edward Cree <ecree@solarflare.com>
To: Nadav Amit <namit@vmware.com>, Josh Poimboeuf <jpoimboe@redhat.com>
Cc: <linux-kernel@vger.kernel.org>, <x86@kernel.org>,
Paolo Abeni <pabeni@redhat.com>
Subject: [RFC PATCH 2/2] net: core: rather hacky PoC implementation of dynamic calls
Date: Wed, 12 Dec 2018 17:52:43 +0000 [thread overview]
Message-ID: <7ab6063d-92ac-4708-d820-0cf175cf0f92@solarflare.com> (raw)
In-Reply-To: <cf6e9449-f3f6-e0fc-8096-eaba6b5a3b97@solarflare.com>
Uses runtime instrumentation of callees from an indirect call site
(deliver_skb, and also __netif_receive_skb_one_core()) to populate an
indirect-call-wrapper branch tree. Essentially we're doing indirect
branch prediction in software because the hardware can't be trusted to
get it right; this is sad.
It's also full of printk()s right now to display what it's doing for
debugging purposes; obviously those wouldn't be quite the same in a
finished version.
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
net/core/dev.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 217 insertions(+), 5 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index 04a6b7100aac..f69c110c34e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/static_call.h>
#include "net-sysfs.h"
@@ -1935,14 +1936,223 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
-static inline int deliver_skb(struct sk_buff *skb,
- struct packet_type *pt_prev,
- struct net_device *orig_dev)
+static void deliver_skb_update(struct work_struct *unused);
+
+static DECLARE_WORK(deliver_skb_update_work, deliver_skb_update);
+
+typedef int (*deliver_skb_func)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
+
+struct deliver_skb_candidate {
+ deliver_skb_func func;
+ unsigned long hit_count;
+};
+
+static DEFINE_PER_CPU(struct deliver_skb_candidate[4], deliver_skb_candidates);
+
+static DEFINE_PER_CPU(unsigned long, deliver_skb_miss_count);
+
+/* Used to route around the dynamic version when we're changing it, as well as
+ * as a fallback if none of our static calls match.
+ */
+static int do_deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
+{
+ struct deliver_skb_candidate *cands = *this_cpu_ptr(&deliver_skb_candidates);
+ deliver_skb_func func = pt_prev->func;
+ unsigned long total_count;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (func == cands[i].func) {
+ cands[i].hit_count++;
+ break;
+ }
+ if (i == 4) /* no match */
+ for (i = 0; i < 4; i++)
+ if (!cands[i].func) {
+ cands[i].func = func;
+ cands[i].hit_count = 1;
+ break;
+ }
+ if (i == 4) /* no space */
+ (*this_cpu_ptr(&deliver_skb_miss_count))++;
+
+ total_count = *this_cpu_ptr(&deliver_skb_miss_count);
+ for (i = 0; i < 4; i++)
+ total_count += cands[i].hit_count;
+ if (total_count > 1000) /* Arbitrary threshold */
+ schedule_work(&deliver_skb_update_work);
+ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+DEFINE_STATIC_CALL(dispatch_deliver_skb, do_deliver_skb);
+
+static int dummy_deliver_skb(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
+{
+ WARN_ON_ONCE(1); /* shouldn't ever actually get here */
+ return do_deliver_skb(skb, pt_prev, orig_dev);
+}
+
+DEFINE_STATIC_CALL(dynamic_deliver_skb_1, dummy_deliver_skb);
+DEFINE_STATIC_CALL(dynamic_deliver_skb_2, dummy_deliver_skb);
+
+static DEFINE_PER_CPU(unsigned long, dds1_hit_count);
+static DEFINE_PER_CPU(unsigned long, dds2_hit_count);
+
+static int dynamic_deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
+{
+ deliver_skb_func func = pt_prev->func;
+
+ if (func == dynamic_deliver_skb_1.func) {
+ (*this_cpu_ptr(&dds1_hit_count))++;
+ return static_call(dynamic_deliver_skb_1, skb, skb->dev,
+ pt_prev, orig_dev);
+ }
+ if (func == dynamic_deliver_skb_2.func) {
+ (*this_cpu_ptr(&dds2_hit_count))++;
+ return static_call(dynamic_deliver_skb_2, skb, skb->dev,
+ pt_prev, orig_dev);
+ }
+ return do_deliver_skb(skb, pt_prev, orig_dev);
+}
+
+DEFINE_MUTEX(deliver_skb_update_lock);
+
+static void deliver_skb_add_cand(struct deliver_skb_candidate *top,
+ size_t ncands,
+ struct deliver_skb_candidate next)
+{
+ struct deliver_skb_candidate old;
+ int i;
+
+ for (i = 0; i < ncands; i++) {
+ if (next.hit_count > top[i].hit_count) {
+ /* Swap next with top[i], so that the old top[i] can
+ * shunt along all lower scores
+ */
+ old = top[i];
+ top[i] = next;
+ next = old;
+ }
+ }
+}
+
+static void deliver_skb_count_hits(struct deliver_skb_candidate *top,
+ size_t ncands, struct static_call_key *key,
+ unsigned long __percpu *hit_count)
+{
+ struct deliver_skb_candidate next;
+ int cpu;
+
+ next.func = key->func;
+ next.hit_count = 0;
+ for_each_online_cpu(cpu) {
+ next.hit_count += *per_cpu_ptr(hit_count, cpu);
+ *per_cpu_ptr(hit_count, cpu) = 0;
+ }
+
+ printk(KERN_ERR "hit_count for old %pf: %lu\n", next.func,
+ next.hit_count);
+
+ deliver_skb_add_cand(top, ncands, next);
+}
+
+static void deliver_skb_update(struct work_struct *unused)
+{
+ struct deliver_skb_candidate top[4], next, *cands, *cands2;
+ int cpu, i, cpu2, j;
+
+ memset(top, 0, sizeof(top));
+
+ printk(KERN_ERR "deliver_skb_update called\n");
+ mutex_lock(&deliver_skb_update_lock);
+ printk(KERN_ERR "deliver_skb_update_lock acquired\n");
+ /* We don't stop the other CPUs adding to their counts while this is
+ * going on; but it doesn't really matter because this is a heuristic
+ * anyway so we don't care about perfect accuracy.
+ */
+ /* First count up the hits on the existing static branches */
+ deliver_skb_count_hits(top, ARRAY_SIZE(top), &dynamic_deliver_skb_1,
+ &dds1_hit_count);
+ deliver_skb_count_hits(top, ARRAY_SIZE(top), &dynamic_deliver_skb_2,
+ &dds2_hit_count);
+ /* Next count up the callees seen in the fallback path */
+ for_each_online_cpu(cpu) {
+ cands = *per_cpu_ptr(&deliver_skb_candidates, cpu);
+ printk(KERN_ERR "miss_count for %d: %lu\n", cpu,
+ *per_cpu_ptr(&deliver_skb_miss_count, cpu));
+ for (i = 0; i < 4; i++) {
+ next = cands[i];
+ if (next.func == NULL)
+ continue;
+ next.hit_count = 0;
+ for_each_online_cpu(cpu2) {
+ cands2 = *per_cpu_ptr(&deliver_skb_candidates,
+ cpu2);
+ for (j = 0; j < 4; j++) {
+ if (cands2[j].func == next.func) {
+ next.hit_count += cands2[j].hit_count;
+ cands2[j].hit_count = 0;
+ cands2[j].func = NULL;
+ break;
+ }
+ }
+ }
+ printk(KERN_ERR "candidate %d/%d: %pf %lu\n", cpu, i,
+ next.func, next.hit_count);
+ deliver_skb_add_cand(top, ARRAY_SIZE(top), next);
+ }
+ }
+ /* Record our results (for debugging) */
+ for (i = 0; i < ARRAY_SIZE(top); i++) {
+ if (i < 2) /* 2 == number of static calls in the branch tree */
+ printk(KERN_ERR "selected [%d] %pf, score %lu\n", i,
+ top[i].func, top[i].hit_count);
+ else
+ printk(KERN_ERR "runnerup [%d] %pf, score %lu\n", i,
+ top[i].func, top[i].hit_count);
+ }
+ /* It's possible that we could have picked up multiple pushes of the
+ * workitem, so someone already collected most of the count. In that
+ * case, don't make a decision based on only a small number of calls.
+ */
+ if (top[0].hit_count > 250) {
+ /* Divert callers away from the fast path */
+ static_call_update(dispatch_deliver_skb, do_deliver_skb);
+ printk(KERN_ERR "patched dds to %pf\n", dispatch_deliver_skb.func);
+ /* Wait for existing fast path callers to finish */
+ synchronize_rcu();
+ /* Patch the chosen callees into the fast path */
+ static_call_update(dynamic_deliver_skb_1, *top[0].func);
+ printk(KERN_ERR "patched dds1 to %pf\n", dynamic_deliver_skb_1.func);
+ static_call_update(dynamic_deliver_skb_2, *top[1].func);
+ printk(KERN_ERR "patched dds2 to %pf\n", dynamic_deliver_skb_2.func);
+ /* Ensure the new fast path is seen before we direct anyone
+ * into it. This probably isn't necessary (the binary-patching
+ * framework probably takes care of it) but let's be paranoid.
+ */
+ wmb();
+ /* Switch callers back onto the fast path */
+ static_call_update(dispatch_deliver_skb, dynamic_deliver_skb);
+ printk(KERN_ERR "patched dds to %pf\n", dispatch_deliver_skb.func);
+ }
+ mutex_unlock(&deliver_skb_update_lock);
+ printk(KERN_ERR "deliver_skb_update finished\n");
+}
+
+static noinline int deliver_skb(struct sk_buff *skb,
+ struct packet_type *pt_prev,
+ struct net_device *orig_dev)
{
if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
return -ENOMEM;
refcount_inc(&skb->users);
- return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ return static_call(dispatch_deliver_skb, skb, pt_prev, orig_dev);
}
static inline void deliver_ptype_list_skb(struct sk_buff *skb,
@@ -4951,7 +5161,9 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (pt_prev)
- ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+ /* ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); */
+ /* but (hopefully) faster */
+ ret = static_call(dispatch_deliver_skb, skb, pt_prev, orig_dev);
return ret;
}
next prev parent reply other threads:[~2018-12-12 17:52 UTC|newest]
Thread overview: 120+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-26 13:54 [PATCH v2 0/4] Static calls Josh Poimboeuf
2018-11-26 13:54 ` [PATCH v2 1/4] compiler.h: Make __ADDRESSABLE() symbol truly unique Josh Poimboeuf
2018-11-27 8:49 ` Ard Biesheuvel
2018-11-26 13:54 ` [PATCH v2 2/4] static_call: Add static call infrastructure Josh Poimboeuf
2018-11-26 13:54 ` [PATCH v2 3/4] x86/static_call: Add out-of-line static call implementation Josh Poimboeuf
2018-11-26 15:43 ` Peter Zijlstra
2018-11-26 16:19 ` Steven Rostedt
2018-11-26 13:55 ` [PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64 Josh Poimboeuf
2018-11-26 16:02 ` Peter Zijlstra
2018-11-26 17:10 ` Josh Poimboeuf
2018-11-26 17:56 ` Josh Poimboeuf
2018-11-26 20:00 ` Peter Zijlstra
2018-11-26 20:08 ` Peter Zijlstra
2018-11-26 21:26 ` Josh Poimboeuf
2018-11-27 8:43 ` Peter Zijlstra
2018-11-27 8:50 ` Peter Zijlstra
2018-11-29 6:05 ` Andy Lutomirski
2018-11-29 9:42 ` Peter Zijlstra
2018-11-29 13:11 ` Josh Poimboeuf
2018-11-29 13:37 ` Andy Lutomirski
2018-11-29 14:38 ` Peter Zijlstra
2018-11-29 14:42 ` Jiri Kosina
2018-11-29 16:33 ` Josh Poimboeuf
2018-11-29 16:49 ` Peter Zijlstra
2018-11-29 16:59 ` Andy Lutomirski
2018-11-29 17:10 ` Josh Poimboeuf
2018-11-29 22:01 ` Peter Zijlstra
2018-11-29 22:14 ` Josh Poimboeuf
2018-11-29 22:22 ` Peter Zijlstra
2018-11-29 22:25 ` Andy Lutomirski
2018-11-29 22:30 ` Josh Poimboeuf
2018-11-29 17:15 ` Peter Zijlstra
2018-11-29 17:20 ` Steven Rostedt
2018-11-29 17:21 ` Steven Rostedt
2018-11-29 17:41 ` Andy Lutomirski
2018-11-29 17:45 ` Josh Poimboeuf
2018-11-29 17:52 ` Andy Lutomirski
2018-11-29 17:49 ` Steven Rostedt
2018-11-29 18:37 ` Josh Poimboeuf
2018-11-29 16:50 ` Linus Torvalds
2018-11-29 16:55 ` Steven Rostedt
2018-11-29 17:02 ` Andy Lutomirski
2018-11-29 17:07 ` Peter Zijlstra
2018-11-29 17:31 ` Andy Lutomirski
2018-11-29 17:35 ` Jiri Kosina
2018-11-29 17:13 ` Steven Rostedt
2018-11-29 17:35 ` Linus Torvalds
2018-11-29 17:44 ` Steven Rostedt
2018-11-29 17:50 ` Linus Torvalds
2018-11-29 17:54 ` Linus Torvalds
2018-11-29 17:58 ` Steven Rostedt
2018-11-29 18:23 ` Linus Torvalds
2018-11-29 18:47 ` Steven Rostedt
2018-11-29 18:58 ` Linus Torvalds
2018-11-29 19:08 ` Linus Torvalds
2018-11-29 19:11 ` Linus Torvalds
2018-12-10 23:58 ` Pavel Machek
2018-12-11 1:43 ` Linus Torvalds
2018-11-29 19:12 ` Steven Rostedt
2018-11-29 19:27 ` Andy Lutomirski
2018-11-29 20:24 ` Josh Poimboeuf
2018-11-29 22:17 ` Josh Poimboeuf
2018-11-29 23:04 ` Linus Torvalds
2018-11-30 16:27 ` Josh Poimboeuf
2018-12-11 9:41 ` David Laight
2018-12-11 17:19 ` Josh Poimboeuf
2018-12-12 18:29 ` Josh Poimboeuf
2018-11-30 16:42 ` Andy Lutomirski
2018-11-30 18:39 ` Josh Poimboeuf
2018-11-30 19:45 ` Linus Torvalds
2018-11-30 20:18 ` Andy Lutomirski
2018-11-30 20:28 ` Steven Rostedt
2018-11-30 20:59 ` Andy Lutomirski
2018-11-30 21:01 ` Steven Rostedt
2018-11-30 21:13 ` Jiri Kosina
2018-11-30 21:10 ` Josh Poimboeuf
2018-11-29 19:16 ` Steven Rostedt
2018-11-29 19:22 ` Josh Poimboeuf
2018-11-29 19:27 ` Steven Rostedt
2018-11-30 22:16 ` Rasmus Villemoes
2018-11-30 22:24 ` Josh Poimboeuf
2018-11-29 19:24 ` Linus Torvalds
2018-11-29 19:28 ` Andy Lutomirski
2018-11-29 19:31 ` Steven Rostedt
2018-11-29 20:12 ` Josh Poimboeuf
2018-11-29 18:00 ` Andy Lutomirski
2018-11-29 18:42 ` Linus Torvalds
2018-11-29 18:55 ` Steven Rostedt
2018-11-29 17:29 ` Linus Torvalds
2018-11-29 17:35 ` Andy Lutomirski
2018-11-26 18:28 ` Andy Lutomirski
2018-11-26 20:14 ` Josh Poimboeuf
2018-11-27 8:46 ` Peter Zijlstra
2018-11-26 16:08 ` Peter Zijlstra
2018-11-26 16:11 ` Ard Biesheuvel
2018-11-26 16:33 ` Andy Lutomirski
2018-11-26 16:39 ` Peter Zijlstra
2018-11-26 16:44 ` Josh Poimboeuf
2018-11-26 14:01 ` [PATCH v2 0/4] Static calls Josh Poimboeuf
2018-11-26 20:54 ` Steven Rostedt
2018-11-26 22:24 ` Josh Poimboeuf
2018-11-26 22:53 ` Steven Rostedt
2018-12-04 23:08 ` Steven Rostedt
2018-12-04 23:41 ` Andy Lutomirski
2018-12-05 15:04 ` Josh Poimboeuf
2018-12-05 23:36 ` Andy Lutomirski
2018-12-07 16:06 ` Edward Cree
2018-12-07 16:49 ` Edward Cree
2018-12-11 18:05 ` Josh Poimboeuf
2018-12-12 5:59 ` Nadav Amit
2018-12-12 17:11 ` Edward Cree
2018-12-12 17:47 ` [RFC/WIP PATCH 0/2] dynamic calls Edward Cree
2018-12-12 17:50 ` [RFC PATCH 1/2] static_call: fix out-of-line static call implementation Edward Cree
2018-12-12 17:52 ` Edward Cree [this message]
2018-12-12 18:14 ` [PATCH v2 0/4] Static calls Nadav Amit
2018-12-12 18:33 ` Edward Cree
2018-12-12 21:15 ` Nadav Amit
2018-12-12 21:36 ` Edward Cree
2018-12-12 21:45 ` Nadav Amit
2018-12-10 23:57 ` Pavel Machek
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7ab6063d-92ac-4708-d820-0cf175cf0f92@solarflare.com \
--to=ecree@solarflare.com \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=namit@vmware.com \
--cc=pabeni@redhat.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).