All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nadav Amit <namit@vmware.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Thomas Garnier <thgarnie@chromium.org>,
	Ingo Molnar <mingo@redhat.com>, Nadav Amit <namit@vmware.com>
Subject: [PATCH 7/7] x86/current: Aggressive caching of current
Date: Fri, 23 Aug 2019 15:44:24 -0700	[thread overview]
Message-ID: <20190823224424.15296-8-namit@vmware.com> (raw)
In-Reply-To: <20190823224424.15296-1-namit@vmware.com>

The current_task is supposed to be constant in each thread and therefore
does not need to be reread. There is already an attempt to cache it
using inline assembly, using this_cpu_read_stable(), which hides the
dependency on the read memory address.

However, this caching is not working very well. For example,
sync_mm_rss() still reads current_task twice for no reason.

Allow more aggressive caching by aliasing current_task to
into a constant const_current_task and reading from the constant copy.
Doing so requires the compiler to support x86 segment qualifiers.
Hide const_current_task in a different compilation unit to avoid the
compiler from assuming that the value is constant during compilation.

Signed-off-by: Nadav Amit <namit@vmware.com>
---
 arch/x86/include/asm/current.h | 30 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/Makefile   |  1 +
 arch/x86/kernel/cpu/common.c   |  7 +------
 arch/x86/kernel/cpu/current.c  | 16 ++++++++++++++++
 include/linux/compiler.h       |  2 +-
 5 files changed, 49 insertions(+), 7 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/current.c

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 3e204e6140b5..7f093e81a647 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -10,11 +10,41 @@ struct task_struct;
 
 DECLARE_PER_CPU(struct task_struct *, current_task);
 
+#if USE_X86_SEG_SUPPORT
+
+/*
+ * Hold a constant alias for current_task, which would allow to avoid caching of
+ * current task.
+ *
+ * We must mark const_current_task with the segment qualifiers, as otherwise gcc
+ * would do redundant reads of const_current_task.
+ */
+DECLARE_PER_CPU(struct task_struct * const __percpu_seg_override, const_current_task);
+
+static __always_inline struct task_struct *get_current(void)
+{
+
+	/*
+	 * GCC is missing functionality of removing segment qualifiers, which
+	 * messes with per-cpu infrastructure that holds local copies. Use
+	 * __raw_cpu_read to avoid holding any copy.
+	 */
+	return __raw_cpu_read(, const_current_task);
+}
+
+#else /* USE_X86_SEG_SUPPORT */
+
+/*
+ * Without segment qualifier support, the per-cpu infrastrucutre is not
+ * suitable for reading constants, so use this_cpu_read_stable() in this case.
+ */
 static __always_inline struct task_struct *get_current(void)
 {
 	return this_cpu_read_stable(current_task);
 }
 
+#endif /* USE_X86_SEG_SUPPORT */
+
 #define current get_current()
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d7a1e5a9331c..d816f03a37d7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -19,6 +19,7 @@ CFLAGS_common.o		:= $(nostackp)
 
 obj-y			:= cacheinfo.o scattered.o topology.o
 obj-y			+= common.o
+obj-y			+= current.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
 obj-y			+= bugs.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5cc2d51cc25e..5f7c9ee57802 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1619,13 +1619,8 @@ DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
 /*
- * The following percpu variables are hot.  Align current_task to
- * cacheline size such that they fall in the same cacheline.
+ * The following percpu variables are hot.
  */
-DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
-	&init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
diff --git a/arch/x86/kernel/cpu/current.c b/arch/x86/kernel/cpu/current.c
new file mode 100644
index 000000000000..3238c6e34984
--- /dev/null
+++ b/arch/x86/kernel/cpu/current.c
@@ -0,0 +1,16 @@
+#include <linux/sched/task.h>
+#include <asm/current.h>
+
+/*
+ * Align current_task to cacheline size such that they fall in the same
+ * cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+	&init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
+#if USE_X86_SEG_SUPPORT
+DECLARE_PER_CPU(struct task_struct * const __percpu_seg_override, const_current_task)
+	__attribute__((alias("current_task")));
+EXPORT_PER_CPU_SYMBOL(const_current_task);
+#endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f0fd5636fddb..1b6ee9ab6373 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,7 +299,7 @@ unsigned long read_word_at_a_time(const void *addr)
  */
 #define __ADDRESSABLE(sym) \
 	static void * __section(".discard.addressable") __used \
-		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+		__PASTE(__addressable_##sym, __LINE__) = (void *)(uintptr_t)&sym;
 
 /**
  * offset_to_ptr - convert a relative memory offset to an absolute pointer
-- 
2.17.1


      parent reply	other threads:[~2019-08-24  6:05 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-23 22:44 [PATCH 0/7] x86/percpu: Use segment qualifiers Nadav Amit
2019-08-23 22:44 ` [PATCH 1/7] compiler: Report x86 segment support Nadav Amit
2019-08-23 22:44 ` [PATCH 2/7] x86/percpu: Use compiler segment prefix qualifier Nadav Amit
2019-08-23 22:44 ` [PATCH 3/7] x86/percpu: Use C for percpu accesses when possible Nadav Amit
2019-08-23 22:44 ` [PATCH 4/7] x86: Fix possible caching of current_task Nadav Amit
2019-08-23 22:44 ` [PATCH 5/7] percpu: Assume preemption is disabled on per_cpu_ptr() Nadav Amit
2019-08-23 22:44 ` [PATCH 6/7] x86/percpu: Optimized arch_raw_cpu_ptr() Nadav Amit
2019-08-23 22:44 ` Nadav Amit [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190823224424.15296-8-namit@vmware.com \
    --to=namit@vmware.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=thgarnie@chromium.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.