linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <dave.hansen@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, dave.hansen@linux.intel.com,
	hughd@google.com, moritz.lipp@iaik.tugraz.at,
	daniel.gruss@iaik.tugraz.at, michael.schwarz@iaik.tugraz.at,
	richard.fellner@student.tugraz.at, luto@kernel.org,
	torvalds@linux-foundation.org, keescook@google.com,
	x86@kernel.org
Subject: [PATCH 18/30] x86, kaiser: map virtually-addressed performance monitoring buffers
Date: Wed, 08 Nov 2017 11:47:20 -0800	[thread overview]
Message-ID: <20171108194720.0ADD17E2@viggo.jf.intel.com> (raw)
In-Reply-To: <20171108194646.907A1942@viggo.jf.intel.com>


From: Hugh Dickins <hughd@google.com>
[Dave] Add explicit _PAGE_GLOBAL

The BTS and PEBS buffers both have their virtual addresses programmed
into the hardware.  This means that we have to access them via the page
tables.  The times that the hardware accesses these are entirely
dependent on how the performance monitoring hardware events are set up.
In other words, we have no idea when we might need to access these
buffers.

Avoid perf crashes: place debug_store in the user-mapped per-cpu area
instead of allocating, and use page allocator plus kaiser_add_mapping()
to keep the BTS and PEBS buffers user-mapped (that is, present in the
user mapping, though visible only to kernel and hardware).  The PEBS
fixup buffer does not need this treatment.

The need for a user-mapped struct debug_store showed up before doing
any conscious perf testing: in a couple of kernel paging oopses on
Westmere, implicating the debug_store offset of the per-cpu area.

Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
Cc: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
Cc: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
Cc: Richard Fellner <richard.fellner@student.tugraz.at>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Kees Cook <keescook@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: x86@kernel.org
---

 b/arch/x86/events/intel/ds.c |   57 +++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 12 deletions(-)

diff -puN arch/x86/events/intel/ds.c~kaiser-user-map-virtually-addressed-performance-monitoring-buffers arch/x86/events/intel/ds.c
--- a/arch/x86/events/intel/ds.c~kaiser-user-map-virtually-addressed-performance-monitoring-buffers	2017-11-08 10:45:35.659681379 -0800
+++ b/arch/x86/events/intel/ds.c	2017-11-08 10:45:35.662681379 -0800
@@ -2,11 +2,15 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/kaiser.h>
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
 #include "../perf_event.h"
 
+static
+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
 
@@ -278,6 +282,39 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
+static void *dsalloc(size_t size, gfp_t flags, int node)
+{
+#ifdef CONFIG_KAISER
+	unsigned int order = get_order(size);
+	struct page *page;
+	unsigned long addr;
+
+	page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+	if (!page)
+		return NULL;
+	addr = (unsigned long)page_address(page);
+	if (kaiser_add_mapping(addr, size, __PAGE_KERNEL | _PAGE_GLOBAL) < 0) {
+		__free_pages(page, order);
+		addr = 0;
+	}
+	return (void *)addr;
+#else
+	return kmalloc_node(size, flags | __GFP_ZERO, node);
+#endif
+}
+
+static void dsfree(const void *buffer, size_t size)
+{
+#ifdef CONFIG_KAISER
+	if (!buffer)
+		return;
+	kaiser_remove_mapping((unsigned long)buffer, size);
+	free_pages((unsigned long)buffer, get_order(size));
+#else
+	kfree(buffer);
+#endif
+}
+
 static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -288,7 +325,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+	buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -299,7 +336,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (x86_pmu.intel_cap.pebs_format < 2) {
 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 		if (!ibuffer) {
-			kfree(buffer);
+			dsfree(buffer, x86_pmu.pebs_buffer_size);
 			return -ENOMEM;
 		}
 		per_cpu(insn_buffer, cpu) = ibuffer;
@@ -325,7 +362,8 @@ static void release_pebs_buffer(int cpu)
 	kfree(per_cpu(insn_buffer, cpu));
 	per_cpu(insn_buffer, cpu) = NULL;
 
-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
+	dsfree((void *)(unsigned long)ds->pebs_buffer_base,
+			x86_pmu.pebs_buffer_size);
 	ds->pebs_buffer_base = 0;
 }
 
@@ -339,7 +377,7 @@ static int alloc_bts_buffer(int cpu)
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+	buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
 	if (unlikely(!buffer)) {
 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 		return -ENOMEM;
@@ -365,19 +403,15 @@ static void release_bts_buffer(int cpu)
 	if (!ds || !x86_pmu.bts)
 		return;
 
-	kfree((void *)(unsigned long)ds->bts_buffer_base);
+	dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
 	ds->bts_buffer_base = 0;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-	int node = cpu_to_node(cpu);
-	struct debug_store *ds;
-
-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-	if (unlikely(!ds))
-		return -ENOMEM;
+	struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
 
+	memset(ds, 0, sizeof(*ds));
 	per_cpu(cpu_hw_events, cpu).ds = ds;
 
 	return 0;
@@ -391,7 +425,6 @@ static void release_ds_buffer(int cpu)
 		return;
 
 	per_cpu(cpu_hw_events, cpu).ds = NULL;
-	kfree(ds);
 }
 
 void release_ds_buffers(void)
_

  parent reply	other threads:[~2017-11-08 19:47 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-08 19:46 [PATCH 00/30] [v2] KAISER: unmap most of the kernel from userspace page tables Dave Hansen
2017-11-08 19:46 ` [PATCH 01/30] x86, mm: do not set _PAGE_USER for init_mm " Dave Hansen
2017-11-08 19:52   ` Linus Torvalds
2017-11-08 20:11     ` Dave Hansen
2017-11-09 10:29   ` Borislav Petkov
2017-11-08 19:46 ` [PATCH 02/30] x86, tlb: make CR4-based TLB flushes more robust Dave Hansen
2017-11-09 10:48   ` Borislav Petkov
2017-11-09 10:51     ` Thomas Gleixner
2017-11-09 11:02       ` Borislav Petkov
2017-11-08 19:46 ` [PATCH 03/30] x86, mm: document X86_CR4_PGE toggling behavior Dave Hansen
2017-11-09 12:21   ` Borislav Petkov
2017-11-08 19:46 ` [PATCH 04/30] x86, kaiser: disable global pages by default with KAISER Dave Hansen
2017-11-09 12:51   ` Borislav Petkov
2017-11-09 22:19   ` Thomas Gleixner
2017-11-08 19:46 ` [PATCH 05/30] x86, kaiser: prepare assembly for entry/exit CR3 switching Dave Hansen
2017-11-09 13:20   ` Borislav Petkov
2017-11-09 15:34     ` Dave Hansen
2017-11-09 15:59       ` Borislav Petkov
2017-11-08 19:46 ` [PATCH 06/30] x86, kaiser: introduce user-mapped percpu areas Dave Hansen
2017-11-08 19:46 ` [PATCH 07/30] x86, kaiser: mark percpu data structures required for entry/exit Dave Hansen
2017-11-08 19:47 ` [PATCH 08/30] x86, kaiser: unmap kernel from userspace page tables (core patch) Dave Hansen
2017-11-10 12:57   ` Ingo Molnar
2017-11-08 19:47 ` [PATCH 09/30] x86, kaiser: only populate shadow page tables for userspace Dave Hansen
2017-11-08 19:47 ` [PATCH 10/30] x86, kaiser: allow NX to be set in p4d/pgd Dave Hansen
2017-11-08 19:47 ` [PATCH 11/30] x86, kaiser: make sure static PGDs are 8k in size Dave Hansen
2017-11-08 19:47 ` [PATCH 12/30] x86, kaiser: map GDT into user page tables Dave Hansen
2017-11-08 19:47 ` [PATCH 13/30] x86, kaiser: map dynamically-allocated LDTs Dave Hansen
2017-11-08 19:47 ` [PATCH 14/30] x86, kaiser: map espfix structures Dave Hansen
2017-11-08 19:47 ` [PATCH 15/30] x86, kaiser: map entry stack variables Dave Hansen
2017-11-08 19:47 ` [PATCH 16/30] x86, kaiser: map trace interrupt entry Dave Hansen
2017-11-08 19:47 ` [PATCH 17/30] x86, kaiser: map debug IDT tables Dave Hansen
2017-11-08 19:47 ` Dave Hansen [this message]
2017-11-10 12:17   ` [PATCH 18/30] x86, kaiser: map virtually-addressed performance monitoring buffers Peter Zijlstra
2017-11-08 19:47 ` [PATCH 19/30] x86, mm: Move CR3 construction functions Dave Hansen
2017-11-08 19:47 ` [PATCH 20/30] x86, mm: remove hard-coded ASID limit checks Dave Hansen
2017-11-10 12:20   ` Peter Zijlstra
2017-11-10 18:41     ` Dave Hansen
2017-11-08 19:47 ` [PATCH 21/30] x86, mm: put mmu-to-h/w ASID translation in one place Dave Hansen
2017-11-08 19:47 ` [PATCH 22/30] x86, pcid, kaiser: allow flushing for future ASID switches Dave Hansen
2017-11-10 12:25   ` Peter Zijlstra
2017-11-08 19:47 ` [PATCH 23/30] x86, kaiser: use PCID feature to make user and kernel switches faster Dave Hansen
2017-11-08 19:47 ` [PATCH 24/30] x86, kaiser: disable native VSYSCALL Dave Hansen
2017-11-09 19:04   ` Andy Lutomirski
2017-11-09 19:26     ` Dave Hansen
2017-11-10  0:53       ` Andy Lutomirski
2017-11-10  0:57         ` Dave Hansen
2017-11-10  1:04           ` Andy Lutomirski
2017-11-10  1:22             ` Dave Hansen
2017-11-10  2:25               ` Andy Lutomirski
2017-11-10  6:31                 ` Dave Hansen
2017-11-10 22:06                   ` Andy Lutomirski
2017-11-10 23:04                     ` Dave Hansen
2017-11-13  3:52                       ` Andy Lutomirski
2017-11-13 21:07                         ` Dave Hansen
2017-11-14  2:15                           ` Andy Lutomirski
2017-11-08 19:47 ` [PATCH 25/30] x86, kaiser: add debugfs file to turn KAISER on/off at runtime Dave Hansen
2017-11-08 19:47 ` [PATCH 26/30] x86, kaiser: add a function to check for KAISER being enabled Dave Hansen
2017-11-08 19:47 ` [PATCH 27/30] x86, kaiser: un-poison PGDs at runtime Dave Hansen
2017-11-08 19:47 ` [PATCH 28/30] x86, kaiser: allow KAISER to be enabled/disabled " Dave Hansen
2017-11-08 19:47 ` [PATCH 29/30] x86, kaiser: add Kconfig Dave Hansen
2017-11-08 19:47 ` [PATCH 30/30] x86, kaiser, xen: Dynamically disable KAISER when running under Xen PV Dave Hansen
2017-11-09 15:01   ` Juergen Gross
2017-11-10 19:30 [PATCH 00/30] [v3] KAISER: unmap most of the kernel from userspace page tables Dave Hansen
2017-11-10 19:31 ` [PATCH 18/30] x86, kaiser: map virtually-addressed performance monitoring buffers Dave Hansen
2017-11-14 18:20   ` Peter Zijlstra
2017-11-14 18:28     ` Dave Hansen
2017-11-14 19:10       ` Hugh Dickins
2017-11-14 19:24         ` Andy Lutomirski
2017-11-15  9:41         ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171108194720.0ADD17E2@viggo.jf.intel.com \
    --to=dave.hansen@linux.intel.com \
    --cc=daniel.gruss@iaik.tugraz.at \
    --cc=hughd@google.com \
    --cc=keescook@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=michael.schwarz@iaik.tugraz.at \
    --cc=moritz.lipp@iaik.tugraz.at \
    --cc=richard.fellner@student.tugraz.at \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).