All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Tatashin <pasha.tatashin@oracle.com>
To: steven.sistare@oracle.com, daniel.m.jordan@oracle.com,
	linux-kernel@vger.kernel.org, Alexander.Levin@microsoft.com,
	dan.j.williams@intel.com, sathyanarayanan.kuppuswamy@intel.com,
	pankaj.laxminarayan.bharadiya@intel.com, akuster@mvista.com,
	cminyard@mvista.com, pasha.tatashin@oracle.com,
	gregkh@linuxfoundation.org, stable@vger.kernel.org
Subject: [PATCH 4.1 33/65] kaiser: fix perf crashes
Date: Mon,  5 Mar 2018 19:25:06 -0500	[thread overview]
Message-ID: <20180306002538.1761-34-pasha.tatashin@oracle.com> (raw)
In-Reply-To: <20180306002538.1761-1-pasha.tatashin@oracle.com>

From: Hugh Dickins <hughd@google.com>

Avoid perf crashes: place debug_store in the user-mapped per-cpu area
instead of allocating, and use page allocator plus kaiser_add_mapping()
to keep the BTS and PEBS buffers user-mapped (that is, present in the
user mapping, though visible only to kernel and hardware).  The PEBS
fixup buffer does not need this treatment.

The need for a user-mapped struct debug_store showed up before doing
any conscious perf testing: in a couple of kernel paging oopses on
Westmere, implicating the debug_store offset of the per-cpu area.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 20cbe9a3aa2e341824da57ce0ac6d52cbffaa570)
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>

Conflicts:
	arch/x86/kernel/cpu/perf_event_intel_ds.c
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 57 ++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f73b3553e2e..e4f109a7ed9a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -2,11 +2,15 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/kaiser.h>
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
 #include "perf_event.h"
 
+static
+DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
 
@@ -246,6 +250,39 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
+static void *dsalloc(size_t size, gfp_t flags, int node)
+{
+#ifdef CONFIG_KAISER
+	unsigned int order = get_order(size);
+	struct page *page;
+	unsigned long addr;
+
+	page = alloc_pages_node(node, flags | __GFP_ZERO, order);
+	if (!page)
+		return NULL;
+	addr = (unsigned long)page_address(page);
+	if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
+		__free_pages(page, order);
+		addr = 0;
+	}
+	return (void *)addr;
+#else
+	return kmalloc_node(size, flags | __GFP_ZERO, node);
+#endif
+}
+
+static void dsfree(const void *buffer, size_t size)
+{
+#ifdef CONFIG_KAISER
+	if (!buffer)
+		return;
+	kaiser_remove_mapping((unsigned long)buffer, size);
+	free_pages((unsigned long)buffer, get_order(size));
+#else
+	kfree(buffer);
+#endif
+}
+
 static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -256,7 +293,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (!x86_pmu.pebs)
 		return 0;
 
-	buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
+	buffer = dsalloc(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
 	if (unlikely(!buffer))
 		return -ENOMEM;
 
@@ -267,7 +304,7 @@ static int alloc_pebs_buffer(int cpu)
 	if (x86_pmu.intel_cap.pebs_format < 2) {
 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
 		if (!ibuffer) {
-			kfree(buffer);
+			dsfree(buffer, PEBS_BUFFER_SIZE);
 			return -ENOMEM;
 		}
 		per_cpu(insn_buffer, cpu) = ibuffer;
@@ -296,7 +333,8 @@ static void release_pebs_buffer(int cpu)
 	kfree(per_cpu(insn_buffer, cpu));
 	per_cpu(insn_buffer, cpu) = NULL;
 
-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
+	dsfree((void *)(unsigned long)ds->pebs_buffer_base,
+			PEBS_BUFFER_SIZE);
 	ds->pebs_buffer_base = 0;
 }
 
@@ -310,7 +348,7 @@ static int alloc_bts_buffer(int cpu)
 	if (!x86_pmu.bts)
 		return 0;
 
-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+	buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
 	if (unlikely(!buffer)) {
 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
 		return -ENOMEM;
@@ -336,19 +374,15 @@ static void release_bts_buffer(int cpu)
 	if (!ds || !x86_pmu.bts)
 		return;
 
-	kfree((void *)(unsigned long)ds->bts_buffer_base);
+	dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
 	ds->bts_buffer_base = 0;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-	int node = cpu_to_node(cpu);
-	struct debug_store *ds;
-
-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-	if (unlikely(!ds))
-		return -ENOMEM;
+	struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
 
+	memset(ds, 0, sizeof(*ds));
 	per_cpu(cpu_hw_events, cpu).ds = ds;
 
 	return 0;
@@ -362,7 +396,6 @@ static void release_ds_buffer(int cpu)
 		return;
 
 	per_cpu(cpu_hw_events, cpu).ds = NULL;
-	kfree(ds);
 }
 
 void release_ds_buffers(void)
-- 
2.16.2

  parent reply	other threads:[~2018-03-06  0:25 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-06  0:24 [PATCH 4.1 00/65] page table isolation for stable 4.1 Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 01/65] x86/mm: Add INVPCID helpers Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 02/65] x86/mm: Fix INVPCID asm constraint Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 03/65] x86/mm: Add a 'noinvpcid' boot option to turn off INVPCID Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 04/65] x86/mm: If INVPCID is available, use it to flush global mappings Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 05/65] mm/mmu_context, sched/core: Fix mmu_context.h assumption Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 06/65] sched/core: Add switch_mm_irqs_off() and use it in the scheduler Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 07/65] x86/mm: Build arch/x86/mm/tlb.c even on !SMP Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 08/65] x86/mm, sched/core: Uninline switch_mm() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 09/65] x86/mm, sched/core: Turn off IRQs in switch_mm() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 10/65] ARM: Hide finish_arch_post_lock_switch() from modules Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 11/65] sched/core: Idle_task_exit() shouldn't use switch_mm_irqs_off() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 12/65] x86/irq: Do not substract irq_tlb_count from irq_call_count Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 13/65] x86/vm86/32: Switch to flush_tlb_mm_range() in mark_screen_rdonly() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 14/65] x86/mm: Remove flush_tlb() and flush_tlb_current_task() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 15/65] x86/mm: Make flush_tlb_mm_range() more predictable Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 16/65] x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range() Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 17/65] x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 18/65] x86/mm: Disable PCID on 32-bit kernels Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 19/65] x86/mm: Add the 'nopcid' boot option to turn off PCID Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 20/65] x86/mm: Enable CR4.PCIDE on supported systems Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 21/65] x86/mm/64: Fix reboot interaction with CR4.PCIDE Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 22/65] x86/boot: Add early cmdline parsing for options with arguments Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 23/65] x86/entry: Stop using PER_CPU_VAR(kernel_stack) Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 24/65] x86/entry: Remove unused 'kernel_stack' per-cpu variable Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 25/65] x86/entry: Define 'cpu_current_top_of_stack' for 64-bit code Pavel Tatashin
2018-03-06  0:24 ` [PATCH 4.1 26/65] KAISER: Kernel Address Isolation Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 27/65] kaiser: merged update Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 28/65] kaiser: do not set _PAGE_NX on pgd_none Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 29/65] kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 30/65] kaiser: fix build and FIXME in alloc_ldt_struct() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 31/65] kaiser: KAISER depends on SMP Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 32/65] kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER Pavel Tatashin
2018-03-06  0:25 ` Pavel Tatashin [this message]
2018-03-06  0:25 ` [PATCH 4.1 34/65] kaiser: ENOMEM if kaiser_pagetable_walk() NULL Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 35/65] kaiser: tidied up asm/kaiser.h somewhat Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 36/65] kaiser: tidied up kaiser_add/remove_mapping slightly Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 37/65] kaiser: kaiser_remove_mapping() move along the pgd Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 38/65] kaiser: cleanups while trying for gold link Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 39/65] kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 40/65] kaiser: delete KAISER_REAL_SWITCH option Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 41/65] kaiser: vmstat show NR_KAISERTABLE as nr_overhead Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 42/65] kaiser: enhanced by kernel and user PCIDs Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 43/65] kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 44/65] kaiser: PCID 0 for kernel and 128 for user Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 45/65] kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 46/65] kaiser: paranoid_entry pass cr3 need to paranoid_exit Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 47/65] kaiser: _pgd_alloc() without __GFP_REPEAT to avoid stalls Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 48/65] kaiser: fix unlikely error in alloc_ldt_struct() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 49/65] kaiser: add "nokaiser" boot option, using ALTERNATIVE Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 50/65] " Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 51/65] x86/kaiser: Rename and simplify X86_FEATURE_KAISER handling Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 52/65] x86/kaiser: Check boottime cmdline params Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 53/65] kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 54/65] kaiser: drop is_atomic arg to kaiser_pagetable_walk() Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 55/65] kaiser: asm/tlbflush.h handle noPGE at lower level Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 56/65] kaiser: kaiser_flush_tlb_on_return_to_user() check PCID Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 57/65] x86/paravirt: Dont patch flush_tlb_single Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 58/65] x86/kaiser: Reenable PARAVIRT Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 59/65] x86/kaiser: Move feature detection up Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 60/65] KPTI: Rename to PAGE_TABLE_ISOLATION Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 61/65] x86/ldt: fix crash in ldt freeing Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 62/65] PTI: unbreak EFI old_memmap Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 63/65] kpti: Disable when running under Xen PV Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 64/65] pti: Rename X86_FEATURE_KAISER to X86_FEATURE_PTI Pavel Tatashin
2018-03-06  0:25 ` [PATCH 4.1 65/65] x86/pti/efi: broken conversion from efi to kernel page table Pavel Tatashin
2018-03-07 12:55 ` [PATCH 4.1 00/65] page table isolation for stable 4.1 Jiri Kosina

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180306002538.1761-34-pasha.tatashin@oracle.com \
    --to=pasha.tatashin@oracle.com \
    --cc=Alexander.Levin@microsoft.com \
    --cc=akuster@mvista.com \
    --cc=cminyard@mvista.com \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pankaj.laxminarayan.bharadiya@intel.com \
    --cc=sathyanarayanan.kuppuswamy@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=steven.sistare@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.