From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753677AbdKXJRh (ORCPT ); Fri, 24 Nov 2017 04:17:37 -0500 Received: from mail-wm0-f66.google.com ([74.125.82.66]:38766 "EHLO mail-wm0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753395AbdKXJPv (ORCPT ); Fri, 24 Nov 2017 04:15:51 -0500 X-Google-Smtp-Source: AGs4zMbHPWUp0ImdLGzgVcpXWH8X8kqWx1B2uT5zAYiE8gePyZ2t4HFuRC6lesDOTcBS1DAuEKKE7g== From: Ingo Molnar To: linux-kernel@vger.kernel.org Cc: Dave Hansen , Andy Lutomirski , Thomas Gleixner , "H . Peter Anvin" , Peter Zijlstra , Borislav Petkov , Linus Torvalds Subject: [PATCH 32/43] x86/mm/kaiser: Map virtually-addressed performance monitoring buffers Date: Fri, 24 Nov 2017 10:14:37 +0100 Message-Id: <20171124091448.7649-33-mingo@kernel.org> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20171124091448.7649-1-mingo@kernel.org> References: <20171124091448.7649-1-mingo@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Hugh Dickins The BTS and PEBS buffers both have their virtual addresses programmed into the hardware. This means that any access to them is performed via the page tables. The times that the hardware accesses these are entirely dependent on how the performance monitoring hardware events are set up. In other words, there is no way for the kernel to tell when the hardware might access these buffers. To avoid perf crashes, place 'debug_store' in the user-mapped per-cpu area instead of dynamically allocating. Also use the page allocator plus kaiser_add_mapping() to keep the BTS and PEBS buffers user-mapped (that is, present in the user mapping, though visible only to kernel and hardware). The PEBS fixup buffer does not need this treatment. The need for a user-mapped struct debug_store showed up before doing any conscious perf testing: in a couple of kernel paging oopses on Westmere, implicating the debug_store offset of the per-cpu area. Signed-off-by: Hugh Dickins Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Gruss Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Michael Schwarz Cc: Moritz Lipp Cc: Peter Zijlstra Cc: Richard Fellner Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20171123003500.7EC0DB4E@viggo.jf.intel.com Signed-off-by: Ingo Molnar x86/mm: Fix Kaiser build on 32-bit, backmerge to: x86/mm/kaiser: Map virtually-addressed performance monitoring buffers Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 49 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3674a4b6f8bd..61388b01962d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,11 +3,15 @@ #include #include +#include #include #include #include "../perf_event.h" +static +DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -279,6 +283,31 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); +static void *dsalloc(size_t size, gfp_t flags, int node) +{ + unsigned int order = get_order(size); + struct page *page; + unsigned long addr; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + if (!page) + return NULL; + addr = (unsigned long)page_address(page); + if (kaiser_add_mapping(addr, size, __PAGE_KERNEL | _PAGE_GLOBAL) < 0) { + __free_pages(page, order); + addr = 0; + } + return (void *)addr; +} + +static void dsfree(const void *buffer, size_t size) +{ + if (!buffer) + return; + kaiser_remove_mapping((unsigned long)buffer, size); + free_pages((unsigned long)buffer, get_order(size)); +} + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -289,7 +318,7 @@ static int alloc_pebs_buffer(int cpu) if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; @@ -300,7 +329,7 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree(buffer, x86_pmu.pebs_buffer_size); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; @@ -326,7 +355,8 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + dsfree((void *)(unsigned long)ds->pebs_buffer_base, + x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; } @@ -340,7 +370,7 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; @@ -366,19 +396,15 @@ static void release_bts_buffer(int cpu) if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu); + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; return 0; @@ -392,7 +418,6 @@ static void release_ds_buffer(int cpu) return; per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) -- 2.14.1