From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753311AbdLDRAV (ORCPT ); Mon, 4 Dec 2017 12:00:21 -0500 Received: from Galois.linutronix.de ([146.0.238.70]:60401 "EHLO Galois.linutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752559AbdLDQvj (ORCPT ); Mon, 4 Dec 2017 11:51:39 -0500 Message-Id: <20171204150608.437321377@linutronix.de> User-Agent: quilt/0.63-1 Date: Mon, 04 Dec 2017 15:07:50 +0100 From: Thomas Gleixner To: LKML Cc: x86@kernel.org, Linus Torvalds , Andy Lutomirsky , Peter Zijlstra , Dave Hansen , Borislav Petkov , Greg KH , keescook@google.com, hughd@google.com, Brian Gerst , Josh Poimboeuf , Denys Vlasenko , Rik van Riel , Boris Ostrovsky , Juergen Gross , David Laight , Eduardo Valentin , aliguori@amazon.com, Will Deacon , daniel.gruss@iaik.tugraz.at, Dave Hansen Subject: [patch 44/60] x86/events/intel/ds: Map debug buffers in fixmap References: <20171204140706.296109558@linutronix.de> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-15 Content-Disposition: inline; filename=x86-events-intel-ds--Map_debug_buffers_in_fixmap.patch Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Hugh Dickins The BTS and PEBS buffers both have their virtual addresses programmed into the hardware. This means that any access to them is performed via the page tables. The times that the hardware accesses these are entirely dependent on how the performance monitoring hardware events are set up. In other words, there is no way for the kernel to tell when the hardware might access these buffers. To avoid perf crashes, place 'debug_store' allocate pages and map them into the cpu_entry_area fixmap. The PEBS fixup buffer does not need this treatment. [ tglx: Got rid of the kaiser_add_mapping() cruft ] Signed-off-by: Hugh Dickins Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/ds.c | 114 ++++++++++++++++++++++++++----------------- arch/x86/events/perf_event.h | 2 2 files changed, 73 insertions(+), 43 deletions(-) --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -280,17 +280,46 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static u64 ds_update_fixmap(int idx, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + phys_addr_t pa, va; + size_t msz = 0; + + va = __fix_to_virt(idx); + pa = virt_to_phys(addr); + for (; msz < size; idx--, msz += PAGE_SIZE, pa += PAGE_SIZE) + __set_fixmap(idx, pa, prot); + return va; +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int idx, max, node = cpu_to_node(cpu); void *buffer, *ibuffer; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -301,25 +330,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the fixmap */ + idx = get_cpu_entry_area_index(cpu, cpu_debug_buffers.pebs_buffer); + ds->pebs_buffer_base = ds_update_fixmap(idx, buffer, bsiz, + PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + int idx; if (!ds || !x86_pmu.pebs) return; @@ -327,73 +358,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + idx = get_cpu_entry_area_index(cpu, cpu_debug_buffers.pebs_buffer); + ds_update_fixmap(idx, 0, x86_pmu.pebs_buffer_size, PAGE_NONE); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + int idx, max; void *buffer; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + idx = get_cpu_entry_area_index(cpu, cpu_debug_buffers.bts_buffer); + ds->bts_buffer_base = ds_update_fixmap(idx, buffer, BTS_BUFFER_SIZE, + PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + int idx; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + idx = get_cpu_entry_area_index(cpu, cpu_debug_buffers.bts_buffer); + ds_update_fixmap(idx, 0, BTS_BUFFER_SIZE, PAGE_NONE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -199,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs;