From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Huang Ying <ying.huang@intel.com>,
Jonathan Corbet <corbet@lwn.net>,
Linus Torvalds <torvalds@linux-foundation.org>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
Thorsten Leemhuis <regressions@leemhuis.info>,
linux-mm@kvack.org, Ingo Molnar <mingo@kernel.org>,
Sasha Levin <alexander.levin@verizon.com>
Subject: [PATCH 4.14 61/75] mm, x86/mm: Fix performance regression in get_user_pages_fast()
Date: Thu, 7 Dec 2017 14:08:23 +0100 [thread overview]
Message-ID: <20171207130821.277841888@linuxfoundation.org> (raw)
In-Reply-To: <20171207130818.742746317@linuxfoundation.org>
4.14-stable review patch. If anyone has any objections, please let me know.
------------------
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
[ Upstream commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80 ]
The 0-day test bot found a performance regression that was tracked down to
switching x86 to the generic get_user_pages_fast() implementation:
http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
The regression was caused by the fact that we now use local_irq_save() +
local_irq_restore() in get_user_pages_fast() to disable interrupts.
In x86 implementation local_irq_disable() + local_irq_enable() was used.
The fix is to make get_user_pages_fast() use local_irq_disable(),
leaving local_irq_save() for __get_user_pages_fast() that can be called
with interrupts disabled.
Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
Before: Average: 14.91 ms, stddev: 0.45 ms
After: Average: 10.76 ms, stddev: 0.18 ms
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: linux-mm@kvack.org
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
mm/gup.c | 97 +++++++++++++++++++++++++++++++++++++--------------------------
1 file changed, 58 insertions(+), 39 deletions(-)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1707,6 +1707,47 @@ static int gup_p4d_range(pgd_t pgd, unsi
return 1;
}
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset(current->mm, addr);
+ do {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ return;
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+ pages, nr))
+ return;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, write, pages, nr))
+ return;
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+ return;
+ } while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
@@ -1714,10 +1755,8 @@ static int gup_p4d_range(pgd_t pgd, unsi
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp;
+ unsigned long flags;
int nr = 0;
start &= PAGE_MASK;
@@ -1741,45 +1780,15 @@ int __get_user_pages_fast(unsigned long
* block IPIs that come from THPs splitting.
*/
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = READ_ONCE(*pgdp);
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
- pages, &nr))
- break;
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, write, pages, &nr))
- break;
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ local_irq_save(flags);
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_restore(flags);
+ }
return nr;
}
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- return end >= start;
-}
-#endif
-
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
@@ -1799,12 +1808,22 @@ bool gup_fast_permitted(unsigned long st
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
+ unsigned long addr, len, end;
int nr = 0, ret = 0;
start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
if (gup_fast_permitted(start, nr_pages, write)) {
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ local_irq_disable();
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_enable();
ret = nr;
}
next prev parent reply other threads:[~2017-12-07 13:21 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 13:07 [PATCH 4.14 00/75] 4.14.5-stable review Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 01/75] drm/fsl-dcu: avoid disabling pixel clock twice on suspend Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 02/75] drm/fsl-dcu: enable IRQ before drm_atomic_helper_resume() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 04/75] s390/runtime instrumentation: simplify task exit handling Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 05/75] usbip: fix usbip attach to find a port that matches the requested speed Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 06/75] usbip: Fix USB device hang due to wrong enabling of scatter-gather Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 07/75] uas: Always apply US_FL_NO_ATA_1X quirk to Seagate devices Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 08/75] usb: quirks: Add no-lpm quirk for KY-688 USB 3.1 Type-C Hub Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 09/75] serial: 8250_pci: Add Amazon PCI serial device ID Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 10/75] ANDROID: binder: fix transaction leak Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 11/75] USB: serial: option: add Quectel BG96 id Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 12/75] USB: serial: usb_debug: add new USB device id Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 13/75] serial: 8250_early: Only set divisor if valid clk & baud Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 14/75] MIPS: Add custom serial.h with BASE_BAUD override for generic kernel Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 15/75] ima: fix hash algorithm initialization Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 16/75] s390: vfio-ccw: Do not attempt to free no-op, test and tic cda Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 17/75] PM / Domains: Fix genpd to deal with drivers returning 1 from ->prepare() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 18/75] s390/pci: do not require AIS facility Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 19/75] selftests/x86/ldt_get: Add a few additional tests for limits Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 20/75] selftests/x86/ldt_gdt: Robustify against set_thread_area() and LAR oddities Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 21/75] staging: greybus: loopback: Fix iteration count on async path Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 22/75] m68k: fix ColdFire node shift size calculation Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 23/75] serial: 8250_fintek: Fix rs485 disablement on invalid ioctl() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 24/75] staging: rtl8822be: fix wrong dma unmap len Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 25/75] staging: rtl8188eu: avoid a null dereference on pmlmepriv Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 26/75] spi: sh-msiof: Fix DMA transfer size check Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 27/75] spi: spi-axi: fix potential use-after-free after deregistration Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 28/75] mmc: tmio: check mmc_regulator_get_supply return value Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 29/75] mmc: sdhci-msm: fix issue with power irq Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 30/75] hwmon: (pmbus/core) Prevent unintentional setting of page to 0xFF Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 31/75] perf/core: Fix __perf_read_group_add() locking Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 32/75] usb: dwc2: Fix UDC state tracking Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 33/75] usb: dwc2: Error out of dwc2_hsotg_ep_disable() if were in host mode Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 34/75] usb: phy: tahvo: fix error handling in tahvo_usb_probe() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 35/75] PCI: dra7xx: Create functional dependency between PCIe and PHY Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 36/75] x86/intel_rdt: Initialize bitmask of shareable resource if CDP enabled Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 37/75] x86/intel_rdt: Fix potential deadlock during resctrl mount Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 38/75] serial: 8250: Preserve DLD[7:4] for PORT_XR17V35X Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 39/75] kprobes: Use synchronize_rcu_tasks() for optprobe with CONFIG_PREEMPT=y Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 40/75] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 41/75] clocksource/drivers/arm_arch_timer: Validate CNTFRQ after enabling frame Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 42/75] dt-bindings: timer: renesas, cmt: Fix SoC-specific compatible values Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 43/75] EDAC, sb_edac: Fix missing break in switch Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 44/75] usb: mtu3: fix error return code in ssusb_gadget_init() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 45/75] staging: fsl-dpaa2/eth: Account for Rx FD buffers on error path Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 46/75] staging: rtl8822be: Keep array subscript no lower than zero Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 47/75] ARM: cpuidle: Correct driver unregistration if init fails Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 48/75] usb: xhci: Return error when host is dead in xhci_disable_slot() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 49/75] sysrq : fix Show Regs call trace on ARM Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 50/75] serial: sh-sci: suppress warning for ports without dma channels Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 51/75] usbip: tools: Install all headers needed for libusbip development Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 53/75] staging: fsl-mc/dpio: Fix incorrect comparison Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 54/75] perf test attr: Fix ignored test case result Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 55/75] perf test attr: Fix python error on empty result Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 56/75] kprobes/x86: Disable preemption in ftrace-based jprobes Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 57/75] locking/refcounts, x86/asm: Use unique .text section for refcount exceptions Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 58/75] s390/ptrace: fix guarded storage regset handling Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 60/75] perf tools: Fix leaking rec_argv in error cases Greg Kroah-Hartman
2017-12-07 13:08 ` Greg Kroah-Hartman [this message]
2017-12-07 13:08 ` [PATCH 4.14 63/75] iio: multiplexer: add NULL check on devm_kzalloc() and devm_kmemdup() return values Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 64/75] locking/refcounts, x86/asm: Enable CONFIG_ARCH_HAS_REFCOUNT Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 65/75] powerpc/jprobes: Disable preemption when triggered through ftrace Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 66/75] powerpc/kprobes: Disable preemption before invoking probe handler for optprobes Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 67/75] dma-buf/sw_sync: force signal all unsignaled fences on dying timeline Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 68/75] staging: ccree: fix leak of import() after init() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 69/75] usb: hub: Cycle HUB power when initialization fails Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 70/75] USB: ulpi: fix bus-node lookup Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 71/75] xhci: Dont show incorrect WARN message about events for empty rings Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 72/75] usb: xhci: fix panic in xhci_free_virt_devices_depth_first Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 73/75] USB: core: Add type-specific length check of BOS descriptors Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 74/75] USB: usbfs: Filter flags passed in from user space Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 75/75] usb: host: fix incorrect updating of offset Greg Kroah-Hartman
2017-12-07 20:55 ` [PATCH 4.14 00/75] 4.14.5-stable review Guenter Roeck
2017-12-08 10:37 ` Greg Kroah-Hartman
2017-12-08 0:08 ` Shuah Khan
2017-12-08 10:34 ` Greg Kroah-Hartman
2017-12-08 5:35 ` Naresh Kamboju
2017-12-09 3:34 ` Ivan Kozik
2017-12-09 7:45 ` Greg Kroah-Hartman
2017-12-09 7:56 ` Ivan Kozik
2017-12-09 17:13 ` Greg Kroah-Hartman
2017-12-09 17:32 ` Thomas Backlund
2017-12-10 12:36 ` Greg Kroah-Hartman
2017-12-09 18:39 ` Ivan Kozik
[not found] ` <5a29b63a.13bbdf0a.b5a04.7588@mx.google.com>
[not found] ` <7hk1xxkmw9.fsf@baylibre.com>
2017-12-09 16:59 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171207130821.277841888@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=alexander.levin@verizon.com \
--cc=corbet@lwn.net \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=regressions@leemhuis.info \
--cc=stable@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).