linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Huang Ying <ying.huang@intel.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Thorsten Leemhuis <regressions@leemhuis.info>,
	linux-mm@kvack.org, Ingo Molnar <mingo@kernel.org>,
	Sasha Levin <alexander.levin@verizon.com>
Subject: [PATCH 4.14 61/75] mm, x86/mm: Fix performance regression in get_user_pages_fast()
Date: Thu,  7 Dec 2017 14:08:23 +0100	[thread overview]
Message-ID: <20171207130821.277841888@linuxfoundation.org> (raw)
In-Reply-To: <20171207130818.742746317@linuxfoundation.org>

4.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>


[ Upstream commit 5b65c4677a57a1d4414212f9995aa0e46a21ff80 ]

The 0-day test bot found a performance regression that was tracked down to
switching x86 to the generic get_user_pages_fast() implementation:

  http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop

The regression was caused by the fact that we now use local_irq_save() +
local_irq_restore() in get_user_pages_fast() to disable interrupts.
In x86 implementation local_irq_disable() + local_irq_enable() was used.

The fix is to make get_user_pages_fast() use local_irq_disable(),
leaving local_irq_save() for __get_user_pages_fast() that can be called
with interrupts disabled.

Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:

  Before:  Average: 14.91 ms, stddev: 0.45 ms
  After:   Average: 10.76 ms, stddev: 0.18 ms

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: linux-mm@kvack.org
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/gup.c |   97 +++++++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 58 insertions(+), 39 deletions(-)

--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1707,6 +1707,47 @@ static int gup_p4d_range(pgd_t pgd, unsi
 	return 1;
 }
 
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pgd_t *pgdp;
+
+	pgdp = pgd_offset(current->mm, addr);
+	do {
+		pgd_t pgd = READ_ONCE(*pgdp);
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			return;
+		if (unlikely(pgd_huge(pgd))) {
+			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+					  pages, nr))
+				return;
+		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+					 PGDIR_SHIFT, next, write, pages, nr))
+				return;
+		} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+			return;
+	} while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+	unsigned long len, end;
+
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	return end >= start;
+}
+#endif
+
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP. It will only return non-negative values.
@@ -1714,10 +1755,8 @@ static int gup_p4d_range(pgd_t pgd, unsi
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
 {
-	struct mm_struct *mm = current->mm;
 	unsigned long addr, len, end;
-	unsigned long next, flags;
-	pgd_t *pgdp;
+	unsigned long flags;
 	int nr = 0;
 
 	start &= PAGE_MASK;
@@ -1741,45 +1780,15 @@ int __get_user_pages_fast(unsigned long
 	 * block IPIs that come from THPs splitting.
 	 */
 
-	local_irq_save(flags);
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd_t pgd = READ_ONCE(*pgdp);
-
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			break;
-		if (unlikely(pgd_huge(pgd))) {
-			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
-					  pages, &nr))
-				break;
-		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
-			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-					 PGDIR_SHIFT, next, write, pages, &nr))
-				break;
-		} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-			break;
-	} while (pgdp++, addr = next, addr != end);
-	local_irq_restore(flags);
+	if (gup_fast_permitted(start, nr_pages, write)) {
+		local_irq_save(flags);
+		gup_pgd_range(addr, end, write, pages, &nr);
+		local_irq_restore(flags);
+	}
 
 	return nr;
 }
 
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
-	unsigned long len, end;
-
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-	end = start + len;
-	return end >= start;
-}
-#endif
-
 /**
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
@@ -1799,12 +1808,22 @@ bool gup_fast_permitted(unsigned long st
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages)
 {
+	unsigned long addr, len, end;
 	int nr = 0, ret = 0;
 
 	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
 
 	if (gup_fast_permitted(start, nr_pages, write)) {
-		nr = __get_user_pages_fast(start, nr_pages, write, pages);
+		local_irq_disable();
+		gup_pgd_range(addr, end, write, pages, &nr);
+		local_irq_enable();
 		ret = nr;
 	}
 

  parent reply	other threads:[~2017-12-07 13:21 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-07 13:07 [PATCH 4.14 00/75] 4.14.5-stable review Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 01/75] drm/fsl-dcu: avoid disabling pixel clock twice on suspend Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 02/75] drm/fsl-dcu: enable IRQ before drm_atomic_helper_resume() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 04/75] s390/runtime instrumentation: simplify task exit handling Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 05/75] usbip: fix usbip attach to find a port that matches the requested speed Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 06/75] usbip: Fix USB device hang due to wrong enabling of scatter-gather Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 07/75] uas: Always apply US_FL_NO_ATA_1X quirk to Seagate devices Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 08/75] usb: quirks: Add no-lpm quirk for KY-688 USB 3.1 Type-C Hub Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 09/75] serial: 8250_pci: Add Amazon PCI serial device ID Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 10/75] ANDROID: binder: fix transaction leak Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 11/75] USB: serial: option: add Quectel BG96 id Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 12/75] USB: serial: usb_debug: add new USB device id Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 13/75] serial: 8250_early: Only set divisor if valid clk & baud Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 14/75] MIPS: Add custom serial.h with BASE_BAUD override for generic kernel Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 15/75] ima: fix hash algorithm initialization Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 16/75] s390: vfio-ccw: Do not attempt to free no-op, test and tic cda Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 17/75] PM / Domains: Fix genpd to deal with drivers returning 1 from ->prepare() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 18/75] s390/pci: do not require AIS facility Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 19/75] selftests/x86/ldt_get: Add a few additional tests for limits Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 20/75] selftests/x86/ldt_gdt: Robustify against set_thread_area() and LAR oddities Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 21/75] staging: greybus: loopback: Fix iteration count on async path Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 22/75] m68k: fix ColdFire node shift size calculation Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 23/75] serial: 8250_fintek: Fix rs485 disablement on invalid ioctl() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 24/75] staging: rtl8822be: fix wrong dma unmap len Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 25/75] staging: rtl8188eu: avoid a null dereference on pmlmepriv Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 26/75] spi: sh-msiof: Fix DMA transfer size check Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 27/75] spi: spi-axi: fix potential use-after-free after deregistration Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 28/75] mmc: tmio: check mmc_regulator_get_supply return value Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 29/75] mmc: sdhci-msm: fix issue with power irq Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 30/75] hwmon: (pmbus/core) Prevent unintentional setting of page to 0xFF Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 31/75] perf/core: Fix __perf_read_group_add() locking Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 32/75] usb: dwc2: Fix UDC state tracking Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 33/75] usb: dwc2: Error out of dwc2_hsotg_ep_disable() if were in host mode Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 34/75] usb: phy: tahvo: fix error handling in tahvo_usb_probe() Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 35/75] PCI: dra7xx: Create functional dependency between PCIe and PHY Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 36/75] x86/intel_rdt: Initialize bitmask of shareable resource if CDP enabled Greg Kroah-Hartman
2017-12-07 13:07 ` [PATCH 4.14 37/75] x86/intel_rdt: Fix potential deadlock during resctrl mount Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 38/75] serial: 8250: Preserve DLD[7:4] for PORT_XR17V35X Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 39/75] kprobes: Use synchronize_rcu_tasks() for optprobe with CONFIG_PREEMPT=y Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 40/75] x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 41/75] clocksource/drivers/arm_arch_timer: Validate CNTFRQ after enabling frame Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 42/75] dt-bindings: timer: renesas, cmt: Fix SoC-specific compatible values Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 43/75] EDAC, sb_edac: Fix missing break in switch Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 44/75] usb: mtu3: fix error return code in ssusb_gadget_init() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 45/75] staging: fsl-dpaa2/eth: Account for Rx FD buffers on error path Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 46/75] staging: rtl8822be: Keep array subscript no lower than zero Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 47/75] ARM: cpuidle: Correct driver unregistration if init fails Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 48/75] usb: xhci: Return error when host is dead in xhci_disable_slot() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 49/75] sysrq : fix Show Regs call trace on ARM Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 50/75] serial: sh-sci: suppress warning for ports without dma channels Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 51/75] usbip: tools: Install all headers needed for libusbip development Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 53/75] staging: fsl-mc/dpio: Fix incorrect comparison Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 54/75] perf test attr: Fix ignored test case result Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 55/75] perf test attr: Fix python error on empty result Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 56/75] kprobes/x86: Disable preemption in ftrace-based jprobes Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 57/75] locking/refcounts, x86/asm: Use unique .text section for refcount exceptions Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 58/75] s390/ptrace: fix guarded storage regset handling Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 60/75] perf tools: Fix leaking rec_argv in error cases Greg Kroah-Hartman
2017-12-07 13:08 ` Greg Kroah-Hartman [this message]
2017-12-07 13:08 ` [PATCH 4.14 63/75] iio: multiplexer: add NULL check on devm_kzalloc() and devm_kmemdup() return values Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 64/75] locking/refcounts, x86/asm: Enable CONFIG_ARCH_HAS_REFCOUNT Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 65/75] powerpc/jprobes: Disable preemption when triggered through ftrace Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 66/75] powerpc/kprobes: Disable preemption before invoking probe handler for optprobes Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 67/75] dma-buf/sw_sync: force signal all unsignaled fences on dying timeline Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 68/75] staging: ccree: fix leak of import() after init() Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 69/75] usb: hub: Cycle HUB power when initialization fails Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 70/75] USB: ulpi: fix bus-node lookup Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 71/75] xhci: Dont show incorrect WARN message about events for empty rings Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 72/75] usb: xhci: fix panic in xhci_free_virt_devices_depth_first Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 73/75] USB: core: Add type-specific length check of BOS descriptors Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 74/75] USB: usbfs: Filter flags passed in from user space Greg Kroah-Hartman
2017-12-07 13:08 ` [PATCH 4.14 75/75] usb: host: fix incorrect updating of offset Greg Kroah-Hartman
2017-12-07 20:55 ` [PATCH 4.14 00/75] 4.14.5-stable review Guenter Roeck
2017-12-08 10:37   ` Greg Kroah-Hartman
2017-12-08  0:08 ` Shuah Khan
2017-12-08 10:34   ` Greg Kroah-Hartman
2017-12-08  5:35 ` Naresh Kamboju
2017-12-09  3:34 ` Ivan Kozik
2017-12-09  7:45   ` Greg Kroah-Hartman
2017-12-09  7:56     ` Ivan Kozik
2017-12-09 17:13       ` Greg Kroah-Hartman
2017-12-09 17:32         ` Thomas Backlund
2017-12-10 12:36           ` Greg Kroah-Hartman
2017-12-09 18:39         ` Ivan Kozik
     [not found] ` <5a29b63a.13bbdf0a.b5a04.7588@mx.google.com>
     [not found]   ` <7hk1xxkmw9.fsf@baylibre.com>
2017-12-09 16:59     ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171207130821.277841888@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=corbet@lwn.net \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=regressions@leemhuis.info \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).