All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Zickus <dzickus@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: dave.hansen@linux.intel.com, eranian@google.com,
	ak@linux.intel.com, jmario@redhat.com,
	linux-kernel@vger.kernel.org, acme@infradead.org
Subject: Re: x86, perf: throttling issues with long nmi latencies
Date: Tue, 15 Oct 2013 12:22:37 -0400	[thread overview]
Message-ID: <20131015162237.GB227855@redhat.com> (raw)
In-Reply-To: <20131015150736.GZ26785@twins.programming.kicks-ass.net>

On Tue, Oct 15, 2013 at 05:07:36PM +0200, Peter Zijlstra wrote:
> On Tue, Oct 15, 2013 at 04:32:27PM +0200, Peter Zijlstra wrote:
> > 
> > This one seems to actually work and is somewhat simpler.
> 
> And here's some hackery to avoid that atomic page inc frobbery.

Hmm, for some reason, I did not see much noticable improvement (over your
second patch) with this patch. :-(  Not to say it doesn't improve things,
I just don't have hard numbers to see if it is a small percentage better
or not.

Cheers,
Don

> 
> ---
>  lib/usercopy.c |   12 ++++++++--
>  mm/gup.c       |   63 ++++++++++++++++++++++++++++++++++++---------------------
>  2 files changed, 49 insertions(+), 26 deletions(-)
> 
> --- a/arch/x86/lib/usercopy.c
> +++ b/arch/x86/lib/usercopy.c
> @@ -10,6 +10,8 @@
>  #include <asm/word-at-a-time.h>
>  #include <linux/sched.h>
>  
> +extern int ___get_user_pages_fast(unsigned long start, int nr_pages, int flags,
> +			  struct page **pages);
>  /*
>   * best effort, GUP based copy_from_user() that is NMI-safe
>   */
> @@ -18,6 +20,7 @@ copy_from_user_nmi(void *to, const void
>  {
>  	unsigned long offset, addr = (unsigned long)from;
>  	unsigned long size, len = 0;
> +	unsigned long flags;
>  	struct page *page;
>  	void *map;
>  	int ret;
> @@ -26,9 +29,12 @@ copy_from_user_nmi(void *to, const void
>  		return len;
>  
>  	do {
> -		ret = __get_user_pages_fast(addr, 1, 0, &page);
> -		if (!ret)
> +		local_irq_save(flags);
> +		ret = ___get_user_pages_fast(addr, 1, 0, &page);
> +		if (!ret) {
> +			local_irq_restore(flags);
>  			break;
> +		}
>  
>  		offset = addr & (PAGE_SIZE - 1);
>  		size = min(PAGE_SIZE - offset, n - len);
> @@ -36,7 +42,7 @@ copy_from_user_nmi(void *to, const void
>  		map = kmap_atomic(page);
>  		memcpy(to, map+offset, size);
>  		kunmap_atomic(map);
> -		put_page(page);
> +		local_irq_restore(flags);
>  
>  		len  += size;
>  		to   += size;
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -63,19 +63,22 @@ static inline pte_t gup_get_pte(pte_t *p
>  #endif
>  }
>  
> +#define GUPF_GET	0x01
> +#define GUPF_WRITE	0x02
> +
>  /*
>   * The performance critical leaf functions are made noinline otherwise gcc
>   * inlines everything into a single function which results in too much
>   * register pressure.
>   */
>  static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
> -		unsigned long end, int write, struct page **pages, int *nr)
> +		unsigned long end, int flags, struct page **pages, int *nr)
>  {
>  	unsigned long mask;
>  	pte_t *ptep;
>  
>  	mask = _PAGE_PRESENT|_PAGE_USER;
> -	if (write)
> +	if (flags & GUPF_WRITE)
>  		mask |= _PAGE_RW;
>  
>  	ptep = pte_offset_map(&pmd, addr);
> @@ -89,7 +92,8 @@ static noinline int gup_pte_range(pmd_t
>  		}
>  		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
> -		get_page(page);
> +		if (flags & GUPF_GET)
> +			get_page(page);
>  		SetPageReferenced(page);
>  		pages[*nr] = page;
>  		(*nr)++;
> @@ -109,7 +113,7 @@ static inline void get_head_page_multipl
>  }
>  
>  static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
> -		unsigned long end, int write, struct page **pages, int *nr)
> +		unsigned long end, int flags, struct page **pages, int *nr)
>  {
>  	unsigned long mask;
>  	pte_t pte = *(pte_t *)&pmd;
> @@ -117,7 +121,7 @@ static noinline int gup_huge_pmd(pmd_t p
>  	int refs;
>  
>  	mask = _PAGE_PRESENT|_PAGE_USER;
> -	if (write)
> +	if (flags & GUPF_WRITE)
>  		mask |= _PAGE_RW;
>  	if ((pte_flags(pte) & mask) != mask)
>  		return 0;
> @@ -131,19 +135,20 @@ static noinline int gup_huge_pmd(pmd_t p
>  	do {
>  		VM_BUG_ON(compound_head(page) != head);
>  		pages[*nr] = page;
> -		if (PageTail(page))
> +		if ((flags & GUPF_GET) && PageTail(page))
>  			get_huge_page_tail(page);
>  		(*nr)++;
>  		page++;
>  		refs++;
>  	} while (addr += PAGE_SIZE, addr != end);
> -	get_head_page_multiple(head, refs);
> +	if (flags & GUPF_GET)
> +		get_head_page_multiple(head, refs);
>  
>  	return 1;
>  }
>  
>  static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
> -		int write, struct page **pages, int *nr)
> +		int flags, struct page **pages, int *nr)
>  {
>  	unsigned long next;
>  	pmd_t *pmdp;
> @@ -167,10 +172,10 @@ static int gup_pmd_range(pud_t pud, unsi
>  		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
>  			return 0;
>  		if (unlikely(pmd_large(pmd))) {
> -			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
> +			if (!gup_huge_pmd(pmd, addr, next, flags, pages, nr))
>  				return 0;
>  		} else {
> -			if (!gup_pte_range(pmd, addr, next, write, pages, nr))
> +			if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
>  				return 0;
>  		}
>  	} while (pmdp++, addr = next, addr != end);
> @@ -179,7 +184,7 @@ static int gup_pmd_range(pud_t pud, unsi
>  }
>  
>  static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
> -		unsigned long end, int write, struct page **pages, int *nr)
> +		unsigned long end, int flags, struct page **pages, int *nr)
>  {
>  	unsigned long mask;
>  	pte_t pte = *(pte_t *)&pud;
> @@ -187,7 +192,7 @@ static noinline int gup_huge_pud(pud_t p
>  	int refs;
>  
>  	mask = _PAGE_PRESENT|_PAGE_USER;
> -	if (write)
> +	if (flags & GUPF_WRITE)
>  		mask |= _PAGE_RW;
>  	if ((pte_flags(pte) & mask) != mask)
>  		return 0;
> @@ -201,19 +206,20 @@ static noinline int gup_huge_pud(pud_t p
>  	do {
>  		VM_BUG_ON(compound_head(page) != head);
>  		pages[*nr] = page;
> -		if (PageTail(page))
> +		if ((flags & GUPF_GET) && PageTail(page))
>  			get_huge_page_tail(page);
>  		(*nr)++;
>  		page++;
>  		refs++;
>  	} while (addr += PAGE_SIZE, addr != end);
> -	get_head_page_multiple(head, refs);
> +	if (flags & GUPF_GET)
> +		get_head_page_multiple(head, refs);
>  
>  	return 1;
>  }
>  
>  static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
> -			int write, struct page **pages, int *nr)
> +			int flags, struct page **pages, int *nr)
>  {
>  	unsigned long next;
>  	pud_t *pudp;
> @@ -226,10 +232,10 @@ static int gup_pud_range(pgd_t pgd, unsi
>  		if (pud_none(pud))
>  			return 0;
>  		if (unlikely(pud_large(pud))) {
> -			if (!gup_huge_pud(pud, addr, next, write, pages, nr))
> +			if (!gup_huge_pud(pud, addr, next, flags, pages, nr))
>  				return 0;
>  		} else {
> -			if (!gup_pmd_range(pud, addr, next, write, pages, nr))
> +			if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
>  				return 0;
>  		}
>  	} while (pudp++, addr = next, addr != end);
> @@ -241,13 +247,12 @@ static int gup_pud_range(pgd_t pgd, unsi
>   * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
>   * back to the regular GUP.
>   */
> -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> +int ___get_user_pages_fast(unsigned long start, int nr_pages, int flags,
>  			  struct page **pages)
>  {
>  	struct mm_struct *mm = current->mm;
>  	unsigned long addr, len, end;
>  	unsigned long next;
> -	unsigned long flags;
>  	pgd_t *pgdp;
>  	int nr = 0;
>  
> @@ -255,7 +260,7 @@ int __get_user_pages_fast(unsigned long
>  	addr = start;
>  	len = (unsigned long) nr_pages << PAGE_SHIFT;
>  	end = start + len;
> -	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
> +	if (unlikely(!access_ok((flags & GUPF_WRITE) ? VERIFY_WRITE : VERIFY_READ,
>  					(void __user *)start, len)))
>  		return 0;
>  
> @@ -277,7 +282,6 @@ int __get_user_pages_fast(unsigned long
>  	 * (which we do on x86, with the above PAE exception), we can follow the
>  	 * address down to the the page and take a ref on it.
>  	 */
> -	local_irq_save(flags);
>  	pgdp = pgd_offset(mm, addr);
>  	do {
>  		pgd_t pgd = *pgdp;
> @@ -285,14 +289,27 @@ int __get_user_pages_fast(unsigned long
>  		next = pgd_addr_end(addr, end);
>  		if (pgd_none(pgd))
>  			break;
> -		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
> +		if (!gup_pud_range(pgd, addr, next, flags, pages, &nr))
>  			break;
>  	} while (pgdp++, addr = next, addr != end);
> -	local_irq_restore(flags);
>  
>  	return nr;
>  }
>  
> +int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> +			  struct page **pages)
> +{
> +	unsigned long flags;
> +	int ret;
> +
> +	local_irq_save(flags);
> +	ret = ___get_user_pages_fast(start, nr_pages,
> +			GUPF_GET | (write ? GUPF_WRITE : 0), pages);
> +	local_irq_restore(flags);
> +
> +	return ret;
> +}
> +
>  /**
>   * get_user_pages_fast() - pin user pages in memory
>   * @start:	starting user address

  parent reply	other threads:[~2013-10-15 16:23 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-14 20:35 x86, perf: throttling issues with long nmi latencies Don Zickus
2013-10-14 21:28 ` Andi Kleen
2013-10-15 10:14 ` Peter Zijlstra
2013-10-15 13:02   ` Peter Zijlstra
2013-10-15 14:32     ` Peter Zijlstra
2013-10-15 15:07       ` Peter Zijlstra
2013-10-15 15:41         ` Don Zickus
2013-10-16 10:57           ` [PATCH] perf, x86: Optimize intel_pmu_pebs_fixup_ip() Peter Zijlstra
2013-10-16 12:46             ` Don Zickus
2013-10-16 13:31               ` Peter Zijlstra
2013-10-16 13:54                 ` Don Zickus
2013-10-17 11:21                 ` Peter Zijlstra
2013-10-17 13:33                 ` Peter Zijlstra
2013-10-29 14:07                   ` [tip:perf/urgent] perf/x86: Fix NMI measurements tip-bot for Peter Zijlstra
2013-10-16 20:52             ` [PATCH] perf, x86: Optimize intel_pmu_pebs_fixup_ip() Andi Kleen
2013-10-16 21:03               ` Peter Zijlstra
2013-10-16 23:07                 ` Peter Zijlstra
2013-10-17  9:41                   ` Peter Zijlstra
2013-10-17 16:00                     ` Don Zickus
2013-10-17 16:04                       ` Don Zickus
2013-10-17 16:30                         ` Peter Zijlstra
2013-10-17 18:26                           ` Linus Torvalds
2013-10-17 21:08                             ` Peter Zijlstra
2013-10-17 21:11                               ` Peter Zijlstra
2013-10-17 22:01                             ` Peter Zijlstra
2013-10-17 22:27                               ` Linus Torvalds
2013-10-22 21:12                                 ` Peter Zijlstra
2013-10-23  7:09                                   ` Linus Torvalds
2013-10-23 20:48                                     ` Peter Zijlstra
2013-10-24 10:52                                       ` Peter Zijlstra
2013-10-24 13:47                                         ` Don Zickus
2013-10-24 14:06                                           ` Peter Zijlstra
2013-10-25 16:33                                         ` Don Zickus
2013-10-25 17:03                                           ` Peter Zijlstra
2013-10-26 10:36                                           ` Ingo Molnar
2013-10-28 13:19                                             ` Don Zickus
2013-10-29 14:08                                         ` [tip:perf/core] perf/x86: Further optimize copy_from_user_nmi() tip-bot for Peter Zijlstra
2013-10-23  7:44                                   ` [PATCH] perf, x86: Optimize intel_pmu_pebs_fixup_ip() Ingo Molnar
2013-10-17 14:49             ` Don Zickus
2013-10-17 14:51               ` Peter Zijlstra
2013-10-17 15:03                 ` Don Zickus
2013-10-17 15:09                   ` Peter Zijlstra
2013-10-17 15:11                     ` Peter Zijlstra
2013-10-17 16:50             ` [tip:perf/core] perf/x86: " tip-bot for Peter Zijlstra
2013-10-15 16:22         ` Don Zickus [this message]
2013-10-15 14:36     ` x86, perf: throttling issues with long nmi latencies Don Zickus
2013-10-15 14:39       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131015162237.GB227855@redhat.com \
    --to=dzickus@redhat.com \
    --cc=acme@infradead.org \
    --cc=ak@linux.intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jmario@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.