Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: john.hubbard@gmail.com
To: Andrew Morton <akpm@linux-foundation.org>, linux-mm@kvack.org
Cc: Al Viro <viro@zeniv.linux.org.uk>,
	Christian Benvenuti <benve@cisco.com>,
	Christoph Hellwig <hch@infradead.org>,
	Christopher Lameter <cl@linux.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>,
	Dennis Dalessandro <dennis.dalessandro@intel.com>,
	Doug Ledford <dledford@redhat.com>, Jan Kara <jack@suse.cz>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	Jerome Glisse <jglisse@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Michal Hocko <mhocko@kernel.org>,
	Mike Rapoport <rppt@linux.ibm.com>,
	Mike Marciniszyn <mike.marciniszyn@intel.com>,
	Ralph Campbell <rcampbell@nvidia.com>,
	Tom Talpey <tom@talpey.com>, LKML <linux-kernel@vger.kernel.org>,
	linux-fsdevel@vger.kernel.org, John Hubbard <jhubbard@nvidia.com>
Subject: [PATCH 5/6] mm/gup: /proc/vmstat support for get/put user pages
Date: Sun,  3 Feb 2019 21:21:34 -0800
Message-ID: <20190204052135.25784-6-jhubbard@nvidia.com> (raw)
In-Reply-To: <20190204052135.25784-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

Add five new /proc/vmstat items, to provide some
visibility into what get_user_pages() and put_user_page()
are doing.

After booting and running fio (https://github.com/axboe/fio)
a few times on an NVMe device, as a way to get lots of
get_user_pages_fast() calls, the counters look like this:

$ cat /proc/vmstat |grep gup
nr_gup_slow_pages_requested 21319
nr_gup_fast_pages_requested 11533792
nr_gup_fast_page_backoffs 0
nr_gup_page_count_overflows 0
nr_gup_pages_returned 11555104

Interpretation of the above:
   Total gup requests (slow + fast): 11555111
   Total put_user_page calls:        11555104

This shows 7 more calls to get_user_pages(), than to
put_user_page(). That may, or may not, represent a
problem worth investigating.

Normally, those last two numbers should be equal, but a
couple of things may cause them to differ:

1) Inherent race condition in reading /proc/vmstat values.

2) Bugs at any of the get_user_pages*() call sites. Those
sites need to match get_user_pages() and put_user_page() calls.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 include/linux/mmzone.h |  5 +++++
 mm/gup.c               | 20 ++++++++++++++++++++
 mm/swap.c              |  1 +
 mm/vmstat.c            |  5 +++++
 4 files changed, 31 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 842f9189537b..f20c14958a2b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -183,6 +183,11 @@ enum node_stat_item {
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
+	NR_GUP_SLOW_PAGES_REQUESTED,	/* via: get_user_pages() */
+	NR_GUP_FAST_PAGES_REQUESTED,	/* via: get_user_pages_fast() */
+	NR_GUP_FAST_PAGE_BACKOFFS,	/* gup_fast() lost to page_mkclean() */
+	NR_GUP_PAGE_COUNT_OVERFLOWS,	/* gup count overflowed: gup() failed */
+	NR_GUP_PAGES_RETURNED,		/* via: put_user_page() */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/gup.c b/mm/gup.c
index 3291da342f9c..848ee7899831 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -37,6 +37,8 @@ int get_gup_pin_page(struct page *page)
 	page = compound_head(page);
 
 	if (page_ref_count(page) >= (UINT_MAX - GUP_PIN_COUNTING_BIAS)) {
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_PAGE_COUNT_OVERFLOWS, 1);
 		WARN_ONCE(1, "get_user_pages pin count overflowed");
 		return -EOVERFLOW;
 	}
@@ -184,6 +186,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = ERR_PTR(ret);
 			goto out;
 		}
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_SLOW_PAGES_REQUESTED, 1);
 	}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
@@ -527,6 +531,8 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	ret = get_gup_pin_page(*page);
 	if (ret)
 		goto unmap;
+
+	mod_node_page_state(page_pgdat(*page), NR_GUP_SLOW_PAGES_REQUESTED, 1);
 out:
 	ret = 0;
 unmap:
@@ -1461,7 +1467,12 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 		if (!page_cache_gup_pin_speculative(head))
 			goto pte_unmap;
 
+		mod_node_page_state(page_pgdat(head),
+				    NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			mod_node_page_state(page_pgdat(head),
+					    NR_GUP_FAST_PAGE_BACKOFFS, 1);
 			put_user_page(head);
 			goto pte_unmap;
 		}
@@ -1522,6 +1533,9 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
 			return 0;
 		}
 
+		mod_node_page_state(page_pgdat(page),
+				    NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 		(*nr)++;
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
@@ -1607,6 +1621,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
 		*nr -= refs;
 		put_user_page(head);
@@ -1644,6 +1660,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
 		*nr -= refs;
 		put_user_page(head);
@@ -1680,6 +1698,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 		return 0;
 	}
 
+	mod_node_page_state(page_pgdat(head), NR_GUP_FAST_PAGES_REQUESTED, 1);
+
 	if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
 		*nr -= refs;
 		put_user_page(head);
diff --git a/mm/swap.c b/mm/swap.c
index 39b0ddd35933..49e192f242d4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -150,6 +150,7 @@ void put_user_page(struct page *page)
 
 	VM_BUG_ON_PAGE(page_ref_count(page) < GUP_PIN_COUNTING_BIAS, page);
 
+	mod_node_page_state(page_pgdat(page), NR_GUP_PAGES_RETURNED, 1);
 	page_ref_sub(page, GUP_PIN_COUNTING_BIAS);
 }
 EXPORT_SYMBOL(put_user_page);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 83b30edc2f7f..18a1a4a2dd29 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1164,6 +1164,11 @@ const char * const vmstat_text[] = {
 	"nr_dirtied",
 	"nr_written",
 	"nr_kernel_misc_reclaimable",
+	"nr_gup_slow_pages_requested",
+	"nr_gup_fast_pages_requested",
+	"nr_gup_fast_page_backoffs",
+	"nr_gup_page_count_overflows",
+	"nr_gup_pages_returned",
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
-- 
2.20.1


  parent reply index

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04  5:21 [PATCH 0/6] RFC v2: mm: gup/dma tracking john.hubbard
2019-02-04  5:21 ` [PATCH 1/6] mm: introduce put_user_page*(), placeholder versions john.hubbard
2019-02-04  5:21 ` [PATCH 2/6] infiniband/mm: convert put_page() to put_user_page*() john.hubbard
2019-02-04  5:21 ` [PATCH 3/6] mm: page_cache_add_speculative(): refactoring john.hubbard
2019-02-04  5:21 ` [PATCH 4/6] mm/gup: track gup-pinned pages john.hubbard
2019-02-04 18:19   ` Matthew Wilcox
2019-02-04 19:11     ` John Hubbard
2019-02-20 19:24   ` Ira Weiny
2019-02-20 20:22     ` John Hubbard
2019-02-04  5:21 ` john.hubbard [this message]
2019-02-04  5:21 ` [PATCH 6/6] mm/gup: Documentation/vm/get_user_pages.rst, MAINTAINERS john.hubbard
2019-02-05 16:40   ` Mike Rapoport
2019-02-05 21:53     ` John Hubbard
2019-02-04 16:08 ` [PATCH 0/6] RFC v2: mm: gup/dma tracking Christopher Lameter
2019-02-04 16:12   ` Christoph Hellwig
2019-02-04 16:59     ` Christopher Lameter
2019-02-04 17:14 ` Christopher Lameter
2019-02-04 17:51   ` Jason Gunthorpe
2019-02-04 18:21     ` Christopher Lameter
2019-02-04 19:09       ` Matthew Wilcox
2019-02-04 23:35   ` Ira Weiny
2019-02-05 19:30     ` Christopher Lameter
2019-02-05  1:41 ` Tom Talpey
2019-02-05  8:22   ` John Hubbard
2019-02-05 13:38     ` Tom Talpey
2019-02-05 21:55       ` John Hubbard

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204052135.25784-6-jhubbard@nvidia.com \
    --to=john.hubbard@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=benve@cisco.com \
    --cc=cl@linux.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=dennis.dalessandro@intel.com \
    --cc=dledford@redhat.com \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mike.marciniszyn@intel.com \
    --cc=rcampbell@nvidia.com \
    --cc=rppt@linux.ibm.com \
    --cc=tom@talpey.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org linux-fsdevel@archiver.kernel.org
	public-inbox-index linux-fsdevel


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox