stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vlastimil Babka <vbabka@suse.cz>
To: stable@vger.kernel.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Ajay Kaher <akaher@vmware.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Jann Horn <jannh@google.com>,
	Matthew Wilcox <willy@infradead.org>,
	stable@kernel.org, Vlastimil Babka <vbabka@suse.cz>
Subject: [PATCH STABLE 4.4 5/8] mm: prevent get_user_pages() from overflowing page refcount
Date: Fri,  8 Nov 2019 10:38:11 +0100	[thread overview]
Message-ID: <20191108093814.16032-6-vbabka@suse.cz> (raw)
In-Reply-To: <20191108093814.16032-1-vbabka@suse.cz>

From: Linus Torvalds <torvalds@linux-foundation.org>

commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream.

[ 4.4 backport: there's get_page_foll(), so add try_get_page()-like checks
                in there, enabled by a new parameter, which is false where
                upstream patch doesn't replace get_page() with try_get_page()
                (the THP and hugetlb callers).
		In gup_pte_range(), we don't expect tail pages, so just check
                page ref count instead of try_get_compound_head()
		Also patch arch-specific variants of gup.c for x86 and s390,
		leaving mips, sh, sparc alone				      ]

If the page refcount wraps around past zero, it will be freed while
there are still four billion references to it.  One of the possible
avenues for an attacker to try to make this happen is by doing direct IO
on a page multiple times.  This patch makes get_user_pages() refuse to
take a new page reference if there are already more than two billion
references to the page.

Reported-by: Jann Horn <jannh@google.com>
Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 arch/s390/mm/gup.c |  6 ++++--
 arch/x86/mm/gup.c  |  9 ++++++++-
 mm/gup.c           | 39 +++++++++++++++++++++++++++++++--------
 mm/huge_memory.c   |  2 +-
 mm/hugetlb.c       | 18 ++++++++++++++++--
 mm/internal.h      | 12 +++++++++---
 6 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 7ad41be8b373..bdaa5f7b652c 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -37,7 +37,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 			return 0;
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
-		if (!page_cache_get_speculative(page))
+		if (unlikely(WARN_ON_ONCE(page_ref_count(page) < 0)
+		    || !page_cache_get_speculative(page)))
 			return 0;
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
 			put_page(page);
@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	if (!page_cache_add_speculative(head, refs)) {
+	if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+	    || !page_cache_add_speculative(head, refs))) {
 		*nr -= refs;
 		return 0;
 	}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 7d2542ad346a..6612d532e42e 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -95,7 +95,10 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		}
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
-		get_page(page);
+		if (unlikely(!try_get_page(page))) {
+			pte_unmap(ptep);
+			return 0;
+		}
 		SetPageReferenced(page);
 		pages[*nr] = page;
 		(*nr)++;
@@ -132,6 +135,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
 
 	refs = 0;
 	head = pmd_page(pmd);
+	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+		return 0;
 	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -208,6 +213,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
 
 	refs = 0;
 	head = pud_page(pud);
+	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+		return 0;
 	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
diff --git a/mm/gup.c b/mm/gup.c
index 71e9d0093a35..fc8e2dca99fc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -127,7 +127,10 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	}
 
 	if (flags & FOLL_GET)
-		get_page_foll(page);
+		if (!get_page_foll(page, true)) {
+			page = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
 		    !pte_dirty(pte) && !PageDirty(page))
@@ -289,7 +292,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 			goto unmap;
 		*page = pte_page(*pte);
 	}
-	get_page(*page);
+	if (unlikely(!try_get_page(*page))) {
+		ret = -ENOMEM;
+		goto unmap;
+	}
 out:
 	ret = 0;
 unmap:
@@ -1053,6 +1059,20 @@ struct page *get_dump_page(unsigned long addr)
  */
 #ifdef CONFIG_HAVE_GENERIC_RCU_GUP
 
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+	struct page *head = compound_head(page);
+	if (WARN_ON_ONCE(page_ref_count(head) < 0))
+		return NULL;
+	if (unlikely(!page_cache_add_speculative(head, refs)))
+		return NULL;
+	return head;
+}
+
 #ifdef __HAVE_ARCH_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 			 int write, struct page **pages, int *nr)
@@ -1083,6 +1103,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 
+		if (WARN_ON_ONCE(page_ref_count(page) < 0))
+			goto pte_unmap;
+
 		if (!page_cache_get_speculative(page))
 			goto pte_unmap;
 
@@ -1139,8 +1162,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pmd_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pmd_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
@@ -1185,8 +1208,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pud_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pud_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
@@ -1227,8 +1250,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	head = compound_head(pgd_page(orig));
-	if (!page_cache_add_speculative(head, refs)) {
+	head = try_get_compound_head(pgd_page(orig), refs);
+	if (!head) {
 		*nr -= refs;
 		return 0;
 	}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 465786cd6490..6087277981a6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1322,7 +1322,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 	if (flags & FOLL_GET)
-		get_page_foll(page);
+		get_page_foll(page, false);
 
 out:
 	return page;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fd932e7a25dd..b4a8a18fa3a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3886,6 +3886,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long vaddr = *position;
 	unsigned long remainder = *nr_pages;
 	struct hstate *h = hstate_vma(vma);
+	int err = -EFAULT;
 
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
@@ -3957,10 +3958,23 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
 		page = pte_page(huge_ptep_get(pte));
+
+		/*
+		 * Instead of doing 'try_get_page()' below in the same_page
+		 * loop, just check the count once here.
+		 */
+		if (unlikely(page_count(page) <= 0)) {
+			if (pages) {
+				spin_unlock(ptl);
+				remainder = 0;
+				err = -ENOMEM;
+				break;
+			}
+		}
 same_page:
 		if (pages) {
 			pages[i] = mem_map_offset(page, pfn_offset);
-			get_page_foll(pages[i]);
+			get_page_foll(pages[i], false);
 		}
 
 		if (vmas)
@@ -3983,7 +3997,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	*nr_pages = remainder;
 	*position = vaddr;
 
-	return i ? i : -EFAULT;
+	return i ? i : err;
 }
 
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/internal.h b/mm/internal.h
index a6639c72780a..b52041969d06 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -93,23 +93,29 @@ static inline void __get_page_tail_foll(struct page *page,
  * follow_page() and it must be called while holding the proper PT
  * lock while the pte (or pmd_trans_huge) is still mapping the page.
  */
-static inline void get_page_foll(struct page *page)
+static inline bool get_page_foll(struct page *page, bool check)
 {
-	if (unlikely(PageTail(page)))
+	if (unlikely(PageTail(page))) {
 		/*
 		 * This is safe only because
 		 * __split_huge_page_refcount() can't run under
 		 * get_page_foll() because we hold the proper PT lock.
 		 */
+		if (check && WARN_ON_ONCE(
+				page_ref_count(compound_head(page)) <= 0))
+			return false;
 		__get_page_tail_foll(page, true);
-	else {
+	} else {
 		/*
 		 * Getting a normal page or the head of a compound page
 		 * requires to already have an elevated page->_count.
 		 */
 		VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
+		if (check && WARN_ON_ONCE(page_ref_count(page) <= 0))
+			return false;
 		atomic_inc(&page->_count);
 	}
+	return true;
 }
 
 extern unsigned long highest_memmap_pfn;
-- 
2.23.0


  parent reply	other threads:[~2019-11-08  9:38 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-08  9:38 [PATCH STABLE 4.4 0/8] page refcount overflow backports Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 1/8] mm, gup: remove broken VM_BUG_ON_PAGE compound check for hugepages Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 2/8] mm, gup: ensure real head page is ref-counted when using hugepages Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 3/8] mm: make page ref count overflow check tighter and more explicit Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 4/8] mm: add 'try_get_page()' helper function Vlastimil Babka
2019-11-08  9:38 ` Vlastimil Babka [this message]
2019-12-03 12:25   ` [PATCH STABLE 4.4 5/8] mm: prevent get_user_pages() from overflowing page refcount Ajay Kaher
2019-12-03 12:57     ` Vlastimil Babka
2019-12-06  4:15       ` Ajay Kaher
2019-12-06 14:32         ` Vlastimil Babka
2019-12-09  8:54           ` Ajay Kaher
2019-12-09  9:10             ` Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 6/8] pipe: add pipe_buf_get() helper Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 7/8] fs: prevent page refcount overflow in pipe_buf_get Vlastimil Babka
2019-11-08  9:38 ` [PATCH STABLE 4.4 8/8] x86, mm, gup: prevent get_page() race with munmap in paravirt guest Vlastimil Babka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191108093814.16032-6-vbabka@suse.cz \
    --to=vbabka@suse.cz \
    --cc=akaher@vmware.com \
    --cc=jannh@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=stable@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).