Re: [PATCH v2 6/8] mm: prevent get_user_pages() from overflowing page refcount

From: Vlastimil Babka <vbabka@suse.cz>
To: Ajay Kaher <akaher@vmware.com>, gregkh@linuxfoundation.org
Cc: torvalds@linux-foundation.org, punit.agrawal@arm.com,
	akpm@linux-foundation.org, kirill.shutemov@linux.intel.com,
	willy@infradead.org, will.deacon@arm.com, mszeredi@redhat.com,
	stable@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, srivatsab@vmware.com,
	srivatsa@csail.mit.edu, amakhalov@vmware.com,
	srinidhir@vmware.com, bvikas@vmware.com, anishs@vmware.com,
	vsirnapalli@vmware.com, srostedt@vmware.com, stable@kernel.org,
	Ben Hutchings <ben@decadent.org.uk>
Subject: Re: [PATCH v2 6/8] mm: prevent get_user_pages() from overflowing page refcount
Date: Wed, 9 Oct 2019 15:13:13 +0200	[thread overview]
Message-ID: <f899be71-4bc0-d07b-f650-d85a335cdebb@suse.cz> (raw)
In-Reply-To: <1570581863-12090-7-git-send-email-akaher@vmware.com>

On 10/9/19 2:44 AM, Ajay Kaher wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
> 
> commit 8fde12ca79aff9b5ba951fce1a2641901b8d8e64 upstream.
> 
> If the page refcount wraps around past zero, it will be freed while
> there are still four billion references to it.  One of the possible
> avenues for an attacker to try to make this happen is by doing direct IO
> on a page multiple times.  This patch makes get_user_pages() refuse to
> take a new page reference if there are already more than two billion
> references to the page.
> 
> Reported-by: Jann Horn <jannh@google.com>
> Acked-by: Matthew Wilcox <willy@infradead.org>
> Cc: stable@kernel.org
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> [ 4.4.y backport notes:
>   Ajay: Added local variable 'err' with-in follow_hugetlb_page()
>         from 2be7cfed995e, to resolve compilation error
>   Srivatsa: Replaced call to get_page_foll() with try_get_page_foll() ]
> Signed-off-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
> Signed-off-by: Ajay Kaher <akaher@vmware.com>
> ---
>  mm/gup.c     | 43 ++++++++++++++++++++++++++++++++-----------
>  mm/hugetlb.c | 16 +++++++++++++++-
>  2 files changed, 47 insertions(+), 12 deletions(-)

This seems to have the same issue as the 4.9 stable version [1], in not
touching the arch-specific gup.c variants.

[1]
https://lore.kernel.org/lkml/6650323f-dbc9-f069-000b-f6b0f941a065@suse.cz/

> diff --git a/mm/gup.c b/mm/gup.c
> index fae4d1e..171b460 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -126,8 +126,12 @@ retry:
>  		}
>  	}
>  
> -	if (flags & FOLL_GET)
> -		get_page_foll(page);
> +	if (flags & FOLL_GET) {
> +		if (unlikely(!try_get_page_foll(page))) {
> +			page = ERR_PTR(-ENOMEM);
> +			goto out;
> +		}
> +	}
>  	if (flags & FOLL_TOUCH) {
>  		if ((flags & FOLL_WRITE) &&
>  		    !pte_dirty(pte) && !PageDirty(page))
> @@ -289,7 +293,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
>  			goto unmap;
>  		*page = pte_page(*pte);
>  	}
> -	get_page(*page);
> +	if (unlikely(!try_get_page(*page))) {
> +		ret = -ENOMEM;
> +		goto unmap;
> +	}
>  out:
>  	ret = 0;
>  unmap:
> @@ -1053,6 +1060,20 @@ struct page *get_dump_page(unsigned long addr)
>   */
>  #ifdef CONFIG_HAVE_GENERIC_RCU_GUP
>  
> +/*
> + * Return the compund head page with ref appropriately incremented,
> + * or NULL if that failed.
> + */
> +static inline struct page *try_get_compound_head(struct page *page, int refs)
> +{
> +	struct page *head = compound_head(page);
> +	if (WARN_ON_ONCE(atomic_read(&head->_count) < 0))
> +		return NULL;
> +	if (unlikely(!page_cache_add_speculative(head, refs)))
> +		return NULL;
> +	return head;
> +}
> +
>  #ifdef __HAVE_ARCH_PTE_SPECIAL
>  static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
>  			 int write, struct page **pages, int *nr)
> @@ -1082,9 +1103,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
>  
>  		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
> -		head = compound_head(page);
>  
> -		if (!page_cache_get_speculative(head))
> +		head = try_get_compound_head(page, 1);
> +		if (!head)
>  			goto pte_unmap;
>  
>  		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
> @@ -1141,8 +1162,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
>  		refs++;
>  	} while (addr += PAGE_SIZE, addr != end);
>  
> -	head = compound_head(pmd_page(orig));
> -	if (!page_cache_add_speculative(head, refs)) {
> +	head = try_get_compound_head(pmd_page(orig), refs);
> +	if (!head) {
>  		*nr -= refs;
>  		return 0;
>  	}
> @@ -1187,8 +1208,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
>  		refs++;
>  	} while (addr += PAGE_SIZE, addr != end);
>  
> -	head = compound_head(pud_page(orig));
> -	if (!page_cache_add_speculative(head, refs)) {
> +	head = try_get_compound_head(pud_page(orig), refs);
> +	if (!head) {
>  		*nr -= refs;
>  		return 0;
>  	}
> @@ -1229,8 +1250,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
>  		refs++;
>  	} while (addr += PAGE_SIZE, addr != end);
>  
> -	head = compound_head(pgd_page(orig));
> -	if (!page_cache_add_speculative(head, refs)) {
> +	head = try_get_compound_head(pgd_page(orig), refs);
> +	if (!head) {
>  		*nr -= refs;
>  		return 0;
>  	}
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index fd932e7..3a1501e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3886,6 +3886,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
>  	unsigned long vaddr = *position;
>  	unsigned long remainder = *nr_pages;
>  	struct hstate *h = hstate_vma(vma);
> +	int err = -EFAULT;
>  
>  	while (vaddr < vma->vm_end && remainder) {
>  		pte_t *pte;
> @@ -3957,6 +3958,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
>  
>  		pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
>  		page = pte_page(huge_ptep_get(pte));
> +
> +		/*
> +		 * Instead of doing 'try_get_page_foll()' below in the same_page
> +		 * loop, just check the count once here.
> +		 */
> +		if (unlikely(page_count(page) <= 0)) {
> +			if (pages) {
> +				spin_unlock(ptl);
> +				remainder = 0;
> +				err = -ENOMEM;
> +				break;
> +			}
> +		}
>  same_page:
>  		if (pages) {
>  			pages[i] = mem_map_offset(page, pfn_offset);
> @@ -3983,7 +3997,7 @@ same_page:
>  	*nr_pages = remainder;
>  	*position = vaddr;
>  
> -	return i ? i : -EFAULT;
> +	return i ? i : err;
>  }
>  
>  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>