All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Xu <peterx@redhat.com>
To: David Stevens <stevensd@chromium.org>
Cc: linux-mm@kvack.org, Matthew Wilcox <willy@infradead.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	"Kirill A . Shutemov" <kirill@shutemov.name>,
	Yang Shi <shy828301@gmail.com>,
	David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v4 1/3] mm/khugepaged: refactor collapse_file control flow
Date: Tue, 21 Feb 2023 16:54:04 -0500	[thread overview]
Message-ID: <Y/U9fBxVJdhxiZ1v@x1n> (raw)
In-Reply-To: <20230217085439.2826375-2-stevensd@google.com>

On Fri, Feb 17, 2023 at 05:54:37PM +0900, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Add a rollback label to deal with failure, instead of continuously
> checking for RESULT_SUCCESS, to make it easier to add more failure
> cases. The refactoring also allows the collapse_file tracepoint to
> include hpage on success (instead of NULL).
> 
> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>  mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
>  1 file changed, 110 insertions(+), 113 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 8dbc39896811..6a3d6d2e25e0 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1885,6 +1885,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  	if (result != SCAN_SUCCEED)
>  		goto out;
>  
> +	__SetPageLocked(hpage);
> +	if (is_shmem)
> +		__SetPageSwapBacked(hpage);
> +	hpage->index = start;
> +	hpage->mapping = mapping;
> +
>  	/*
>  	 * Ensure we have slots for all the pages in the range.  This is
>  	 * almost certainly a no-op because most of the pages must be present
> @@ -1897,16 +1903,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  		xas_unlock_irq(&xas);
>  		if (!xas_nomem(&xas, GFP_KERNEL)) {
>  			result = SCAN_FAIL;
> -			goto out;
> +			goto rollback;
>  		}
>  	} while (1);
>  
> -	__SetPageLocked(hpage);
> -	if (is_shmem)
> -		__SetPageSwapBacked(hpage);
> -	hpage->index = start;
> -	hpage->mapping = mapping;
> -
>  	/*
>  	 * At this point the hpage is locked and not up-to-date.
>  	 * It's safe to insert it into the page cache, because nobody would
> @@ -2123,131 +2123,128 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  	 */
>  	try_to_unmap_flush();
>  
> -	if (result == SCAN_SUCCEED) {
> -		/*
> -		 * Replacing old pages with new one has succeeded, now we
> -		 * attempt to copy the contents.
> -		 */
> -		index = start;
> -		list_for_each_entry(page, &pagelist, lru) {
> -			while (index < page->index) {
> -				clear_highpage(hpage + (index % HPAGE_PMD_NR));
> -				index++;
> -			}
> -			if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> -					 page) > 0) {
> -				result = SCAN_COPY_MC;
> -				break;
> -			}
> -			index++;
> -		}
> -		while (result == SCAN_SUCCEED && index < end) {
> +	if (result != SCAN_SUCCEED)
> +		goto rollback;
> +
> +	/*
> +	 * Replacing old pages with new one has succeeded, now we
> +	 * attempt to copy the contents.
> +	 */
> +	index = start;
> +	list_for_each_entry(page, &pagelist, lru) {
> +		while (index < page->index) {
>  			clear_highpage(hpage + (index % HPAGE_PMD_NR));
>  			index++;
>  		}
> +		if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> +				 page) > 0) {
> +			result = SCAN_COPY_MC;
> +			goto rollback;
> +		}
> +		index++;
> +	}
> +	while (index < end) {
> +		clear_highpage(hpage + (index % HPAGE_PMD_NR));
> +		index++;
>  	}
>  
> -	if (result == SCAN_SUCCEED) {
> -		/*
> -		 * Copying old pages to huge one has succeeded, now we
> -		 * need to free the old pages.
> -		 */
> -		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> -			list_del(&page->lru);
> -			page->mapping = NULL;
> -			page_ref_unfreeze(page, 1);
> -			ClearPageActive(page);
> -			ClearPageUnevictable(page);
> -			unlock_page(page);
> -			put_page(page);
> -		}
> +	/*
> +	 * Copying old pages to huge one has succeeded, now we
> +	 * need to free the old pages.
> +	 */
> +	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> +		list_del(&page->lru);
> +		page->mapping = NULL;
> +		page_ref_unfreeze(page, 1);
> +		ClearPageActive(page);
> +		ClearPageUnevictable(page);
> +		unlock_page(page);
> +		put_page(page);
> +	}
>  
> -		xas_lock_irq(&xas);
> -		if (is_shmem)
> -			__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> -		else
> -			__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +	xas_lock_irq(&xas);
> +	if (is_shmem)
> +		__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> +	else
> +		__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +
> +	if (nr_none) {
> +		__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> +		/* nr_none is always 0 for non-shmem. */
> +		__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> +	}
> +	/* Join all the small entries into a single multi-index entry. */
> +	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> +	xas_store(&xas, hpage);
> +	xas_unlock_irq(&xas);
>  
> -		if (nr_none) {
> -			__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> -			/* nr_none is always 0 for non-shmem. */
> -			__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> -		}
> -		/* Join all the small entries into a single multi-index entry. */
> -		xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> -		xas_store(&xas, hpage);
> -		xas_unlock_irq(&xas);
> +	folio = page_folio(hpage);
> +	folio_mark_uptodate(folio);
> +	folio_ref_add(folio, HPAGE_PMD_NR - 1);
>  
> -		folio = page_folio(hpage);
> -		folio_mark_uptodate(folio);
> -		folio_ref_add(folio, HPAGE_PMD_NR - 1);
> +	if (is_shmem)
> +		folio_mark_dirty(folio);
> +	folio_add_lru(folio);
>  
> -		if (is_shmem)
> -			folio_mark_dirty(folio);
> -		folio_add_lru(folio);
> +	/*
> +	 * Remove pte page tables, so we can re-fault the page as huge.
> +	 */
> +	result = retract_page_tables(mapping, start, mm, addr, hpage,
> +				     cc);
> +	unlock_page(hpage);
> +	goto out;
> +
> +rollback:
> +	/* Something went wrong: roll back page cache changes */
> +	xas_lock_irq(&xas);
> +	if (nr_none) {
> +		mapping->nrpages -= nr_none;
> +		shmem_uncharge(mapping->host, nr_none);
> +	}
>  
> -		/*
> -		 * Remove pte page tables, so we can re-fault the page as huge.
> -		 */
> -		result = retract_page_tables(mapping, start, mm, addr, hpage,
> -					     cc);
> -		unlock_page(hpage);
> -		hpage = NULL;
> -	} else {
> -		/* Something went wrong: roll back page cache changes */
> -		xas_lock_irq(&xas);
> -		if (nr_none) {
> -			mapping->nrpages -= nr_none;
> -			shmem_uncharge(mapping->host, nr_none);
> +	xas_set(&xas, start);
> +	xas_for_each(&xas, page, end - 1) {
> +		page = list_first_entry_or_null(&pagelist,
> +				struct page, lru);
> +		if (!page || xas.xa_index < page->index) {
> +			if (!nr_none)
> +				break;
> +			nr_none--;
> +			/* Put holes back where they were */
> +			xas_store(&xas, NULL);
> +			continue;
>  		}
>  
> -		xas_set(&xas, start);
> -		xas_for_each(&xas, page, end - 1) {
> -			page = list_first_entry_or_null(&pagelist,
> -					struct page, lru);
> -			if (!page || xas.xa_index < page->index) {
> -				if (!nr_none)
> -					break;
> -				nr_none--;
> -				/* Put holes back where they were */
> -				xas_store(&xas, NULL);
> -				continue;
> -			}
> +		VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
>  
> -			VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> +		/* Unfreeze the page. */
> +		list_del(&page->lru);
> +		page_ref_unfreeze(page, 2);
> +		xas_store(&xas, page);
> +		xas_pause(&xas);
> +		xas_unlock_irq(&xas);
> +		unlock_page(page);
> +		putback_lru_page(page);
> +		xas_lock_irq(&xas);
> +	}
> +	VM_BUG_ON(nr_none);
> +	/*
> +	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> +	 * This undo is not needed unless failure is due to SCAN_COPY_MC.
> +	 */
> +	if (!is_shmem && result == SCAN_COPY_MC)
> +		filemap_nr_thps_dec(mapping);
>  
> -			/* Unfreeze the page. */
> -			list_del(&page->lru);
> -			page_ref_unfreeze(page, 2);
> -			xas_store(&xas, page);
> -			xas_pause(&xas);
> -			xas_unlock_irq(&xas);
> -			unlock_page(page);
> -			putback_lru_page(page);
> -			xas_lock_irq(&xas);
> -		}
> -		VM_BUG_ON(nr_none);
> -		/*
> -		 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> -		 * This undo is not needed unless failure is due to SCAN_COPY_MC.
> -		 */
> -		if (!is_shmem && result == SCAN_COPY_MC)
> -			filemap_nr_thps_dec(mapping);
> +	xas_unlock_irq(&xas);
>  
> -		xas_unlock_irq(&xas);
> +	hpage->mapping = NULL;
>  
> -		hpage->mapping = NULL;
> -	}
> +	unlock_page(hpage);
> +	mem_cgroup_uncharge(page_folio(hpage));
> +	put_page(hpage);
>  
> -	if (hpage)
> -		unlock_page(hpage);
>  out:
>  	VM_BUG_ON(!list_empty(&pagelist));
> -	if (hpage) {
> -		mem_cgroup_uncharge(page_folio(hpage));
> -		put_page(hpage);
> -	}

Moving this chunk seems wrong, as it can leak the huge page if
alloc_charge_hpage() failed on mem charging, iiuc.

And I found that keeping it seems wrong either, because if mem charge
failed we'll uncharge even without charging it before.  But that's nothing
about this patch alone.

Posted a patch for this:

https://lore.kernel.org/all/20230221214344.609226-1-peterx@redhat.com/

I _think_ this patch will make sense after rebasing to that fix if that's
correct, but please review it and double check.

Thanks,

> -
>  	trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
>  	return result;
>  }
> -- 
> 2.39.2.637.g21b0678d19-goog
> 

-- 
Peter Xu


  parent reply	other threads:[~2023-02-21 21:55 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-17  8:54 [PATCH v4 0/3] mm/khugepaged: fix khugepaged+shmem races David Stevens
2023-02-17  8:54 ` [PATCH v4 1/3] mm/khugepaged: refactor collapse_file control flow David Stevens
2023-02-17 23:44   ` Yang Shi
2023-02-21 21:54   ` Peter Xu [this message]
2023-02-21 22:28     ` Yang Shi
2023-02-22  4:08     ` David Stevens
2023-02-22 16:24       ` Peter Xu
2023-02-17  8:54 ` [PATCH v4 2/3] mm/khugepaged: skip shmem with userfaultfd David Stevens
2023-02-21 22:12   ` Peter Xu
2023-02-17  8:54 ` [PATCH v4 3/3] mm/khugepaged: maintain page cache uptodate flag David Stevens
2023-02-21 22:18   ` Peter Xu
2023-02-17 10:37 ` [PATCH v4 0/3] mm/khugepaged: fix khugepaged+shmem races Miko Larsson
2023-03-03 15:35 ` Peter Xu
2023-03-03 15:45   ` Zach O'Keefe
2023-03-03 18:55   ` Yang Shi
2023-03-03 22:52   ` Andrew Morton
2023-03-06  2:44     ` David Stevens
2023-03-06 21:25       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y/U9fBxVJdhxiZ1v@x1n \
    --to=peterx@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=shy828301@gmail.com \
    --cc=stevensd@chromium.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.