All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: Josef Bacik <josef@toxicpanda.com>
Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	hannes@cmpxchg.org, tj@kernel.org, linux-fsdevel@vger.kernel.org,
	akpm@linux-foundation.org, riel@redhat.com, linux-mm@kvack.org,
	linux-btrfs@vger.kernel.org
Subject: Re: [PATCH 1/9] mm: infrastructure for page fault page caching
Date: Thu, 27 Sep 2018 09:24:43 -0700	[thread overview]
Message-ID: <20180927162442.GC19006@bombadil.infradead.org> (raw)
In-Reply-To: <20180926210856.7895-2-josef@toxicpanda.com>

On Wed, Sep 26, 2018 at 05:08:48PM -0400, Josef Bacik wrote:
> We want to be able to cache the result of a previous loop of a page
> fault in the case that we use VM_FAULT_RETRY, so introduce
> handle_mm_fault_cacheable that will take a struct vm_fault directly, add
> a ->cached_page field to vm_fault, and add helpers to init/cleanup the
> struct vm_fault.
> 
> I've converted x86, other arch's can follow suit if they so wish, it's
> relatively straightforward.

Here's what I did back in January ... feel free to steal any of it if you
like it better.


diff --git a/mm/memory.c b/mm/memory.c
index 5eb3d2524bdc..403934297a3d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3977,36 +3977,28 @@ static int handle_pte_fault(struct vm_fault *vmf)
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags)
+static int __handle_mm_fault(struct vm_fault *vmf)
 {
-	struct vm_fault vmf = {
-		.vma = vma,
-		.address = address & PAGE_MASK,
-		.flags = flags,
-		.pgoff = linear_page_index(vma, address),
-		.gfp_mask = __get_fault_gfp_mask(vma),
-	};
-	unsigned int dirty = flags & FAULT_FLAG_WRITE;
-	struct mm_struct *mm = vma->vm_mm;
+	unsigned int dirty = vmf->flags & FAULT_FLAG_WRITE;
+	struct mm_struct *mm = vmf->vma->vm_mm;
 	pgd_t *pgd;
 	p4d_t *p4d;
 	int ret;
 
-	pgd = pgd_offset(mm, address);
-	p4d = p4d_alloc(mm, pgd, address);
+	pgd = pgd_offset(mm, vmf->address);
+	p4d = p4d_alloc(mm, pgd, vmf->address);
 	if (!p4d)
 		return VM_FAULT_OOM;
 
-	vmf.pud = pud_alloc(mm, p4d, address);
-	if (!vmf.pud)
+	vmf->pud = pud_alloc(mm, p4d, vmf->address);
+	if (!vmf->pud)
 		return VM_FAULT_OOM;
-	if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) {
-		ret = create_huge_pud(&vmf);
+	if (pud_none(*vmf->pud) && transparent_hugepage_enabled(vmf->vma)) {
+		ret = create_huge_pud(vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
-		pud_t orig_pud = *vmf.pud;
+		pud_t orig_pud = *vmf->pud;
 
 		barrier();
 		if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
@@ -4014,50 +4006,51 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 			/* NUMA case for anonymous PUDs would go here */
 
 			if (dirty && !pud_access_permitted(orig_pud, WRITE)) {
-				ret = wp_huge_pud(&vmf, orig_pud);
+				ret = wp_huge_pud(vmf, orig_pud);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {
-				huge_pud_set_accessed(&vmf, orig_pud);
+				huge_pud_set_accessed(vmf, orig_pud);
 				return 0;
 			}
 		}
 	}
 
-	vmf.pmd = pmd_alloc(mm, vmf.pud, address);
-	if (!vmf.pmd)
+	vmf->pmd = pmd_alloc(mm, vmf->pud, vmf->address);
+	if (!vmf->pmd)
 		return VM_FAULT_OOM;
-	if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) {
-		ret = create_huge_pmd(&vmf);
+	if (pmd_none(*vmf->pmd) && transparent_hugepage_enabled(vmf->vma)) {
+		ret = create_huge_pmd(vmf);
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
-		pmd_t orig_pmd = *vmf.pmd;
+		pmd_t orig_pmd = *vmf->pmd;
 
 		barrier();
 		if (unlikely(is_swap_pmd(orig_pmd))) {
 			VM_BUG_ON(thp_migration_supported() &&
 					  !is_pmd_migration_entry(orig_pmd));
 			if (is_pmd_migration_entry(orig_pmd))
-				pmd_migration_entry_wait(mm, vmf.pmd);
+				pmd_migration_entry_wait(mm, vmf->pmd);
 			return 0;
 		}
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
-			if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
-				return do_huge_pmd_numa_page(&vmf, orig_pmd);
+			if (pmd_protnone(orig_pmd) &&
+						vma_is_accessible(vmf->vma))
+				return do_huge_pmd_numa_page(vmf, orig_pmd);
 
 			if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) {
-				ret = wp_huge_pmd(&vmf, orig_pmd);
+				ret = wp_huge_pmd(vmf, orig_pmd);
 				if (!(ret & VM_FAULT_FALLBACK))
 					return ret;
 			} else {
-				huge_pmd_set_accessed(&vmf, orig_pmd);
+				huge_pmd_set_accessed(vmf, orig_pmd);
 				return 0;
 			}
 		}
 	}
 
-	return handle_pte_fault(&vmf);
+	return handle_pte_fault(vmf);
 }
 
 /*
@@ -4066,9 +4059,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags)
+int vm_handle_fault(struct vm_fault *vmf)
 {
+	unsigned int flags = vmf->flags;
+	struct vm_area_struct *vma = vmf->vma;
 	int ret;
 
 	__set_current_state(TASK_RUNNING);
@@ -4092,9 +4086,9 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		mem_cgroup_oom_enable();
 
 	if (unlikely(is_vm_hugetlb_page(vma)))
-		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
+		ret = hugetlb_fault(vma->vm_mm, vma, vmf->address, flags);
 	else
-		ret = __handle_mm_fault(vma, address, flags);
+		ret = __handle_mm_fault(vmf);
 
 	if (flags & FAULT_FLAG_USER) {
 		mem_cgroup_oom_disable();
@@ -4110,6 +4104,26 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 
 	return ret;
 }
+
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value.  See filemap_fault() and __lock_page_or_retry().
+ */
+int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+		unsigned int flags)
+{
+	struct vm_fault vmf = {
+		.vma = vma,
+		.address = address & PAGE_MASK,
+		.flags = flags,
+		.pgoff = linear_page_index(vma, address),
+		.gfp_mask = __get_fault_gfp_mask(vma),
+	};
+
+	return vm_handle_fault(&vmf);
+}
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
 #ifndef __PAGETABLE_P4D_FOLDED

  reply	other threads:[~2018-09-27 16:24 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-26 21:08 [RFC][PATCH 0/9][V2] drop the mmap_sem when doing IO in the fault path Josef Bacik
2018-09-26 21:08 ` [PATCH 1/9] mm: infrastructure for page fault page caching Josef Bacik
2018-09-27 16:24   ` Matthew Wilcox [this message]
2018-09-26 21:08 ` [PATCH 2/9] mm: drop mmap_sem for page cache read IO submission Josef Bacik
2018-09-26 21:08 ` [PATCH 3/9] mm: clean up swapcache lookup and creation function names Josef Bacik
2018-09-26 21:08 ` [PATCH 4/9] mm: drop mmap_sem for swap read IO submission Josef Bacik
2018-09-26 21:08 ` [PATCH 5/9] mm: drop the mmap_sem in all read fault cases Josef Bacik
2018-09-26 21:08 ` [PATCH 6/9] mm: use the cached page for filemap_fault Josef Bacik
2018-09-26 21:08 ` [PATCH 7/9] mm: add a flag to indicate we used a cached page Josef Bacik
2018-09-26 21:08 ` [PATCH 8/9] mm: allow ->page_mkwrite to do retries Josef Bacik
2018-09-26 21:08 ` [PATCH 9/9] btrfs: drop mmap_sem in mkwrite for btrfs Josef Bacik

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180927162442.GC19006@bombadil.infradead.org \
    --to=willy@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.