All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <djwong@kernel.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Christoph Hellwig <hch@lst.de>,
	John Hubbard <jhubbard@nvidia.com>,
	linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-ext4@vger.kernel.org
Subject: [PATCH v2 14/18] devdax: add PUD support to the DAX mapping infrastructure
Date: Thu, 15 Sep 2022 20:36:31 -0700	[thread overview]
Message-ID: <166329939123.2786261.4488002998591622104.stgit@dwillia2-xfh.jf.intel.com> (raw)
In-Reply-To: <166329930818.2786261.6086109734008025807.stgit@dwillia2-xfh.jf.intel.com>

In preparation for using the DAX mapping infrastructure for device-dax,
update the helpers to handle PUD entries.

In practice the code related to @size_downgrade will go unused for PUD
entries since only devdax creates DAX PUD entries and devdax enforces
aligned mappings. The conversion is included for completeness.

The addition of PUD support to dax_insert_pfn_mkwrite() requires a new
stub for vmf_insert_pfn_pud() in the
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=n case.

Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/mapping.c   |   50 ++++++++++++++++++++++++++++++++++++-----------
 include/linux/dax.h     |   32 ++++++++++++++++++++----------
 include/linux/huge_mm.h |   11 ++++++++--
 3 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/drivers/dax/mapping.c b/drivers/dax/mapping.c
index 5d4b9601f183..b5a5196f8831 100644
--- a/drivers/dax/mapping.c
+++ b/drivers/dax/mapping.c
@@ -13,6 +13,7 @@
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
 #include <linux/pagemap.h>
+#include <linux/huge_mm.h>
 
 #include "dax-private.h"
 
@@ -56,6 +57,8 @@ static bool dax_is_zapped(void *entry)
 
 static unsigned int dax_entry_order(void *entry)
 {
+	if (xa_to_value(entry) & DAX_PUD)
+		return PUD_ORDER;
 	if (xa_to_value(entry) & DAX_PMD)
 		return PMD_ORDER;
 	return 0;
@@ -66,9 +69,14 @@ static unsigned long dax_is_pmd_entry(void *entry)
 	return xa_to_value(entry) & DAX_PMD;
 }
 
+static unsigned long dax_is_pud_entry(void *entry)
+{
+	return xa_to_value(entry) & DAX_PUD;
+}
+
 static bool dax_is_pte_entry(void *entry)
 {
-	return !(xa_to_value(entry) & DAX_PMD);
+	return !(xa_to_value(entry) & (DAX_PMD|DAX_PUD));
 }
 
 static int dax_is_zero_entry(void *entry)
@@ -277,6 +285,8 @@ static unsigned long dax_entry_size(void *entry)
 		return 0;
 	else if (dax_is_pmd_entry(entry))
 		return PMD_SIZE;
+	else if (dax_is_pud_entry(entry))
+		return PUD_SIZE;
 	else
 		return PAGE_SIZE;
 }
@@ -564,11 +574,11 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
 			     struct address_space *mapping, unsigned int order)
 {
 	unsigned long index = xas->xa_index;
-	bool pmd_downgrade; /* splitting PMD entry into PTE entries? */
+	bool size_downgrade; /* splitting entry into PTE entries? */
 	void *entry;
 
 retry:
-	pmd_downgrade = false;
+	size_downgrade = false;
 	xas_lock_irq(xas);
 	entry = get_unlocked_entry(xas, order);
 
@@ -581,15 +591,25 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
 		}
 
 		if (order == 0) {
-			if (dax_is_pmd_entry(entry) &&
+			if (!dax_is_pte_entry(entry) &&
 			    (dax_is_zero_entry(entry) ||
 			     dax_is_empty_entry(entry))) {
-				pmd_downgrade = true;
+				size_downgrade = true;
 			}
 		}
 	}
 
-	if (pmd_downgrade) {
+	if (size_downgrade) {
+		unsigned long colour, nr;
+
+		if (dax_is_pmd_entry(entry)) {
+			colour = PG_PMD_COLOUR;
+			nr = PG_PMD_NR;
+		} else {
+			colour = PG_PUD_COLOUR;
+			nr = PG_PUD_NR;
+		}
+
 		/*
 		 * Make sure 'entry' remains valid while we drop
 		 * the i_pages lock.
@@ -603,9 +623,8 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
 		 */
 		if (dax_is_zero_entry(entry)) {
 			xas_unlock_irq(xas);
-			unmap_mapping_pages(mapping,
-					    xas->xa_index & ~PG_PMD_COLOUR,
-					    PG_PMD_NR, false);
+			unmap_mapping_pages(mapping, xas->xa_index & ~colour,
+					    nr, false);
 			xas_reset(xas);
 			xas_lock_irq(xas);
 		}
@@ -613,7 +632,7 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
 		dax_disassociate_entry(entry, mapping, false);
 		xas_store(xas, NULL); /* undo the PMD join */
 		dax_wake_entry(xas, entry, WAKE_ALL);
-		mapping->nrpages -= PG_PMD_NR;
+		mapping->nrpages -= nr;
 		entry = NULL;
 		xas_set(xas, index);
 	}
@@ -623,7 +642,9 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
 	} else {
 		unsigned long flags = DAX_EMPTY;
 
-		if (order > 0)
+		if (order == PUD_SHIFT - PAGE_SHIFT)
+			flags |= DAX_PUD;
+		else if (order == PMD_SHIFT - PAGE_SHIFT)
 			flags |= DAX_PMD;
 		entry = dax_make_entry(pfn_to_pfn_t(0), flags);
 		dax_lock_entry(xas, entry);
@@ -846,7 +867,10 @@ vm_fault_t dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
 	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
 		unsigned long index = xas->xa_index;
 		/* we are replacing a zero page with block mapping */
-		if (dax_is_pmd_entry(entry))
+		if (dax_is_pud_entry(entry))
+			unmap_mapping_pages(mapping, index & ~PG_PUD_COLOUR,
+					    PG_PUD_NR, false);
+		else if (dax_is_pmd_entry(entry))
 			unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
 					    PG_PMD_NR, false);
 		else /* pte entry */
@@ -1018,6 +1042,8 @@ vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn,
 	else if (order == PMD_ORDER)
 		ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
 #endif
+	else if (order == PUD_ORDER)
+		ret = vmf_insert_pfn_pud(vmf, pfn, FAULT_FLAG_WRITE);
 	else
 		ret = VM_FAULT_FALLBACK;
 	dax_unlock_entry(&xas, entry);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index de60a34088bb..3a27fecf072a 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -278,22 +278,25 @@ static inline bool dax_mapping(struct address_space *mapping)
 }
 
 /*
- * DAX pagecache entries use XArray value entries so they can't be mistaken
- * for pages.  We use one bit for locking, one bit for the entry size (PMD)
- * and two more to tell us if the entry is a zero page or an empty entry that
- * is just used for locking.  In total four special bits.
+ * DAX pagecache entries use XArray value entries so they can't be
+ * mistaken for pages.  We use one bit for locking, two bits for the
+ * entry size (PMD, PUD) and two more to tell us if the entry is a zero
+ * page or an empty entry that is just used for locking.  In total 5
+ * special bits which limits the max pfn that can be stored as:
+ * (1UL << 57 - PAGE_SHIFT). 63 - DAX_SHIFT - 1 (for xa_mk_value()).
  *
- * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
- * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
- * block allocation.
+ * If the P{M,U}D bits are not set the entry has size PAGE_SIZE, and if
+ * the ZERO_PAGE and EMPTY bits aren't set the entry is a normal DAX
+ * entry with a filesystem block allocation.
  */
-#define DAX_SHIFT	(5)
+#define DAX_SHIFT	(6)
 #define DAX_MASK	((1UL << DAX_SHIFT) - 1)
 #define DAX_LOCKED	(1UL << 0)
 #define DAX_PMD		(1UL << 1)
-#define DAX_ZERO_PAGE	(1UL << 2)
-#define DAX_EMPTY	(1UL << 3)
-#define DAX_ZAP		(1UL << 4)
+#define DAX_PUD		(1UL << 2)
+#define DAX_ZERO_PAGE	(1UL << 3)
+#define DAX_EMPTY	(1UL << 4)
+#define DAX_ZAP		(1UL << 5)
 
 /*
  * These flags are not conveyed in Xarray value entries, they are just
@@ -316,6 +319,13 @@ int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 /* The order of a PMD entry */
 #define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
 
+/* The 'colour' (ie low bits) within a PUD of a page offset.  */
+#define PG_PUD_COLOUR ((PUD_SIZE >> PAGE_SHIFT) - 1)
+#define PG_PUD_NR (PUD_SIZE >> PAGE_SHIFT)
+
+/* The order of a PUD entry */
+#define PUD_ORDER (PUD_SHIFT - PAGE_SHIFT)
+
 static inline unsigned int pe_order(enum page_entry_size pe_size)
 {
 	if (pe_size == PE_SIZE_PTE)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 768e5261fdae..de73f5a16252 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -18,10 +18,19 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
+				   pgprot_t pgprot, bool write);
 #else
 static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 {
 }
+
+static inline vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf,
+						 pfn_t pfn, pgprot_t pgprot,
+						 bool write)
+{
+	return VM_FAULT_SIGBUS;
+}
 #endif
 
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
@@ -58,8 +67,6 @@ static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn,
 {
 	return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write);
 }
-vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn,
-				   pgprot_t pgprot, bool write);
 
 /**
  * vmf_insert_pfn_pud - insert a pud size pfn


  parent reply	other threads:[~2022-09-16  3:36 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-16  3:35 [PATCH v2 00/18] Fix the DAX-gup mistake Dan Williams
2022-09-16  3:35 ` [PATCH v2 01/18] fsdax: Wait on @page not @page->_refcount Dan Williams
2022-09-20 14:30   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 02/18] fsdax: Use dax_page_idle() to document DAX busy page checking Dan Williams
2022-09-20 14:31   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 03/18] fsdax: Include unmapped inodes for page-idle detection Dan Williams
2022-09-16  3:35 ` [PATCH v2 04/18] ext4: Add ext4_break_layouts() to the inode eviction path Dan Williams
2022-09-16  3:35 ` [PATCH v2 05/18] xfs: Add xfs_break_layouts() " Dan Williams
2022-09-18 22:57   ` Dave Chinner
2022-09-19 16:11     ` Dan Williams
2022-09-19 21:29       ` Dave Chinner
2022-09-20 16:44         ` Dan Williams
2022-09-21 22:14           ` Dave Chinner
2022-09-21 22:28             ` Jason Gunthorpe
2022-09-23  0:18               ` Dave Chinner
2022-09-23  0:41                 ` Dan Williams
2022-09-23  2:10                   ` Dave Chinner
2022-09-23  9:38                     ` Jan Kara
2022-09-23 23:06                       ` Dan Williams
2022-09-25 23:54                       ` Dave Chinner
2022-09-26 14:10                         ` Jan Kara
2022-09-29 23:33                           ` Dan Williams
2022-09-30 13:41                             ` Jan Kara
2022-09-30 17:56                               ` Dan Williams
2022-09-30 18:06                                 ` Jason Gunthorpe
2022-09-30 18:46                                   ` Dan Williams
2022-10-03  7:55                                   ` Jan Kara
2022-09-23 12:39                     ` Jason Gunthorpe
2022-09-26  0:34                       ` Dave Chinner
2022-09-26 13:04                         ` Jason Gunthorpe
2022-09-22  0:02             ` Dan Williams
2022-09-22  0:10               ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 06/18] fsdax: Rework dax_layout_busy_page() to dax_zap_mappings() Dan Williams
2022-09-16  3:35 ` [PATCH v2 07/18] fsdax: Update dax_insert_entry() calling convention to return an error Dan Williams
2022-09-16  3:35 ` [PATCH v2 08/18] fsdax: Cleanup dax_associate_entry() Dan Williams
2022-09-16  3:36 ` [PATCH v2 09/18] fsdax: Rework dax_insert_entry() calling convention Dan Williams
2022-09-16  3:36 ` [PATCH v2 10/18] fsdax: Manage pgmap references at entry insertion and deletion Dan Williams
2022-09-21 14:03   ` Jason Gunthorpe
2022-09-21 15:18     ` Dan Williams
2022-09-21 21:38       ` Dan Williams
2022-09-21 22:07         ` Jason Gunthorpe
2022-09-22  0:14           ` Dan Williams
2022-09-22  0:25             ` Jason Gunthorpe
2022-09-22  2:17               ` Dan Williams
2022-09-22 17:55                 ` Jason Gunthorpe
2022-09-22 21:54                   ` Dan Williams
2022-09-23  1:36                     ` Dave Chinner
2022-09-23  2:01                       ` Dan Williams
2022-09-23 13:24                     ` Jason Gunthorpe
2022-09-23 16:29                       ` Dan Williams
2022-09-23 17:42                         ` Jason Gunthorpe
2022-09-23 19:03                           ` Dan Williams
2022-09-23 19:23                             ` Jason Gunthorpe
2022-09-27  6:07                             ` Alistair Popple
2022-09-27 12:56                               ` Jason Gunthorpe
2022-09-16  3:36 ` [PATCH v2 11/18] devdax: Minor warning fixups Dan Williams
2022-09-16  3:36 ` [PATCH v2 12/18] devdax: Move address_space helpers to the DAX core Dan Williams
2022-09-27  6:20   ` Alistair Popple
2022-09-29 22:38     ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 13/18] dax: Prep mapping helpers for compound pages Dan Williams
2022-09-21 14:06   ` Jason Gunthorpe
2022-09-21 15:19     ` Dan Williams
2022-09-16  3:36 ` Dan Williams [this message]
2022-09-16  3:36 ` [PATCH v2 15/18] devdax: Use dax_insert_entry() + dax_delete_mapping_entry() Dan Williams
2022-09-21 14:10   ` Jason Gunthorpe
2022-09-21 15:48     ` Dan Williams
2022-09-21 22:23       ` Jason Gunthorpe
2022-09-22  0:15         ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 16/18] mm/memremap_pages: Support initializing pages to a zero reference count Dan Williams
2022-09-21 15:24   ` Jason Gunthorpe
2022-09-21 23:45     ` Dan Williams
2022-09-22  0:03       ` Alistair Popple
2022-09-22  0:04       ` Jason Gunthorpe
2022-09-22  0:34         ` Dan Williams
2022-09-22  1:36           ` Alistair Popple
2022-09-22  2:34             ` Dan Williams
2022-09-26  6:17               ` Alistair Popple
2022-09-22  0:13       ` John Hubbard
2022-09-16  3:36 ` [PATCH v2 17/18] fsdax: Delete put_devmap_managed_page_refs() Dan Williams
2022-09-16  3:36 ` [PATCH v2 18/18] mm/gup: Drop DAX pgmap accounting Dan Williams
2022-09-20 14:29 ` [PATCH v2 00/18] Fix the DAX-gup mistake Jason Gunthorpe
2022-09-20 16:50   ` Dan Williams
2022-11-09  0:20 ` Andrew Morton
2022-11-09 11:38   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166329939123.2786261.4488002998591622104.stgit@dwillia2-xfh.jf.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.