Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: Dan Williams <dan.j.williams@intel.com>
Cc: linux-nvdimm@lists.01.org, Jan Kara <jack@suse.cz>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] dax: Fix Xarray conversion of dax_unlock_mapping_entry()
Date: Fri, 30 Nov 2018 07:49:02 -0800
Message-ID: <20181130154902.GL10377@bombadil.infradead.org> (raw)
In-Reply-To: <154353682674.1676897.15440708268545845062.stgit@dwillia2-desk3.amr.corp.intel.com>

On Thu, Nov 29, 2018 at 04:13:46PM -0800, Dan Williams wrote:
> Internal to dax_unlock_mapping_entry(), dax_unlock_entry() is used to
> store a replacement entry in the Xarray at the given xas-index with the
> DAX_LOCKED bit clear. When called, dax_unlock_entry() expects the unlocked
> value of the entry relative to the current Xarray state to be specified.
> 
> In most contexts dax_unlock_entry() is operating in the same scope as
> the matched dax_lock_entry(). However, in the dax_unlock_mapping_entry()
> case the implementation needs to recall the original entry. In the case
> where the original entry is a 'pmd' entry it is possible that the pfn
> performed to do the lookup is misaligned to the value retrieved in the
> Xarray.

So far, dax_unlock_mapping_entry only has the one caller.  I'd rather we
returned the 'entry' to the caller, then had them pass it back to the
unlock function.  That matches the flow in the rest of DAX and doesn't
pose an undue burden to the caller.

I plan to reclaim the DAX_LOCK bit (and the DAX_EMPTY bit for that
matter), instead using a special DAX_LOCK value.  DAX is almost free of
assumptions about the other bits in a locked entry, and this will remove
the assuption that there's a PMD bit in the entry.

How does this look?

diff --git a/fs/dax.c b/fs/dax.c
index 9bcce89ea18e..7681429af42f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -351,20 +351,20 @@ static struct page *dax_busy_page(void *entry)
  * @page: The page whose entry we want to lock
  *
  * Context: Process context.
- * Return: %true if the entry was locked or does not need to be locked.
+ * Return: A cookie to pass to dax_unlock_mapping_entry() or %NULL if the
+ * entry could not be locked.
  */
-bool dax_lock_mapping_entry(struct page *page)
+void *dax_lock_mapping_entry(struct page *page)
 {
 	XA_STATE(xas, NULL, 0);
 	void *entry;
-	bool locked;
 
 	/* Ensure page->mapping isn't freed while we look at it */
 	rcu_read_lock();
 	for (;;) {
 		struct address_space *mapping = READ_ONCE(page->mapping);
 
-		locked = false;
+		entry = NULL;
 		if (!dax_mapping(mapping))
 			break;
 
@@ -375,7 +375,7 @@ bool dax_lock_mapping_entry(struct page *page)
 		 * otherwise we would not have a valid pfn_to_page()
 		 * translation.
 		 */
-		locked = true;
+		entry = (void *)1;
 		if (S_ISCHR(mapping->host->i_mode))
 			break;
 
@@ -400,22 +400,17 @@ bool dax_lock_mapping_entry(struct page *page)
 		break;
 	}
 	rcu_read_unlock();
-	return locked;
+	return entry;
 }
 
-void dax_unlock_mapping_entry(struct page *page)
+void dax_unlock_mapping_entry(struct page *page, void *entry)
 {
 	struct address_space *mapping = page->mapping;
 	XA_STATE(xas, &mapping->i_pages, page->index);
-	void *entry;
 
 	if (S_ISCHR(mapping->host->i_mode))
 		return;
 
-	rcu_read_lock();
-	entry = xas_load(&xas);
-	rcu_read_unlock();
-	entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry));
 	dax_unlock_entry(&xas, entry);
 }
 
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 450b28db9533..bc143c2d6980 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -88,8 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
-bool dax_lock_mapping_entry(struct page *page);
-void dax_unlock_mapping_entry(struct page *page);
+void *dax_lock_mapping_entry(struct page *page);
+void dax_unlock_mapping_entry(struct page *page, void *entry);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -122,14 +122,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 	return -EOPNOTSUPP;
 }
 
-static inline bool dax_lock_mapping_entry(struct page *page)
+static inline void *dax_lock_mapping_entry(struct page *page)
 {
 	if (IS_DAX(page->mapping->host))
-		return true;
-	return false;
+		return (void *)1;
+	return NULL;
 }
 
-static inline void dax_unlock_mapping_entry(struct page *page)
+static inline void dax_unlock_mapping_entry(struct page *page, void *entry)
 {
 }
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0cd3de3550f0..3abea1e19902 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1161,6 +1161,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	LIST_HEAD(tokill);
 	int rc = -EBUSY;
 	loff_t start;
+	void *cookie;
 
 	/*
 	 * Prevent the inode from being freed while we are interrogating
@@ -1169,7 +1170,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	 * also prevents changes to the mapping of this pfn until
 	 * poison signaling is complete.
 	 */
-	if (!dax_lock_mapping_entry(page))
+	cookie = dax_lock_mapping_entry(page);
+	if (!cookie)
 		goto out;
 
 	if (hwpoison_filter(page)) {
@@ -1220,7 +1222,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
 	rc = 0;
 unlock:
-	dax_unlock_mapping_entry(page);
+	dax_unlock_mapping_entry(page, cookie);
 out:
 	/* drop pgmap ref acquired in caller */
 	put_dev_pagemap(pgmap);

  reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-30  0:13 Dan Williams
2018-11-30 15:49 ` Matthew Wilcox [this message]
2018-11-30 15:54   ` Dan Williams
2018-11-30 16:24     ` Matthew Wilcox
2018-11-30 16:33       ` Dan Williams
2018-11-30 17:01         ` Dan Williams
2018-11-30 19:50           ` Matthew Wilcox
2018-11-30 20:05             ` Dan Williams
2018-12-04  3:33               ` Dan Williams
2018-12-05  1:34                 ` Matthew Wilcox
2018-12-05  6:11                   ` Dan Williams
2018-12-05  9:22                     ` Jan Kara

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181130154902.GL10377@bombadil.infradead.org \
    --to=willy@infradead.org \
    --cc=dan.j.williams@intel.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git