From: Dan Williams <dan.j.williams@intel.com> To: linux-nvdimm@lists.01.org Cc: Jan Kara <jack@suse.cz>, linux-kernel@vger.kernel.org, Matthew Wilcox <willy@infradead.org>, linux-mm@kvack.org, linux-fsdevel@vger.kernel.org, Christoph Hellwig <hch@lst.de> Subject: [PATCH v6 09/13] filesystem-dax: Introduce dax_lock_mapping_entry() Date: Fri, 13 Jul 2018 21:50:16 -0700 [thread overview] Message-ID: <153154381675.34503.4471648812866312162.stgit@dwillia2-desk3.amr.corp.intel.com> (raw) In-Reply-To: <153154376846.34503.15480221419473501643.stgit@dwillia2-desk3.amr.corp.intel.com> In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- fs/dax.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/dax.h | 13 ++++++ 2 files changed, 116 insertions(+), 6 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 4de11ed463ce..57ec272038da 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) * * Must be called with the i_pages lock held. */ -static void *get_unlocked_mapping_entry(struct address_space *mapping, - pgoff_t index, void ***slotp) +static void *__get_unlocked_mapping_entry(struct address_space *mapping, + pgoff_t index, void ***slotp, bool (*wait_fn)(void)) { void *entry, **slot; struct wait_exceptional_entry_queue ewait; @@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ewait.wait.func = wake_exceptional_entry_func; for (;;) { + bool revalidate; + entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (!entry || @@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xa_unlock_irq(&mapping->i_pages); - schedule(); + revalidate = wait_fn(); finish_wait(wq, &ewait.wait); xa_lock_irq(&mapping->i_pages); + if (revalidate) + return ERR_PTR(-EAGAIN); } } -static void dax_unlock_mapping_entry(struct address_space *mapping, - pgoff_t index) +static bool entry_wait(void) +{ + schedule(); + /* + * Never return an ERR_PTR() from + * __get_unlocked_mapping_entry(), just keep looping. + */ + return false; +} + +static void *get_unlocked_mapping_entry(struct address_space *mapping, + pgoff_t index, void ***slotp) +{ + return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait); +} + +static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) { void *entry, **slot; @@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, static void put_locked_mapping_entry(struct address_space *mapping, pgoff_t index) { - dax_unlock_mapping_entry(mapping, index); + unlock_mapping_entry(mapping, index); } /* @@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry) return NULL; } +static bool entry_wait_revalidate(void) +{ + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + + /* + * Tell __get_unlocked_mapping_entry() to take a break, we need + * to revalidate page->mapping after dropping locks + */ + return true; +} + +bool dax_lock_mapping_entry(struct page *page) +{ + pgoff_t index; + struct inode *inode; + bool did_lock = false; + void *entry = NULL, **slot; + struct address_space *mapping; + + rcu_read_lock(); + for (;;) { + mapping = READ_ONCE(page->mapping); + + if (!dax_mapping(mapping)) + break; + + /* + * In the device-dax case there's no need to lock, a + * struct dev_pagemap pin is sufficient to keep the + * inode alive, and we assume we have dev_pagemap pin + * otherwise we would not have a valid pfn_to_page() + * translation. + */ + inode = mapping->host; + if (S_ISCHR(inode->i_mode)) { + did_lock = true; + break; + } + + xa_lock_irq(&mapping->i_pages); + if (mapping != page->mapping) { + xa_unlock_irq(&mapping->i_pages); + continue; + } + index = page->index; + + entry = __get_unlocked_mapping_entry(mapping, index, &slot, + entry_wait_revalidate); + if (!entry) { + xa_unlock_irq(&mapping->i_pages); + break; + } else if (IS_ERR(entry)) { + WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); + continue; + } + lock_slot(mapping, slot); + did_lock = true; + xa_unlock_irq(&mapping->i_pages); + break; + } + rcu_read_unlock(); + + return did_lock; +} + +void dax_unlock_mapping_entry(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + if (S_ISCHR(inode->i_mode)) + return; + + unlock_mapping_entry(mapping, page->index); +} + /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't diff --git a/include/linux/dax.h b/include/linux/dax.h index 3855e3800f48..cf8ac51cf0d7 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); +bool dax_lock_mapping_entry(struct page *page); +void dax_unlock_mapping_entry(struct page *page); #else static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) @@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, { return -EOPNOTSUPP; } + +static inline bool dax_lock_mapping_entry(struct page *page) +{ + if (IS_DAX(page->mapping->host)) + return true; + return false; +} + +static inline void dax_unlock_mapping_entry(struct page *page) +{ +} #endif int dax_read_lock(void); _______________________________________________ Linux-nvdimm mailing list Linux-nvdimm@lists.01.org https://lists.01.org/mailman/listinfo/linux-nvdimm
WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com> To: linux-nvdimm@lists.01.org Cc: Christoph Hellwig <hch@lst.de>, Matthew Wilcox <willy@infradead.org>, Ross Zwisler <ross.zwisler@linux.intel.com>, Jan Kara <jack@suse.cz>, linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org Subject: [PATCH v6 09/13] filesystem-dax: Introduce dax_lock_mapping_entry() Date: Fri, 13 Jul 2018 21:50:16 -0700 [thread overview] Message-ID: <153154381675.34503.4471648812866312162.stgit@dwillia2-desk3.amr.corp.intel.com> (raw) In-Reply-To: <153154376846.34503.15480221419473501643.stgit@dwillia2-desk3.amr.corp.intel.com> In preparation for implementing support for memory poison (media error) handling via dax mappings, implement a lock_page() equivalent. Poison error handling requires rmap and needs guarantees that the page->mapping association is maintained / valid (inode not freed) for the duration of the lookup. In the device-dax case it is sufficient to simply hold a dev_pagemap reference. In the filesystem-dax case we need to use the entry lock. Export the entry lock via dax_lock_mapping_entry() that uses rcu_read_lock() to protect against the inode being freed, and revalidates the page->mapping association under xa_lock(). Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox <willy@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- fs/dax.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/dax.h | 13 ++++++ 2 files changed, 116 insertions(+), 6 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 4de11ed463ce..57ec272038da 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) * * Must be called with the i_pages lock held. */ -static void *get_unlocked_mapping_entry(struct address_space *mapping, - pgoff_t index, void ***slotp) +static void *__get_unlocked_mapping_entry(struct address_space *mapping, + pgoff_t index, void ***slotp, bool (*wait_fn)(void)) { void *entry, **slot; struct wait_exceptional_entry_queue ewait; @@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, ewait.wait.func = wake_exceptional_entry_func; for (;;) { + bool revalidate; + entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (!entry || @@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xa_unlock_irq(&mapping->i_pages); - schedule(); + revalidate = wait_fn(); finish_wait(wq, &ewait.wait); xa_lock_irq(&mapping->i_pages); + if (revalidate) + return ERR_PTR(-EAGAIN); } } -static void dax_unlock_mapping_entry(struct address_space *mapping, - pgoff_t index) +static bool entry_wait(void) +{ + schedule(); + /* + * Never return an ERR_PTR() from + * __get_unlocked_mapping_entry(), just keep looping. + */ + return false; +} + +static void *get_unlocked_mapping_entry(struct address_space *mapping, + pgoff_t index, void ***slotp) +{ + return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait); +} + +static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) { void *entry, **slot; @@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, static void put_locked_mapping_entry(struct address_space *mapping, pgoff_t index) { - dax_unlock_mapping_entry(mapping, index); + unlock_mapping_entry(mapping, index); } /* @@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry) return NULL; } +static bool entry_wait_revalidate(void) +{ + rcu_read_unlock(); + schedule(); + rcu_read_lock(); + + /* + * Tell __get_unlocked_mapping_entry() to take a break, we need + * to revalidate page->mapping after dropping locks + */ + return true; +} + +bool dax_lock_mapping_entry(struct page *page) +{ + pgoff_t index; + struct inode *inode; + bool did_lock = false; + void *entry = NULL, **slot; + struct address_space *mapping; + + rcu_read_lock(); + for (;;) { + mapping = READ_ONCE(page->mapping); + + if (!dax_mapping(mapping)) + break; + + /* + * In the device-dax case there's no need to lock, a + * struct dev_pagemap pin is sufficient to keep the + * inode alive, and we assume we have dev_pagemap pin + * otherwise we would not have a valid pfn_to_page() + * translation. + */ + inode = mapping->host; + if (S_ISCHR(inode->i_mode)) { + did_lock = true; + break; + } + + xa_lock_irq(&mapping->i_pages); + if (mapping != page->mapping) { + xa_unlock_irq(&mapping->i_pages); + continue; + } + index = page->index; + + entry = __get_unlocked_mapping_entry(mapping, index, &slot, + entry_wait_revalidate); + if (!entry) { + xa_unlock_irq(&mapping->i_pages); + break; + } else if (IS_ERR(entry)) { + WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); + continue; + } + lock_slot(mapping, slot); + did_lock = true; + xa_unlock_irq(&mapping->i_pages); + break; + } + rcu_read_unlock(); + + return did_lock; +} + +void dax_unlock_mapping_entry(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + + if (S_ISCHR(inode->i_mode)) + return; + + unlock_mapping_entry(mapping, page->index); +} + /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't diff --git a/include/linux/dax.h b/include/linux/dax.h index 3855e3800f48..cf8ac51cf0d7 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); struct page *dax_layout_busy_page(struct address_space *mapping); +bool dax_lock_mapping_entry(struct page *page); +void dax_unlock_mapping_entry(struct page *page); #else static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize) @@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping, { return -EOPNOTSUPP; } + +static inline bool dax_lock_mapping_entry(struct page *page) +{ + if (IS_DAX(page->mapping->host)) + return true; + return false; +} + +static inline void dax_unlock_mapping_entry(struct page *page) +{ +} #endif int dax_read_lock(void);
next prev parent reply other threads:[~2018-07-14 5:00 UTC|newest] Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top 2018-07-14 4:49 [PATCH v6 00/13] mm: Teach memory_failure() about ZONE_DEVICE pages Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 01/13] device-dax: Convert to vmf_insert_mixed and vm_fault_t Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 02/13] device-dax: Enable page_mapping() Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 03/13] device-dax: Set page->index Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 04/13] filesystem-dax: " Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-14 4:49 ` [PATCH v6 05/13] mm, madvise_inject_error: Disable MADV_SOFT_OFFLINE for ZONE_DEVICE pages Dan Williams 2018-07-14 4:49 ` Dan Williams 2018-07-17 6:47 ` Naoya Horiguchi 2018-07-17 6:47 ` Naoya Horiguchi 2018-07-14 4:50 ` [PATCH v6 06/13] mm, dev_pagemap: Do not clear ->mapping on final put Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:12 ` Dave Jiang 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-23 16:23 ` Jerome Glisse 2018-07-14 4:50 ` [PATCH v6 07/13] mm, madvise_inject_error: Let memory_failure() optionally take a page reference Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-17 6:52 ` Naoya Horiguchi 2018-07-14 4:50 ` [PATCH v6 08/13] mm, memory_failure: Collect mapping size in collect_procs() Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` Dan Williams [this message] 2018-07-14 4:50 ` [PATCH v6 09/13] filesystem-dax: Introduce dax_lock_mapping_entry() Dan Williams 2018-08-06 9:21 ` Jan Kara 2018-08-06 9:21 ` Jan Kara 2018-07-14 4:50 ` [PATCH v6 10/13] mm, memory_failure: Teach memory_failure() about dev_pagemap pages Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-08-06 9:27 ` Jan Kara 2018-07-14 4:50 ` [PATCH v6 11/13] x86/mm/pat: Prepare {reserve, free}_memtype() for "decoy" addresses Dan Williams 2018-07-14 4:50 ` [v6,11/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 11/13] " Dan Williams 2018-07-24 7:36 ` Ingo Molnar 2018-07-24 7:36 ` [v6,11/13] " Ingo Molnar 2018-07-24 7:36 ` [PATCH v6 11/13] " Ingo Molnar 2018-07-24 15:46 ` Dave Jiang 2018-07-24 15:46 ` [v6,11/13] " Dave Jiang 2018-07-24 15:46 ` [PATCH v6 11/13] " Dave Jiang 2018-07-14 4:50 ` [PATCH v6 12/13] x86/memory_failure: Introduce {set, clear}_mce_nospec() Dan Williams 2018-07-14 4:50 ` [v6,12/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 12/13] " Dan Williams 2018-07-14 4:50 ` [PATCH v6 13/13] libnvdimm, pmem: Restore page attributes when clearing errors Dan Williams 2018-07-14 4:50 ` Dan Williams 2018-07-19 17:57 ` [PATCH v6 00/13] mm: Teach memory_failure() about ZONE_DEVICE pages Dave Jiang 2018-07-19 17:57 ` Dave Jiang 2018-07-24 7:39 ` Ingo Molnar 2018-07-24 7:39 ` Ingo Molnar
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=153154381675.34503.4471648812866312162.stgit@dwillia2-desk3.amr.corp.intel.com \ --to=dan.j.williams@intel.com \ --cc=hch@lst.de \ --cc=jack@suse.cz \ --cc=linux-fsdevel@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linux-mm@kvack.org \ --cc=linux-nvdimm@lists.01.org \ --cc=willy@infradead.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.