All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: jack@suse.cz, linux-nvdimm@lists.01.org,
	Matthew Wilcox <mawilcox@microsoft.com>,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	hch@lst.de
Subject: [PATCH v4 15/18] mm, fs, dax: use page->mapping to warn if dma collides with truncate
Date: Sat, 23 Dec 2017 16:57:20 -0800	[thread overview]
Message-ID: <151407704063.38751.15403501871756055025.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <151407695916.38751.2866053440557472361.stgit@dwillia2-desk3.amr.corp.intel.com>

Catch cases where truncate encounters pages that are still under active
dma. This warning is a canary for potential data corruption as truncated
blocks could be allocated to a new file while the device is still
perform i/o.

Here is an example of a collision that this implementation catches:

 WARNING: CPU: 2 PID: 1286 at fs/dax.c:343 dax_disassociate_entry+0x55/0x80
 [..]
 Call Trace:
  __dax_invalidate_mapping_entry+0x6c/0xf0
  dax_delete_mapping_entry+0xf/0x20
  truncate_exceptional_pvec_entries.part.12+0x1af/0x200
  truncate_inode_pages_range+0x268/0x970
  ? tlb_gather_mmu+0x10/0x20
  ? up_write+0x1c/0x40
  ? unmap_mapping_range+0x73/0x140
  xfs_free_file_space+0x1b6/0x5b0 [xfs]
  ? xfs_file_fallocate+0x7f/0x320 [xfs]
  ? down_write_nested+0x40/0x70
  ? xfs_ilock+0x21d/0x2f0 [xfs]
  xfs_file_fallocate+0x162/0x320 [xfs]
  ? rcu_read_lock_sched_held+0x3f/0x70
  ? rcu_sync_lockdep_assert+0x2a/0x50
  ? __sb_start_write+0xd0/0x1b0
  ? vfs_fallocate+0x20c/0x270
  vfs_fallocate+0x154/0x270
  SyS_fallocate+0x43/0x80
  entry_SYSCALL_64_fastpath+0x1f/0x96

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index fadc1b13838b..7d9fff8a1195 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -366,6 +366,56 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+	if (dax_is_zero_entry(entry))
+		return 0;
+	else if (dax_is_empty_entry(entry))
+		return 0;
+	else if (dax_is_pmd_entry(entry))
+		return HPAGE_SIZE;
+	else
+		return PAGE_SIZE;
+}
+
+#define for_each_entry_pfn(entry, pfn, end_pfn) \
+	for (pfn = dax_radix_pfn(entry), \
+			end_pfn = pfn + dax_entry_size(entry) / PAGE_SIZE; \
+			pfn < end_pfn; \
+			pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+	unsigned long pfn, end_pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_entry_pfn(entry, pfn, end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(page->mapping);
+		page->mapping = mapping;
+	}
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+		bool trunc)
+{
+	unsigned long pfn, end_pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_entry_pfn(entry, pfn, end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		page->mapping = NULL;
+	}
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -472,6 +522,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
 		}
 
 		if (pmd_downgrade) {
+			dax_disassociate_entry(entry, mapping, false);
 			radix_tree_delete(&mapping->page_tree, index);
 			mapping->nrexceptional--;
 			dax_wake_mapping_entry_waiter(mapping, index, entry,
@@ -521,6 +572,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
 	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
 		goto out;
+	dax_disassociate_entry(entry, mapping, trunc);
 	radix_tree_delete(page_tree, index);
 	mapping->nrexceptional--;
 	ret = 1;
@@ -617,6 +669,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 
 	spin_lock_irq(&mapping->tree_lock);
 	new_entry = dax_radix_locked_entry(pfn, flags);
+	if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+		dax_disassociate_entry(entry, mapping, false);
+		dax_associate_entry(new_entry, mapping);
+	}
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: jack@suse.cz, Matthew Wilcox <mawilcox@microsoft.com>,
	linux-nvdimm@lists.01.org, linux-xfs@vger.kernel.org,
	Jeff Moyer <jmoyer@redhat.com>,
	linux-fsdevel@vger.kernel.org, ross.zwisler@linux.intel.com,
	hch@lst.de
Subject: [PATCH v4 15/18] mm, fs, dax: use page->mapping to warn if dma collides with truncate
Date: Sat, 23 Dec 2017 16:57:20 -0800	[thread overview]
Message-ID: <151407704063.38751.15403501871756055025.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <151407695916.38751.2866053440557472361.stgit@dwillia2-desk3.amr.corp.intel.com>

Catch cases where truncate encounters pages that are still under active
dma. This warning is a canary for potential data corruption as truncated
blocks could be allocated to a new file while the device is still
perform i/o.

Here is an example of a collision that this implementation catches:

 WARNING: CPU: 2 PID: 1286 at fs/dax.c:343 dax_disassociate_entry+0x55/0x80
 [..]
 Call Trace:
  __dax_invalidate_mapping_entry+0x6c/0xf0
  dax_delete_mapping_entry+0xf/0x20
  truncate_exceptional_pvec_entries.part.12+0x1af/0x200
  truncate_inode_pages_range+0x268/0x970
  ? tlb_gather_mmu+0x10/0x20
  ? up_write+0x1c/0x40
  ? unmap_mapping_range+0x73/0x140
  xfs_free_file_space+0x1b6/0x5b0 [xfs]
  ? xfs_file_fallocate+0x7f/0x320 [xfs]
  ? down_write_nested+0x40/0x70
  ? xfs_ilock+0x21d/0x2f0 [xfs]
  xfs_file_fallocate+0x162/0x320 [xfs]
  ? rcu_read_lock_sched_held+0x3f/0x70
  ? rcu_sync_lockdep_assert+0x2a/0x50
  ? __sb_start_write+0xd0/0x1b0
  ? vfs_fallocate+0x20c/0x270
  vfs_fallocate+0x154/0x270
  SyS_fallocate+0x43/0x80
  entry_SYSCALL_64_fastpath+0x1f/0x96

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index fadc1b13838b..7d9fff8a1195 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -366,6 +366,56 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+	if (dax_is_zero_entry(entry))
+		return 0;
+	else if (dax_is_empty_entry(entry))
+		return 0;
+	else if (dax_is_pmd_entry(entry))
+		return HPAGE_SIZE;
+	else
+		return PAGE_SIZE;
+}
+
+#define for_each_entry_pfn(entry, pfn, end_pfn) \
+	for (pfn = dax_radix_pfn(entry), \
+			end_pfn = pfn + dax_entry_size(entry) / PAGE_SIZE; \
+			pfn < end_pfn; \
+			pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+	unsigned long pfn, end_pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_entry_pfn(entry, pfn, end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(page->mapping);
+		page->mapping = mapping;
+	}
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+		bool trunc)
+{
+	unsigned long pfn, end_pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_entry_pfn(entry, pfn, end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		page->mapping = NULL;
+	}
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -472,6 +522,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
 		}
 
 		if (pmd_downgrade) {
+			dax_disassociate_entry(entry, mapping, false);
 			radix_tree_delete(&mapping->page_tree, index);
 			mapping->nrexceptional--;
 			dax_wake_mapping_entry_waiter(mapping, index, entry,
@@ -521,6 +572,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
 	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
 		goto out;
+	dax_disassociate_entry(entry, mapping, trunc);
 	radix_tree_delete(page_tree, index);
 	mapping->nrexceptional--;
 	ret = 1;
@@ -617,6 +669,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 
 	spin_lock_irq(&mapping->tree_lock);
 	new_entry = dax_radix_locked_entry(pfn, flags);
+	if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+		dax_disassociate_entry(entry, mapping, false);
+		dax_associate_entry(new_entry, mapping);
+	}
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*

  parent reply	other threads:[~2017-12-24  1:00 UTC|newest]

Thread overview: 136+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-24  0:56 [PATCH v4 00/18] dax: fix dma vs truncate/hole-punch Dan Williams
2017-12-24  0:56 ` Dan Williams
2017-12-24  0:56 ` Dan Williams
2017-12-24  0:56 ` [PATCH v4 01/18] mm, dax: introduce pfn_t_special() Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-04  8:16   ` Christoph Hellwig
2018-01-04  8:16     ` Christoph Hellwig
2017-12-24  0:56 ` [PATCH v4 02/18] ext4: auto disable dax instead of failing mount Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-03 14:20   ` Jan Kara
2018-01-03 14:20     ` Jan Kara
2017-12-24  0:56 ` [PATCH v4 03/18] ext2: " Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-03 14:21   ` Jan Kara
2018-01-03 14:21     ` Jan Kara
2017-12-24  0:56 ` [PATCH v4 04/18] dax: require 'struct page' by default for filesystem dax Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-03 15:29   ` Jan Kara
2018-01-03 15:29     ` Jan Kara
2018-01-04  8:16   ` Christoph Hellwig
2018-01-04  8:16     ` Christoph Hellwig
2018-01-08 11:58   ` Gerald Schaefer
2018-01-08 11:58     ` Gerald Schaefer
2017-12-24  0:56 ` [PATCH v4 05/18] dax: stop using VM_MIXEDMAP for dax Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-03 15:27   ` Jan Kara
2018-01-03 15:27     ` Jan Kara
2017-12-24  0:56 ` [PATCH v4 06/18] dax: stop using VM_HUGEPAGE " Dan Williams
2017-12-24  0:56   ` Dan Williams
2017-12-24  0:56 ` [PATCH v4 07/18] dax: store pfns in the radix Dan Williams
2017-12-24  0:56   ` Dan Williams
2017-12-27  0:17   ` Ross Zwisler
2017-12-27  0:17     ` Ross Zwisler
2018-01-02 20:15     ` Dan Williams
2018-01-02 20:15       ` Dan Williams
2018-01-03 15:39   ` Jan Kara
2018-01-03 15:39     ` Jan Kara
2017-12-24  0:56 ` [PATCH v4 08/18] tools/testing/nvdimm: add 'bio_delay' mechanism Dan Williams
2017-12-24  0:56   ` Dan Williams
2017-12-27 18:08   ` Ross Zwisler
2017-12-27 18:08     ` Ross Zwisler
2018-01-02 20:35     ` Dan Williams
2018-01-02 20:35       ` Dan Williams
2018-01-02 21:44   ` Dave Chinner
2018-01-02 21:44     ` Dave Chinner
2018-01-02 21:51     ` Dan Williams
2018-01-02 21:51       ` Dan Williams
2018-01-03 15:46       ` Jan Kara
2018-01-03 15:46         ` Jan Kara
2018-01-03 20:37         ` Jeff Moyer
2018-01-03 20:37           ` Jeff Moyer
2017-12-24  0:56 ` [PATCH v4 09/18] mm, dax: enable filesystems to trigger dev_pagemap ->page_free callbacks Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-04  8:20   ` Christoph Hellwig
2018-01-04  8:20     ` Christoph Hellwig
2017-12-24  0:56 ` [PATCH v4 10/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS Dan Williams
2017-12-24  0:56   ` Dan Williams
2018-01-04  8:25   ` Christoph Hellwig
2018-01-04  8:25     ` Christoph Hellwig
2017-12-24  0:56 ` [PATCH v4 11/18] fs, dax: introduce DEFINE_FSDAX_AOPS Dan Williams
2017-12-24  0:56   ` Dan Williams
2017-12-27  5:29   ` Matthew Wilcox
2017-12-27  5:29     ` Matthew Wilcox
2018-01-02 20:21     ` Dan Williams
2018-01-02 20:21       ` Dan Williams
2018-01-03 16:05       ` Jan Kara
2018-01-03 16:05         ` Jan Kara
2018-01-04  8:27         ` Christoph Hellwig
2018-01-04  8:27           ` Christoph Hellwig
2018-01-02 21:41   ` Dave Chinner
2018-01-02 21:41     ` Dave Chinner
2017-12-24  0:57 ` [PATCH v4 12/18] xfs: use DEFINE_FSDAX_AOPS Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-02 21:15   ` Darrick J. Wong
2018-01-02 21:15     ` Darrick J. Wong
2018-01-02 21:40     ` Dan Williams
2018-01-02 21:40       ` Dan Williams
2018-01-03 16:09       ` Jan Kara
2018-01-03 16:09         ` Jan Kara
2018-01-04  8:28   ` Christoph Hellwig
2018-01-04  8:28     ` Christoph Hellwig
2017-12-24  0:57 ` [PATCH v4 13/18] ext4: " Dan Williams
2017-12-24  0:57   ` Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-04  8:29   ` Christoph Hellwig
2018-01-04  8:29     ` Christoph Hellwig
2018-01-04  8:29     ` Christoph Hellwig
2017-12-24  0:57 ` [PATCH v4 14/18] ext2: " Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-04  8:29   ` Christoph Hellwig
2018-01-04  8:29     ` Christoph Hellwig
2017-12-24  0:57 ` Dan Williams [this message]
2017-12-24  0:57   ` [PATCH v4 15/18] mm, fs, dax: use page->mapping to warn if dma collides with truncate Dan Williams
2018-01-04  8:30   ` Christoph Hellwig
2018-01-04  8:30     ` Christoph Hellwig
2018-01-04  9:39   ` Jan Kara
2018-01-04  9:39     ` Jan Kara
2017-12-24  0:57 ` [PATCH v4 16/18] wait_bit: introduce {wait_on,wake_up}_atomic_one Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-04  8:30   ` Christoph Hellwig
2018-01-04  8:30     ` Christoph Hellwig
2017-12-24  0:57 ` [PATCH v4 17/18] mm, fs, dax: dax_flush_dma, handle dma vs block-map-change collisions Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-04  8:31   ` Christoph Hellwig
2018-01-04  8:31     ` Christoph Hellwig
2018-01-04 11:12   ` Jan Kara
2018-01-04 11:12     ` Jan Kara
2018-01-07 21:58     ` Dan Williams
2018-01-07 21:58       ` Dan Williams
2018-01-08 13:50       ` Jan Kara
2018-01-08 13:50         ` Jan Kara
2018-03-08 17:02         ` Dan Williams
2018-03-08 17:02           ` Dan Williams
2018-03-09 12:56           ` Jan Kara
2018-03-09 12:56             ` Jan Kara
2018-03-09 16:15             ` Dan Williams
2018-03-09 16:15               ` Dan Williams
2018-03-09 17:26               ` Dan Williams
2018-03-09 17:26                 ` Dan Williams
2017-12-24  0:57 ` [PATCH v4 18/18] xfs, dax: wire up dax_flush_dma support via a new xfs_sync_dma helper Dan Williams
2017-12-24  0:57   ` Dan Williams
2018-01-02 21:07   ` Darrick J. Wong
2018-01-02 21:07     ` Darrick J. Wong
2018-01-02 23:00   ` Dave Chinner
2018-01-02 23:00     ` Dave Chinner
2018-01-03  2:21     ` Dan Williams
2018-01-03  2:21       ` Dan Williams
2018-01-03  7:51       ` Dave Chinner
2018-01-03  7:51         ` Dave Chinner
2018-01-04  8:34         ` Christoph Hellwig
2018-01-04  8:34           ` Christoph Hellwig
2018-01-04  8:33     ` Christoph Hellwig
2018-01-04  8:33       ` Christoph Hellwig
2018-01-04  8:17 ` [PATCH v4 00/18] dax: fix dma vs truncate/hole-punch Christoph Hellwig
2018-01-04  8:17   ` Christoph Hellwig
2018-01-04  8:17   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=151407704063.38751.15403501871756055025.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mawilcox@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.