All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ross Zwisler <ross.zwisler@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: Theodore Ts'o <tytso@mit.edu>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-nvdimm@lists.01.org,
	Matthew Wilcox <mawilcox@microsoft.com>,
	Dave Chinner <david@fromorbit.com>,
	linux-mm@kvack.org, Andreas Dilger <adilger.kernel@dilger.ca>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Jan Kara <jack@suse.com>,
	linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org
Subject: [PATCH v2 7/9] dax: coordinate locking for offsets in PMD range
Date: Tue, 23 Aug 2016 16:04:17 -0600	[thread overview]
Message-ID: <20160823220419.11717-8-ross.zwisler@linux.intel.com> (raw)
In-Reply-To: <20160823220419.11717-1-ross.zwisler@linux.intel.com>

DAX radix tree locking currently locks entries based on the unique
combination of the 'mapping' pointer and the pgoff_t 'index' for the entry.
This works for PTEs, but as we move to PMDs we will need to have all the
offsets within the range covered by the PMD to map to the same bit lock.
To accomplish this, for ranges covered by a PMD entry we will instead lock
based on the page offset of the beginning of the PMD entry.  The 'mapping'
pointer is still used in the same way.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c            | 37 ++++++++++++++++++++++++-------------
 include/linux/dax.h |  2 +-
 mm/filemap.c        |  2 +-
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 0e3f462..955e184 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -62,10 +62,17 @@ static int __init init_dax_wait_table(void)
 }
 fs_initcall(init_dax_wait_table);
 
+static pgoff_t dax_entry_start(pgoff_t index, void *entry)
+{
+	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		index &= (PMD_MASK >> PAGE_SHIFT);
+	return index;
+}
+
 static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-					      pgoff_t index)
+					      pgoff_t entry_start)
 {
-	unsigned long hash = hash_long((unsigned long)mapping ^ index,
+	unsigned long hash = hash_long((unsigned long)mapping ^ entry_start,
 				       DAX_WAIT_TABLE_BITS);
 	return wait_table + hash;
 }
@@ -283,7 +290,7 @@ EXPORT_SYMBOL_GPL(dax_do_io);
  */
 struct exceptional_entry_key {
 	struct address_space *mapping;
-	unsigned long index;
+	pgoff_t entry_start;
 };
 
 struct wait_exceptional_entry_queue {
@@ -299,7 +306,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 		container_of(wait, struct wait_exceptional_entry_queue, wait);
 
 	if (key->mapping != ewait->key.mapping ||
-	    key->index != ewait->key.index)
+	    key->entry_start != ewait->key.entry_start)
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, NULL);
 }
@@ -357,12 +364,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq;
 
 	init_wait(&ewait.wait);
 	ewait.wait.func = wake_exceptional_entry_func;
-	ewait.key.mapping = mapping;
-	ewait.key.index = index;
 
 	for (;;) {
 		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
@@ -373,6 +378,11 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 				*slotp = slot;
 			return entry;
 		}
+
+		wq = dax_entry_waitqueue(mapping,
+				dax_entry_start(index, entry));
+		ewait.key.mapping = mapping;
+		ewait.key.entry_start = dax_entry_start(index, entry);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -445,10 +455,11 @@ restart:
 	return entry;
 }
 
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all)
 {
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq = dax_entry_waitqueue(mapping,
+						dax_entry_start(index, entry));
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -460,7 +471,7 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		struct exceptional_entry_key key;
 
 		key.mapping = mapping;
-		key.index = index;
+		key.entry_start = dax_entry_start(index, entry);
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 	}
 }
@@ -478,7 +489,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 	}
 	unlock_slot(mapping, slot);
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 static void put_locked_mapping_entry(struct address_space *mapping,
@@ -503,7 +514,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 		return;
 
 	/* We have to wake up next waiter for the radix tree entry lock */
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 /*
@@ -530,7 +541,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	radix_tree_delete(&mapping->page_tree, index);
 	mapping->nrexceptional--;
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, true);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, true);
 
 	return 1;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..f6cab31 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -15,7 +15,7 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..35e880d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -617,7 +617,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			if (node)
 				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
+			dax_wake_mapping_entry_waiter(p, mapping, page->index,
 						      false);
 		}
 	}
-- 
2.9.0

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Ross Zwisler <ross.zwisler@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-nvdimm@ml01.01.org,
	Matthew Wilcox <mawilcox@microsoft.com>
Subject: [PATCH v2 7/9] dax: coordinate locking for offsets in PMD range
Date: Tue, 23 Aug 2016 16:04:17 -0600	[thread overview]
Message-ID: <20160823220419.11717-8-ross.zwisler@linux.intel.com> (raw)
In-Reply-To: <20160823220419.11717-1-ross.zwisler@linux.intel.com>

DAX radix tree locking currently locks entries based on the unique
combination of the 'mapping' pointer and the pgoff_t 'index' for the entry.
This works for PTEs, but as we move to PMDs we will need to have all the
offsets within the range covered by the PMD to map to the same bit lock.
To accomplish this, for ranges covered by a PMD entry we will instead lock
based on the page offset of the beginning of the PMD entry.  The 'mapping'
pointer is still used in the same way.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c            | 37 ++++++++++++++++++++++++-------------
 include/linux/dax.h |  2 +-
 mm/filemap.c        |  2 +-
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 0e3f462..955e184 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -62,10 +62,17 @@ static int __init init_dax_wait_table(void)
 }
 fs_initcall(init_dax_wait_table);
 
+static pgoff_t dax_entry_start(pgoff_t index, void *entry)
+{
+	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		index &= (PMD_MASK >> PAGE_SHIFT);
+	return index;
+}
+
 static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-					      pgoff_t index)
+					      pgoff_t entry_start)
 {
-	unsigned long hash = hash_long((unsigned long)mapping ^ index,
+	unsigned long hash = hash_long((unsigned long)mapping ^ entry_start,
 				       DAX_WAIT_TABLE_BITS);
 	return wait_table + hash;
 }
@@ -283,7 +290,7 @@ EXPORT_SYMBOL_GPL(dax_do_io);
  */
 struct exceptional_entry_key {
 	struct address_space *mapping;
-	unsigned long index;
+	pgoff_t entry_start;
 };
 
 struct wait_exceptional_entry_queue {
@@ -299,7 +306,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 		container_of(wait, struct wait_exceptional_entry_queue, wait);
 
 	if (key->mapping != ewait->key.mapping ||
-	    key->index != ewait->key.index)
+	    key->entry_start != ewait->key.entry_start)
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, NULL);
 }
@@ -357,12 +364,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq;
 
 	init_wait(&ewait.wait);
 	ewait.wait.func = wake_exceptional_entry_func;
-	ewait.key.mapping = mapping;
-	ewait.key.index = index;
 
 	for (;;) {
 		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
@@ -373,6 +378,11 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 				*slotp = slot;
 			return entry;
 		}
+
+		wq = dax_entry_waitqueue(mapping,
+				dax_entry_start(index, entry));
+		ewait.key.mapping = mapping;
+		ewait.key.entry_start = dax_entry_start(index, entry);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -445,10 +455,11 @@ restart:
 	return entry;
 }
 
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all)
 {
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq = dax_entry_waitqueue(mapping,
+						dax_entry_start(index, entry));
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -460,7 +471,7 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		struct exceptional_entry_key key;
 
 		key.mapping = mapping;
-		key.index = index;
+		key.entry_start = dax_entry_start(index, entry);
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 	}
 }
@@ -478,7 +489,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 	}
 	unlock_slot(mapping, slot);
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 static void put_locked_mapping_entry(struct address_space *mapping,
@@ -503,7 +514,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 		return;
 
 	/* We have to wake up next waiter for the radix tree entry lock */
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 /*
@@ -530,7 +541,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	radix_tree_delete(&mapping->page_tree, index);
 	mapping->nrexceptional--;
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, true);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, true);
 
 	return 1;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..f6cab31 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -15,7 +15,7 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..35e880d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -617,7 +617,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			if (node)
 				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
+			dax_wake_mapping_entry_waiter(p, mapping, page->index,
 						      false);
 		}
 	}
-- 
2.9.0

WARNING: multiple messages have this Message-ID (diff)
From: Ross Zwisler <ross.zwisler@linux.intel.com>
To: linux-kernel@vger.kernel.org
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Andreas Dilger <adilger.kernel@dilger.ca>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.com>,
	linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-nvdimm@lists.01.org,
	Matthew Wilcox <mawilcox@microsoft.com>
Subject: [PATCH v2 7/9] dax: coordinate locking for offsets in PMD range
Date: Tue, 23 Aug 2016 16:04:17 -0600	[thread overview]
Message-ID: <20160823220419.11717-8-ross.zwisler@linux.intel.com> (raw)
In-Reply-To: <20160823220419.11717-1-ross.zwisler@linux.intel.com>

DAX radix tree locking currently locks entries based on the unique
combination of the 'mapping' pointer and the pgoff_t 'index' for the entry.
This works for PTEs, but as we move to PMDs we will need to have all the
offsets within the range covered by the PMD to map to the same bit lock.
To accomplish this, for ranges covered by a PMD entry we will instead lock
based on the page offset of the beginning of the PMD entry.  The 'mapping'
pointer is still used in the same way.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 fs/dax.c            | 37 ++++++++++++++++++++++++-------------
 include/linux/dax.h |  2 +-
 mm/filemap.c        |  2 +-
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 0e3f462..955e184 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -62,10 +62,17 @@ static int __init init_dax_wait_table(void)
 }
 fs_initcall(init_dax_wait_table);
 
+static pgoff_t dax_entry_start(pgoff_t index, void *entry)
+{
+	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		index &= (PMD_MASK >> PAGE_SHIFT);
+	return index;
+}
+
 static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-					      pgoff_t index)
+					      pgoff_t entry_start)
 {
-	unsigned long hash = hash_long((unsigned long)mapping ^ index,
+	unsigned long hash = hash_long((unsigned long)mapping ^ entry_start,
 				       DAX_WAIT_TABLE_BITS);
 	return wait_table + hash;
 }
@@ -283,7 +290,7 @@ EXPORT_SYMBOL_GPL(dax_do_io);
  */
 struct exceptional_entry_key {
 	struct address_space *mapping;
-	unsigned long index;
+	pgoff_t entry_start;
 };
 
 struct wait_exceptional_entry_queue {
@@ -299,7 +306,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 		container_of(wait, struct wait_exceptional_entry_queue, wait);
 
 	if (key->mapping != ewait->key.mapping ||
-	    key->index != ewait->key.index)
+	    key->entry_start != ewait->key.entry_start)
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, NULL);
 }
@@ -357,12 +364,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq;
 
 	init_wait(&ewait.wait);
 	ewait.wait.func = wake_exceptional_entry_func;
-	ewait.key.mapping = mapping;
-	ewait.key.index = index;
 
 	for (;;) {
 		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
@@ -373,6 +378,11 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 				*slotp = slot;
 			return entry;
 		}
+
+		wq = dax_entry_waitqueue(mapping,
+				dax_entry_start(index, entry));
+		ewait.key.mapping = mapping;
+		ewait.key.entry_start = dax_entry_start(index, entry);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -445,10 +455,11 @@ restart:
 	return entry;
 }
 
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all)
 {
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq = dax_entry_waitqueue(mapping,
+						dax_entry_start(index, entry));
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -460,7 +471,7 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		struct exceptional_entry_key key;
 
 		key.mapping = mapping;
-		key.index = index;
+		key.entry_start = dax_entry_start(index, entry);
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 	}
 }
@@ -478,7 +489,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 	}
 	unlock_slot(mapping, slot);
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 static void put_locked_mapping_entry(struct address_space *mapping,
@@ -503,7 +514,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 		return;
 
 	/* We have to wake up next waiter for the radix tree entry lock */
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 /*
@@ -530,7 +541,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	radix_tree_delete(&mapping->page_tree, index);
 	mapping->nrexceptional--;
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, true);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, true);
 
 	return 1;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..f6cab31 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -15,7 +15,7 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..35e880d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -617,7 +617,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			if (node)
 				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
+			dax_wake_mapping_entry_waiter(p, mapping, page->index,
 						      false);
 		}
 	}
-- 
2.9.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>,
	Andrew Morton
	<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
	linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org,
	Matthew Wilcox <mawilcox-0li6OtcxBFHby3iVrkZq2A@public.gmane.org>,
	Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	Andreas Dilger
	<adilger.kernel-m1MBpc4rdrD3fQ9qLvQP4Q@public.gmane.org>,
	Alexander Viro
	<viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org>,
	Jan Kara <jack-IBi9RG/b67k@public.gmane.org>,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH v2 7/9] dax: coordinate locking for offsets in PMD range
Date: Tue, 23 Aug 2016 16:04:17 -0600	[thread overview]
Message-ID: <20160823220419.11717-8-ross.zwisler@linux.intel.com> (raw)
In-Reply-To: <20160823220419.11717-1-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

DAX radix tree locking currently locks entries based on the unique
combination of the 'mapping' pointer and the pgoff_t 'index' for the entry.
This works for PTEs, but as we move to PMDs we will need to have all the
offsets within the range covered by the PMD to map to the same bit lock.
To accomplish this, for ranges covered by a PMD entry we will instead lock
based on the page offset of the beginning of the PMD entry.  The 'mapping'
pointer is still used in the same way.

Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 fs/dax.c            | 37 ++++++++++++++++++++++++-------------
 include/linux/dax.h |  2 +-
 mm/filemap.c        |  2 +-
 3 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 0e3f462..955e184 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -62,10 +62,17 @@ static int __init init_dax_wait_table(void)
 }
 fs_initcall(init_dax_wait_table);
 
+static pgoff_t dax_entry_start(pgoff_t index, void *entry)
+{
+	if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+		index &= (PMD_MASK >> PAGE_SHIFT);
+	return index;
+}
+
 static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
-					      pgoff_t index)
+					      pgoff_t entry_start)
 {
-	unsigned long hash = hash_long((unsigned long)mapping ^ index,
+	unsigned long hash = hash_long((unsigned long)mapping ^ entry_start,
 				       DAX_WAIT_TABLE_BITS);
 	return wait_table + hash;
 }
@@ -283,7 +290,7 @@ EXPORT_SYMBOL_GPL(dax_do_io);
  */
 struct exceptional_entry_key {
 	struct address_space *mapping;
-	unsigned long index;
+	pgoff_t entry_start;
 };
 
 struct wait_exceptional_entry_queue {
@@ -299,7 +306,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode,
 		container_of(wait, struct wait_exceptional_entry_queue, wait);
 
 	if (key->mapping != ewait->key.mapping ||
-	    key->index != ewait->key.index)
+	    key->entry_start != ewait->key.entry_start)
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, NULL);
 }
@@ -357,12 +364,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq;
 
 	init_wait(&ewait.wait);
 	ewait.wait.func = wake_exceptional_entry_func;
-	ewait.key.mapping = mapping;
-	ewait.key.index = index;
 
 	for (;;) {
 		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
@@ -373,6 +378,11 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 				*slotp = slot;
 			return entry;
 		}
+
+		wq = dax_entry_waitqueue(mapping,
+				dax_entry_start(index, entry));
+		ewait.key.mapping = mapping;
+		ewait.key.entry_start = dax_entry_start(index, entry);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		spin_unlock_irq(&mapping->tree_lock);
@@ -445,10 +455,11 @@ restart:
 	return entry;
 }
 
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all)
 {
-	wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index);
+	wait_queue_head_t *wq = dax_entry_waitqueue(mapping,
+						dax_entry_start(index, entry));
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
@@ -460,7 +471,7 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		struct exceptional_entry_key key;
 
 		key.mapping = mapping;
-		key.index = index;
+		key.entry_start = dax_entry_start(index, entry);
 		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
 	}
 }
@@ -478,7 +489,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 	}
 	unlock_slot(mapping, slot);
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 static void put_locked_mapping_entry(struct address_space *mapping,
@@ -503,7 +514,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 		return;
 
 	/* We have to wake up next waiter for the radix tree entry lock */
-	dax_wake_mapping_entry_waiter(mapping, index, false);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, false);
 }
 
 /*
@@ -530,7 +541,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	radix_tree_delete(&mapping->page_tree, index);
 	mapping->nrexceptional--;
 	spin_unlock_irq(&mapping->tree_lock);
-	dax_wake_mapping_entry_waiter(mapping, index, true);
+	dax_wake_mapping_entry_waiter(entry, mapping, index, true);
 
 	return 1;
 }
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9c6dc77..f6cab31 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -15,7 +15,7 @@ int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
-void dax_wake_mapping_entry_waiter(struct address_space *mapping,
+void dax_wake_mapping_entry_waiter(void *entry, struct address_space *mapping,
 				   pgoff_t index, bool wake_all);
 
 #ifdef CONFIG_FS_DAX
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..35e880d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -617,7 +617,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 			if (node)
 				workingset_node_pages_dec(node);
 			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
+			dax_wake_mapping_entry_waiter(p, mapping, page->index,
 						      false);
 		}
 	}
-- 
2.9.0

  parent reply	other threads:[~2016-08-23 22:04 UTC|newest]

Thread overview: 135+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-23 22:04 [PATCH v2 0/9] re-enable DAX PMD support Ross Zwisler
2016-08-23 22:04 ` Ross Zwisler
2016-08-23 22:04 ` Ross Zwisler
2016-08-23 22:04 ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 1/9] ext4: allow DAX writeback for hole punch Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-09-21 15:22   ` Ross Zwisler
2016-09-21 15:22     ` Ross Zwisler
2016-09-21 15:22     ` Ross Zwisler
2016-09-21 15:22     ` Ross Zwisler
2016-09-21 15:22     ` Ross Zwisler
2016-09-22  6:59     ` Jan Kara
2016-09-22  6:59       ` Jan Kara
2016-09-22  6:59       ` Jan Kara
2016-09-22  6:59       ` Jan Kara
2016-09-22 15:51     ` Theodore Ts'o
2016-09-22 15:51       ` Theodore Ts'o
2016-08-23 22:04 ` [PATCH v2 2/9] ext2: tell DAX the size of allocation holes Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
     [not found]   ` <20160823220419.11717-3-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-08-25  7:57     ` Christoph Hellwig
2016-08-25  7:57       ` Christoph Hellwig
2016-08-25  7:57       ` Christoph Hellwig
     [not found]       ` <20160825075728.GA11235-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2016-08-25 19:25         ` Ross Zwisler
2016-08-25 19:25           ` Ross Zwisler
2016-08-25 19:25           ` Ross Zwisler
2016-08-26 21:29         ` Ross Zwisler
2016-08-26 21:29           ` Ross Zwisler
2016-08-26 21:29           ` Ross Zwisler
2016-08-29  0:42           ` Dave Chinner
2016-08-29  0:42             ` Dave Chinner
     [not found]           ` <20160826212934.GA11265-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-08-29  7:41             ` Christoph Hellwig
2016-08-29  7:41               ` Christoph Hellwig
2016-08-29  7:41               ` Christoph Hellwig
     [not found]               ` <20160829074116.GA16491-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
2016-08-29 12:57                 ` Theodore Ts'o
2016-08-29 12:57                   ` Theodore Ts'o
2016-08-29 12:57                   ` Theodore Ts'o
     [not found]                   ` <20160829125741.cdnbb2uaditcmnw2-AKGzg7BKzIDYtjvyW6yDsg@public.gmane.org>
2016-08-30  7:21                     ` Christoph Hellwig
2016-08-30  7:21                       ` Christoph Hellwig
2016-08-30  7:21                       ` Christoph Hellwig
2016-09-09 16:48                     ` Ross Zwisler
2016-09-09 16:48                       ` Ross Zwisler
2016-09-09 16:48                       ` Ross Zwisler
2016-09-09 20:35                       ` Matthew Wilcox
2016-09-09 20:35                         ` Matthew Wilcox
     [not found]                         ` <DM2PR21MB0089BCA980B67D8C53B25A1BCBFA0-B2pw06WL+/BAVFCO9/lqPs1VXTxX1y3OvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-09-09 22:34                           ` Dan Williams
2016-09-09 22:34                             ` Dan Williams
2016-09-09 22:34                             ` Dan Williams
     [not found]                             ` <CAPcyv4hjna08+Yw23w_V2f-RbBE6ar220+YGCuBVA-TACKWNug-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-09-10  7:31                               ` Christoph Hellwig
2016-09-10  7:31                                 ` Christoph Hellwig
2016-09-10  7:31                                 ` Christoph Hellwig
2016-09-10  7:50                                 ` Matthew Wilcox
2016-09-10  7:50                                   ` Matthew Wilcox
2016-09-10 17:49                                 ` Theodore Ts'o
2016-09-10 17:49                                   ` Theodore Ts'o
     [not found]                                   ` <20160910174910.yyirb7smiob7evt5-AKGzg7BKzIDYtjvyW6yDsg@public.gmane.org>
2016-09-11  0:42                                     ` Matthew Wilcox
2016-09-11  0:42                                       ` Matthew Wilcox
2016-09-11  0:42                                       ` Matthew Wilcox
2016-09-10  8:15                               ` Matthew Wilcox
2016-09-10  8:15                                 ` Matthew Wilcox
2016-09-10  8:15                                 ` Matthew Wilcox
2016-09-10 14:56                                 ` Dan Williams
2016-09-10 14:56                                   ` Dan Williams
2016-09-10  7:30                         ` Christoph Hellwig
2016-09-10  7:30                           ` Christoph Hellwig
2016-09-10  7:33                           ` Matthew Wilcox
2016-09-10  7:33                             ` Matthew Wilcox
2016-09-10  7:42                             ` Christoph Hellwig
2016-09-10  7:42                               ` Christoph Hellwig
2016-09-10  7:52                               ` Matthew Wilcox
2016-09-10  7:52                                 ` Matthew Wilcox
     [not found]                                 ` <DM2PR21MB0089C20EF469AA91A916867CCBFD0-B2pw06WL+/BAVFCO9/lqPs1VXTxX1y3OvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2016-09-11 12:47                                   ` Christoph Hellwig
2016-09-11 12:47                                     ` Christoph Hellwig
2016-09-11 12:47                                     ` Christoph Hellwig
2016-09-11 22:57                                     ` Ross Zwisler
2016-09-11 22:57                                       ` Ross Zwisler
2016-09-10 15:55                           ` Matthew Wilcox
2016-09-10 15:55                             ` Matthew Wilcox
2016-09-15 20:09   ` Ross Zwisler
2016-09-15 20:09     ` Ross Zwisler
2016-09-15 20:09     ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 3/9] ext4: " Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 4/9] dax: remove buffer_size_valid() Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 5/9] dax: make 'wait_table' global variable static Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 6/9] dax: consistent variable naming for DAX entries Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` Ross Zwisler [this message]
2016-08-23 22:04   ` [PATCH v2 7/9] dax: coordinate locking for offsets in PMD range Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 8/9] dax: re-enable DAX PMD support Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04 ` [PATCH v2 9/9] dax: remove "depends on BROKEN" from FS_DAX_PMD Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-23 22:04   ` Ross Zwisler
2016-08-30 23:01 ` [PATCH v2 0/9] re-enable DAX PMD support Ross Zwisler
2016-08-30 23:01   ` Ross Zwisler
2016-08-30 23:01   ` Ross Zwisler
2016-08-30 23:01   ` Ross Zwisler
2016-08-31 20:20   ` Kani, Toshimitsu
2016-08-31 20:20     ` Kani, Toshimitsu
2016-08-31 20:20     ` Kani, Toshimitsu
2016-08-31 21:36     ` Ross Zwisler
2016-08-31 21:36       ` Ross Zwisler
2016-08-31 21:36       ` Ross Zwisler
2016-08-31 21:36       ` Ross Zwisler
2016-08-31 21:36       ` Ross Zwisler
2016-08-31 22:08       ` Kani, Toshimitsu
2016-08-31 22:08         ` Kani, Toshimitsu
2016-08-31 22:08         ` Kani, Toshimitsu
2016-08-31 22:08         ` Kani, Toshimitsu
2016-09-01 16:21         ` Ross Zwisler
2016-09-01 16:21           ` Ross Zwisler
2016-09-01 16:21           ` Ross Zwisler
2016-09-01 16:21           ` Ross Zwisler
2016-09-01 16:21           ` Ross Zwisler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160823220419.11717-8-ross.zwisler@linux.intel.com \
    --to=ross.zwisler@linux.intel.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=akpm@linux-foundation.org \
    --cc=david@fromorbit.com \
    --cc=jack@suse.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mawilcox@microsoft.com \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.