All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<nvdimm@lists.linux.dev>, <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <djwong@kernel.org>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@infradead.org>, <jane.chu@oracle.com>
Subject: [PATCH v9 01/10] dax: Use percpu rwsem for dax_{read,write}_lock()
Date: Sun, 26 Dec 2021 22:34:30 +0800	[thread overview]
Message-ID: <20211226143439.3985960-2-ruansy.fnst@fujitsu.com> (raw)
In-Reply-To: <20211226143439.3985960-1-ruansy.fnst@fujitsu.com>

In order to introduce dax holder registration, we need a write lock for
dax.  The write operation is per dax device job.  So, the global lock is
not suitable.  Change the current lock to percpu_rw_semaphore and introduce
a write lock for registration.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 drivers/dax/device.c       | 11 +++++-----
 drivers/dax/super.c        | 42 ++++++++++++++++++++++++++------------
 drivers/md/dm-writecache.c |  7 +++----
 fs/dax.c                   | 31 ++++++++++++++--------------
 fs/fuse/dax.c              |  6 +++---
 include/linux/dax.h        | 19 ++++++++++++-----
 6 files changed, 69 insertions(+), 47 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index dd8222a42808..041345f9956d 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -198,7 +198,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 	struct file *filp = vmf->vma->vm_file;
 	unsigned long fault_size;
 	vm_fault_t rc = VM_FAULT_SIGBUS;
-	int id;
 	pfn_t pfn;
 	struct dev_dax *dev_dax = filp->private_data;
 
@@ -206,7 +205,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 			(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 
-	id = dax_read_lock();
+	dax_read_lock(dev_dax->dax_dev);
 	switch (pe_size) {
 	case PE_SIZE_PTE:
 		fault_size = PAGE_SIZE;
@@ -246,7 +245,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 			page->index = pgoff + i;
 		}
 	}
-	dax_read_unlock(id);
+	dax_read_unlock(dev_dax->dax_dev);
 
 	return rc;
 }
@@ -284,7 +283,7 @@ static const struct vm_operations_struct dax_vm_ops = {
 static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct dev_dax *dev_dax = filp->private_data;
-	int rc, id;
+	int rc;
 
 	dev_dbg(&dev_dax->dev, "trace\n");
 
@@ -292,9 +291,9 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 	 * We lock to check dax_dev liveness and will re-check at
 	 * fault time.
 	 */
-	id = dax_read_lock();
+	dax_read_lock(dev_dax->dax_dev);
 	rc = check_vma(dev_dax, vma, __func__);
-	dax_read_unlock(id);
+	dax_read_unlock(dev_dax->dax_dev);
 	if (rc)
 		return rc;
 
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index e7152a6c4cc4..c46f56e33d40 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -26,29 +26,41 @@ struct dax_device {
 	struct inode inode;
 	struct cdev cdev;
 	void *private;
+	struct percpu_rw_semaphore rwsem;
 	unsigned long flags;
 	const struct dax_operations *ops;
 };
 
 static dev_t dax_devt;
-DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
 static DEFINE_IDA(dax_minor_ida);
 static struct kmem_cache *dax_cache __read_mostly;
 static struct super_block *dax_superblock __read_mostly;
 
-int dax_read_lock(void)
+void dax_read_lock(struct dax_device *dax_dev)
 {
-	return srcu_read_lock(&dax_srcu);
+	percpu_down_read(&dax_dev->rwsem);
 }
 EXPORT_SYMBOL_GPL(dax_read_lock);
 
-void dax_read_unlock(int id)
+void dax_read_unlock(struct dax_device *dax_dev)
 {
-	srcu_read_unlock(&dax_srcu, id);
+	percpu_up_read(&dax_dev->rwsem);
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+void dax_write_lock(struct dax_device *dax_dev)
+{
+	percpu_down_write(&dax_dev->rwsem);
+}
+EXPORT_SYMBOL_GPL(dax_write_lock);
+
+void dax_write_unlock(struct dax_device *dax_dev)
+{
+	percpu_up_write(&dax_dev->rwsem);
+}
+EXPORT_SYMBOL_GPL(dax_write_unlock);
+
 #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
 #include <linux/blkdev.h>
 
@@ -75,7 +87,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off)
 {
 	struct dax_device *dax_dev;
 	u64 part_size;
-	int id;
+	bool not_found;
 
 	if (!blk_queue_dax(bdev->bd_disk->queue))
 		return NULL;
@@ -87,11 +99,14 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev, u64 *start_off)
 		return NULL;
 	}
 
-	id = dax_read_lock();
 	dax_dev = xa_load(&dax_hosts, (unsigned long)bdev->bd_disk);
-	if (!dax_dev || !dax_alive(dax_dev) || !igrab(&dax_dev->inode))
-		dax_dev = NULL;
-	dax_read_unlock(id);
+	if (dax_dev) {
+		dax_read_lock(dax_dev);
+		not_found = !dax_alive(dax_dev) || !igrab(&dax_dev->inode);
+		dax_read_unlock(dax_dev);
+		if (not_found)
+			dax_dev = NULL;
+	}
 
 	return dax_dev;
 }
@@ -222,7 +237,7 @@ EXPORT_SYMBOL_GPL(__set_dax_synchronous);
 
 bool dax_alive(struct dax_device *dax_dev)
 {
-	lockdep_assert_held(&dax_srcu);
+	lockdep_assert_held(&dax_dev->rwsem);
 	return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
 }
 EXPORT_SYMBOL_GPL(dax_alive);
@@ -237,9 +252,9 @@ void kill_dax(struct dax_device *dax_dev)
 {
 	if (!dax_dev)
 		return;
-
+	dax_write_lock(dax_dev);
 	clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
-	synchronize_srcu(&dax_srcu);
+	dax_write_unlock(dax_dev);
 }
 EXPORT_SYMBOL_GPL(kill_dax);
 
@@ -366,6 +381,7 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops,
 
 	dax_dev->ops = ops;
 	dax_dev->private = private;
+	percpu_init_rwsem(&dax_dev->rwsem);
 	if (flags & DAXDEV_F_SYNC)
 		set_dax_synchronous(dax_dev);
 
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 4f31591d2d25..ebe1ec345d1d 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -260,7 +260,6 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 	loff_t s;
 	long p, da;
 	pfn_t pfn;
-	int id;
 	struct page **pages;
 	sector_t offset;
 
@@ -284,7 +283,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 	}
 	offset >>= PAGE_SHIFT - 9;
 
-	id = dax_read_lock();
+	dax_read_lock(wc->ssd_dev->dax_dev);
 
 	da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn);
 	if (da < 0) {
@@ -334,7 +333,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 		wc->memory_vmapped = true;
 	}
 
-	dax_read_unlock(id);
+	dax_read_unlock(wc->ssd_dev->dax_dev);
 
 	wc->memory_map += (size_t)wc->start_sector << SECTOR_SHIFT;
 	wc->memory_map_size -= (size_t)wc->start_sector << SECTOR_SHIFT;
@@ -343,7 +342,7 @@ static int persistent_memory_claim(struct dm_writecache *wc)
 err3:
 	kvfree(pages);
 err2:
-	dax_read_unlock(id);
+	dax_read_unlock(wc->ssd_dev->dax_dev);
 err1:
 	return r;
 }
diff --git a/fs/dax.c b/fs/dax.c
index e0eecd8e3a8f..1f46810d4b68 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -717,20 +717,20 @@ static pgoff_t dax_iomap_pgoff(const struct iomap *iomap, loff_t pos)
 static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter)
 {
 	pgoff_t pgoff = dax_iomap_pgoff(&iter->iomap, iter->pos);
+	struct dax_device *dax_dev = iter->iomap.dax_dev;
 	void *vto, *kaddr;
 	long rc;
-	int id;
 
-	id = dax_read_lock();
-	rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
+	dax_read_lock(dax_dev);
+	rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
 	if (rc < 0) {
-		dax_read_unlock(id);
+		dax_read_unlock(dax_dev);
 		return rc;
 	}
 	vto = kmap_atomic(vmf->cow_page);
 	copy_user_page(vto, kaddr, vmf->address, vmf->cow_page);
 	kunmap_atomic(vto);
-	dax_read_unlock(id);
+	dax_read_unlock(dax_dev);
 	return 0;
 }
 
@@ -1009,10 +1009,10 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
 			 pfn_t *pfnp)
 {
 	pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
-	int id, rc;
+	int rc;
 	long length;
 
-	id = dax_read_lock();
+	dax_read_lock(iomap->dax_dev);
 	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
 				   NULL, pfnp);
 	if (length < 0) {
@@ -1029,7 +1029,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
 		goto out;
 	rc = 0;
 out:
-	dax_read_unlock(id);
+	dax_read_unlock(iomap->dax_dev);
 	return rc;
 }
 
@@ -1135,6 +1135,7 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
 {
 	const struct iomap *iomap = &iter->iomap;
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
+	struct dax_device *dax_dev = iomap->dax_dev;
 	loff_t pos = iter->pos;
 	u64 length = iomap_length(iter);
 	s64 written = 0;
@@ -1148,14 +1149,13 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
 		unsigned size = min_t(u64, PAGE_SIZE - offset, length);
 		pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
 		long rc;
-		int id;
 
-		id = dax_read_lock();
+		dax_read_lock(dax_dev);
 		if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
-			rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
+			rc = dax_zero_page_range(dax_dev, pgoff, 1);
 		else
-			rc = dax_memzero(iomap->dax_dev, pgoff, offset, size);
-		dax_read_unlock(id);
+			rc = dax_memzero(dax_dev, pgoff, offset, size);
+		dax_read_unlock(dax_dev);
 
 		if (rc < 0)
 			return rc;
@@ -1209,7 +1209,6 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 	loff_t end = pos + length, done = 0;
 	ssize_t ret = 0;
 	size_t xfer;
-	int id;
 
 	if (iov_iter_rw(iter) == READ) {
 		end = min(end, i_size_read(iomi->inode));
@@ -1234,7 +1233,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 					      (end - 1) >> PAGE_SHIFT);
 	}
 
-	id = dax_read_lock();
+	dax_read_lock(dax_dev);
 	while (pos < end) {
 		unsigned offset = pos & (PAGE_SIZE - 1);
 		const size_t size = ALIGN(length + offset, PAGE_SIZE);
@@ -1281,7 +1280,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		if (xfer < map_len)
 			break;
 	}
-	dax_read_unlock(id);
+	dax_read_unlock(dax_dev);
 
 	return done ? done : ret;
 }
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 713818d74de6..00a419acab79 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1231,7 +1231,7 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
 {
 	long nr_pages, nr_ranges;
 	struct fuse_dax_mapping *range;
-	int ret, id;
+	int ret;
 	size_t dax_size = -1;
 	unsigned long i;
 
@@ -1240,10 +1240,10 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
 	INIT_LIST_HEAD(&fcd->busy_ranges);
 	INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
 
-	id = dax_read_lock();
+	dax_read_lock(fcd->dev);
 	nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
 				     NULL);
-	dax_read_unlock(id);
+	dax_read_unlock(fcd->dev);
 	if (nr_pages < 0) {
 		pr_debug("dax_direct_access() returned %ld\n", nr_pages);
 		return nr_pages;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 87ae4c9b1d65..a146bfb80804 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -178,15 +178,24 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
 
 #if IS_ENABLED(CONFIG_DAX)
-int dax_read_lock(void);
-void dax_read_unlock(int id);
+void dax_read_lock(struct dax_device *dax_dev);
+void dax_read_unlock(struct dax_device *dax_dev);
+void dax_write_lock(struct dax_device *dax_dev);
+void dax_write_unlock(struct dax_device *dax_dev);
 #else
-static inline int dax_read_lock(void)
+static inline void dax_read_lock(struct dax_device *dax_dev)
+{
+}
+
+static inline void dax_read_unlock(struct dax_device *dax_dev)
+{
+}
+
+static inline void dax_write_lock(struct dax_device *dax_dev)
 {
-	return 0;
 }
 
-static inline void dax_read_unlock(int id)
+static inline void dax_write_unlock(struct dax_device *dax_dev)
 {
 }
 #endif /* CONFIG_DAX */
-- 
2.34.1




  reply	other threads:[~2021-12-26 14:35 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-26 14:34 [PATCH v9 00/10] fsdax: introduce fs query to support reflink Shiyang Ruan
2021-12-26 14:34 ` Shiyang Ruan [this message]
2022-01-04 22:44   ` [PATCH v9 01/10] dax: Use percpu rwsem for dax_{read,write}_lock() Dan Williams
2022-01-05 17:45     ` Darrick J. Wong
2022-01-06 11:06     ` Shiyang Ruan
2021-12-26 14:34 ` [PATCH v9 02/10] dax: Introduce holder for dax_device Shiyang Ruan
2022-01-05 18:12   ` Darrick J. Wong
2022-01-05 18:23     ` Dan Williams
2022-01-05 18:56       ` Darrick J. Wong
2022-01-05 19:20         ` Dan Williams
2022-01-05 22:47           ` Darrick J. Wong
2022-01-05 23:01             ` Dan Williams
2022-01-05 23:54               ` Darrick J. Wong
2022-01-06  0:12                 ` Dan Williams
2022-01-20  8:46                   ` Christoph Hellwig
2022-01-21  1:26                     ` Shiyang Ruan
2022-01-21  2:22                       ` Darrick J. Wong
2022-01-21  7:17                         ` Christoph Hellwig
2022-02-15 21:51                         ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 03/10] mm: factor helpers for memory_failure_dev_pagemap Shiyang Ruan
2021-12-26 14:34 ` [PATCH v9 04/10] pagemap,pmem: Introduce ->memory_failure() Shiyang Ruan
2022-01-05 19:06   ` Darrick J. Wong
2021-12-26 14:34 ` [PATCH v9 05/10] fsdax: fix function description Shiyang Ruan
2022-01-05 17:50   ` Darrick J. Wong
2022-01-20  8:47   ` Christoph Hellwig
2021-12-26 14:34 ` [PATCH v9 06/10] fsdax: Introduce dax_lock_mapping_entry() Shiyang Ruan
2022-01-04 22:55   ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 07/10] mm: move pgoff_address() to vma_pgoff_address() Shiyang Ruan
2022-01-20  8:47   ` Christoph Hellwig
2021-12-26 14:34 ` [PATCH v9 08/10] mm: Introduce mf_dax_kill_procs() for fsdax case Shiyang Ruan
2022-01-20  8:55   ` Christoph Hellwig
2021-12-26 14:34 ` [PATCH v9 09/10] xfs: Implement ->notify_failure() for XFS Shiyang Ruan
2022-01-05 18:53   ` Darrick J. Wong
2022-01-05 21:17     ` Dan Williams
2021-12-26 14:34 ` [PATCH v9 10/10] fsdax: set a CoW flag when associate reflink mappings Shiyang Ruan
2022-01-20  8:59   ` Christoph Hellwig
2022-01-21  2:33     ` Shiyang Ruan
2022-01-21  7:16       ` Christoph Hellwig
2022-01-21  8:34         ` Shiyang Ruan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211226143439.3985960-2-ruansy.fnst@fujitsu.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jane.chu@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.