All of lore.kernel.org
 help / color / mirror / Atom feed
* [Virtio-fs] [PATCH 0/3] virtio-fs: Create a read-write mapping only if needed
@ 2019-07-24 21:07 Vivek Goyal
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping Vivek Goyal
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Vivek Goyal @ 2019-07-24 21:07 UTC (permalink / raw)
  To: virtio-fs

This should apply on top of.

https://gitlab.com/virtio-fs/linux/commits/virtio-fs-dev

Do not always create a read-write mapping. Create it read-write only if
caller asked for it. Otherwise create a read-only mapping and upgrade
it to read-write later when needed.

Vivek Goyal (3):
  fuse: Get rid of file parameter from setup mapping
  fuse: Move new mapping setup code in a function
  fuse: Add logic to upgrade a read-only mapping to read-write

 fs/fuse/file.c   | 246 +++++++++++++++++++++++++++++------------------
 fs/fuse/fuse_i.h |   3 +
 2 files changed, 155 insertions(+), 94 deletions(-)

-- 
2.17.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping
  2019-07-24 21:07 [Virtio-fs] [PATCH 0/3] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
@ 2019-07-24 21:07 ` Vivek Goyal
  2019-07-26  1:23   ` Liu Bo
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function Vivek Goyal
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
  2 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-24 21:07 UTC (permalink / raw)
  To: virtio-fs

There is only one caller of fuse_setup_one_mapping() and that passes file
argument as NULL. So get rid of this parameter as there are no callers.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fc40e0f44578..93f8e62e2b5b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -264,41 +264,26 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
 }
 
 /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
-static int fuse_setup_one_mapping(struct inode *inode,
-				struct file *file, loff_t offset,
-				struct fuse_dax_mapping *dmap)
+static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
+				  struct fuse_dax_mapping *dmap)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-	struct fuse_file *ff = NULL;
 	struct fuse_setupmapping_in inarg;
 	FUSE_ARGS(args);
 	ssize_t err;
 
-	if (file)
-		ff = file->private_data;
-
 	WARN_ON(offset % FUSE_DAX_MEM_RANGE_SZ);
 	WARN_ON(fc->nr_free_ranges < 0);
 
 	/* Ask fuse daemon to setup mapping */
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.foffset = offset;
-	if (ff)
-		inarg.fh = ff->fh;
-	else
-		inarg.fh = -1;
+	inarg.fh = -1;
 	inarg.moffset = dmap->window_offset;
 	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
-	if (file) {
-		inarg.flags |= (file->f_mode & FMODE_WRITE) ?
-				FUSE_SETUPMAPPING_FLAG_WRITE : 0;
-		inarg.flags |= (file->f_mode & FMODE_READ) ?
-				FUSE_SETUPMAPPING_FLAG_READ : 0;
-	} else {
-		inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
-		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
-	}
+	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
+	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
 	args.in.h.opcode = FUSE_SETUPMAPPING;
 	args.in.h.nodeid = fi->nodeid;
 	args.in.numargs = 1;
@@ -1985,9 +1970,8 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 		}
 
 		/* Setup one mapping */
-		ret = fuse_setup_one_mapping(inode, NULL,
-				ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
-				alloc_dmap);
+		ret = fuse_setup_one_mapping(inode,
+			ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
 		if (ret < 0) {
 			printk("fuse_setup_one_mapping() failed. err=%d"
 				" pos=0x%llx\n", ret, pos);
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function
  2019-07-24 21:07 [Virtio-fs] [PATCH 0/3] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping Vivek Goyal
@ 2019-07-24 21:07 ` Vivek Goyal
  2019-07-26  1:34   ` Liu Bo
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
  2 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-24 21:07 UTC (permalink / raw)
  To: virtio-fs

Move new mapping setup code in a separate function. More code will come
in fuse_iomap_begin() and its becoming too big.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c | 116 +++++++++++++++++++++++++++----------------------
 1 file changed, 64 insertions(+), 52 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 93f8e62e2b5b..a2c19e4a28b5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1882,6 +1882,67 @@ static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
 	}
 }
 
+static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
+					 loff_t length, unsigned flags,
+					 struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
+	int ret;
+
+	/* Can't do reclaim in fault path yet due to lock ordering.
+	 * Read path takes shared inode lock and that's not sufficient
+	 * for inline range reclaim. Caller needs to drop lock, wait
+	 * and retry.
+	 */
+	if (flags & IOMAP_FAULT || !(flags & IOMAP_WRITE)) {
+		alloc_dmap = alloc_dax_mapping(fc);
+		if (!alloc_dmap)
+			return -ENOSPC;
+	} else {
+		alloc_dmap = alloc_dax_mapping_reclaim(fc, inode);
+		if (IS_ERR(alloc_dmap))
+			return PTR_ERR(alloc_dmap);
+	}
+
+	/* If we are here, we should have memory allocated */
+	if (WARN_ON(!alloc_dmap))
+		return -EBUSY;
+
+	/*
+	 * Drop read lock and take write lock so that only one
+	 * caller can try to setup mapping and other waits
+	 */
+	down_write(&fi->i_dmap_sem);
+	/*
+	 * We dropped lock. Check again if somebody else setup
+	 * mapping already.
+	 */
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos,
+						pos);
+	if (dmap) {
+		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+		dmap_add_to_free_pool(fc, alloc_dmap);
+		up_write(&fi->i_dmap_sem);
+		return 0;
+	}
+
+	/* Setup one mapping */
+	ret = fuse_setup_one_mapping(inode,
+		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
+	if (ret < 0) {
+		printk("fuse_setup_one_mapping() failed. err=%d"
+			" pos=0x%llx\n", ret, pos);
+		dmap_add_to_free_pool(fc, alloc_dmap);
+		up_write(&fi->i_dmap_sem);
+		return ret;
+	}
+	fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
+	up_write(&fi->i_dmap_sem);
+	return 0;
+}
+
 /* This is just for DAX and the mapping is ephemeral, do not use it for other
  * purposes since there is no block device with a permanent mapping.
  */
@@ -1890,8 +1951,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
-	int ret;
+	struct fuse_dax_mapping *dmap;
 
 	/* We don't support FIEMAP */
 	BUG_ON(flags & IOMAP_REPORT);
@@ -1932,56 +1992,8 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 		if (pos >= i_size_read(inode))
 			goto iomap_hole;
 
-		/* Can't do reclaim in fault path yet due to lock ordering.
-		 * Read path takes shared inode lock and that's not sufficient
-		 * for inline range reclaim. Caller needs to drop lock, wait
-		 * and retry.
-		 */
-		if (flags & IOMAP_FAULT || !(flags & IOMAP_WRITE)) {
-			alloc_dmap = alloc_dax_mapping(fc);
-			if (!alloc_dmap)
-				return -ENOSPC;
-		} else {
-			alloc_dmap = alloc_dax_mapping_reclaim(fc, inode);
-			if (IS_ERR(alloc_dmap))
-				return PTR_ERR(alloc_dmap);
-		}
-
-		/* If we are here, we should have memory allocated */
-		if (WARN_ON(!alloc_dmap))
-			return -EBUSY;
-
-		/*
-		 * Drop read lock and take write lock so that only one
-		 * caller can try to setup mapping and other waits
-		 */
-		down_write(&fi->i_dmap_sem);
-		/*
-		 * We dropped lock. Check again if somebody else setup
-		 * mapping already.
-		 */
-		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos,
-							pos);
-		if (dmap) {
-			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-			dmap_add_to_free_pool(fc, alloc_dmap);
-			up_write(&fi->i_dmap_sem);
-			return 0;
-		}
-
-		/* Setup one mapping */
-		ret = fuse_setup_one_mapping(inode,
-			ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
-		if (ret < 0) {
-			printk("fuse_setup_one_mapping() failed. err=%d"
-				" pos=0x%llx\n", ret, pos);
-			dmap_add_to_free_pool(fc, alloc_dmap);
-			up_write(&fi->i_dmap_sem);
-			return ret;
-		}
-		fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
-		up_write(&fi->i_dmap_sem);
-		return 0;
+		return iomap_begin_setup_new_mapping(inode, pos, length, flags,
+						    iomap);
 	}
 
 	/*
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-24 21:07 [Virtio-fs] [PATCH 0/3] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping Vivek Goyal
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function Vivek Goyal
@ 2019-07-24 21:07 ` Vivek Goyal
  2019-07-25  0:08   ` Liu Bo
  2 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-24 21:07 UTC (permalink / raw)
  To: virtio-fs

Do not always map a dax mapping read-write. There are use cases like
executing a file where we want to map file read-only. virtio-fs dir on
host might be backed by overlayfs. We don't want to open file read-write
on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
the advantages of sharing page cache between vms for unmodified files.

Hence, create a read-only mapping if user did not ask for writable mapping.
Later upgrade it to read-write mapping when users requests it.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
 fs/fuse/fuse_i.h |   3 ++
 2 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a2c19e4a28b5..bcd8ddcc0fdf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
 
 /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
 static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
-				  struct fuse_dax_mapping *dmap)
+				  struct fuse_dax_mapping *dmap, bool writable,
+				  bool upgrade)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 	inarg.moffset = dmap->window_offset;
 	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
 	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
-	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+	if (writable)
+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
 	args.in.h.opcode = FUSE_SETUPMAPPING;
 	args.in.h.nodeid = fi->nodeid;
 	args.in.numargs = 1;
@@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 		return err;
 	}
 
-	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
+	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
+		 " err=%zd\n", offset, writable, err);
 
-	/*
-	 * We don't take a refernce on inode. inode is valid right now and
-	 * when inode is going away, cleanup logic should first cleanup
-	 * dmap entries.
-	 *
-	 * TODO: Do we need to ensure that we are holding inode lock
-	 * as well.
-	 */
-	dmap->inode = inode;
-	dmap->start = offset;
-	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
-	/* Protected by fi->i_dmap_sem */
-	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
-	fi->nr_dmaps++;
-	spin_lock(&fc->lock);
-	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
-	fc->nr_busy_ranges++;
-	spin_unlock(&fc->lock);
+	dmap->writable = writable;
+	if (!upgrade) {
+		/*
+		 * We don't take a refernce on inode. inode is valid right now
+		 * and when inode is going away, cleanup logic should first
+		 * cleanup dmap entries.
+		 *
+		 * TODO: Do we need to ensure that we are holding inode lock
+		 * as well.
+		 */
+		dmap->inode = inode;
+		dmap->start = offset;
+		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
+		/* Protected by fi->i_dmap_sem */
+		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
+		fi->nr_dmaps++;
+		spin_lock(&fc->lock);
+		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
+		fc->nr_busy_ranges++;
+		spin_unlock(&fc->lock);
+	}
 	return 0;
 }
 
@@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
 	int ret;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* Can't do reclaim in fault path yet due to lock ordering.
 	 * Read path takes shared inode lock and that's not sufficient
@@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 
 	/* Setup one mapping */
 	ret = fuse_setup_one_mapping(inode,
-		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     alloc_dmap, writable, false);
 	if (ret < 0) {
 		printk("fuse_setup_one_mapping() failed. err=%d"
-			" pos=0x%llx\n", ret, pos);
+			" pos=0x%llx, writable=%d\n", ret, pos, writable);
 		dmap_add_to_free_pool(fc, alloc_dmap);
 		up_write(&fi->i_dmap_sem);
 		return ret;
@@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	return 0;
 }
 
+static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
+					 loff_t length, unsigned flags,
+					 struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+	int ret;
+
+	/*
+	 * Take exclusive lock so that only one caller can try to setup
+	 * mapping and others wait.
+	 */
+	down_write(&fi->i_dmap_sem);
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
+
+	/* We are holding either inode lock or i_mmap_sem, and that should
+	 * ensure that dmap can't reclaimed or truncated and it should still
+	 * be there in tree despite the fact we dropped and re-acquired the
+	 * lock.
+	 */
+	if (WARN_ON(!dmap))
+		return -EIO;
+
+	WARN_ON(dmap->writable);
+	ret = fuse_setup_one_mapping(inode,
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     dmap, true, true);
+	if (ret < 0) {
+		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
+		       ret, pos);
+		up_write(&fi->i_dmap_sem);
+		return ret;
+	}
+
+	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+	up_write(&fi->i_dmap_sem);
+	return 0;
+}
+
 /* This is just for DAX and the mapping is ephemeral, do not use it for other
  * purposes since there is no block device with a permanent mapping.
  */
@@ -1952,6 +1999,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* We don't support FIEMAP */
 	BUG_ON(flags & IOMAP_REPORT);
@@ -1982,9 +2030,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
 
 	if (dmap) {
-		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-		up_read(&fi->i_dmap_sem);
-		return 0;
+		if (writable && !dmap->writable) {
+			/* Upgrade read-only mapping to read-write. This will
+			 * require exclusive i_dmap_sem lock as we don't want
+			 * two threads to be trying to this simultaneously
+			 * for same dmap. So drop shared lock and acquire
+			 * exclusive lock.
+			 */
+			up_read(&fi->i_dmap_sem);
+			pr_debug("%s: Upgrading mapping at offset 0x%llx"
+				 " length 0x%llx\n", __func__, pos, length);
+			return iomap_begin_upgrade_mapping(inode, pos, length,
+							   flags, iomap);
+		} else {
+			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+			up_read(&fi->i_dmap_sem);
+			return 0;
+		}
 	} else {
 		up_read(&fi->i_dmap_sem);
 		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e899a06e29d7..34ca8b90a1e1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -134,6 +134,9 @@ struct fuse_dax_mapping {
 
 	/** Length of mapping, in bytes */
 	loff_t length;
+
+	/* Is this mapping read-only or read-write */
+	bool writable;
 };
 
 /** FUSE inode */
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
@ 2019-07-25  0:08   ` Liu Bo
  2019-07-25 15:35     ` Vivek Goyal
  0 siblings, 1 reply; 15+ messages in thread
From: Liu Bo @ 2019-07-25  0:08 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

On Wed, Jul 24, 2019 at 05:07:21PM -0400, Vivek Goyal wrote:
> Do not always map a dax mapping read-write. There are use cases like
> executing a file where we want to map file read-only. virtio-fs dir on
> host might be backed by overlayfs. We don't want to open file read-write
> on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> the advantages of sharing page cache between vms for unmodified files.
> 
> Hence, create a read-only mapping if user did not ask for writable mapping.
> Later upgrade it to read-write mapping when users requests it.
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
>  fs/fuse/fuse_i.h |   3 ++
>  2 files changed, 91 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index a2c19e4a28b5..bcd8ddcc0fdf 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
>  
>  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
>  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> -				  struct fuse_dax_mapping *dmap)
> +				  struct fuse_dax_mapping *dmap, bool writable,
> +				  bool upgrade)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>  	inarg.moffset = dmap->window_offset;
>  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
>  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> +	if (writable)
> +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
>  	args.in.h.opcode = FUSE_SETUPMAPPING;
>  	args.in.h.nodeid = fi->nodeid;
>  	args.in.numargs = 1;
> @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>  		return err;
>  	}
>  
> -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> +		 " err=%zd\n", offset, writable, err);
>  
> -	/*
> -	 * We don't take a refernce on inode. inode is valid right now and
> -	 * when inode is going away, cleanup logic should first cleanup
> -	 * dmap entries.
> -	 *
> -	 * TODO: Do we need to ensure that we are holding inode lock
> -	 * as well.
> -	 */
> -	dmap->inode = inode;
> -	dmap->start = offset;
> -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> -	/* Protected by fi->i_dmap_sem */
> -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> -	fi->nr_dmaps++;
> -	spin_lock(&fc->lock);
> -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> -	fc->nr_busy_ranges++;
> -	spin_unlock(&fc->lock);
> +	dmap->writable = writable;
> +	if (!upgrade) {
> +		/*
> +		 * We don't take a refernce on inode. inode is valid right now
> +		 * and when inode is going away, cleanup logic should first
> +		 * cleanup dmap entries.
> +		 *
> +		 * TODO: Do we need to ensure that we are holding inode lock
> +		 * as well.
> +		 */
> +		dmap->inode = inode;
> +		dmap->start = offset;
> +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> +		/* Protected by fi->i_dmap_sem */
> +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> +		fi->nr_dmaps++;
> +		spin_lock(&fc->lock);
> +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> +		fc->nr_busy_ranges++;
> +		spin_unlock(&fc->lock);
> +	}
>  	return 0;
>  }
>  
> @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
>  	int ret;
> +	bool writable = flags & IOMAP_WRITE;
>  
>  	/* Can't do reclaim in fault path yet due to lock ordering.
>  	 * Read path takes shared inode lock and that's not sufficient
> @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  
>  	/* Setup one mapping */
>  	ret = fuse_setup_one_mapping(inode,
> -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> +				     alloc_dmap, writable, false);
>  	if (ret < 0) {
>  		printk("fuse_setup_one_mapping() failed. err=%d"
> -			" pos=0x%llx\n", ret, pos);
> +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
>  		dmap_add_to_free_pool(fc, alloc_dmap);
>  		up_write(&fi->i_dmap_sem);
>  		return ret;
> @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  	return 0;
>  }
>  
> +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> +					 loff_t length, unsigned flags,
> +					 struct iomap *iomap)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_dax_mapping *dmap;
> +	int ret;
> +
> +	/*
> +	 * Take exclusive lock so that only one caller can try to setup
> +	 * mapping and others wait.
> +	 */
> +	down_write(&fi->i_dmap_sem);
> +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> +
> +	/* We are holding either inode lock or i_mmap_sem, and that should
> +	 * ensure that dmap can't reclaimed or truncated and it should still
> +	 * be there in tree despite the fact we dropped and re-acquired the
> +	 * lock.
> +	 */
> +	if (WARN_ON(!dmap))
> +		return -EIO;

up_write() is missed here.

And I posted a similar patch[1] but with inode_lock being dropped in
mind, anything wrong with that patch?

[1]
virtiofs: add dmap flags to differentiate write mapping from read mapping

thanks,
-liubo

> +
> +	WARN_ON(dmap->writable);
> +	ret = fuse_setup_one_mapping(inode,
> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> +				     dmap, true, true);
> +	if (ret < 0) {
> +		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
> +		       ret, pos);
> +		up_write(&fi->i_dmap_sem);
> +		return ret;
> +	}
> +
> +	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +	up_write(&fi->i_dmap_sem);
> +	return 0;
> +}
> +
>  /* This is just for DAX and the mapping is ephemeral, do not use it for other
>   * purposes since there is no block device with a permanent mapping.
>   */
> @@ -1952,6 +1999,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  	struct fuse_inode *fi = get_fuse_inode(inode);
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_dax_mapping *dmap;
> +	bool writable = flags & IOMAP_WRITE;
>  
>  	/* We don't support FIEMAP */
>  	BUG_ON(flags & IOMAP_REPORT);
> @@ -1982,9 +2030,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
>  
>  	if (dmap) {
> -		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> -		up_read(&fi->i_dmap_sem);
> -		return 0;
> +		if (writable && !dmap->writable) {
> +			/* Upgrade read-only mapping to read-write. This will
> +			 * require exclusive i_dmap_sem lock as we don't want
> +			 * two threads to be trying to this simultaneously
> +			 * for same dmap. So drop shared lock and acquire
> +			 * exclusive lock.
> +			 */
> +			up_read(&fi->i_dmap_sem);
> +			pr_debug("%s: Upgrading mapping at offset 0x%llx"
> +				 " length 0x%llx\n", __func__, pos, length);
> +			return iomap_begin_upgrade_mapping(inode, pos, length,
> +							   flags, iomap);
> +		} else {
> +			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +			up_read(&fi->i_dmap_sem);
> +			return 0;
> +		}
>  	} else {
>  		up_read(&fi->i_dmap_sem);
>  		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index e899a06e29d7..34ca8b90a1e1 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -134,6 +134,9 @@ struct fuse_dax_mapping {
>  
>  	/** Length of mapping, in bytes */
>  	loff_t length;
> +
> +	/* Is this mapping read-only or read-write */
> +	bool writable;
>  };
>  
>  /** FUSE inode */
> -- 
> 2.17.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-25  0:08   ` Liu Bo
@ 2019-07-25 15:35     ` Vivek Goyal
  2019-07-25 19:32       ` Liu Bo
  0 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-25 15:35 UTC (permalink / raw)
  To: Liu Bo; +Cc: virtio-fs

On Wed, Jul 24, 2019 at 05:08:58PM -0700, Liu Bo wrote:
> On Wed, Jul 24, 2019 at 05:07:21PM -0400, Vivek Goyal wrote:
> > Do not always map a dax mapping read-write. There are use cases like
> > executing a file where we want to map file read-only. virtio-fs dir on
> > host might be backed by overlayfs. We don't want to open file read-write
> > on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> > the advantages of sharing page cache between vms for unmodified files.
> > 
> > Hence, create a read-only mapping if user did not ask for writable mapping.
> > Later upgrade it to read-write mapping when users requests it.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
> >  fs/fuse/fuse_i.h |   3 ++
> >  2 files changed, 91 insertions(+), 26 deletions(-)
> > 
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index a2c19e4a28b5..bcd8ddcc0fdf 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
> >  
> >  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> >  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > -				  struct fuse_dax_mapping *dmap)
> > +				  struct fuse_dax_mapping *dmap, bool writable,
> > +				  bool upgrade)
> >  {
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	struct fuse_inode *fi = get_fuse_inode(inode);
> > @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> >  	inarg.moffset = dmap->window_offset;
> >  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> >  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> > -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > +	if (writable)
> > +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> >  	args.in.h.opcode = FUSE_SETUPMAPPING;
> >  	args.in.h.nodeid = fi->nodeid;
> >  	args.in.numargs = 1;
> > @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> >  		return err;
> >  	}
> >  
> > -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> > +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> > +		 " err=%zd\n", offset, writable, err);
> >  
> > -	/*
> > -	 * We don't take a refernce on inode. inode is valid right now and
> > -	 * when inode is going away, cleanup logic should first cleanup
> > -	 * dmap entries.
> > -	 *
> > -	 * TODO: Do we need to ensure that we are holding inode lock
> > -	 * as well.
> > -	 */
> > -	dmap->inode = inode;
> > -	dmap->start = offset;
> > -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > -	/* Protected by fi->i_dmap_sem */
> > -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > -	fi->nr_dmaps++;
> > -	spin_lock(&fc->lock);
> > -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > -	fc->nr_busy_ranges++;
> > -	spin_unlock(&fc->lock);
> > +	dmap->writable = writable;
> > +	if (!upgrade) {
> > +		/*
> > +		 * We don't take a refernce on inode. inode is valid right now
> > +		 * and when inode is going away, cleanup logic should first
> > +		 * cleanup dmap entries.
> > +		 *
> > +		 * TODO: Do we need to ensure that we are holding inode lock
> > +		 * as well.
> > +		 */
> > +		dmap->inode = inode;
> > +		dmap->start = offset;
> > +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > +		/* Protected by fi->i_dmap_sem */
> > +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > +		fi->nr_dmaps++;
> > +		spin_lock(&fc->lock);
> > +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > +		fc->nr_busy_ranges++;
> > +		spin_unlock(&fc->lock);
> > +	}
> >  	return 0;
> >  }
> >  
> > @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> >  	int ret;
> > +	bool writable = flags & IOMAP_WRITE;
> >  
> >  	/* Can't do reclaim in fault path yet due to lock ordering.
> >  	 * Read path takes shared inode lock and that's not sufficient
> > @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  
> >  	/* Setup one mapping */
> >  	ret = fuse_setup_one_mapping(inode,
> > -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > +				     alloc_dmap, writable, false);
> >  	if (ret < 0) {
> >  		printk("fuse_setup_one_mapping() failed. err=%d"
> > -			" pos=0x%llx\n", ret, pos);
> > +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
> >  		dmap_add_to_free_pool(fc, alloc_dmap);
> >  		up_write(&fi->i_dmap_sem);
> >  		return ret;
> > @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  	return 0;
> >  }
> >  
> > +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> > +					 loff_t length, unsigned flags,
> > +					 struct iomap *iomap)
> > +{
> > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > +	struct fuse_dax_mapping *dmap;
> > +	int ret;
> > +
> > +	/*
> > +	 * Take exclusive lock so that only one caller can try to setup
> > +	 * mapping and others wait.
> > +	 */
> > +	down_write(&fi->i_dmap_sem);
> > +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> > +
> > +	/* We are holding either inode lock or i_mmap_sem, and that should
> > +	 * ensure that dmap can't reclaimed or truncated and it should still
> > +	 * be there in tree despite the fact we dropped and re-acquired the
> > +	 * lock.
> > +	 */
> > +	if (WARN_ON(!dmap))
> > +		return -EIO;
> 
> up_write() is missed here.

Good catch. Will fix it.

> 
> And I posted a similar patch[1] but with inode_lock being dropped in
> mind, anything wrong with that patch?

It was an old posting so did not pay much attention to it. Looking at
it now and few things I notice.

- That patch will allocate and possibly wait for memory range and free
  it up when upgrading from read-only to read-write. That's not required.

- dmap->flags is not required at this point of time. If we add more
  states later, we can convert it to a flag.

- Code structure seems little simpler in this patch (in
  fuse_iomap_begin()).

W.r.t dropping inode lock, I think we should still be able to do with this
patch and take reference on dmap under shared lock. And drop that
reference later. Taking that reference should make sure dmap will not
be reclaimed when we don't have i_dmap_sem. And truncate/punch_hole
can't reclaim anyway because of inode_lock/i_mmap_sem lock.

Thanks
Vivek


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-25 15:35     ` Vivek Goyal
@ 2019-07-25 19:32       ` Liu Bo
  2019-07-25 20:30         ` Vivek Goyal
  0 siblings, 1 reply; 15+ messages in thread
From: Liu Bo @ 2019-07-25 19:32 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

On Thu, Jul 25, 2019 at 11:35:21AM -0400, Vivek Goyal wrote:
> On Wed, Jul 24, 2019 at 05:08:58PM -0700, Liu Bo wrote:
> > On Wed, Jul 24, 2019 at 05:07:21PM -0400, Vivek Goyal wrote:
> > > Do not always map a dax mapping read-write. There are use cases like
> > > executing a file where we want to map file read-only. virtio-fs dir on
> > > host might be backed by overlayfs. We don't want to open file read-write
> > > on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> > > the advantages of sharing page cache between vms for unmodified files.
> > > 
> > > Hence, create a read-only mapping if user did not ask for writable mapping.
> > > Later upgrade it to read-write mapping when users requests it.
> > > 
> > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > ---
> > >  fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
> > >  fs/fuse/fuse_i.h |   3 ++
> > >  2 files changed, 91 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > > index a2c19e4a28b5..bcd8ddcc0fdf 100644
> > > --- a/fs/fuse/file.c
> > > +++ b/fs/fuse/file.c
> > > @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
> > >  
> > >  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> > >  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > -				  struct fuse_dax_mapping *dmap)
> > > +				  struct fuse_dax_mapping *dmap, bool writable,
> > > +				  bool upgrade)
> > >  {
> > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > >  	struct fuse_inode *fi = get_fuse_inode(inode);
> > > @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > >  	inarg.moffset = dmap->window_offset;
> > >  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> > >  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> > > -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > > +	if (writable)
> > > +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > >  	args.in.h.opcode = FUSE_SETUPMAPPING;
> > >  	args.in.h.nodeid = fi->nodeid;
> > >  	args.in.numargs = 1;
> > > @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > >  		return err;
> > >  	}
> > >  
> > > -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> > > +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> > > +		 " err=%zd\n", offset, writable, err);
> > >  
> > > -	/*
> > > -	 * We don't take a refernce on inode. inode is valid right now and
> > > -	 * when inode is going away, cleanup logic should first cleanup
> > > -	 * dmap entries.
> > > -	 *
> > > -	 * TODO: Do we need to ensure that we are holding inode lock
> > > -	 * as well.
> > > -	 */
> > > -	dmap->inode = inode;
> > > -	dmap->start = offset;
> > > -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > -	/* Protected by fi->i_dmap_sem */
> > > -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > -	fi->nr_dmaps++;
> > > -	spin_lock(&fc->lock);
> > > -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > -	fc->nr_busy_ranges++;
> > > -	spin_unlock(&fc->lock);
> > > +	dmap->writable = writable;
> > > +	if (!upgrade) {
> > > +		/*
> > > +		 * We don't take a refernce on inode. inode is valid right now
> > > +		 * and when inode is going away, cleanup logic should first
> > > +		 * cleanup dmap entries.
> > > +		 *
> > > +		 * TODO: Do we need to ensure that we are holding inode lock
> > > +		 * as well.
> > > +		 */
> > > +		dmap->inode = inode;
> > > +		dmap->start = offset;
> > > +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > +		/* Protected by fi->i_dmap_sem */
> > > +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > +		fi->nr_dmaps++;
> > > +		spin_lock(&fc->lock);
> > > +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > +		fc->nr_busy_ranges++;
> > > +		spin_unlock(&fc->lock);
> > > +	}
> > >  	return 0;
> > >  }
> > >  
> > > @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > >  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> > >  	int ret;
> > > +	bool writable = flags & IOMAP_WRITE;
> > >  
> > >  	/* Can't do reclaim in fault path yet due to lock ordering.
> > >  	 * Read path takes shared inode lock and that's not sufficient
> > > @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > >  
> > >  	/* Setup one mapping */
> > >  	ret = fuse_setup_one_mapping(inode,
> > > -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> > > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > > +				     alloc_dmap, writable, false);
> > >  	if (ret < 0) {
> > >  		printk("fuse_setup_one_mapping() failed. err=%d"
> > > -			" pos=0x%llx\n", ret, pos);
> > > +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
> > >  		dmap_add_to_free_pool(fc, alloc_dmap);
> > >  		up_write(&fi->i_dmap_sem);
> > >  		return ret;
> > > @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > >  	return 0;
> > >  }
> > >  
> > > +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> > > +					 loff_t length, unsigned flags,
> > > +					 struct iomap *iomap)
> > > +{
> > > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > > +	struct fuse_dax_mapping *dmap;
> > > +	int ret;
> > > +
> > > +	/*
> > > +	 * Take exclusive lock so that only one caller can try to setup
> > > +	 * mapping and others wait.
> > > +	 */
> > > +	down_write(&fi->i_dmap_sem);
> > > +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> > > +
> > > +	/* We are holding either inode lock or i_mmap_sem, and that should
> > > +	 * ensure that dmap can't reclaimed or truncated and it should still
> > > +	 * be there in tree despite the fact we dropped and re-acquired the
> > > +	 * lock.
> > > +	 */
> > > +	if (WARN_ON(!dmap))
> > > +		return -EIO;
> > 
> > up_write() is missed here.
> 
> Good catch. Will fix it.
> 
> > 
> > And I posted a similar patch[1] but with inode_lock being dropped in
> > mind, anything wrong with that patch?
> 
> It was an old posting so did not pay much attention to it. Looking at
> it now and few things I notice.
> 
> - That patch will allocate and possibly wait for memory range and free
>   it up when upgrading from read-only to read-write. That's not required.
>

What I was trying to say is that the above assumption about "dmap is still in
tree despite dropping-acquiring lock" is doubtful if inode lock is not held by
reclaim code.

There is a window between dropping and reacquiring lock, if it gets reclaimed,
we're supposed to continue on the allocation path.


> - dmap->flags is not required at this point of time. If we add more
>   states later, we can convert it to a flag.
> 
> - Code structure seems little simpler in this patch (in
>   fuse_iomap_begin()).

I like the cleanups.

thanks,
-liubo

> 
> W.r.t dropping inode lock, I think we should still be able to do with this
> patch and take reference on dmap under shared lock. And drop that
> reference later. Taking that reference should make sure dmap will not
> be reclaimed when we don't have i_dmap_sem. And truncate/punch_hole
> can't reclaim anyway because of inode_lock/i_mmap_sem lock.
> 
> Thanks
> Vivek


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-25 19:32       ` Liu Bo
@ 2019-07-25 20:30         ` Vivek Goyal
  2019-07-25 21:57           ` Liu Bo
  0 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-25 20:30 UTC (permalink / raw)
  To: Liu Bo; +Cc: virtio-fs

On Thu, Jul 25, 2019 at 12:32:50PM -0700, Liu Bo wrote:
> On Thu, Jul 25, 2019 at 11:35:21AM -0400, Vivek Goyal wrote:
> > On Wed, Jul 24, 2019 at 05:08:58PM -0700, Liu Bo wrote:
> > > On Wed, Jul 24, 2019 at 05:07:21PM -0400, Vivek Goyal wrote:
> > > > Do not always map a dax mapping read-write. There are use cases like
> > > > executing a file where we want to map file read-only. virtio-fs dir on
> > > > host might be backed by overlayfs. We don't want to open file read-write
> > > > on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> > > > the advantages of sharing page cache between vms for unmodified files.
> > > > 
> > > > Hence, create a read-only mapping if user did not ask for writable mapping.
> > > > Later upgrade it to read-write mapping when users requests it.
> > > > 
> > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > ---
> > > >  fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
> > > >  fs/fuse/fuse_i.h |   3 ++
> > > >  2 files changed, 91 insertions(+), 26 deletions(-)
> > > > 
> > > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > > > index a2c19e4a28b5..bcd8ddcc0fdf 100644
> > > > --- a/fs/fuse/file.c
> > > > +++ b/fs/fuse/file.c
> > > > @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
> > > >  
> > > >  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> > > >  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > > -				  struct fuse_dax_mapping *dmap)
> > > > +				  struct fuse_dax_mapping *dmap, bool writable,
> > > > +				  bool upgrade)
> > > >  {
> > > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > > >  	struct fuse_inode *fi = get_fuse_inode(inode);
> > > > @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > >  	inarg.moffset = dmap->window_offset;
> > > >  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> > > >  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> > > > -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > > > +	if (writable)
> > > > +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > > >  	args.in.h.opcode = FUSE_SETUPMAPPING;
> > > >  	args.in.h.nodeid = fi->nodeid;
> > > >  	args.in.numargs = 1;
> > > > @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > >  		return err;
> > > >  	}
> > > >  
> > > > -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> > > > +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> > > > +		 " err=%zd\n", offset, writable, err);
> > > >  
> > > > -	/*
> > > > -	 * We don't take a refernce on inode. inode is valid right now and
> > > > -	 * when inode is going away, cleanup logic should first cleanup
> > > > -	 * dmap entries.
> > > > -	 *
> > > > -	 * TODO: Do we need to ensure that we are holding inode lock
> > > > -	 * as well.
> > > > -	 */
> > > > -	dmap->inode = inode;
> > > > -	dmap->start = offset;
> > > > -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > > -	/* Protected by fi->i_dmap_sem */
> > > > -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > > -	fi->nr_dmaps++;
> > > > -	spin_lock(&fc->lock);
> > > > -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > > -	fc->nr_busy_ranges++;
> > > > -	spin_unlock(&fc->lock);
> > > > +	dmap->writable = writable;
> > > > +	if (!upgrade) {
> > > > +		/*
> > > > +		 * We don't take a refernce on inode. inode is valid right now
> > > > +		 * and when inode is going away, cleanup logic should first
> > > > +		 * cleanup dmap entries.
> > > > +		 *
> > > > +		 * TODO: Do we need to ensure that we are holding inode lock
> > > > +		 * as well.
> > > > +		 */
> > > > +		dmap->inode = inode;
> > > > +		dmap->start = offset;
> > > > +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > > +		/* Protected by fi->i_dmap_sem */
> > > > +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > > +		fi->nr_dmaps++;
> > > > +		spin_lock(&fc->lock);
> > > > +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > > +		fc->nr_busy_ranges++;
> > > > +		spin_unlock(&fc->lock);
> > > > +	}
> > > >  	return 0;
> > > >  }
> > > >  
> > > > @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > > >  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> > > >  	int ret;
> > > > +	bool writable = flags & IOMAP_WRITE;
> > > >  
> > > >  	/* Can't do reclaim in fault path yet due to lock ordering.
> > > >  	 * Read path takes shared inode lock and that's not sufficient
> > > > @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > >  
> > > >  	/* Setup one mapping */
> > > >  	ret = fuse_setup_one_mapping(inode,
> > > > -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> > > > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > > > +				     alloc_dmap, writable, false);
> > > >  	if (ret < 0) {
> > > >  		printk("fuse_setup_one_mapping() failed. err=%d"
> > > > -			" pos=0x%llx\n", ret, pos);
> > > > +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
> > > >  		dmap_add_to_free_pool(fc, alloc_dmap);
> > > >  		up_write(&fi->i_dmap_sem);
> > > >  		return ret;
> > > > @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> > > > +					 loff_t length, unsigned flags,
> > > > +					 struct iomap *iomap)
> > > > +{
> > > > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > > > +	struct fuse_dax_mapping *dmap;
> > > > +	int ret;
> > > > +
> > > > +	/*
> > > > +	 * Take exclusive lock so that only one caller can try to setup
> > > > +	 * mapping and others wait.
> > > > +	 */
> > > > +	down_write(&fi->i_dmap_sem);
> > > > +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> > > > +
> > > > +	/* We are holding either inode lock or i_mmap_sem, and that should
> > > > +	 * ensure that dmap can't reclaimed or truncated and it should still
> > > > +	 * be there in tree despite the fact we dropped and re-acquired the
> > > > +	 * lock.
> > > > +	 */
> > > > +	if (WARN_ON(!dmap))
> > > > +		return -EIO;
> > > 
> > > up_write() is missed here.
> > 
> > Good catch. Will fix it.
> > 
> > > 

[..]
> > > And I posted a similar patch[1] but with inode_lock being dropped in
> > > mind, anything wrong with that patch?
> > 
> > It was an old posting so did not pay much attention to it. Looking at
> > it now and few things I notice.
> > 
> > - That patch will allocate and possibly wait for memory range and free
> >   it up when upgrading from read-only to read-write. That's not required.
> >
> 
> What I was trying to say is that the above assumption about "dmap is still in
> tree despite dropping-acquiring lock" is doubtful if inode lock is not held by
> reclaim code.

In that case we will take reference on dmap before dropping the lock to
make sure dmap is not reclaimed in the time we dropped the lock and
re-acquired it? And that means there is no need to allocate dmap for
upgrade case.

> 
> There is a window between dropping and reacquiring lock, if it gets reclaimed,
> we're supposed to continue on the allocation path.

Thanks
Vivek


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-25 20:30         ` Vivek Goyal
@ 2019-07-25 21:57           ` Liu Bo
  0 siblings, 0 replies; 15+ messages in thread
From: Liu Bo @ 2019-07-25 21:57 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

On Thu, Jul 25, 2019 at 04:30:13PM -0400, Vivek Goyal wrote:
> On Thu, Jul 25, 2019 at 12:32:50PM -0700, Liu Bo wrote:
> > On Thu, Jul 25, 2019 at 11:35:21AM -0400, Vivek Goyal wrote:
> > > On Wed, Jul 24, 2019 at 05:08:58PM -0700, Liu Bo wrote:
> > > > On Wed, Jul 24, 2019 at 05:07:21PM -0400, Vivek Goyal wrote:
> > > > > Do not always map a dax mapping read-write. There are use cases like
> > > > > executing a file where we want to map file read-only. virtio-fs dir on
> > > > > host might be backed by overlayfs. We don't want to open file read-write
> > > > > on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> > > > > the advantages of sharing page cache between vms for unmodified files.
> > > > > 
> > > > > Hence, create a read-only mapping if user did not ask for writable mapping.
> > > > > Later upgrade it to read-write mapping when users requests it.
> > > > > 
> > > > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > > > > ---
> > > > >  fs/fuse/file.c   | 114 ++++++++++++++++++++++++++++++++++++-----------
> > > > >  fs/fuse/fuse_i.h |   3 ++
> > > > >  2 files changed, 91 insertions(+), 26 deletions(-)
> > > > > 
> > > > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > > > > index a2c19e4a28b5..bcd8ddcc0fdf 100644
> > > > > --- a/fs/fuse/file.c
> > > > > +++ b/fs/fuse/file.c
> > > > > @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
> > > > >  
> > > > >  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> > > > >  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > > > -				  struct fuse_dax_mapping *dmap)
> > > > > +				  struct fuse_dax_mapping *dmap, bool writable,
> > > > > +				  bool upgrade)
> > > > >  {
> > > > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > > > >  	struct fuse_inode *fi = get_fuse_inode(inode);
> > > > > @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > > >  	inarg.moffset = dmap->window_offset;
> > > > >  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> > > > >  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> > > > > -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > > > > +	if (writable)
> > > > > +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > > > >  	args.in.h.opcode = FUSE_SETUPMAPPING;
> > > > >  	args.in.h.nodeid = fi->nodeid;
> > > > >  	args.in.numargs = 1;
> > > > > @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > > > >  		return err;
> > > > >  	}
> > > > >  
> > > > > -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> > > > > +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> > > > > +		 " err=%zd\n", offset, writable, err);
> > > > >  
> > > > > -	/*
> > > > > -	 * We don't take a refernce on inode. inode is valid right now and
> > > > > -	 * when inode is going away, cleanup logic should first cleanup
> > > > > -	 * dmap entries.
> > > > > -	 *
> > > > > -	 * TODO: Do we need to ensure that we are holding inode lock
> > > > > -	 * as well.
> > > > > -	 */
> > > > > -	dmap->inode = inode;
> > > > > -	dmap->start = offset;
> > > > > -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > > > -	/* Protected by fi->i_dmap_sem */
> > > > > -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > > > -	fi->nr_dmaps++;
> > > > > -	spin_lock(&fc->lock);
> > > > > -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > > > -	fc->nr_busy_ranges++;
> > > > > -	spin_unlock(&fc->lock);
> > > > > +	dmap->writable = writable;
> > > > > +	if (!upgrade) {
> > > > > +		/*
> > > > > +		 * We don't take a refernce on inode. inode is valid right now
> > > > > +		 * and when inode is going away, cleanup logic should first
> > > > > +		 * cleanup dmap entries.
> > > > > +		 *
> > > > > +		 * TODO: Do we need to ensure that we are holding inode lock
> > > > > +		 * as well.
> > > > > +		 */
> > > > > +		dmap->inode = inode;
> > > > > +		dmap->start = offset;
> > > > > +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > > > > +		/* Protected by fi->i_dmap_sem */
> > > > > +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > > > > +		fi->nr_dmaps++;
> > > > > +		spin_lock(&fc->lock);
> > > > > +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > > > > +		fc->nr_busy_ranges++;
> > > > > +		spin_unlock(&fc->lock);
> > > > > +	}
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > > >  	struct fuse_conn *fc = get_fuse_conn(inode);
> > > > >  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> > > > >  	int ret;
> > > > > +	bool writable = flags & IOMAP_WRITE;
> > > > >  
> > > > >  	/* Can't do reclaim in fault path yet due to lock ordering.
> > > > >  	 * Read path takes shared inode lock and that's not sufficient
> > > > > @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > > >  
> > > > >  	/* Setup one mapping */
> > > > >  	ret = fuse_setup_one_mapping(inode,
> > > > > -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> > > > > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > > > > +				     alloc_dmap, writable, false);
> > > > >  	if (ret < 0) {
> > > > >  		printk("fuse_setup_one_mapping() failed. err=%d"
> > > > > -			" pos=0x%llx\n", ret, pos);
> > > > > +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
> > > > >  		dmap_add_to_free_pool(fc, alloc_dmap);
> > > > >  		up_write(&fi->i_dmap_sem);
> > > > >  		return ret;
> > > > > @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> > > > > +					 loff_t length, unsigned flags,
> > > > > +					 struct iomap *iomap)
> > > > > +{
> > > > > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > > > > +	struct fuse_dax_mapping *dmap;
> > > > > +	int ret;
> > > > > +
> > > > > +	/*
> > > > > +	 * Take exclusive lock so that only one caller can try to setup
> > > > > +	 * mapping and others wait.
> > > > > +	 */
> > > > > +	down_write(&fi->i_dmap_sem);
> > > > > +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> > > > > +
> > > > > +	/* We are holding either inode lock or i_mmap_sem, and that should
> > > > > +	 * ensure that dmap can't reclaimed or truncated and it should still
> > > > > +	 * be there in tree despite the fact we dropped and re-acquired the
> > > > > +	 * lock.
> > > > > +	 */
> > > > > +	if (WARN_ON(!dmap))
> > > > > +		return -EIO;
> > > > 
> > > > up_write() is missed here.
> > > 
> > > Good catch. Will fix it.
> > > 
> > > > 
> 
> [..]
> > > > And I posted a similar patch[1] but with inode_lock being dropped in
> > > > mind, anything wrong with that patch?
> > > 
> > > It was an old posting so did not pay much attention to it. Looking at
> > > it now and few things I notice.
> > > 
> > > - That patch will allocate and possibly wait for memory range and free
> > >   it up when upgrading from read-only to read-write. That's not required.
> > >
> > 
> > What I was trying to say is that the above assumption about "dmap is still in
> > tree despite dropping-acquiring lock" is doubtful if inode lock is not held by
> > reclaim code.
> 
> In that case we will take reference on dmap before dropping the lock to
> make sure dmap is not reclaimed in the time we dropped the lock and
> re-acquired it? And that means there is no need to allocate dmap for
> upgrade case.

Ah makes sense, once we pin it, it won't go away underneath.

Other than this, feel free to add

Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>

thanks,
-liubo
> 
> > 
> > There is a window between dropping and reacquiring lock, if it gets reclaimed,
> > we're supposed to continue on the allocation path.
> 
> Thanks
> Vivek


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping Vivek Goyal
@ 2019-07-26  1:23   ` Liu Bo
  0 siblings, 0 replies; 15+ messages in thread
From: Liu Bo @ 2019-07-26  1:23 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

On Wed, Jul 24, 2019 at 05:07:19PM -0400, Vivek Goyal wrote:
> There is only one caller of fuse_setup_one_mapping() and that passes file
> argument as NULL. So get rid of this parameter as there are no callers.
> 

Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>

thanks,
-liubo

> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/file.c | 30 +++++++-----------------------
>  1 file changed, 7 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index fc40e0f44578..93f8e62e2b5b 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -264,41 +264,26 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
>  }
>  
>  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> -static int fuse_setup_one_mapping(struct inode *inode,
> -				struct file *file, loff_t offset,
> -				struct fuse_dax_mapping *dmap)
> +static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> +				  struct fuse_dax_mapping *dmap)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> -	struct fuse_file *ff = NULL;
>  	struct fuse_setupmapping_in inarg;
>  	FUSE_ARGS(args);
>  	ssize_t err;
>  
> -	if (file)
> -		ff = file->private_data;
> -
>  	WARN_ON(offset % FUSE_DAX_MEM_RANGE_SZ);
>  	WARN_ON(fc->nr_free_ranges < 0);
>  
>  	/* Ask fuse daemon to setup mapping */
>  	memset(&inarg, 0, sizeof(inarg));
>  	inarg.foffset = offset;
> -	if (ff)
> -		inarg.fh = ff->fh;
> -	else
> -		inarg.fh = -1;
> +	inarg.fh = -1;
>  	inarg.moffset = dmap->window_offset;
>  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> -	if (file) {
> -		inarg.flags |= (file->f_mode & FMODE_WRITE) ?
> -				FUSE_SETUPMAPPING_FLAG_WRITE : 0;
> -		inarg.flags |= (file->f_mode & FMODE_READ) ?
> -				FUSE_SETUPMAPPING_FLAG_READ : 0;
> -	} else {
> -		inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> -		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> -	}
> +	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> +	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
>  	args.in.h.opcode = FUSE_SETUPMAPPING;
>  	args.in.h.nodeid = fi->nodeid;
>  	args.in.numargs = 1;
> @@ -1985,9 +1970,8 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  		}
>  
>  		/* Setup one mapping */
> -		ret = fuse_setup_one_mapping(inode, NULL,
> -				ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> -				alloc_dmap);
> +		ret = fuse_setup_one_mapping(inode,
> +			ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
>  		if (ret < 0) {
>  			printk("fuse_setup_one_mapping() failed. err=%d"
>  				" pos=0x%llx\n", ret, pos);
> -- 
> 2.17.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function
  2019-07-24 21:07 ` [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function Vivek Goyal
@ 2019-07-26  1:34   ` Liu Bo
  0 siblings, 0 replies; 15+ messages in thread
From: Liu Bo @ 2019-07-26  1:34 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

On Wed, Jul 24, 2019 at 05:07:20PM -0400, Vivek Goyal wrote:
> Move new mapping setup code in a separate function. More code will come
> in fuse_iomap_begin() and its becoming too big.

Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>

thanks,
-liubo
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/file.c | 116 +++++++++++++++++++++++++++----------------------
>  1 file changed, 64 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 93f8e62e2b5b..a2c19e4a28b5 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1882,6 +1882,67 @@ static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length,
>  	}
>  }
>  
> +static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> +					 loff_t length, unsigned flags,
> +					 struct iomap *iomap)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_conn *fc = get_fuse_conn(inode);
> +	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> +	int ret;
> +
> +	/* Can't do reclaim in fault path yet due to lock ordering.
> +	 * Read path takes shared inode lock and that's not sufficient
> +	 * for inline range reclaim. Caller needs to drop lock, wait
> +	 * and retry.
> +	 */
> +	if (flags & IOMAP_FAULT || !(flags & IOMAP_WRITE)) {
> +		alloc_dmap = alloc_dax_mapping(fc);
> +		if (!alloc_dmap)
> +			return -ENOSPC;
> +	} else {
> +		alloc_dmap = alloc_dax_mapping_reclaim(fc, inode);
> +		if (IS_ERR(alloc_dmap))
> +			return PTR_ERR(alloc_dmap);
> +	}
> +
> +	/* If we are here, we should have memory allocated */
> +	if (WARN_ON(!alloc_dmap))
> +		return -EBUSY;
> +
> +	/*
> +	 * Drop read lock and take write lock so that only one
> +	 * caller can try to setup mapping and other waits
> +	 */
> +	down_write(&fi->i_dmap_sem);
> +	/*
> +	 * We dropped lock. Check again if somebody else setup
> +	 * mapping already.
> +	 */
> +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos,
> +						pos);
> +	if (dmap) {
> +		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +		dmap_add_to_free_pool(fc, alloc_dmap);
> +		up_write(&fi->i_dmap_sem);
> +		return 0;
> +	}
> +
> +	/* Setup one mapping */
> +	ret = fuse_setup_one_mapping(inode,
> +		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> +	if (ret < 0) {
> +		printk("fuse_setup_one_mapping() failed. err=%d"
> +			" pos=0x%llx\n", ret, pos);
> +		dmap_add_to_free_pool(fc, alloc_dmap);
> +		up_write(&fi->i_dmap_sem);
> +		return ret;
> +	}
> +	fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
> +	up_write(&fi->i_dmap_sem);
> +	return 0;
> +}
> +
>  /* This is just for DAX and the mapping is ephemeral, do not use it for other
>   * purposes since there is no block device with a permanent mapping.
>   */
> @@ -1890,8 +1951,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  {
>  	struct fuse_inode *fi = get_fuse_inode(inode);
>  	struct fuse_conn *fc = get_fuse_conn(inode);
> -	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> -	int ret;
> +	struct fuse_dax_mapping *dmap;
>  
>  	/* We don't support FIEMAP */
>  	BUG_ON(flags & IOMAP_REPORT);
> @@ -1932,56 +1992,8 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  		if (pos >= i_size_read(inode))
>  			goto iomap_hole;
>  
> -		/* Can't do reclaim in fault path yet due to lock ordering.
> -		 * Read path takes shared inode lock and that's not sufficient
> -		 * for inline range reclaim. Caller needs to drop lock, wait
> -		 * and retry.
> -		 */
> -		if (flags & IOMAP_FAULT || !(flags & IOMAP_WRITE)) {
> -			alloc_dmap = alloc_dax_mapping(fc);
> -			if (!alloc_dmap)
> -				return -ENOSPC;
> -		} else {
> -			alloc_dmap = alloc_dax_mapping_reclaim(fc, inode);
> -			if (IS_ERR(alloc_dmap))
> -				return PTR_ERR(alloc_dmap);
> -		}
> -
> -		/* If we are here, we should have memory allocated */
> -		if (WARN_ON(!alloc_dmap))
> -			return -EBUSY;
> -
> -		/*
> -		 * Drop read lock and take write lock so that only one
> -		 * caller can try to setup mapping and other waits
> -		 */
> -		down_write(&fi->i_dmap_sem);
> -		/*
> -		 * We dropped lock. Check again if somebody else setup
> -		 * mapping already.
> -		 */
> -		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos,
> -							pos);
> -		if (dmap) {
> -			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> -			dmap_add_to_free_pool(fc, alloc_dmap);
> -			up_write(&fi->i_dmap_sem);
> -			return 0;
> -		}
> -
> -		/* Setup one mapping */
> -		ret = fuse_setup_one_mapping(inode,
> -			ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> -		if (ret < 0) {
> -			printk("fuse_setup_one_mapping() failed. err=%d"
> -				" pos=0x%llx\n", ret, pos);
> -			dmap_add_to_free_pool(fc, alloc_dmap);
> -			up_write(&fi->i_dmap_sem);
> -			return ret;
> -		}
> -		fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags);
> -		up_write(&fi->i_dmap_sem);
> -		return 0;
> +		return iomap_begin_setup_new_mapping(inode, pos, length, flags,
> +						    iomap);
>  	}
>  
>  	/*
> -- 
> 2.17.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-08-05 12:25     ` Vivek Goyal
@ 2019-08-05 12:34       ` piaojun
  0 siblings, 0 replies; 15+ messages in thread
From: piaojun @ 2019-08-05 12:34 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs

Hi Vivek,

On 2019/8/5 20:25, Vivek Goyal wrote:
> On Sun, Aug 04, 2019 at 09:56:55AM +0800, piaojun wrote:
>> Hi Vivek,
>>
>> Shall we downgrade dax mapping from rw to read-only, if current user
>> open with O_RDONLY, and last user opened with O_RDWR? This will minimize
>> the access authority.
>>
>> I wonder if my concern is necessary. And if it will cause frequent dax
>> mapping?
> 
> There is a chance that memory mapping will be reclaimed and setup as
> read-only again.

I got it, and thanks for your reply.

Thanks,
Jun

> 
> I will not worry about this at this point of time.
> 
> Thanks
> Vivek
> 
>>
>> Thanks,
>> Jun
>>
>> On 2019/7/26 23:49, Vivek Goyal wrote:
>>> Do not always map a dax mapping read-write. There are use cases like
>>> executing a file where we want to map file read-only. virtio-fs dir on
>>> host might be backed by overlayfs. We don't want to open file read-write
>>> on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
>>> the advantages of sharing page cache between vms for unmodified files.
>>>
>>> Hence, create a read-only mapping if user did not ask for writable mapping.
>>> Later upgrade it to read-write mapping when users requests it.
>>>
>>> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
>>> ---
>>>  fs/fuse/file.c   | 121 +++++++++++++++++++++++++++++++++++++----------
>>>  fs/fuse/fuse_i.h |   3 ++
>>>  2 files changed, 98 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>>> index a2c19e4a28b5..5277de7028a6 100644
>>> --- a/fs/fuse/file.c
>>> +++ b/fs/fuse/file.c
>>> @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
>>>  
>>>  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
>>>  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>>> -				  struct fuse_dax_mapping *dmap)
>>> +				  struct fuse_dax_mapping *dmap, bool writable,
>>> +				  bool upgrade)
>>>  {
>>>  	struct fuse_conn *fc = get_fuse_conn(inode);
>>>  	struct fuse_inode *fi = get_fuse_inode(inode);
>>> @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>>>  	inarg.moffset = dmap->window_offset;
>>>  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
>>>  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
>>> -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
>>> +	if (writable)
>>> +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
>>>  	args.in.h.opcode = FUSE_SETUPMAPPING;
>>>  	args.in.h.nodeid = fi->nodeid;
>>>  	args.in.numargs = 1;
>>> @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>>>  		return err;
>>>  	}
>>>  
>>> -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
>>> +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
>>> +		 " err=%zd\n", offset, writable, err);
>>>  
>>> -	/*
>>> -	 * We don't take a refernce on inode. inode is valid right now and
>>> -	 * when inode is going away, cleanup logic should first cleanup
>>> -	 * dmap entries.
>>> -	 *
>>> -	 * TODO: Do we need to ensure that we are holding inode lock
>>> -	 * as well.
>>> -	 */
>>> -	dmap->inode = inode;
>>> -	dmap->start = offset;
>>> -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
>>> -	/* Protected by fi->i_dmap_sem */
>>> -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
>>> -	fi->nr_dmaps++;
>>> -	spin_lock(&fc->lock);
>>> -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
>>> -	fc->nr_busy_ranges++;
>>> -	spin_unlock(&fc->lock);
>>> +	dmap->writable = writable;
>>> +	if (!upgrade) {
>>> +		/*
>>> +		 * We don't take a refernce on inode. inode is valid right now
>>> +		 * and when inode is going away, cleanup logic should first
>>> +		 * cleanup dmap entries.
>>> +		 *
>>> +		 * TODO: Do we need to ensure that we are holding inode lock
>>> +		 * as well.
>>> +		 */
>>> +		dmap->inode = inode;
>>> +		dmap->start = offset;
>>> +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
>>> +		/* Protected by fi->i_dmap_sem */
>>> +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
>>> +		fi->nr_dmaps++;
>>> +		spin_lock(&fc->lock);
>>> +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
>>> +		fc->nr_busy_ranges++;
>>> +		spin_unlock(&fc->lock);
>>> +	}
>>>  	return 0;
>>>  }
>>>  
>>> @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>>>  	struct fuse_conn *fc = get_fuse_conn(inode);
>>>  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
>>>  	int ret;
>>> +	bool writable = flags & IOMAP_WRITE;
>>>  
>>>  	/* Can't do reclaim in fault path yet due to lock ordering.
>>>  	 * Read path takes shared inode lock and that's not sufficient
>>> @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>>>  
>>>  	/* Setup one mapping */
>>>  	ret = fuse_setup_one_mapping(inode,
>>> -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
>>> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
>>> +				     alloc_dmap, writable, false);
>>>  	if (ret < 0) {
>>>  		printk("fuse_setup_one_mapping() failed. err=%d"
>>> -			" pos=0x%llx\n", ret, pos);
>>> +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
>>>  		dmap_add_to_free_pool(fc, alloc_dmap);
>>>  		up_write(&fi->i_dmap_sem);
>>>  		return ret;
>>> @@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>>>  	return 0;
>>>  }
>>>  
>>> +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
>>> +					 loff_t length, unsigned flags,
>>> +					 struct iomap *iomap)
>>> +{
>>> +	struct fuse_inode *fi = get_fuse_inode(inode);
>>> +	struct fuse_dax_mapping *dmap;
>>> +	int ret;
>>> +
>>> +	/*
>>> +	 * Take exclusive lock so that only one caller can try to setup
>>> +	 * mapping and others wait.
>>> +	 */
>>> +	down_write(&fi->i_dmap_sem);
>>> +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
>>> +
>>> +	/* We are holding either inode lock or i_mmap_sem, and that should
>>> +	 * ensure that dmap can't reclaimed or truncated and it should still
>>> +	 * be there in tree despite the fact we dropped and re-acquired the
>>> +	 * lock.
>>> +	 */
>>> +	ret = -EIO;
>>> +	if (WARN_ON(!dmap))
>>> +		goto out_err;
>>> +
>>> +	/* Maybe another thread already upgraded mapping while we were not
>>> +	 * holding lock.
>>> +	 */
>>> +	if (dmap->writable)
>>> +		goto out_fill_iomap;
>>> +
>>> +	ret = fuse_setup_one_mapping(inode,
>>> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
>>> +				     dmap, true, true);
>>> +	if (ret < 0) {
>>> +		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
>>> +		       ret, pos);
>>> +		goto out_err;
>>> +	}
>>> +
>>> +out_fill_iomap:
>>> +	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
>>> +out_err:
>>> +	up_write(&fi->i_dmap_sem);
>>> +	return ret;
>>> +}
>>> +
>>>  /* This is just for DAX and the mapping is ephemeral, do not use it for other
>>>   * purposes since there is no block device with a permanent mapping.
>>>   */
>>> @@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>>>  	struct fuse_inode *fi = get_fuse_inode(inode);
>>>  	struct fuse_conn *fc = get_fuse_conn(inode);
>>>  	struct fuse_dax_mapping *dmap;
>>> +	bool writable = flags & IOMAP_WRITE;
>>>  
>>>  	/* We don't support FIEMAP */
>>>  	BUG_ON(flags & IOMAP_REPORT);
>>> @@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>>>  	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
>>>  
>>>  	if (dmap) {
>>> -		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
>>> -		up_read(&fi->i_dmap_sem);
>>> -		return 0;
>>> +		if (writable && !dmap->writable) {
>>> +			/* Upgrade read-only mapping to read-write. This will
>>> +			 * require exclusive i_dmap_sem lock as we don't want
>>> +			 * two threads to be trying to this simultaneously
>>> +			 * for same dmap. So drop shared lock and acquire
>>> +			 * exclusive lock.
>>> +			 */
>>> +			up_read(&fi->i_dmap_sem);
>>> +			pr_debug("%s: Upgrading mapping at offset 0x%llx"
>>> +				 " length 0x%llx\n", __func__, pos, length);
>>> +			return iomap_begin_upgrade_mapping(inode, pos, length,
>>> +							   flags, iomap);
>>> +		} else {
>>> +			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
>>> +			up_read(&fi->i_dmap_sem);
>>> +			return 0;
>>> +		}
>>>  	} else {
>>>  		up_read(&fi->i_dmap_sem);
>>>  		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
>>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>>> index e899a06e29d7..34ca8b90a1e1 100644
>>> --- a/fs/fuse/fuse_i.h
>>> +++ b/fs/fuse/fuse_i.h
>>> @@ -134,6 +134,9 @@ struct fuse_dax_mapping {
>>>  
>>>  	/** Length of mapping, in bytes */
>>>  	loff_t length;
>>> +
>>> +	/* Is this mapping read-only or read-write */
>>> +	bool writable;
>>>  };
>>>  
>>>  /** FUSE inode */
>>>
> .
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-08-04  1:56   ` piaojun
@ 2019-08-05 12:25     ` Vivek Goyal
  2019-08-05 12:34       ` piaojun
  0 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-08-05 12:25 UTC (permalink / raw)
  To: piaojun; +Cc: virtio-fs

On Sun, Aug 04, 2019 at 09:56:55AM +0800, piaojun wrote:
> Hi Vivek,
> 
> Shall we downgrade dax mapping from rw to read-only, if current user
> open with O_RDONLY, and last user opened with O_RDWR? This will minimize
> the access authority.
> 
> I wonder if my concern is necessary. And if it will cause frequent dax
> mapping?

There is a chance that memory mapping will be reclaimed and setup as
read-only again.

I will not worry about this at this point of time.

Thanks
Vivek

> 
> Thanks,
> Jun
> 
> On 2019/7/26 23:49, Vivek Goyal wrote:
> > Do not always map a dax mapping read-write. There are use cases like
> > executing a file where we want to map file read-only. virtio-fs dir on
> > host might be backed by overlayfs. We don't want to open file read-write
> > on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> > the advantages of sharing page cache between vms for unmodified files.
> > 
> > Hence, create a read-only mapping if user did not ask for writable mapping.
> > Later upgrade it to read-write mapping when users requests it.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> > ---
> >  fs/fuse/file.c   | 121 +++++++++++++++++++++++++++++++++++++----------
> >  fs/fuse/fuse_i.h |   3 ++
> >  2 files changed, 98 insertions(+), 26 deletions(-)
> > 
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index a2c19e4a28b5..5277de7028a6 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
> >  
> >  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
> >  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> > -				  struct fuse_dax_mapping *dmap)
> > +				  struct fuse_dax_mapping *dmap, bool writable,
> > +				  bool upgrade)
> >  {
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	struct fuse_inode *fi = get_fuse_inode(inode);
> > @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> >  	inarg.moffset = dmap->window_offset;
> >  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
> >  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> > -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> > +	if (writable)
> > +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> >  	args.in.h.opcode = FUSE_SETUPMAPPING;
> >  	args.in.h.nodeid = fi->nodeid;
> >  	args.in.numargs = 1;
> > @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> >  		return err;
> >  	}
> >  
> > -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> > +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> > +		 " err=%zd\n", offset, writable, err);
> >  
> > -	/*
> > -	 * We don't take a refernce on inode. inode is valid right now and
> > -	 * when inode is going away, cleanup logic should first cleanup
> > -	 * dmap entries.
> > -	 *
> > -	 * TODO: Do we need to ensure that we are holding inode lock
> > -	 * as well.
> > -	 */
> > -	dmap->inode = inode;
> > -	dmap->start = offset;
> > -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > -	/* Protected by fi->i_dmap_sem */
> > -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > -	fi->nr_dmaps++;
> > -	spin_lock(&fc->lock);
> > -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > -	fc->nr_busy_ranges++;
> > -	spin_unlock(&fc->lock);
> > +	dmap->writable = writable;
> > +	if (!upgrade) {
> > +		/*
> > +		 * We don't take a refernce on inode. inode is valid right now
> > +		 * and when inode is going away, cleanup logic should first
> > +		 * cleanup dmap entries.
> > +		 *
> > +		 * TODO: Do we need to ensure that we are holding inode lock
> > +		 * as well.
> > +		 */
> > +		dmap->inode = inode;
> > +		dmap->start = offset;
> > +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> > +		/* Protected by fi->i_dmap_sem */
> > +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> > +		fi->nr_dmaps++;
> > +		spin_lock(&fc->lock);
> > +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> > +		fc->nr_busy_ranges++;
> > +		spin_unlock(&fc->lock);
> > +	}
> >  	return 0;
> >  }
> >  
> > @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
> >  	int ret;
> > +	bool writable = flags & IOMAP_WRITE;
> >  
> >  	/* Can't do reclaim in fault path yet due to lock ordering.
> >  	 * Read path takes shared inode lock and that's not sufficient
> > @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  
> >  	/* Setup one mapping */
> >  	ret = fuse_setup_one_mapping(inode,
> > -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > +				     alloc_dmap, writable, false);
> >  	if (ret < 0) {
> >  		printk("fuse_setup_one_mapping() failed. err=%d"
> > -			" pos=0x%llx\n", ret, pos);
> > +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
> >  		dmap_add_to_free_pool(fc, alloc_dmap);
> >  		up_write(&fi->i_dmap_sem);
> >  		return ret;
> > @@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
> >  	return 0;
> >  }
> >  
> > +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> > +					 loff_t length, unsigned flags,
> > +					 struct iomap *iomap)
> > +{
> > +	struct fuse_inode *fi = get_fuse_inode(inode);
> > +	struct fuse_dax_mapping *dmap;
> > +	int ret;
> > +
> > +	/*
> > +	 * Take exclusive lock so that only one caller can try to setup
> > +	 * mapping and others wait.
> > +	 */
> > +	down_write(&fi->i_dmap_sem);
> > +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> > +
> > +	/* We are holding either inode lock or i_mmap_sem, and that should
> > +	 * ensure that dmap can't reclaimed or truncated and it should still
> > +	 * be there in tree despite the fact we dropped and re-acquired the
> > +	 * lock.
> > +	 */
> > +	ret = -EIO;
> > +	if (WARN_ON(!dmap))
> > +		goto out_err;
> > +
> > +	/* Maybe another thread already upgraded mapping while we were not
> > +	 * holding lock.
> > +	 */
> > +	if (dmap->writable)
> > +		goto out_fill_iomap;
> > +
> > +	ret = fuse_setup_one_mapping(inode,
> > +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> > +				     dmap, true, true);
> > +	if (ret < 0) {
> > +		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
> > +		       ret, pos);
> > +		goto out_err;
> > +	}
> > +
> > +out_fill_iomap:
> > +	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> > +out_err:
> > +	up_write(&fi->i_dmap_sem);
> > +	return ret;
> > +}
> > +
> >  /* This is just for DAX and the mapping is ephemeral, do not use it for other
> >   * purposes since there is no block device with a permanent mapping.
> >   */
> > @@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> >  	struct fuse_inode *fi = get_fuse_inode(inode);
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	struct fuse_dax_mapping *dmap;
> > +	bool writable = flags & IOMAP_WRITE;
> >  
> >  	/* We don't support FIEMAP */
> >  	BUG_ON(flags & IOMAP_REPORT);
> > @@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
> >  	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> >  
> >  	if (dmap) {
> > -		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> > -		up_read(&fi->i_dmap_sem);
> > -		return 0;
> > +		if (writable && !dmap->writable) {
> > +			/* Upgrade read-only mapping to read-write. This will
> > +			 * require exclusive i_dmap_sem lock as we don't want
> > +			 * two threads to be trying to this simultaneously
> > +			 * for same dmap. So drop shared lock and acquire
> > +			 * exclusive lock.
> > +			 */
> > +			up_read(&fi->i_dmap_sem);
> > +			pr_debug("%s: Upgrading mapping at offset 0x%llx"
> > +				 " length 0x%llx\n", __func__, pos, length);
> > +			return iomap_begin_upgrade_mapping(inode, pos, length,
> > +							   flags, iomap);
> > +		} else {
> > +			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> > +			up_read(&fi->i_dmap_sem);
> > +			return 0;
> > +		}
> >  	} else {
> >  		up_read(&fi->i_dmap_sem);
> >  		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index e899a06e29d7..34ca8b90a1e1 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -134,6 +134,9 @@ struct fuse_dax_mapping {
> >  
> >  	/** Length of mapping, in bytes */
> >  	loff_t length;
> > +
> > +	/* Is this mapping read-only or read-write */
> > +	bool writable;
> >  };
> >  
> >  /** FUSE inode */
> > 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-26 15:49 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
@ 2019-08-04  1:56   ` piaojun
  2019-08-05 12:25     ` Vivek Goyal
  0 siblings, 1 reply; 15+ messages in thread
From: piaojun @ 2019-08-04  1:56 UTC (permalink / raw)
  To: Vivek Goyal, virtio-fs

Hi Vivek,

Shall we downgrade dax mapping from rw to read-only, if current user
open with O_RDONLY, and last user opened with O_RDWR? This will minimize
the access authority.

I wonder if my concern is necessary. And if it will cause frequent dax
mapping?

Thanks,
Jun

On 2019/7/26 23:49, Vivek Goyal wrote:
> Do not always map a dax mapping read-write. There are use cases like
> executing a file where we want to map file read-only. virtio-fs dir on
> host might be backed by overlayfs. We don't want to open file read-write
> on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
> the advantages of sharing page cache between vms for unmodified files.
> 
> Hence, create a read-only mapping if user did not ask for writable mapping.
> Later upgrade it to read-write mapping when users requests it.
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/file.c   | 121 +++++++++++++++++++++++++++++++++++++----------
>  fs/fuse/fuse_i.h |   3 ++
>  2 files changed, 98 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index a2c19e4a28b5..5277de7028a6 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
>  
>  /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
>  static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
> -				  struct fuse_dax_mapping *dmap)
> +				  struct fuse_dax_mapping *dmap, bool writable,
> +				  bool upgrade)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_inode *fi = get_fuse_inode(inode);
> @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>  	inarg.moffset = dmap->window_offset;
>  	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
>  	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
> -	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
> +	if (writable)
> +		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
>  	args.in.h.opcode = FUSE_SETUPMAPPING;
>  	args.in.h.nodeid = fi->nodeid;
>  	args.in.numargs = 1;
> @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
>  		return err;
>  	}
>  
> -	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
> +	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
> +		 " err=%zd\n", offset, writable, err);
>  
> -	/*
> -	 * We don't take a refernce on inode. inode is valid right now and
> -	 * when inode is going away, cleanup logic should first cleanup
> -	 * dmap entries.
> -	 *
> -	 * TODO: Do we need to ensure that we are holding inode lock
> -	 * as well.
> -	 */
> -	dmap->inode = inode;
> -	dmap->start = offset;
> -	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> -	/* Protected by fi->i_dmap_sem */
> -	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> -	fi->nr_dmaps++;
> -	spin_lock(&fc->lock);
> -	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> -	fc->nr_busy_ranges++;
> -	spin_unlock(&fc->lock);
> +	dmap->writable = writable;
> +	if (!upgrade) {
> +		/*
> +		 * We don't take a refernce on inode. inode is valid right now
> +		 * and when inode is going away, cleanup logic should first
> +		 * cleanup dmap entries.
> +		 *
> +		 * TODO: Do we need to ensure that we are holding inode lock
> +		 * as well.
> +		 */
> +		dmap->inode = inode;
> +		dmap->start = offset;
> +		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
> +		/* Protected by fi->i_dmap_sem */
> +		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
> +		fi->nr_dmaps++;
> +		spin_lock(&fc->lock);
> +		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
> +		fc->nr_busy_ranges++;
> +		spin_unlock(&fc->lock);
> +	}
>  	return 0;
>  }
>  
> @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
>  	int ret;
> +	bool writable = flags & IOMAP_WRITE;
>  
>  	/* Can't do reclaim in fault path yet due to lock ordering.
>  	 * Read path takes shared inode lock and that's not sufficient
> @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  
>  	/* Setup one mapping */
>  	ret = fuse_setup_one_mapping(inode,
> -		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> +				     alloc_dmap, writable, false);
>  	if (ret < 0) {
>  		printk("fuse_setup_one_mapping() failed. err=%d"
> -			" pos=0x%llx\n", ret, pos);
> +			" pos=0x%llx, writable=%d\n", ret, pos, writable);
>  		dmap_add_to_free_pool(fc, alloc_dmap);
>  		up_write(&fi->i_dmap_sem);
>  		return ret;
> @@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
>  	return 0;
>  }
>  
> +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
> +					 loff_t length, unsigned flags,
> +					 struct iomap *iomap)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_dax_mapping *dmap;
> +	int ret;
> +
> +	/*
> +	 * Take exclusive lock so that only one caller can try to setup
> +	 * mapping and others wait.
> +	 */
> +	down_write(&fi->i_dmap_sem);
> +	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
> +
> +	/* We are holding either inode lock or i_mmap_sem, and that should
> +	 * ensure that dmap can't reclaimed or truncated and it should still
> +	 * be there in tree despite the fact we dropped and re-acquired the
> +	 * lock.
> +	 */
> +	ret = -EIO;
> +	if (WARN_ON(!dmap))
> +		goto out_err;
> +
> +	/* Maybe another thread already upgraded mapping while we were not
> +	 * holding lock.
> +	 */
> +	if (dmap->writable)
> +		goto out_fill_iomap;
> +
> +	ret = fuse_setup_one_mapping(inode,
> +				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
> +				     dmap, true, true);
> +	if (ret < 0) {
> +		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
> +		       ret, pos);
> +		goto out_err;
> +	}
> +
> +out_fill_iomap:
> +	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +out_err:
> +	up_write(&fi->i_dmap_sem);
> +	return ret;
> +}
> +
>  /* This is just for DAX and the mapping is ephemeral, do not use it for other
>   * purposes since there is no block device with a permanent mapping.
>   */
> @@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  	struct fuse_inode *fi = get_fuse_inode(inode);
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	struct fuse_dax_mapping *dmap;
> +	bool writable = flags & IOMAP_WRITE;
>  
>  	/* We don't support FIEMAP */
>  	BUG_ON(flags & IOMAP_REPORT);
> @@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
>  	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
>  
>  	if (dmap) {
> -		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> -		up_read(&fi->i_dmap_sem);
> -		return 0;
> +		if (writable && !dmap->writable) {
> +			/* Upgrade read-only mapping to read-write. This will
> +			 * require exclusive i_dmap_sem lock as we don't want
> +			 * two threads to be trying to this simultaneously
> +			 * for same dmap. So drop shared lock and acquire
> +			 * exclusive lock.
> +			 */
> +			up_read(&fi->i_dmap_sem);
> +			pr_debug("%s: Upgrading mapping at offset 0x%llx"
> +				 " length 0x%llx\n", __func__, pos, length);
> +			return iomap_begin_upgrade_mapping(inode, pos, length,
> +							   flags, iomap);
> +		} else {
> +			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
> +			up_read(&fi->i_dmap_sem);
> +			return 0;
> +		}
>  	} else {
>  		up_read(&fi->i_dmap_sem);
>  		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index e899a06e29d7..34ca8b90a1e1 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -134,6 +134,9 @@ struct fuse_dax_mapping {
>  
>  	/** Length of mapping, in bytes */
>  	loff_t length;
> +
> +	/* Is this mapping read-only or read-write */
> +	bool writable;
>  };
>  
>  /** FUSE inode */
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write
  2019-07-26 15:49 [Virtio-fs] [PATCH 0/3][V2] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
@ 2019-07-26 15:49 ` Vivek Goyal
  2019-08-04  1:56   ` piaojun
  0 siblings, 1 reply; 15+ messages in thread
From: Vivek Goyal @ 2019-07-26 15:49 UTC (permalink / raw)
  To: virtio-fs

Do not always map a dax mapping read-write. There are use cases like
executing a file where we want to map file read-only. virtio-fs dir on
host might be backed by overlayfs. We don't want to open file read-write
on overlayfs otherwise it will unnecessariliy be copied-up nullyifying
the advantages of sharing page cache between vms for unmodified files.

Hence, create a read-only mapping if user did not ask for writable mapping.
Later upgrade it to read-write mapping when users requests it.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c   | 121 +++++++++++++++++++++++++++++++++++++----------
 fs/fuse/fuse_i.h |   3 ++
 2 files changed, 98 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a2c19e4a28b5..5277de7028a6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc,
 
 /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */
 static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
-				  struct fuse_dax_mapping *dmap)
+				  struct fuse_dax_mapping *dmap, bool writable,
+				  bool upgrade)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 	inarg.moffset = dmap->window_offset;
 	inarg.len = FUSE_DAX_MEM_RANGE_SZ;
 	inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ;
-	inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
+	if (writable)
+		inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE;
 	args.in.h.opcode = FUSE_SETUPMAPPING;
 	args.in.h.nodeid = fi->nodeid;
 	args.in.numargs = 1;
@@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset,
 		return err;
 	}
 
-	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
+	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d"
+		 " err=%zd\n", offset, writable, err);
 
-	/*
-	 * We don't take a refernce on inode. inode is valid right now and
-	 * when inode is going away, cleanup logic should first cleanup
-	 * dmap entries.
-	 *
-	 * TODO: Do we need to ensure that we are holding inode lock
-	 * as well.
-	 */
-	dmap->inode = inode;
-	dmap->start = offset;
-	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
-	/* Protected by fi->i_dmap_sem */
-	fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
-	fi->nr_dmaps++;
-	spin_lock(&fc->lock);
-	list_add_tail(&dmap->busy_list, &fc->busy_ranges);
-	fc->nr_busy_ranges++;
-	spin_unlock(&fc->lock);
+	dmap->writable = writable;
+	if (!upgrade) {
+		/*
+		 * We don't take a refernce on inode. inode is valid right now
+		 * and when inode is going away, cleanup logic should first
+		 * cleanup dmap entries.
+		 *
+		 * TODO: Do we need to ensure that we are holding inode lock
+		 * as well.
+		 */
+		dmap->inode = inode;
+		dmap->start = offset;
+		dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
+		/* Protected by fi->i_dmap_sem */
+		fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree);
+		fi->nr_dmaps++;
+		spin_lock(&fc->lock);
+		list_add_tail(&dmap->busy_list, &fc->busy_ranges);
+		fc->nr_busy_ranges++;
+		spin_unlock(&fc->lock);
+	}
 	return 0;
 }
 
@@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap, *alloc_dmap = NULL;
 	int ret;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* Can't do reclaim in fault path yet due to lock ordering.
 	 * Read path takes shared inode lock and that's not sufficient
@@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 
 	/* Setup one mapping */
 	ret = fuse_setup_one_mapping(inode,
-		ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap);
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     alloc_dmap, writable, false);
 	if (ret < 0) {
 		printk("fuse_setup_one_mapping() failed. err=%d"
-			" pos=0x%llx\n", ret, pos);
+			" pos=0x%llx, writable=%d\n", ret, pos, writable);
 		dmap_add_to_free_pool(fc, alloc_dmap);
 		up_write(&fi->i_dmap_sem);
 		return ret;
@@ -1943,6 +1951,52 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos,
 	return 0;
 }
 
+static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos,
+					 loff_t length, unsigned flags,
+					 struct iomap *iomap)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+	int ret;
+
+	/*
+	 * Take exclusive lock so that only one caller can try to setup
+	 * mapping and others wait.
+	 */
+	down_write(&fi->i_dmap_sem);
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
+
+	/* We are holding either inode lock or i_mmap_sem, and that should
+	 * ensure that dmap can't reclaimed or truncated and it should still
+	 * be there in tree despite the fact we dropped and re-acquired the
+	 * lock.
+	 */
+	ret = -EIO;
+	if (WARN_ON(!dmap))
+		goto out_err;
+
+	/* Maybe another thread already upgraded mapping while we were not
+	 * holding lock.
+	 */
+	if (dmap->writable)
+		goto out_fill_iomap;
+
+	ret = fuse_setup_one_mapping(inode,
+				     ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ),
+				     dmap, true, true);
+	if (ret < 0) {
+		printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n",
+		       ret, pos);
+		goto out_err;
+	}
+
+out_fill_iomap:
+	fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+out_err:
+	up_write(&fi->i_dmap_sem);
+	return ret;
+}
+
 /* This is just for DAX and the mapping is ephemeral, do not use it for other
  * purposes since there is no block device with a permanent mapping.
  */
@@ -1952,6 +2006,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_dax_mapping *dmap;
+	bool writable = flags & IOMAP_WRITE;
 
 	/* We don't support FIEMAP */
 	BUG_ON(flags & IOMAP_REPORT);
@@ -1982,9 +2037,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos);
 
 	if (dmap) {
-		fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
-		up_read(&fi->i_dmap_sem);
-		return 0;
+		if (writable && !dmap->writable) {
+			/* Upgrade read-only mapping to read-write. This will
+			 * require exclusive i_dmap_sem lock as we don't want
+			 * two threads to be trying to this simultaneously
+			 * for same dmap. So drop shared lock and acquire
+			 * exclusive lock.
+			 */
+			up_read(&fi->i_dmap_sem);
+			pr_debug("%s: Upgrading mapping at offset 0x%llx"
+				 " length 0x%llx\n", __func__, pos, length);
+			return iomap_begin_upgrade_mapping(inode, pos, length,
+							   flags, iomap);
+		} else {
+			fuse_fill_iomap(inode, pos, length, iomap, dmap, flags);
+			up_read(&fi->i_dmap_sem);
+			return 0;
+		}
 	} else {
 		up_read(&fi->i_dmap_sem);
 		pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n",
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e899a06e29d7..34ca8b90a1e1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -134,6 +134,9 @@ struct fuse_dax_mapping {
 
 	/** Length of mapping, in bytes */
 	loff_t length;
+
+	/* Is this mapping read-only or read-write */
+	bool writable;
 };
 
 /** FUSE inode */
-- 
2.17.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2019-08-05 12:34 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-24 21:07 [Virtio-fs] [PATCH 0/3] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
2019-07-24 21:07 ` [Virtio-fs] [PATCH 1/3] fuse: Get rid of file parameter from setup mapping Vivek Goyal
2019-07-26  1:23   ` Liu Bo
2019-07-24 21:07 ` [Virtio-fs] [PATCH 2/3] fuse: Move new mapping setup code in a function Vivek Goyal
2019-07-26  1:34   ` Liu Bo
2019-07-24 21:07 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
2019-07-25  0:08   ` Liu Bo
2019-07-25 15:35     ` Vivek Goyal
2019-07-25 19:32       ` Liu Bo
2019-07-25 20:30         ` Vivek Goyal
2019-07-25 21:57           ` Liu Bo
2019-07-26 15:49 [Virtio-fs] [PATCH 0/3][V2] virtio-fs: Create a read-write mapping only if needed Vivek Goyal
2019-07-26 15:49 ` [Virtio-fs] [PATCH 3/3] fuse: Add logic to upgrade a read-only mapping to read-write Vivek Goyal
2019-08-04  1:56   ` piaojun
2019-08-05 12:25     ` Vivek Goyal
2019-08-05 12:34       ` piaojun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.