All of lore.kernel.org
 help / color / mirror / Atom feed
* [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
@ 2019-06-05  3:02 Peng Tao
  2019-06-05  3:02 ` [Virtio-fs] [PATCH] " Peng Tao
  2019-06-05 20:30 ` [Virtio-fs] [PATCH-v3 0/1] " Vivek Goyal
  0 siblings, 2 replies; 7+ messages in thread
From: Peng Tao @ 2019-06-05  3:02 UTC (permalink / raw)
  To: virtio-fs; +Cc: Peng Tao, Vivek Goyal

Hi Vivek,

This works with corresponding qemu virtiofsd patch:
"[PATCH-v3] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries".

v1-v2: make fuse_removemapping_in count fuse_removemapping_one
v2->v3:
1. fold in Vivek's cleanup and rename all forget_one to remove_one
2. assert dmap->list is not used by anyone when adding to temporary list
3. list_del() dmap->list when removing it from to_remove list

Please add your SOB line as I don't see it with your cleanup and thus
did not add it to the patch when folding your change.

Thanks,
Tao

Peng Tao (1):
  virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call

 fs/fuse/file.c            | 205 +++++++++++++++++++++++++++-----------
 fs/fuse/fuse_i.h          |   6 +-
 fs/fuse/inode.c           |   2 +-
 include/uapi/linux/fuse.h |   7 ++
 4 files changed, 160 insertions(+), 60 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Virtio-fs] [PATCH] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05  3:02 [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call Peng Tao
@ 2019-06-05  3:02 ` Peng Tao
  2019-06-05 18:06   ` Liu Bo
  2019-06-05 20:30 ` [Virtio-fs] [PATCH-v3 0/1] " Vivek Goyal
  1 sibling, 1 reply; 7+ messages in thread
From: Peng Tao @ 2019-06-05  3:02 UTC (permalink / raw)
  To: virtio-fs; +Cc: Peng Tao, Vivek Goyal

Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings
in one call. So that fuse_removemapping() can send just one fuse request
for many dmap entries.

Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
---
 fs/fuse/file.c            | 205 +++++++++++++++++++++++++++-----------
 fs/fuse/fuse_i.h          |   6 +-
 fs/fuse/inode.c           |   2 +-
 include/uapi/linux/fuse.h |   7 ++
 4 files changed, 160 insertions(+), 60 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d0979dc32f08..5a0b80719815 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -27,6 +27,9 @@ INTERVAL_TREE_DEFINE(struct fuse_dax_mapping, rb, __u64, __subtree_last,
 
 static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
 				struct inode *inode);
+static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
+				struct inode *inode, loff_t start, loff_t end);
+
 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 			  int opcode, struct fuse_open_out *outargp)
 {
@@ -319,75 +322,87 @@ static int fuse_setup_one_mapping(struct inode *inode,
 	return 0;
 }
 
-static int fuse_removemapping_one(struct inode *inode,
-					struct fuse_dax_mapping *dmap)
+static int
+fuse_send_removemapping(struct inode *inode,
+			struct fuse_removemapping_in *inargp,
+			struct fuse_removemapping_one *remove_one)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_removemapping_in inarg;
 	FUSE_ARGS(args);
 
-	memset(&inarg, 0, sizeof(inarg));
-	inarg.moffset = dmap->window_offset;
-	inarg.len = dmap->length;
 	args.in.h.opcode = FUSE_REMOVEMAPPING;
 	args.in.h.nodeid = fi->nodeid;
-	args.in.numargs = 1;
-	args.in.args[0].size = sizeof(inarg);
-	args.in.args[0].value = &inarg;
+	args.in.numargs = 2;
+	args.in.args[0].size = sizeof(*inargp);
+	args.in.args[0].value = inargp;
+	args.in.args[1].size = inargp->count * sizeof(*remove_one);
+	args.in.args[1].value = remove_one;
 	return fuse_simple_request(fc, &args);
 }
 
-/*
- * It is called from evict_inode() and by that time inode is going away. So
- * this function does not take any locks like fi->i_dmap_sem for traversing
- * that fuse inode interval tree. If that lock is taken then lock validator
- * complains of deadlock situation w.r.t fs_reclaim lock.
- */
-void fuse_removemapping(struct inode *inode)
+static int dmap_list_send_removemappings(struct inode *inode, unsigned num,
+					 struct list_head *to_remove)
 {
-	struct fuse_conn *fc = get_fuse_conn(inode);
-	struct fuse_inode *fi = get_fuse_inode(inode);
-	ssize_t err;
+	struct fuse_removemapping_one *remove_one, *ptr;
+	struct fuse_removemapping_in inarg;
 	struct fuse_dax_mapping *dmap;
+	int ret, i = 0, nr_alloc;
 
-	/* Clear the mappings list */
-	while (true) {
-		WARN_ON(fi->nr_dmaps < 0);
+	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
+	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO);
+	if (!remove_one)
+		return -ENOMEM;
 
-		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0,
-								-1);
-		if (dmap) {
-			fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
-			fi->nr_dmaps--;
-			dmap_remove_busy_list(fc, dmap);
+	ptr = remove_one;
+	list_for_each_entry(dmap, to_remove, list) {
+		ptr->moffset = dmap->window_offset;
+		ptr->len = dmap->length;
+		ptr++;
+		i++;
+		num--;
+		if (i >= nr_alloc || num == 0) {
+			memset(&inarg, 0, sizeof(inarg));
+			inarg.count = i;
+			ret = fuse_send_removemapping(inode, &inarg,
+						      remove_one);
+			if (ret)
+				goto out;
+			ptr = remove_one;
+			i = 0;
 		}
+	}
+out:
+	kfree(remove_one);
+	return ret;
+}
 
-		if (!dmap)
-			break;
+static int fuse_removemapping_one(struct inode *inode,
+					struct fuse_dax_mapping *dmap)
+{
+	struct fuse_removemapping_in inarg;
+	struct fuse_removemapping_one remove_one;
 
-		/*
-		 * During umount/shutdown, fuse connection is dropped first
-		 * and later evict_inode() is called later. That means any
-		 * removemapping messages are going to fail. Send messages
-		 * only if connection is up. Otherwise fuse daemon is
-		 * responsible for cleaning up any leftover references and
-		 * mappings.
-		 */
-		if (fc->connected) {
-			err = fuse_removemapping_one(inode, dmap);
-			if (err) {
-				pr_warn("Failed to removemapping. offset=0x%llx"
-					" len=0x%llx\n", dmap->window_offset,
-					dmap->length);
-			}
-		}
+	memset(&inarg, 0, sizeof(inarg));
+	/* TODO: fill in inarg.fh when available */
+	inarg.count = 1;
+	memset(&remove_one, 0, sizeof(remove_one));
+	remove_one.moffset = dmap->window_offset;
+	remove_one.len = dmap->length;
 
-		dmap->inode = NULL;
+	return fuse_send_removemapping(inode, &inarg, &remove_one);
+}
 
-		/* Add it back to free ranges list */
-		free_dax_mapping(fc, dmap);
-	}
+/*
+ * It is called from evict_inode() and by that time inode is going away. So
+ * this function does not take any locks like fi->i_dmap_sem for traversing
+ * that fuse inode interval tree. If that lock is taken then lock validator
+ * complains of deadlock situation w.r.t fs_reclaim lock.
+ */
+void fuse_cleanup_inode_mappings(struct inode *inode)
+{
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	fuse_dax_free_mappings_range(fc, inode, 0, -1);
 }
 
 void fuse_finish_open(struct inode *inode, struct file *file)
@@ -3980,6 +3995,88 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
 	}
 }
 
+/*
+ * Cleanup dmap entry and add back to free list. This should be called with
+ * fc->lock held.
+ */
+static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc,
+					    struct fuse_dax_mapping *dmap)
+{
+	__dmap_remove_busy_list(fc, dmap);
+	dmap->inode = NULL;
+	dmap->start = dmap->end = 0;
+	__free_dax_mapping(fc, dmap);
+	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
+		"window_offset=0x%llx length=0x%llx\n", dmap->start,
+		dmap->end, dmap->window_offset, dmap->length);
+}
+
+/*
+ * Free inode dmap entries whose range falls entirely inside [start, end].
+ * Does not take any locks. Caller must take care of any lock requirements.
+ * Lock ordering follows fuse_dax_free_one_mapping().
+ * inode->i_rwsem, fuse_inode->i_mmap_sem and fuse_inode->i_dmap_sem must be
+ * held exclusively, unless it is called from evict_inode() where no one else
+ * is accessing the inode.
+ */
+static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
+					 struct inode *inode, loff_t start,
+					 loff_t end)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap, *n;
+	int err, num = 0;
+	LIST_HEAD(to_remove);
+
+	pr_debug("fuse: %s: start=0x%llx, end=0x%llx\n", __func__, start, end);
+
+	/*
+	 * Interval tree search matches intersecting entries. Adjust the range
+	 * to avoid dropping partial valid entries.
+	 */
+	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
+	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
+
+	while (1) {
+		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
+							 end);
+		if (!dmap)
+			break;
+		fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+		num++;
+		WARN_ON(!list_empty(&dmap->list));
+		list_add_tail(&dmap->list, &to_remove);
+	}
+
+	/* Nothing to remove */
+	if (list_empty(&to_remove))
+		return;
+
+	WARN_ON(fi->nr_dmaps < num);
+	fi->nr_dmaps -= num;
+	/*
+	 * During umount/shutdown, fuse connection is dropped first
+	 * and evict_inode() is called later. That means any
+	 * removemapping messages are going to fail. Send messages
+	 * only if connection is up. Otherwise fuse daemon is
+	 * responsible for cleaning up any leftover references and
+	 * mappings.
+	 */
+	if (fc->connected) {
+		err = dmap_list_send_removemappings(inode, num, &to_remove);
+		if (err) {
+			pr_warn("Failed to removemappings. start=0x%llx"
+				" end=0x%llx\n", start, end);
+		}
+	}
+	spin_lock(&fc->lock);
+	list_for_each_entry_safe(dmap, n, &to_remove, list) {
+		list_del(&dmap->list);
+		fuse_dax_do_free_mapping_locked(fc, dmap);
+	}
+	spin_unlock(&fc->lock);
+}
+
 int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
 				u64 dmap_start)
 {
@@ -4003,15 +4100,9 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
 
 	/* Cleanup dmap entry and add back to free list */
 	spin_lock(&fc->lock);
-	__dmap_remove_busy_list(fc, dmap);
-	dmap->inode = NULL;
-	dmap->start = dmap->end = 0;
-	__free_dax_mapping(fc, dmap);
+	fuse_dax_do_free_mapping_locked(fc, dmap);
 	spin_unlock(&fc->lock);
 
-	pr_debug("fuse: freed memory range window_offset=0x%llx,"
-				" length=0x%llx\n", dmap->window_offset,
-				dmap->length);
 	return ret;
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7ac7f9a0b81b..17784dbd49a7 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -117,7 +117,9 @@ struct fuse_dax_mapping {
 	/* Pointer to inode where this memory range is mapped */
 	struct inode *inode;
 
-	/* Will connect in fc->free_ranges to keep track of free memory */
+	/* Will connect in fc->free_ranges to keep track of free memory.
+	 * When the mapping is in fc->busy_ranges, the field can also be
+	 * used by temporary lists */
 	struct list_head list;
 
 	/* For interval tree in file/inode */
@@ -1286,6 +1288,6 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
  */
 u64 fuse_get_unique(struct fuse_iqueue *fiq);
 void fuse_dax_free_mem_worker(struct work_struct *work);
-void fuse_removemapping(struct inode *inode);
+void fuse_cleanup_inode_mappings(struct inode *inode);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 302f7e04b645..4277324a7c3b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -123,7 +123,7 @@ static void fuse_evict_inode(struct inode *inode)
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
 		if (IS_DAX(inode)) {
-			fuse_removemapping(inode);
+			fuse_cleanup_inode_mappings(inode);
 			WARN_ON(fi->nr_dmaps);
 		}
 		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5042e227e8a8..b588a028f099 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -850,10 +850,17 @@ struct fuse_setupmapping_out {
 struct fuse_removemapping_in {
         /* An already open handle */
         uint64_t	fh;
+	/* number of fuse_removemapping_one follows */
+	uint32_t	count;
+};
+
+struct fuse_removemapping_one {
 	/* Offset into the dax window start the unmapping */
 	uint64_t        moffset;
         /* Length of mapping required */
         uint64_t	len;
 };
+#define FUSE_REMOVEMAPPING_MAX_ENTRY	\
+		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
 
 #endif /* _LINUX_FUSE_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Virtio-fs] [PATCH] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05  3:02 ` [Virtio-fs] [PATCH] " Peng Tao
@ 2019-06-05 18:06   ` Liu Bo
  2019-06-05 18:35     ` Vivek Goyal
  0 siblings, 1 reply; 7+ messages in thread
From: Liu Bo @ 2019-06-05 18:06 UTC (permalink / raw)
  To: Peng Tao; +Cc: virtio-fs, Vivek Goyal

On Wed, Jun 05, 2019 at 11:02:33AM +0800, Peng Tao wrote:
> Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings
> in one call. So that fuse_removemapping() can send just one fuse request
> for many dmap entries.
> 
> Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> ---
>  fs/fuse/file.c            | 205 +++++++++++++++++++++++++++-----------
>  fs/fuse/fuse_i.h          |   6 +-
>  fs/fuse/inode.c           |   2 +-
>  include/uapi/linux/fuse.h |   7 ++
>  4 files changed, 160 insertions(+), 60 deletions(-)
> 
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index d0979dc32f08..5a0b80719815 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -27,6 +27,9 @@ INTERVAL_TREE_DEFINE(struct fuse_dax_mapping, rb, __u64, __subtree_last,
>  
>  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  				struct inode *inode);
> +static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
> +				struct inode *inode, loff_t start, loff_t end);
> +
>  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
>  			  int opcode, struct fuse_open_out *outargp)
>  {
> @@ -319,75 +322,87 @@ static int fuse_setup_one_mapping(struct inode *inode,
>  	return 0;
>  }
>  
> -static int fuse_removemapping_one(struct inode *inode,
> -					struct fuse_dax_mapping *dmap)
> +static int
> +fuse_send_removemapping(struct inode *inode,
> +			struct fuse_removemapping_in *inargp,
> +			struct fuse_removemapping_one *remove_one)
>  {
>  	struct fuse_inode *fi = get_fuse_inode(inode);
>  	struct fuse_conn *fc = get_fuse_conn(inode);
> -	struct fuse_removemapping_in inarg;
>  	FUSE_ARGS(args);
>  
> -	memset(&inarg, 0, sizeof(inarg));
> -	inarg.moffset = dmap->window_offset;
> -	inarg.len = dmap->length;
>  	args.in.h.opcode = FUSE_REMOVEMAPPING;
>  	args.in.h.nodeid = fi->nodeid;
> -	args.in.numargs = 1;
> -	args.in.args[0].size = sizeof(inarg);
> -	args.in.args[0].value = &inarg;
> +	args.in.numargs = 2;
> +	args.in.args[0].size = sizeof(*inargp);
> +	args.in.args[0].value = inargp;
> +	args.in.args[1].size = inargp->count * sizeof(*remove_one);
> +	args.in.args[1].value = remove_one;
>  	return fuse_simple_request(fc, &args);
>  }
>  
> -/*
> - * It is called from evict_inode() and by that time inode is going away. So
> - * this function does not take any locks like fi->i_dmap_sem for traversing
> - * that fuse inode interval tree. If that lock is taken then lock validator
> - * complains of deadlock situation w.r.t fs_reclaim lock.
> - */
> -void fuse_removemapping(struct inode *inode)
> +static int dmap_list_send_removemappings(struct inode *inode, unsigned num,
> +					 struct list_head *to_remove)
>  {
> -	struct fuse_conn *fc = get_fuse_conn(inode);
> -	struct fuse_inode *fi = get_fuse_inode(inode);
> -	ssize_t err;
> +	struct fuse_removemapping_one *remove_one, *ptr;
> +	struct fuse_removemapping_in inarg;
>  	struct fuse_dax_mapping *dmap;
> +	int ret, i = 0, nr_alloc;
>  
> -	/* Clear the mappings list */
> -	while (true) {
> -		WARN_ON(fi->nr_dmaps < 0);
> +	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
> +	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO);

GFP_NOIO's comments implies that memalloc_noio_{save,restore} are preferred,
also would GFP_NOFS be better?

> +	if (!remove_one)
> +		return -ENOMEM;
>  
> -		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0,
> -								-1);
> -		if (dmap) {
> -			fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> -			fi->nr_dmaps--;
> -			dmap_remove_busy_list(fc, dmap);
> +	ptr = remove_one;
> +	list_for_each_entry(dmap, to_remove, list) {
> +		ptr->moffset = dmap->window_offset;
> +		ptr->len = dmap->length;
> +		ptr++;
> +		i++;
> +		num--;
> +		if (i >= nr_alloc || num == 0) {
> +			memset(&inarg, 0, sizeof(inarg));
> +			inarg.count = i;
> +			ret = fuse_send_removemapping(inode, &inarg,
> +						      remove_one);
> +			if (ret)
> +				goto out;
> +			ptr = remove_one;
> +			i = 0;
>  		}
> +	}
> +out:
> +	kfree(remove_one);
> +	return ret;
> +}
>  
> -		if (!dmap)
> -			break;
> +static int fuse_removemapping_one(struct inode *inode,
> +					struct fuse_dax_mapping *dmap)
> +{
> +	struct fuse_removemapping_in inarg;
> +	struct fuse_removemapping_one remove_one;
>  
> -		/*
> -		 * During umount/shutdown, fuse connection is dropped first
> -		 * and later evict_inode() is called later. That means any
> -		 * removemapping messages are going to fail. Send messages
> -		 * only if connection is up. Otherwise fuse daemon is
> -		 * responsible for cleaning up any leftover references and
> -		 * mappings.
> -		 */
> -		if (fc->connected) {
> -			err = fuse_removemapping_one(inode, dmap);
> -			if (err) {
> -				pr_warn("Failed to removemapping. offset=0x%llx"
> -					" len=0x%llx\n", dmap->window_offset,
> -					dmap->length);
> -			}
> -		}
> +	memset(&inarg, 0, sizeof(inarg));
> +	/* TODO: fill in inarg.fh when available */
> +	inarg.count = 1;
> +	memset(&remove_one, 0, sizeof(remove_one));
> +	remove_one.moffset = dmap->window_offset;
> +	remove_one.len = dmap->length;
>  
> -		dmap->inode = NULL;
> +	return fuse_send_removemapping(inode, &inarg, &remove_one);
> +}
>  
> -		/* Add it back to free ranges list */
> -		free_dax_mapping(fc, dmap);
> -	}
> +/*
> + * It is called from evict_inode() and by that time inode is going away. So
> + * this function does not take any locks like fi->i_dmap_sem for traversing
> + * that fuse inode interval tree. If that lock is taken then lock validator
> + * complains of deadlock situation w.r.t fs_reclaim lock.
> + */
> +void fuse_cleanup_inode_mappings(struct inode *inode)
> +{
> +	struct fuse_conn *fc = get_fuse_conn(inode);
> +	fuse_dax_free_mappings_range(fc, inode, 0, -1);
>  }
>  
>  void fuse_finish_open(struct inode *inode, struct file *file)
> @@ -3980,6 +3995,88 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  	}
>  }
>  
> +/*
> + * Cleanup dmap entry and add back to free list. This should be called with
> + * fc->lock held.
> + */
> +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc,
> +					    struct fuse_dax_mapping *dmap)
> +{
> +	__dmap_remove_busy_list(fc, dmap);
> +	dmap->inode = NULL;
> +	dmap->start = dmap->end = 0;
> +	__free_dax_mapping(fc, dmap);
> +	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> +		"window_offset=0x%llx length=0x%llx\n", dmap->start,
> +		dmap->end, dmap->window_offset, dmap->length);

pr_debug() needs to be placed at the beginning as dmap->start & end have been
zero'd.

Other parts look good to me.
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>

thanks,
-liubo
> +}
> +
> +/*
> + * Free inode dmap entries whose range falls entirely inside [start, end].
> + * Does not take any locks. Caller must take care of any lock requirements.
> + * Lock ordering follows fuse_dax_free_one_mapping().
> + * inode->i_rwsem, fuse_inode->i_mmap_sem and fuse_inode->i_dmap_sem must be
> + * held exclusively, unless it is called from evict_inode() where no one else
> + * is accessing the inode.
> + */
> +static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
> +					 struct inode *inode, loff_t start,
> +					 loff_t end)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_dax_mapping *dmap, *n;
> +	int err, num = 0;
> +	LIST_HEAD(to_remove);
> +
> +	pr_debug("fuse: %s: start=0x%llx, end=0x%llx\n", __func__, start, end);
> +
> +	/*
> +	 * Interval tree search matches intersecting entries. Adjust the range
> +	 * to avoid dropping partial valid entries.
> +	 */
> +	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> +	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> +
> +	while (1) {
> +		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
> +							 end);
> +		if (!dmap)
> +			break;
> +		fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> +		num++;
> +		WARN_ON(!list_empty(&dmap->list));
> +		list_add_tail(&dmap->list, &to_remove);
> +	}
> +
> +	/* Nothing to remove */
> +	if (list_empty(&to_remove))
> +		return;
> +
> +	WARN_ON(fi->nr_dmaps < num);
> +	fi->nr_dmaps -= num;
> +	/*
> +	 * During umount/shutdown, fuse connection is dropped first
> +	 * and evict_inode() is called later. That means any
> +	 * removemapping messages are going to fail. Send messages
> +	 * only if connection is up. Otherwise fuse daemon is
> +	 * responsible for cleaning up any leftover references and
> +	 * mappings.
> +	 */
> +	if (fc->connected) {
> +		err = dmap_list_send_removemappings(inode, num, &to_remove);
> +		if (err) {
> +			pr_warn("Failed to removemappings. start=0x%llx"
> +				" end=0x%llx\n", start, end);
> +		}
> +	}
> +	spin_lock(&fc->lock);
> +	list_for_each_entry_safe(dmap, n, &to_remove, list) {
> +		list_del(&dmap->list);
> +		fuse_dax_do_free_mapping_locked(fc, dmap);
> +	}
> +	spin_unlock(&fc->lock);
> +}
> +
>  int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  				u64 dmap_start)
>  {
> @@ -4003,15 +4100,9 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  
>  	/* Cleanup dmap entry and add back to free list */
>  	spin_lock(&fc->lock);
> -	__dmap_remove_busy_list(fc, dmap);
> -	dmap->inode = NULL;
> -	dmap->start = dmap->end = 0;
> -	__free_dax_mapping(fc, dmap);
> +	fuse_dax_do_free_mapping_locked(fc, dmap);
>  	spin_unlock(&fc->lock);
>  
> -	pr_debug("fuse: freed memory range window_offset=0x%llx,"
> -				" length=0x%llx\n", dmap->window_offset,
> -				dmap->length);
>  	return ret;
>  }
>  
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 7ac7f9a0b81b..17784dbd49a7 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -117,7 +117,9 @@ struct fuse_dax_mapping {
>  	/* Pointer to inode where this memory range is mapped */
>  	struct inode *inode;
>  
> -	/* Will connect in fc->free_ranges to keep track of free memory */
> +	/* Will connect in fc->free_ranges to keep track of free memory.
> +	 * When the mapping is in fc->busy_ranges, the field can also be
> +	 * used by temporary lists */
>  	struct list_head list;
>  
>  	/* For interval tree in file/inode */
> @@ -1286,6 +1288,6 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
>   */
>  u64 fuse_get_unique(struct fuse_iqueue *fiq);
>  void fuse_dax_free_mem_worker(struct work_struct *work);
> -void fuse_removemapping(struct inode *inode);
> +void fuse_cleanup_inode_mappings(struct inode *inode);
>  
>  #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 302f7e04b645..4277324a7c3b 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -123,7 +123,7 @@ static void fuse_evict_inode(struct inode *inode)
>  		struct fuse_conn *fc = get_fuse_conn(inode);
>  		struct fuse_inode *fi = get_fuse_inode(inode);
>  		if (IS_DAX(inode)) {
> -			fuse_removemapping(inode);
> +			fuse_cleanup_inode_mappings(inode);
>  			WARN_ON(fi->nr_dmaps);
>  		}
>  		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 5042e227e8a8..b588a028f099 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -850,10 +850,17 @@ struct fuse_setupmapping_out {
>  struct fuse_removemapping_in {
>          /* An already open handle */
>          uint64_t	fh;
> +	/* number of fuse_removemapping_one follows */
> +	uint32_t	count;
> +};
> +
> +struct fuse_removemapping_one {
>  	/* Offset into the dax window start the unmapping */
>  	uint64_t        moffset;
>          /* Length of mapping required */
>          uint64_t	len;
>  };
> +#define FUSE_REMOVEMAPPING_MAX_ENTRY	\
> +		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
>  
>  #endif /* _LINUX_FUSE_H */
> -- 
> 2.17.1
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Virtio-fs] [PATCH] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05 18:06   ` Liu Bo
@ 2019-06-05 18:35     ` Vivek Goyal
  2019-06-05 18:50       ` Liu Bo
  0 siblings, 1 reply; 7+ messages in thread
From: Vivek Goyal @ 2019-06-05 18:35 UTC (permalink / raw)
  To: Liu Bo; +Cc: virtio-fs, Peng Tao

On Wed, Jun 05, 2019 at 11:06:46AM -0700, Liu Bo wrote:

[..]
> > -/*
> > - * It is called from evict_inode() and by that time inode is going away. So
> > - * this function does not take any locks like fi->i_dmap_sem for traversing
> > - * that fuse inode interval tree. If that lock is taken then lock validator
> > - * complains of deadlock situation w.r.t fs_reclaim lock.
> > - */
> > -void fuse_removemapping(struct inode *inode)
> > +static int dmap_list_send_removemappings(struct inode *inode, unsigned num,
> > +					 struct list_head *to_remove)
> >  {
> > -	struct fuse_conn *fc = get_fuse_conn(inode);
> > -	struct fuse_inode *fi = get_fuse_inode(inode);
> > -	ssize_t err;
> > +	struct fuse_removemapping_one *remove_one, *ptr;
> > +	struct fuse_removemapping_in inarg;
> >  	struct fuse_dax_mapping *dmap;
> > +	int ret, i = 0, nr_alloc;
> >  
> > -	/* Clear the mappings list */
> > -	while (true) {
> > -		WARN_ON(fi->nr_dmaps < 0);
> > +	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
> > +	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO);
> 
> GFP_NOIO's comments implies that memalloc_noio_{save,restore} are preferred,
> also would GFP_NOFS be better?

GFP_NOFS sounds reasonable. I am not sure how memalloc_noio_{save,restore}
API is better as opposed to using GFP_NOFS.

[..]
> > +/*
> > + * Cleanup dmap entry and add back to free list. This should be called with
> > + * fc->lock held.
> > + */
> > +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc,
> > +					    struct fuse_dax_mapping *dmap)
> > +{
> > +	__dmap_remove_busy_list(fc, dmap);
> > +	dmap->inode = NULL;
> > +	dmap->start = dmap->end = 0;
> > +	__free_dax_mapping(fc, dmap);
> > +	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> > +		"window_offset=0x%llx length=0x%llx\n", dmap->start,
> > +		dmap->end, dmap->window_offset, dmap->length);
> 
> pr_debug() needs to be placed at the beginning as dmap->start & end have been
> zero'd.
> 

Good point. Will fix.

Vivek


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Virtio-fs] [PATCH] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05 18:35     ` Vivek Goyal
@ 2019-06-05 18:50       ` Liu Bo
  0 siblings, 0 replies; 7+ messages in thread
From: Liu Bo @ 2019-06-05 18:50 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs, Peng Tao

On Wed, Jun 05, 2019 at 02:35:14PM -0400, Vivek Goyal wrote:
> On Wed, Jun 05, 2019 at 11:06:46AM -0700, Liu Bo wrote:
> 
> [..]
> > > -/*
> > > - * It is called from evict_inode() and by that time inode is going away. So
> > > - * this function does not take any locks like fi->i_dmap_sem for traversing
> > > - * that fuse inode interval tree. If that lock is taken then lock validator
> > > - * complains of deadlock situation w.r.t fs_reclaim lock.
> > > - */
> > > -void fuse_removemapping(struct inode *inode)
> > > +static int dmap_list_send_removemappings(struct inode *inode, unsigned num,
> > > +					 struct list_head *to_remove)
> > >  {
> > > -	struct fuse_conn *fc = get_fuse_conn(inode);
> > > -	struct fuse_inode *fi = get_fuse_inode(inode);
> > > -	ssize_t err;
> > > +	struct fuse_removemapping_one *remove_one, *ptr;
> > > +	struct fuse_removemapping_in inarg;
> > >  	struct fuse_dax_mapping *dmap;
> > > +	int ret, i = 0, nr_alloc;
> > >  
> > > -	/* Clear the mappings list */
> > > -	while (true) {
> > > -		WARN_ON(fi->nr_dmaps < 0);
> > > +	nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
> > > +	remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO);
> > 
> > GFP_NOIO's comments implies that memalloc_noio_{save,restore} are preferred,
> > also would GFP_NOFS be better?
> 
> GFP_NOFS sounds reasonable. I am not sure how memalloc_noio_{save,restore}
> API is better as opposed to using GFP_NOFS.
>

mm devs need memalloc_nofs_{save,restore} to encourage fs to declare a
scope that all children callees inherit the nofs attribute.

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7dea19f9ee636cb244109a4dba426bbb3e5304b7

thanks,
-liubo

> [..]
> > > +/*
> > > + * Cleanup dmap entry and add back to free list. This should be called with
> > > + * fc->lock held.
> > > + */
> > > +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc,
> > > +					    struct fuse_dax_mapping *dmap)
> > > +{
> > > +	__dmap_remove_busy_list(fc, dmap);
> > > +	dmap->inode = NULL;
> > > +	dmap->start = dmap->end = 0;
> > > +	__free_dax_mapping(fc, dmap);
> > > +	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> > > +		"window_offset=0x%llx length=0x%llx\n", dmap->start,
> > > +		dmap->end, dmap->window_offset, dmap->length);
> > 
> > pr_debug() needs to be placed at the beginning as dmap->start & end have been
> > zero'd.
> > 
> 
> Good point. Will fix.
> 
> Vivek


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05  3:02 [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call Peng Tao
  2019-06-05  3:02 ` [Virtio-fs] [PATCH] " Peng Tao
@ 2019-06-05 20:30 ` Vivek Goyal
  2019-06-06  6:51   ` Peng Tao
  1 sibling, 1 reply; 7+ messages in thread
From: Vivek Goyal @ 2019-06-05 20:30 UTC (permalink / raw)
  To: Peng Tao; +Cc: virtio-fs

Hi Peng Tao,

I have squashed this patch and forget request wait patches into existing
pathces and pushed latest patches here.

https://github.com/rhvgoyal/linux/commits/virtio-fs-dev-5.1

Please give it a try.

I have made more function name changes into existing patches. There were
so many functions with name fuse_dax_free* that I was finding it hard
to remember which function is doing what.

Thanks
Vivek

On Wed, Jun 05, 2019 at 11:02:32AM +0800, Peng Tao wrote:
> Hi Vivek,
> 
> This works with corresponding qemu virtiofsd patch:
> "[PATCH-v3] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries".
> 
> v1-v2: make fuse_removemapping_in count fuse_removemapping_one
> v2->v3:
> 1. fold in Vivek's cleanup and rename all forget_one to remove_one
> 2. assert dmap->list is not used by anyone when adding to temporary list
> 3. list_del() dmap->list when removing it from to_remove list
> 
> Please add your SOB line as I don't see it with your cleanup and thus
> did not add it to the patch when folding your change.
> 
> Thanks,
> Tao
> 
> Peng Tao (1):
>   virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
> 
>  fs/fuse/file.c            | 205 +++++++++++++++++++++++++++-----------
>  fs/fuse/fuse_i.h          |   6 +-
>  fs/fuse/inode.c           |   2 +-
>  include/uapi/linux/fuse.h |   7 ++
>  4 files changed, 160 insertions(+), 60 deletions(-)
> 
> -- 
> 2.17.1
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
  2019-06-05 20:30 ` [Virtio-fs] [PATCH-v3 0/1] " Vivek Goyal
@ 2019-06-06  6:51   ` Peng Tao
  0 siblings, 0 replies; 7+ messages in thread
From: Peng Tao @ 2019-06-06  6:51 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs



On 2019/6/6 04:30, Vivek Goyal wrote:
> Hi Peng Tao,
> 
> I have squashed this patch and forget request wait patches into existing
> pathces and pushed latest patches here.
> 
> https://github.com/rhvgoyal/linux/commits/virtio-fs-dev-5.1
> 
> Please give it a try.
Both look good and work as expected. Thanks!

Cheers,
Tao

> 
> I have made more function name changes into existing patches. There were
> so many functions with name fuse_dax_free* that I was finding it hard
> to remember which function is doing what.
> 
> Thanks
> Vivek
> 
> On Wed, Jun 05, 2019 at 11:02:32AM +0800, Peng Tao wrote:
>> Hi Vivek,
>>
>> This works with corresponding qemu virtiofsd patch:
>> "[PATCH-v3] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries".
>>
>> v1-v2: make fuse_removemapping_in count fuse_removemapping_one
>> v2->v3:
>> 1. fold in Vivek's cleanup and rename all forget_one to remove_one
>> 2. assert dmap->list is not used by anyone when adding to temporary list
>> 3. list_del() dmap->list when removing it from to_remove list
>>
>> Please add your SOB line as I don't see it with your cleanup and thus
>> did not add it to the patch when folding your change.
>>
>> Thanks,
>> Tao
>>
>> Peng Tao (1):
>>    virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call
>>
>>   fs/fuse/file.c            | 205 +++++++++++++++++++++++++++-----------
>>   fs/fuse/fuse_i.h          |   6 +-
>>   fs/fuse/inode.c           |   2 +-
>>   include/uapi/linux/fuse.h |   7 ++
>>   4 files changed, 160 insertions(+), 60 deletions(-)
>>
>> -- 
>> 2.17.1
>>

-- 
Into something rich and strange.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-06-06  6:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-05  3:02 [Virtio-fs] [PATCH-v3 0/1] virtiofs: FUSE_REMOVEMAPPING remove multiple entries in one call Peng Tao
2019-06-05  3:02 ` [Virtio-fs] [PATCH] " Peng Tao
2019-06-05 18:06   ` Liu Bo
2019-06-05 18:35     ` Vivek Goyal
2019-06-05 18:50       ` Liu Bo
2019-06-05 20:30 ` [Virtio-fs] [PATCH-v3 0/1] " Vivek Goyal
2019-06-06  6:51   ` Peng Tao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.