All of lore.kernel.org
 help / color / mirror / Atom feed
* [Virtio-fs] [PATCH 0/2] virtiofs: remove dmap when truncating inode
@ 2019-05-10 10:29 Peng Tao
  2019-05-10 10:29 ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Peng Tao
  0 siblings, 1 reply; 8+ messages in thread
From: Peng Tao @ 2019-05-10 10:29 UTC (permalink / raw)
  To: virtio-fs; +Cc: Peng Tao

Hi,

The first patch is a simple cleanup. The second patch removes
dmaps when truncating a file since they are no longer valid
and we can return them to free pool directly. Otherwise they are
cleaned up by the reclaim code that might try to reclaim valid
dmaps first.


Peng Tao (2):
  fuse: remove unnecessary allocated_ranges
  fuse: remove dmap when truncating inode

 fs/fuse/dir.c    |  5 ++++
 fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
 fs/fuse/fuse_i.h |  2 ++
 fs/fuse/inode.c  |  7 +++--
 4 files changed, 66 insertions(+), 17 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges
  2019-05-10 10:29 [Virtio-fs] [PATCH 0/2] virtiofs: remove dmap when truncating inode Peng Tao
@ 2019-05-10 10:29 ` Peng Tao
  2019-05-10 10:29   ` [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode Peng Tao
  2019-05-10 15:24   ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Vivek Goyal
  0 siblings, 2 replies; 8+ messages in thread
From: Peng Tao @ 2019-05-10 10:29 UTC (permalink / raw)
  To: virtio-fs; +Cc: Peng Tao

It is always nr_ranges.

Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
---
 fs/fuse/inode.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index dd16c7f6a561..cb56572c4b26 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -640,7 +640,7 @@ static int fuse_dax_mem_range_init(struct fuse_conn *fc,
 	phys_addr_t phys_addr;
 	int ret = 0, id;
 	size_t dax_size = -1;
-	unsigned long allocated_ranges = 0, i;
+	unsigned long i;
 
 	id = dax_read_lock();
 	nr_pages = dax_direct_access(dax_dev, 0, PHYS_PFN(dax_size), &kaddr,
@@ -670,12 +670,11 @@ static int fuse_dax_mem_range_init(struct fuse_conn *fc,
 		range->length = FUSE_DAX_MEM_RANGE_SZ;
 		list_add_tail(&range->list, &mem_ranges);
 		INIT_LIST_HEAD(&range->busy_list);
-		allocated_ranges++;
 	}
 
 	list_replace_init(&mem_ranges, &fc->free_ranges);
-	fc->nr_free_ranges = allocated_ranges;
-	fc->nr_ranges = allocated_ranges;
+	fc->nr_free_ranges = nr_ranges;
+	fc->nr_ranges = nr_ranges;
 	return 0;
 out_err:
 	/* Free All allocated elements */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
  2019-05-10 10:29 ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Peng Tao
@ 2019-05-10 10:29   ` Peng Tao
  2019-05-10 15:23     ` Vivek Goyal
  2019-05-10 15:24   ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Vivek Goyal
  1 sibling, 1 reply; 8+ messages in thread
From: Peng Tao @ 2019-05-10 10:29 UTC (permalink / raw)
  To: virtio-fs; +Cc: Peng Tao

The obseleted dmap entries can be put back to global free list
immediately and we don't have to rely on reclaim code to free them.

Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
---
 fs/fuse/dir.c    |  5 ++++
 fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
 fs/fuse/fuse_i.h |  2 ++
 3 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3f923fe7841a..233c3ed391f1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 		down_write(&fi->i_mmap_sem);
 		truncate_pagecache(inode, outarg.attr.size);
 		invalidate_inode_pages2(inode->i_mapping);
+		// Free dmap beyond i_size
+		if (IS_DAX(inode) && oldsize > outarg.attr.size)
+			fuse_dax_free_mappings_range(fc, inode,
+						     outarg.attr.size, -1);
+
 		up_write(&fi->i_mmap_sem);
 	}
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 51faed351c7c..6d130f2f3a23 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
 					loff_t offset, loff_t length);
 static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
 					struct inode *inode);
+static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
+					struct fuse_dax_mapping *dmap);
 
 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 			  int opcode, struct fuse_open_out *outargp)
@@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
 		return ret;
 	}
 
-	/* Remove dax mapping from inode interval tree now */
-	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
-	fi->nr_dmaps--;
+	fuse_dax_do_remove_mapping_locked(fi, dmap);
 	return 0;
 }
 
@@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
 	}
 }
 
+/* Cleanup dmap entry and add back to free list */
+static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
+{
+	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
+		"window_offset=0x%llx length=0x%llx\n", dmap->start,
+		dmap->end, dmap->window_offset, dmap->length);
+	spin_lock(&fc->lock);
+	__dmap_remove_busy_list(fc, dmap);
+	dmap->inode = NULL;
+	dmap->start = dmap->end = 0;
+	__free_dax_mapping(fc, dmap);
+	spin_unlock(&fc->lock);
+}
+
+/* Remove dax mapping from inode interval tree */
+static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
+{
+	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+	fi->nr_dmaps--;
+}
+
+/*
+ * Free inode dmap entries whose range falls entirely inside [start, end].
+ * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
+ *
+ * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
+ * will be overridden next time dmap is used -- same logic is applied in
+ * fuse_dax_reclaim_first_mapping().
+ */
+void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode, loff_t start, loff_t end)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+
+	WARN_ON(!inode_is_locked(inode));
+	WARN_ON(!rwsem_is_locked(&fi->i_mmap_sem));
+
+	/* interval tree search matches intersecting entries.
+	 * Adjust the range to avoid dropping partial valid entries. */
+	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
+	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
+
+	pr_debug("fuse: fuse_dax_free_mappings_range start=0x%llx, end=0x%llx\n", start, end);
+	/* Lock ordering follows fuse_dax_free_one_mapping() */
+	down_write(&fi->i_dmap_sem);
+	while ((dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, end))) {
+		fuse_dax_do_remove_mapping_locked(fi, dmap);
+		fuse_dax_do_free_mapping_locked(fc, dmap);
+	}
+	up_write(&fi->i_dmap_sem);
+}
+
 int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
 				u64 dmap_start)
 {
@@ -3852,17 +3904,8 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
 	if (ret < 0)
 		return ret;
 
-	/* Cleanup dmap entry and add back to free list */
-	spin_lock(&fc->lock);
-	__dmap_remove_busy_list(fc, dmap);
-	dmap->inode = NULL;
-	dmap->start = dmap->end = 0;
-	__free_dax_mapping(fc, dmap);
-	spin_unlock(&fc->lock);
+	fuse_dax_do_free_mapping_locked(fc, dmap);
 
-	pr_debug("fuse: freed memory range window_offset=0x%llx,"
-				" length=0x%llx\n", dmap->window_offset,
-				dmap->length);
 	return ret;
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1149281ab1e8..e273f1209f5b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1189,5 +1189,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
 u64 fuse_get_unique(struct fuse_iqueue *fiq);
 void fuse_dax_free_mem_worker(struct work_struct *work);
 void fuse_removemapping(struct inode *inode);
+void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode,
+			loff_t start, loff_t end);
 
 #endif /* _FS_FUSE_I_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
  2019-05-10 10:29   ` [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode Peng Tao
@ 2019-05-10 15:23     ` Vivek Goyal
  2019-05-10 16:16       ` Tao Peng
  0 siblings, 1 reply; 8+ messages in thread
From: Vivek Goyal @ 2019-05-10 15:23 UTC (permalink / raw)
  To: Peng Tao; +Cc: virtio-fs

On Fri, May 10, 2019 at 06:29:57PM +0800, Peng Tao wrote:
> The obseleted dmap entries can be put back to global free list
> immediately and we don't have to rely on reclaim code to free them.
> 
> Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> ---
>  fs/fuse/dir.c    |  5 ++++
>  fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
>  fs/fuse/fuse_i.h |  2 ++
>  3 files changed, 63 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 3f923fe7841a..233c3ed391f1 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>  		down_write(&fi->i_mmap_sem);
>  		truncate_pagecache(inode, outarg.attr.size);
>  		invalidate_inode_pages2(inode->i_mapping);
> +		// Free dmap beyond i_size
> +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
> +			fuse_dax_free_mappings_range(fc, inode,
> +						     outarg.attr.size, -1);
> +
>  		up_write(&fi->i_mmap_sem);
>  	}
>  
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 51faed351c7c..6d130f2f3a23 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
>  					loff_t offset, loff_t length);
>  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  					struct inode *inode);
> +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
> +					struct fuse_dax_mapping *dmap);
>  
>  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
>  			  int opcode, struct fuse_open_out *outargp)
> @@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
>  		return ret;
>  	}
>  
> -	/* Remove dax mapping from inode interval tree now */
> -	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> -	fi->nr_dmaps--;
> +	fuse_dax_do_remove_mapping_locked(fi, dmap);
>  	return 0;
>  }
>  
> @@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  	}
>  }
>  
> +/* Cleanup dmap entry and add back to free list */
> +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
> +{
> +	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> +		"window_offset=0x%llx length=0x%llx\n", dmap->start,
> +		dmap->end, dmap->window_offset, dmap->length);
> +	spin_lock(&fc->lock);
> +	__dmap_remove_busy_list(fc, dmap);
> +	dmap->inode = NULL;
> +	dmap->start = dmap->end = 0;
> +	__free_dax_mapping(fc, dmap);
> +	spin_unlock(&fc->lock);
> +}
> +
> +/* Remove dax mapping from inode interval tree */
> +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
> +{
> +	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> +	fi->nr_dmaps--;
> +}

this is just a two line funciton. I think lets open code it and we don't
need a separate function for this.


> +
> +/*
> + * Free inode dmap entries whose range falls entirely inside [start, end].
> + * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
> + *
> + * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
> + * will be overridden next time dmap is used -- same logic is applied in
> + * fuse_dax_reclaim_first_mapping().

This is true that sending FUSE_REMOVEMAPPING is not strictly needed. But,
thinking more about it, what if range does not get used soon. Then it
will keep resources unnecessarily busy on host? (fd, file, inode, dentry,
vma etc).

I think it probably is a good idea to send FUSE_REMOVEMAPPING to daemon
when range is being freed. Only case where we are reclaiming a range
and reusing immediately, we can skip sending FUSE_REMOVEMAPPING.

This will especially be more meaniningful if dax window is large and
has potential to create lots of mappings.


Thanks
Vivek

> + */
> +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode, loff_t start, loff_t end)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_dax_mapping *dmap;
> +
> +	WARN_ON(!inode_is_locked(inode));
> +	WARN_ON(!rwsem_is_locked(&fi->i_mmap_sem));
> +
> +	/* interval tree search matches intersecting entries.
> +	 * Adjust the range to avoid dropping partial valid entries. */
> +	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> +	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> +
> +	pr_debug("fuse: fuse_dax_free_mappings_range start=0x%llx, end=0x%llx\n", start, end);
> +	/* Lock ordering follows fuse_dax_free_one_mapping() */
> +	down_write(&fi->i_dmap_sem);
> +	while ((dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, end))) {
> +		fuse_dax_do_remove_mapping_locked(fi, dmap);
> +		fuse_dax_do_free_mapping_locked(fc, dmap);
> +	}
> +	up_write(&fi->i_dmap_sem);
> +}
> +
>  int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  				u64 dmap_start)
>  {
> @@ -3852,17 +3904,8 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  	if (ret < 0)
>  		return ret;
>  
> -	/* Cleanup dmap entry and add back to free list */
> -	spin_lock(&fc->lock);
> -	__dmap_remove_busy_list(fc, dmap);
> -	dmap->inode = NULL;
> -	dmap->start = dmap->end = 0;
> -	__free_dax_mapping(fc, dmap);
> -	spin_unlock(&fc->lock);
> +	fuse_dax_do_free_mapping_locked(fc, dmap);
>  
> -	pr_debug("fuse: freed memory range window_offset=0x%llx,"
> -				" length=0x%llx\n", dmap->window_offset,
> -				dmap->length);
>  	return ret;
>  }
>  
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 1149281ab1e8..e273f1209f5b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1189,5 +1189,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
>  u64 fuse_get_unique(struct fuse_iqueue *fiq);
>  void fuse_dax_free_mem_worker(struct work_struct *work);
>  void fuse_removemapping(struct inode *inode);
> +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode,
> +			loff_t start, loff_t end);
>  
>  #endif /* _FS_FUSE_I_H */
> -- 
> 2.17.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges
  2019-05-10 10:29 ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Peng Tao
  2019-05-10 10:29   ` [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode Peng Tao
@ 2019-05-10 15:24   ` Vivek Goyal
  1 sibling, 0 replies; 8+ messages in thread
From: Vivek Goyal @ 2019-05-10 15:24 UTC (permalink / raw)
  To: Peng Tao; +Cc: virtio-fs

On Fri, May 10, 2019 at 06:29:56PM +0800, Peng Tao wrote:
> It is always nr_ranges.

This looks like a good cleanup. Will squash it with existing patches.

Vivek

> 
> Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> ---
>  fs/fuse/inode.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index dd16c7f6a561..cb56572c4b26 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -640,7 +640,7 @@ static int fuse_dax_mem_range_init(struct fuse_conn *fc,
>  	phys_addr_t phys_addr;
>  	int ret = 0, id;
>  	size_t dax_size = -1;
> -	unsigned long allocated_ranges = 0, i;
> +	unsigned long i;
>  
>  	id = dax_read_lock();
>  	nr_pages = dax_direct_access(dax_dev, 0, PHYS_PFN(dax_size), &kaddr,
> @@ -670,12 +670,11 @@ static int fuse_dax_mem_range_init(struct fuse_conn *fc,
>  		range->length = FUSE_DAX_MEM_RANGE_SZ;
>  		list_add_tail(&range->list, &mem_ranges);
>  		INIT_LIST_HEAD(&range->busy_list);
> -		allocated_ranges++;
>  	}
>  
>  	list_replace_init(&mem_ranges, &fc->free_ranges);
> -	fc->nr_free_ranges = allocated_ranges;
> -	fc->nr_ranges = allocated_ranges;
> +	fc->nr_free_ranges = nr_ranges;
> +	fc->nr_ranges = nr_ranges;
>  	return 0;
>  out_err:
>  	/* Free All allocated elements */
> -- 
> 2.17.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
  2019-05-10 15:23     ` Vivek Goyal
@ 2019-05-10 16:16       ` Tao Peng
  2019-05-10 17:31         ` Vivek Goyal
  0 siblings, 1 reply; 8+ messages in thread
From: Tao Peng @ 2019-05-10 16:16 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs, Peng Tao

On Fri, May 10, 2019 at 11:23 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Fri, May 10, 2019 at 06:29:57PM +0800, Peng Tao wrote:
> > The obseleted dmap entries can be put back to global free list
> > immediately and we don't have to rely on reclaim code to free them.
> >
> > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > ---
> >  fs/fuse/dir.c    |  5 ++++
> >  fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
> >  fs/fuse/fuse_i.h |  2 ++
> >  3 files changed, 63 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > index 3f923fe7841a..233c3ed391f1 100644
> > --- a/fs/fuse/dir.c
> > +++ b/fs/fuse/dir.c
> > @@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
> >               down_write(&fi->i_mmap_sem);
> >               truncate_pagecache(inode, outarg.attr.size);
> >               invalidate_inode_pages2(inode->i_mapping);
> > +             // Free dmap beyond i_size
> > +             if (IS_DAX(inode) && oldsize > outarg.attr.size)
> > +                     fuse_dax_free_mappings_range(fc, inode,
> > +                                                  outarg.attr.size, -1);
> > +
> >               up_write(&fi->i_mmap_sem);
> >       }
> >
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index 51faed351c7c..6d130f2f3a23 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
> >                                       loff_t offset, loff_t length);
> >  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> >                                       struct inode *inode);
> > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
> > +                                     struct fuse_dax_mapping *dmap);
> >
> >  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
> >                         int opcode, struct fuse_open_out *outargp)
> > @@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
> >               return ret;
> >       }
> >
> > -     /* Remove dax mapping from inode interval tree now */
> > -     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > -     fi->nr_dmaps--;
> > +     fuse_dax_do_remove_mapping_locked(fi, dmap);
> >       return 0;
> >  }
> >
> > @@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> >       }
> >  }
> >
> > +/* Cleanup dmap entry and add back to free list */
> > +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
> > +{
> > +     pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> > +             "window_offset=0x%llx length=0x%llx\n", dmap->start,
> > +             dmap->end, dmap->window_offset, dmap->length);
> > +     spin_lock(&fc->lock);
> > +     __dmap_remove_busy_list(fc, dmap);
> > +     dmap->inode = NULL;
> > +     dmap->start = dmap->end = 0;
> > +     __free_dax_mapping(fc, dmap);
> > +     spin_unlock(&fc->lock);
> > +}
> > +
> > +/* Remove dax mapping from inode interval tree */
> > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
> > +{
> > +     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > +     fi->nr_dmaps--;
> > +}
>
> this is just a two line funciton. I think lets open code it and we don't
> need a separate function for this.
ok.

>
>
> > +
> > +/*
> > + * Free inode dmap entries whose range falls entirely inside [start, end].
> > + * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
> > + *
> > + * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
> > + * will be overridden next time dmap is used -- same logic is applied in
> > + * fuse_dax_reclaim_first_mapping().
>
> This is true that sending FUSE_REMOVEMAPPING is not strictly needed. But,
> thinking more about it, what if range does not get used soon. Then it
> will keep resources unnecessarily busy on host? (fd, file, inode, dentry,
> vma etc).
>
> I think it probably is a good idea to send FUSE_REMOVEMAPPING to daemon
> when range is being freed.
We are under inode->i_rwsem and i_mmap_sem here. It can be very
expensive if we wait for userspace holding locks. How about pushing it
to a work queue? Still we at least need to grab the dmap lock but it
seems less expensive and doesn't block truncate.

Cheers,
Tao

> Only case where we are reclaiming a range
> and reusing immediately, we can skip sending FUSE_REMOVEMAPPING.
>
> This will especially be more meaniningful if dax window is large and
> has potential to create lots of mappings.
>
>
> Thanks
> Vivek
>
> > + */
> > +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode, loff_t start, loff_t end)
> > +{
> > +     struct fuse_inode *fi = get_fuse_inode(inode);
> > +     struct fuse_dax_mapping *dmap;
> > +
> > +     WARN_ON(!inode_is_locked(inode));
> > +     WARN_ON(!rwsem_is_locked(&fi->i_mmap_sem));
> > +
> > +     /* interval tree search matches intersecting entries.
> > +      * Adjust the range to avoid dropping partial valid entries. */
> > +     start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> > +     end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> > +
> > +     pr_debug("fuse: fuse_dax_free_mappings_range start=0x%llx, end=0x%llx\n", start, end);
> > +     /* Lock ordering follows fuse_dax_free_one_mapping() */
> > +     down_write(&fi->i_dmap_sem);
> > +     while ((dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, end))) {
> > +             fuse_dax_do_remove_mapping_locked(fi, dmap);
> > +             fuse_dax_do_free_mapping_locked(fc, dmap);
> > +     }
> > +     up_write(&fi->i_dmap_sem);
> > +}
> > +
> >  int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
> >                               u64 dmap_start)
> >  {
> > @@ -3852,17 +3904,8 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
> >       if (ret < 0)
> >               return ret;
> >
> > -     /* Cleanup dmap entry and add back to free list */
> > -     spin_lock(&fc->lock);
> > -     __dmap_remove_busy_list(fc, dmap);
> > -     dmap->inode = NULL;
> > -     dmap->start = dmap->end = 0;
> > -     __free_dax_mapping(fc, dmap);
> > -     spin_unlock(&fc->lock);
> > +     fuse_dax_do_free_mapping_locked(fc, dmap);
> >
> > -     pr_debug("fuse: freed memory range window_offset=0x%llx,"
> > -                             " length=0x%llx\n", dmap->window_offset,
> > -                             dmap->length);
> >       return ret;
> >  }
> >
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index 1149281ab1e8..e273f1209f5b 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -1189,5 +1189,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
> >  u64 fuse_get_unique(struct fuse_iqueue *fiq);
> >  void fuse_dax_free_mem_worker(struct work_struct *work);
> >  void fuse_removemapping(struct inode *inode);
> > +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode,
> > +                     loff_t start, loff_t end);
> >
> >  #endif /* _FS_FUSE_I_H */
> > --
> > 2.17.1
> >
>
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs



-- 
bergwolf@hyper.sh


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
  2019-05-10 16:16       ` Tao Peng
@ 2019-05-10 17:31         ` Vivek Goyal
  2019-05-11 15:24           ` Tao Peng
  0 siblings, 1 reply; 8+ messages in thread
From: Vivek Goyal @ 2019-05-10 17:31 UTC (permalink / raw)
  To: Tao Peng; +Cc: virtio-fs, Peng Tao

On Sat, May 11, 2019 at 12:16:23AM +0800, Tao Peng wrote:
> On Fri, May 10, 2019 at 11:23 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Fri, May 10, 2019 at 06:29:57PM +0800, Peng Tao wrote:
> > > The obseleted dmap entries can be put back to global free list
> > > immediately and we don't have to rely on reclaim code to free them.
> > >
> > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > ---
> > >  fs/fuse/dir.c    |  5 ++++
> > >  fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
> > >  fs/fuse/fuse_i.h |  2 ++
> > >  3 files changed, 63 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > > index 3f923fe7841a..233c3ed391f1 100644
> > > --- a/fs/fuse/dir.c
> > > +++ b/fs/fuse/dir.c
> > > @@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
> > >               down_write(&fi->i_mmap_sem);
> > >               truncate_pagecache(inode, outarg.attr.size);
> > >               invalidate_inode_pages2(inode->i_mapping);
> > > +             // Free dmap beyond i_size
> > > +             if (IS_DAX(inode) && oldsize > outarg.attr.size)
> > > +                     fuse_dax_free_mappings_range(fc, inode,
> > > +                                                  outarg.attr.size, -1);
> > > +
> > >               up_write(&fi->i_mmap_sem);
> > >       }
> > >
> > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > > index 51faed351c7c..6d130f2f3a23 100644
> > > --- a/fs/fuse/file.c
> > > +++ b/fs/fuse/file.c
> > > @@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
> > >                                       loff_t offset, loff_t length);
> > >  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> > >                                       struct inode *inode);
> > > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
> > > +                                     struct fuse_dax_mapping *dmap);
> > >
> > >  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
> > >                         int opcode, struct fuse_open_out *outargp)
> > > @@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
> > >               return ret;
> > >       }
> > >
> > > -     /* Remove dax mapping from inode interval tree now */
> > > -     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > > -     fi->nr_dmaps--;
> > > +     fuse_dax_do_remove_mapping_locked(fi, dmap);
> > >       return 0;
> > >  }
> > >
> > > @@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> > >       }
> > >  }
> > >
> > > +/* Cleanup dmap entry and add back to free list */
> > > +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
> > > +{
> > > +     pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> > > +             "window_offset=0x%llx length=0x%llx\n", dmap->start,
> > > +             dmap->end, dmap->window_offset, dmap->length);
> > > +     spin_lock(&fc->lock);
> > > +     __dmap_remove_busy_list(fc, dmap);
> > > +     dmap->inode = NULL;
> > > +     dmap->start = dmap->end = 0;
> > > +     __free_dax_mapping(fc, dmap);
> > > +     spin_unlock(&fc->lock);
> > > +}
> > > +
> > > +/* Remove dax mapping from inode interval tree */
> > > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
> > > +{
> > > +     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > > +     fi->nr_dmaps--;
> > > +}
> >
> > this is just a two line funciton. I think lets open code it and we don't
> > need a separate function for this.
> ok.
> 
> >
> >
> > > +
> > > +/*
> > > + * Free inode dmap entries whose range falls entirely inside [start, end].
> > > + * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
> > > + *
> > > + * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
> > > + * will be overridden next time dmap is used -- same logic is applied in
> > > + * fuse_dax_reclaim_first_mapping().
> >
> > This is true that sending FUSE_REMOVEMAPPING is not strictly needed. But,
> > thinking more about it, what if range does not get used soon. Then it
> > will keep resources unnecessarily busy on host? (fd, file, inode, dentry,
> > vma etc).
> >
> > I think it probably is a good idea to send FUSE_REMOVEMAPPING to daemon
> > when range is being freed.
> We are under inode->i_rwsem and i_mmap_sem here. It can be very
> expensive if we wait for userspace holding locks. How about pushing it
> to a work queue? Still we at least need to grab the dmap lock but it
> seems less expensive and doesn't block truncate.

How about freeing multiple ranges in single removemapping call. I think
that's allowed. We can look into making it async also at some point of
time, but right now it probably is best to keep code as simple as
possible. Do typical workloads do truncate at high frequencey?

Vivek


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
  2019-05-10 17:31         ` Vivek Goyal
@ 2019-05-11 15:24           ` Tao Peng
  0 siblings, 0 replies; 8+ messages in thread
From: Tao Peng @ 2019-05-11 15:24 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs, Peng Tao

On Sat, May 11, 2019 at 1:31 AM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Sat, May 11, 2019 at 12:16:23AM +0800, Tao Peng wrote:
> > On Fri, May 10, 2019 at 11:23 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > On Fri, May 10, 2019 at 06:29:57PM +0800, Peng Tao wrote:
> > > > The obseleted dmap entries can be put back to global free list
> > > > immediately and we don't have to rely on reclaim code to free them.
> > > >
> > > > Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> > > > ---
> > > >  fs/fuse/dir.c    |  5 ++++
> > > >  fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
> > > >  fs/fuse/fuse_i.h |  2 ++
> > > >  3 files changed, 63 insertions(+), 13 deletions(-)
> > > >
> > > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > > > index 3f923fe7841a..233c3ed391f1 100644
> > > > --- a/fs/fuse/dir.c
> > > > +++ b/fs/fuse/dir.c
> > > > @@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
> > > >               down_write(&fi->i_mmap_sem);
> > > >               truncate_pagecache(inode, outarg.attr.size);
> > > >               invalidate_inode_pages2(inode->i_mapping);
> > > > +             // Free dmap beyond i_size
> > > > +             if (IS_DAX(inode) && oldsize > outarg.attr.size)
> > > > +                     fuse_dax_free_mappings_range(fc, inode,
> > > > +                                                  outarg.attr.size, -1);
> > > > +
> > > >               up_write(&fi->i_mmap_sem);
> > > >       }
> > > >
> > > > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > > > index 51faed351c7c..6d130f2f3a23 100644
> > > > --- a/fs/fuse/file.c
> > > > +++ b/fs/fuse/file.c
> > > > @@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
> > > >                                       loff_t offset, loff_t length);
> > > >  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> > > >                                       struct inode *inode);
> > > > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
> > > > +                                     struct fuse_dax_mapping *dmap);
> > > >
> > > >  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
> > > >                         int opcode, struct fuse_open_out *outargp)
> > > > @@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
> > > >               return ret;
> > > >       }
> > > >
> > > > -     /* Remove dax mapping from inode interval tree now */
> > > > -     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > > > -     fi->nr_dmaps--;
> > > > +     fuse_dax_do_remove_mapping_locked(fi, dmap);
> > > >       return 0;
> > > >  }
> > > >
> > > > @@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> > > >       }
> > > >  }
> > > >
> > > > +/* Cleanup dmap entry and add back to free list */
> > > > +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
> > > > +{
> > > > +     pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> > > > +             "window_offset=0x%llx length=0x%llx\n", dmap->start,
> > > > +             dmap->end, dmap->window_offset, dmap->length);
> > > > +     spin_lock(&fc->lock);
> > > > +     __dmap_remove_busy_list(fc, dmap);
> > > > +     dmap->inode = NULL;
> > > > +     dmap->start = dmap->end = 0;
> > > > +     __free_dax_mapping(fc, dmap);
> > > > +     spin_unlock(&fc->lock);
> > > > +}
> > > > +
> > > > +/* Remove dax mapping from inode interval tree */
> > > > +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
> > > > +{
> > > > +     fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> > > > +     fi->nr_dmaps--;
> > > > +}
> > >
> > > this is just a two line funciton. I think lets open code it and we don't
> > > need a separate function for this.
> > ok.
> >
> > >
> > >
> > > > +
> > > > +/*
> > > > + * Free inode dmap entries whose range falls entirely inside [start, end].
> > > > + * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
> > > > + *
> > > > + * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
> > > > + * will be overridden next time dmap is used -- same logic is applied in
> > > > + * fuse_dax_reclaim_first_mapping().
> > >
> > > This is true that sending FUSE_REMOVEMAPPING is not strictly needed. But,
> > > thinking more about it, what if range does not get used soon. Then it
> > > will keep resources unnecessarily busy on host? (fd, file, inode, dentry,
> > > vma etc).
> > >
> > > I think it probably is a good idea to send FUSE_REMOVEMAPPING to daemon
> > > when range is being freed.
> > We are under inode->i_rwsem and i_mmap_sem here. It can be very
> > expensive if we wait for userspace holding locks. How about pushing it
> > to a work queue? Still we at least need to grab the dmap lock but it
> > seems less expensive and doesn't block truncate.
>
> How about freeing multiple ranges in single removemapping call. I think
> that's allowed. We can look into making it async also at some point of
> time, but right now it probably is best to keep code as simple as
> possible. Do typical workloads do truncate at high frequencey?
>
Good point! I'll change to do it. Thanks!

Cheers,
Tao
-- 
bergwolf@hyper.sh


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-05-11 15:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-10 10:29 [Virtio-fs] [PATCH 0/2] virtiofs: remove dmap when truncating inode Peng Tao
2019-05-10 10:29 ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Peng Tao
2019-05-10 10:29   ` [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode Peng Tao
2019-05-10 15:23     ` Vivek Goyal
2019-05-10 16:16       ` Tao Peng
2019-05-10 17:31         ` Vivek Goyal
2019-05-11 15:24           ` Tao Peng
2019-05-10 15:24   ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Vivek Goyal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.