All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org
Cc: vgoyal@redhat.com, miklos@szeredi.hu, stefanha@redhat.com,
	dgilbert@redhat.com, sweil@redhat.com, swhiteho@redhat.com
Subject: [PATCH 34/52] fuse: Add logic to free up a memory range
Date: Mon, 10 Dec 2018 12:13:00 -0500	[thread overview]
Message-ID: <20181210171318.16998-35-vgoyal@redhat.com> (raw)
In-Reply-To: <20181210171318.16998-1-vgoyal@redhat.com>

Add logic to free up a busy memory range. Freed memory range will be
returned to free pool. Add a worker which can be started to select
and free some busy memory ranges.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c   | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/fuse_i.h |  10 ++++
 fs/fuse/inode.c  |   2 +
 3 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 73068289f62e..17becdff3014 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -272,7 +272,15 @@ static int fuse_setup_one_mapping(struct inode *inode,
 
 	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
 
-	/* TODO: What locking is required here. For now, using fc->lock */
+	/*
+	 * We don't take a refernce on inode. inode is valid right now and
+	 * when inode is going away, cleanup logic should first cleanup
+	 * dmap entries.
+	 *
+	 * TODO: Do we need to ensure that we are holding inode lock
+	 * as well.
+	 */
+	dmap->inode = inode;
 	dmap->start = offset;
 	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
 	/* Protected by fi->i_dmap_sem */
@@ -347,6 +355,8 @@ void fuse_removemapping(struct inode *inode)
 			continue;
 		}
 
+		dmap->inode = NULL;
+
 		/* Add it back to free ranges list */
 		free_dax_mapping(fc, dmap);
 	}
@@ -3694,3 +3704,139 @@ void fuse_init_file_inode(struct inode *inode)
 		inode->i_data.a_ops = &fuse_dax_file_aops;
 	}
 }
+
+int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
+				u64 dmap_start)
+{
+	int ret;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+
+	WARN_ON(!inode_is_locked(inode));
+
+	/* Find fuse dax mapping at file offset inode. */
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, dmap_start,
+							dmap_start);
+
+	/* Range already got cleaned up by somebody else */
+	if (!dmap)
+		return 0;
+
+	ret = filemap_fdatawrite_range(inode->i_mapping, dmap->start, dmap->end);
+	if (ret) {
+		printk("filemap_fdatawrite_range() failed. err=%d start=0x%llx,"
+			" end=0x%llx\n", ret, dmap->start, dmap->end);
+		return ret;
+	}
+
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					dmap->start >> PAGE_SHIFT,
+					dmap->end >> PAGE_SHIFT);
+	/* TODO: What to do if above fails? For now,
+	 * leave the range in place.
+	 */
+	if (ret) {
+		printk("invalidate_inode_pages2_range() failed err=%d\n", ret);
+		return ret;
+	}
+
+	/* Remove dax mapping from inode interval tree now */
+	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+	fi->nr_dmaps--;
+
+	/* Cleanup dmap entry and add back to free list */
+	spin_lock(&fc->lock);
+	list_del_init(&dmap->busy_list);
+	WARN_ON(fc->nr_busy_ranges == 0);
+	fc->nr_busy_ranges--;
+	dmap->inode = NULL;
+	dmap->start = dmap->end = 0;
+	__free_dax_mapping(fc, dmap);
+	spin_unlock(&fc->lock);
+
+	pr_debug("fuse: freed memory range window_offset=0x%llx,"
+				" length=0x%llx\n", dmap->window_offset,
+				dmap->length);
+
+	return ret;
+}
+
+/*
+ * Free a range of memory.
+ * Locking.
+ * 1. Take inode->i_rwsem to prever further read/write.
+ * 2. Take fuse_inode->i_mmap_sem to block dax faults.
+ * 3. Take fuse_inode->i_dmap_sem to protect interval tree. It might not
+ *    be strictly necessary as lock 1 and 2 seem sufficient.
+ */
+int fuse_dax_free_one_mapping(struct fuse_conn *fc, struct inode *inode,
+				u64 dmap_start)
+{
+	int ret;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	inode_lock(inode);
+	down_write(&fi->i_mmap_sem);
+	down_write(&fi->i_dmap_sem);
+	ret = fuse_dax_free_one_mapping_locked(fc, inode, dmap_start);
+	up_write(&fi->i_dmap_sem);
+	up_write(&fi->i_mmap_sem);
+	inode_unlock(inode);
+	return ret;
+}
+
+int fuse_dax_free_memory(struct fuse_conn *fc, unsigned long nr_to_free)
+{
+	struct fuse_dax_mapping *dmap, *pos;
+	int ret, i;
+	u64 dmap_start = 0, window_offset = 0;
+	struct inode *inode = NULL;
+
+	/* Pick first busy range and free it for now*/
+	for (i = 0; i < nr_to_free; i++) {
+		dmap = NULL;
+		spin_lock(&fc->lock);
+
+		list_for_each_entry(pos, &fc->busy_ranges, busy_list) {
+			dmap = pos;
+			inode = igrab(dmap->inode);
+			/*
+			 * This inode is going away. That will free
+			 * up all the ranges anyway, continue to
+			 * next range.
+			 */
+			if (!inode)
+				continue;
+			dmap_start = dmap->start;
+			window_offset = dmap->window_offset;
+			break;
+		}
+		spin_unlock(&fc->lock);
+		if (!dmap)
+			return 0;
+
+		ret = fuse_dax_free_one_mapping(fc, inode, dmap_start);
+		iput(inode);
+		if (ret) {
+			printk("%s(window_offset=0x%llx) failed. err=%d\n",
+				__func__, window_offset, ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* TODO: This probably should go in inode.c */
+void fuse_dax_free_mem_worker(struct work_struct *work)
+{
+	int ret;
+	struct fuse_conn *fc = container_of(work, struct fuse_conn,
+						dax_free_work.work);
+	pr_debug("fuse: Worker to free memory called.\n");
+	pr_debug("fuse: Worker to free memory called. nr_free_ranges=%lu"
+		 " nr_busy_ranges=%lu\n", fc->nr_free_ranges,
+		 fc->nr_busy_ranges);
+	ret = fuse_dax_free_memory(fc, FUSE_DAX_RECLAIM_CHUNK);
+	if (ret)
+		pr_debug("fuse: fuse_dax_free_memory() failed with err=%d\n", ret);
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 280f717deb57..383deaf0ecf1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -50,6 +50,9 @@
 #define FUSE_DAX_MEM_RANGE_SZ	(2*1024*1024)
 #define FUSE_DAX_MEM_RANGE_PAGES	(FUSE_DAX_MEM_RANGE_SZ/PAGE_SIZE)
 
+/* Number of ranges reclaimer will try to free in one invocation */
+#define FUSE_DAX_RECLAIM_CHUNK		(10)
+
 /** List of active connections */
 extern struct list_head fuse_conn_list;
 
@@ -102,6 +105,9 @@ struct fuse_forget_link {
 
 /** Translation information for file offsets to DAX window offsets */
 struct fuse_dax_mapping {
+	/* Pointer to inode where this memory range is mapped */
+	struct inode *inode;
+
 	/* Will connect in fc->free_ranges to keep track of free memory */
 	struct list_head list;
 
@@ -870,6 +876,9 @@ struct fuse_conn {
 	unsigned long nr_busy_ranges;
 	struct list_head busy_ranges;
 
+	/* Worker to free up memory ranges */
+	struct delayed_work dax_free_work;
+
 	/*
 	 * DAX Window Free Ranges. TODO: This might not be best place to store
 	 * this free list
@@ -1244,6 +1253,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
  * Get the next unique ID for a request
  */
 u64 fuse_get_unique(struct fuse_iqueue *fiq);
+void fuse_dax_free_mem_worker(struct work_struct *work);
 void fuse_removemapping(struct inode *inode);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 59fc5a7a18fc..44f7bc44e319 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -713,6 +713,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
 	fc->user_ns = get_user_ns(user_ns);
 	INIT_LIST_HEAD(&fc->free_ranges);
 	INIT_LIST_HEAD(&fc->busy_ranges);
+	INIT_DELAYED_WORK(&fc->dax_free_work, fuse_dax_free_mem_worker);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -721,6 +722,7 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (refcount_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
+		flush_delayed_work(&fc->dax_free_work);
 		if (fc->dax_dev)
 			fuse_free_dax_mem_ranges(&fc->free_ranges);
 		put_pid_ns(fc->pid_ns);
-- 
2.13.6


  parent reply	other threads:[~2018-12-10 17:14 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-10 17:12 [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Vivek Goyal
2018-12-10 17:12 ` [PATCH 01/52] fuse: add skeleton virtio_fs.ko module Vivek Goyal
2018-12-10 17:12 ` [PATCH 02/52] fuse: add probe/remove virtio driver Vivek Goyal
2018-12-10 17:12 ` [PATCH 03/52] fuse: rely on mutex_unlock() barrier instead of fput() Vivek Goyal
2018-12-10 17:12 ` [PATCH 04/52] fuse: extract fuse_fill_super_common() Vivek Goyal
2018-12-10 17:12 ` [PATCH 05/52] virtio_fs: get mount working Vivek Goyal
2018-12-10 17:12 ` [PATCH 06/52] fuse: export fuse_end_request() Vivek Goyal
2018-12-10 17:12 ` [PATCH 07/52] fuse: export fuse_len_args() Vivek Goyal
2018-12-10 17:12 ` [PATCH 08/52] fuse: add fuse_iqueue_ops callbacks Vivek Goyal
2018-12-10 17:12 ` [PATCH 09/52] fuse: process requests queues Vivek Goyal
2018-12-10 17:12 ` [PATCH 10/52] fuse: export fuse_get_unique() Vivek Goyal
2018-12-10 17:12 ` [PATCH 11/52] fuse: implement FUSE_FORGET for virtio-fs Vivek Goyal
2018-12-10 17:12 ` [PATCH 12/52] virtio_fs: Set up dax_device Vivek Goyal
2018-12-10 17:12 ` [PATCH 13/52] dax: remove block device dependencies Vivek Goyal
2018-12-10 17:12 ` [PATCH 14/52] fuse: add fuse_conn->dax_dev field Vivek Goyal
2018-12-10 17:12 ` [PATCH 15/52] fuse: map virtio_fs DAX window BAR Vivek Goyal
2018-12-12 16:37   ` Christian Borntraeger
2018-12-13 11:55     ` Stefan Hajnoczi
2018-12-13 16:06   ` kbuild test robot
2018-12-13 19:55   ` Dan Williams
2018-12-13 20:09     ` Dr. David Alan Gilbert
2018-12-13 20:15       ` Dan Williams
2018-12-13 20:40         ` Vivek Goyal
2018-12-13 21:18           ` Vivek Goyal
2018-12-14 10:09             ` Dr. David Alan Gilbert
2018-12-10 17:12 ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them Vivek Goyal
2018-12-12 16:36   ` [PATCH] virtio-fs: fix semicolon.cocci warnings kbuild test robot
2018-12-12 16:36   ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them kbuild test robot
2018-12-10 17:12 ` [PATCH 17/52] virtio-fs: Retrieve shm capabilities for cache Vivek Goyal
2018-12-10 17:12 ` [PATCH 18/52] virtio-fs: Map cache using the values from the capabilities Vivek Goyal
2018-12-13  9:10   ` David Hildenbrand
2018-12-13  9:13     ` Dr. David Alan Gilbert
2018-12-13  9:34       ` David Hildenbrand
2018-12-13 10:00         ` Dr. David Alan Gilbert
2018-12-13 11:26           ` David Hildenbrand
2018-12-13 12:15             ` Dr. David Alan Gilbert
2018-12-13 12:24               ` David Hildenbrand
2018-12-13 12:38                 ` Cornelia Huck
2018-12-14 13:44                   ` Stefan Hajnoczi
2018-12-14 13:50                     ` Cornelia Huck
2018-12-14 14:06                       ` Dr. David Alan Gilbert
2018-12-17 11:25                       ` Stefan Hajnoczi
2018-12-17 10:53                     ` David Hildenbrand
2018-12-17 14:56                       ` Stefan Hajnoczi
2018-12-18 17:13                         ` Cornelia Huck
2018-12-18 17:25                           ` David Hildenbrand
2019-01-02 10:24                             ` Stefan Hajnoczi
2019-03-17  0:33   ` Liu Bo
2019-03-20 10:42     ` Dr. David Alan Gilbert
2019-03-17  0:35   ` [PATCH] virtio-fs: fix multiple tag support Liu Bo
2019-03-19 20:26     ` Vivek Goyal
2019-03-20  2:04       ` Liu Bo
2018-12-10 17:12 ` [PATCH 19/52] virito-fs: Make dax optional Vivek Goyal
2018-12-10 17:12 ` [PATCH 20/52] Limit number of pages returned by direct_access() Vivek Goyal
2018-12-10 17:12 ` [PATCH 21/52] fuse: Introduce fuse_dax_mapping Vivek Goyal
2018-12-10 17:12 ` [PATCH 22/52] Create a list of free memory ranges Vivek Goyal
2018-12-11 17:44   ` kbuild test robot
2018-12-15 19:22   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 23/52] fuse: simplify fuse_fill_super_common() calling Vivek Goyal
2018-12-10 17:12 ` [PATCH 24/52] fuse: Introduce setupmapping/removemapping commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 25/52] Introduce interval tree basic data structures Vivek Goyal
2018-12-10 17:12 ` [PATCH 26/52] fuse: Implement basic DAX read/write support commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 27/52] fuse: Maintain a list of busy elements Vivek Goyal
2018-12-10 17:12 ` [PATCH 28/52] Do fallocate() to grow file before mapping for file growing writes Vivek Goyal
2018-12-11  6:13   ` kbuild test robot
2018-12-11  6:20   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 29/52] fuse: add DAX mmap support Vivek Goyal
2018-12-10 17:12 ` [PATCH 30/52] fuse: delete dentry if timeout is zero Vivek Goyal
2018-12-10 17:12 ` [PATCH 31/52] dax: Pass dax_dev to dax_writeback_mapping_range() Vivek Goyal
2018-12-11  6:12   ` kbuild test robot
2018-12-11 17:38   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 32/52] fuse: Define dax address space operations Vivek Goyal
2018-12-10 17:12 ` [PATCH 33/52] fuse, dax: Take ->i_mmap_sem lock during dax page fault Vivek Goyal
2018-12-10 17:13 ` Vivek Goyal [this message]
2018-12-10 17:13 ` [PATCH 35/52] fuse: Add logic to do direct reclaim of memory Vivek Goyal
2018-12-10 17:13 ` [PATCH 36/52] fuse: Kick worker when free memory drops below 20% of total ranges Vivek Goyal
2018-12-10 17:13 ` [PATCH 37/52] fuse: multiplex cached/direct_io/dax file operations Vivek Goyal
2018-12-10 17:13 ` [PATCH 38/52] Dispatch FORGET requests later instead of dropping them Vivek Goyal
2018-12-10 17:13 ` [PATCH 39/52] Release file in process context Vivek Goyal
2018-12-10 17:13 ` [PATCH 40/52] fuse: Do not block on inode lock while freeing memory range Vivek Goyal
2018-12-10 17:13 ` [PATCH 41/52] fuse: Reschedule dax free work if too many EAGAIN attempts Vivek Goyal
2018-12-10 17:13 ` [PATCH 42/52] fuse: Wait for memory ranges to become free Vivek Goyal
2018-12-10 17:13 ` [PATCH 43/52] fuse: Take inode lock for dax inode truncation Vivek Goyal
2018-12-10 17:13 ` [PATCH 44/52] fuse: Clear setuid bit even in direct I/O path Vivek Goyal
2018-12-10 17:13 ` [PATCH 45/52] virtio: Free fuse devices on umount Vivek Goyal
2018-12-10 17:13 ` [PATCH 46/52] virtio-fs: Retrieve shm capabilities for version table Vivek Goyal
2018-12-10 17:13 ` [PATCH 47/52] virtio-fs: Map using the values from the capabilities Vivek Goyal
2018-12-10 17:13 ` [PATCH 48/52] virtio-fs: pass version table pointer to fuse Vivek Goyal
2018-12-10 17:13 ` [PATCH 49/52] fuse: don't crash if version table is NULL Vivek Goyal
2018-12-10 17:13 ` [PATCH 50/52] fuse: add shared version support (virtio-fs only) Vivek Goyal
2018-12-10 17:13 ` [PATCH 51/52] fuse: shared version cleanups Vivek Goyal
2018-12-10 17:13 ` [PATCH 52/52] fuse: fix fuse_permission() for the default_permissions case Vivek Goyal
2018-12-19 21:25   ` kbuild test robot
2018-12-11 12:54 ` [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Stefan Hajnoczi
2018-12-12 20:30 ` Konrad Rzeszutek Wilk
2018-12-12 21:22   ` Vivek Goyal
2019-02-12 15:56 ` Aneesh Kumar K.V
2019-02-12 18:57   ` Vivek Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181210171318.16998-35-vgoyal@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=stefanha@redhat.com \
    --cc=sweil@redhat.com \
    --cc=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.