LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org
Cc: vgoyal@redhat.com, miklos@szeredi.hu, stefanha@redhat.com,
	dgilbert@redhat.com, sweil@redhat.com, swhiteho@redhat.com
Subject: [PATCH 34/52] fuse: Add logic to free up a memory range
Date: Mon, 10 Dec 2018 12:13:00 -0500
Message-ID: <20181210171318.16998-35-vgoyal@redhat.com> (raw)
In-Reply-To: <20181210171318.16998-1-vgoyal@redhat.com>

Add logic to free up a busy memory range. Freed memory range will be
returned to free pool. Add a worker which can be started to select
and free some busy memory ranges.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c   | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/fuse_i.h |  10 ++++
 fs/fuse/inode.c  |   2 +
 3 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 73068289f62e..17becdff3014 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -272,7 +272,15 @@ static int fuse_setup_one_mapping(struct inode *inode,
 
 	pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
 
-	/* TODO: What locking is required here. For now, using fc->lock */
+	/*
+	 * We don't take a refernce on inode. inode is valid right now and
+	 * when inode is going away, cleanup logic should first cleanup
+	 * dmap entries.
+	 *
+	 * TODO: Do we need to ensure that we are holding inode lock
+	 * as well.
+	 */
+	dmap->inode = inode;
 	dmap->start = offset;
 	dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
 	/* Protected by fi->i_dmap_sem */
@@ -347,6 +355,8 @@ void fuse_removemapping(struct inode *inode)
 			continue;
 		}
 
+		dmap->inode = NULL;
+
 		/* Add it back to free ranges list */
 		free_dax_mapping(fc, dmap);
 	}
@@ -3694,3 +3704,139 @@ void fuse_init_file_inode(struct inode *inode)
 		inode->i_data.a_ops = &fuse_dax_file_aops;
 	}
 }
+
+int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
+				u64 dmap_start)
+{
+	int ret;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	struct fuse_dax_mapping *dmap;
+
+	WARN_ON(!inode_is_locked(inode));
+
+	/* Find fuse dax mapping at file offset inode. */
+	dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, dmap_start,
+							dmap_start);
+
+	/* Range already got cleaned up by somebody else */
+	if (!dmap)
+		return 0;
+
+	ret = filemap_fdatawrite_range(inode->i_mapping, dmap->start, dmap->end);
+	if (ret) {
+		printk("filemap_fdatawrite_range() failed. err=%d start=0x%llx,"
+			" end=0x%llx\n", ret, dmap->start, dmap->end);
+		return ret;
+	}
+
+	ret = invalidate_inode_pages2_range(inode->i_mapping,
+					dmap->start >> PAGE_SHIFT,
+					dmap->end >> PAGE_SHIFT);
+	/* TODO: What to do if above fails? For now,
+	 * leave the range in place.
+	 */
+	if (ret) {
+		printk("invalidate_inode_pages2_range() failed err=%d\n", ret);
+		return ret;
+	}
+
+	/* Remove dax mapping from inode interval tree now */
+	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+	fi->nr_dmaps--;
+
+	/* Cleanup dmap entry and add back to free list */
+	spin_lock(&fc->lock);
+	list_del_init(&dmap->busy_list);
+	WARN_ON(fc->nr_busy_ranges == 0);
+	fc->nr_busy_ranges--;
+	dmap->inode = NULL;
+	dmap->start = dmap->end = 0;
+	__free_dax_mapping(fc, dmap);
+	spin_unlock(&fc->lock);
+
+	pr_debug("fuse: freed memory range window_offset=0x%llx,"
+				" length=0x%llx\n", dmap->window_offset,
+				dmap->length);
+
+	return ret;
+}
+
+/*
+ * Free a range of memory.
+ * Locking.
+ * 1. Take inode->i_rwsem to prever further read/write.
+ * 2. Take fuse_inode->i_mmap_sem to block dax faults.
+ * 3. Take fuse_inode->i_dmap_sem to protect interval tree. It might not
+ *    be strictly necessary as lock 1 and 2 seem sufficient.
+ */
+int fuse_dax_free_one_mapping(struct fuse_conn *fc, struct inode *inode,
+				u64 dmap_start)
+{
+	int ret;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	inode_lock(inode);
+	down_write(&fi->i_mmap_sem);
+	down_write(&fi->i_dmap_sem);
+	ret = fuse_dax_free_one_mapping_locked(fc, inode, dmap_start);
+	up_write(&fi->i_dmap_sem);
+	up_write(&fi->i_mmap_sem);
+	inode_unlock(inode);
+	return ret;
+}
+
+int fuse_dax_free_memory(struct fuse_conn *fc, unsigned long nr_to_free)
+{
+	struct fuse_dax_mapping *dmap, *pos;
+	int ret, i;
+	u64 dmap_start = 0, window_offset = 0;
+	struct inode *inode = NULL;
+
+	/* Pick first busy range and free it for now*/
+	for (i = 0; i < nr_to_free; i++) {
+		dmap = NULL;
+		spin_lock(&fc->lock);
+
+		list_for_each_entry(pos, &fc->busy_ranges, busy_list) {
+			dmap = pos;
+			inode = igrab(dmap->inode);
+			/*
+			 * This inode is going away. That will free
+			 * up all the ranges anyway, continue to
+			 * next range.
+			 */
+			if (!inode)
+				continue;
+			dmap_start = dmap->start;
+			window_offset = dmap->window_offset;
+			break;
+		}
+		spin_unlock(&fc->lock);
+		if (!dmap)
+			return 0;
+
+		ret = fuse_dax_free_one_mapping(fc, inode, dmap_start);
+		iput(inode);
+		if (ret) {
+			printk("%s(window_offset=0x%llx) failed. err=%d\n",
+				__func__, window_offset, ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/* TODO: This probably should go in inode.c */
+void fuse_dax_free_mem_worker(struct work_struct *work)
+{
+	int ret;
+	struct fuse_conn *fc = container_of(work, struct fuse_conn,
+						dax_free_work.work);
+	pr_debug("fuse: Worker to free memory called.\n");
+	pr_debug("fuse: Worker to free memory called. nr_free_ranges=%lu"
+		 " nr_busy_ranges=%lu\n", fc->nr_free_ranges,
+		 fc->nr_busy_ranges);
+	ret = fuse_dax_free_memory(fc, FUSE_DAX_RECLAIM_CHUNK);
+	if (ret)
+		pr_debug("fuse: fuse_dax_free_memory() failed with err=%d\n", ret);
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 280f717deb57..383deaf0ecf1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -50,6 +50,9 @@
 #define FUSE_DAX_MEM_RANGE_SZ	(2*1024*1024)
 #define FUSE_DAX_MEM_RANGE_PAGES	(FUSE_DAX_MEM_RANGE_SZ/PAGE_SIZE)
 
+/* Number of ranges reclaimer will try to free in one invocation */
+#define FUSE_DAX_RECLAIM_CHUNK		(10)
+
 /** List of active connections */
 extern struct list_head fuse_conn_list;
 
@@ -102,6 +105,9 @@ struct fuse_forget_link {
 
 /** Translation information for file offsets to DAX window offsets */
 struct fuse_dax_mapping {
+	/* Pointer to inode where this memory range is mapped */
+	struct inode *inode;
+
 	/* Will connect in fc->free_ranges to keep track of free memory */
 	struct list_head list;
 
@@ -870,6 +876,9 @@ struct fuse_conn {
 	unsigned long nr_busy_ranges;
 	struct list_head busy_ranges;
 
+	/* Worker to free up memory ranges */
+	struct delayed_work dax_free_work;
+
 	/*
 	 * DAX Window Free Ranges. TODO: This might not be best place to store
 	 * this free list
@@ -1244,6 +1253,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
  * Get the next unique ID for a request
  */
 u64 fuse_get_unique(struct fuse_iqueue *fiq);
+void fuse_dax_free_mem_worker(struct work_struct *work);
 void fuse_removemapping(struct inode *inode);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 59fc5a7a18fc..44f7bc44e319 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -713,6 +713,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
 	fc->user_ns = get_user_ns(user_ns);
 	INIT_LIST_HEAD(&fc->free_ranges);
 	INIT_LIST_HEAD(&fc->busy_ranges);
+	INIT_DELAYED_WORK(&fc->dax_free_work, fuse_dax_free_mem_worker);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -721,6 +722,7 @@ void fuse_conn_put(struct fuse_conn *fc)
 	if (refcount_dec_and_test(&fc->count)) {
 		if (fc->destroy_req)
 			fuse_request_free(fc->destroy_req);
+		flush_delayed_work(&fc->dax_free_work);
 		if (fc->dax_dev)
 			fuse_free_dax_mem_ranges(&fc->free_ranges);
 		put_pid_ns(fc->pid_ns);
-- 
2.13.6


  parent reply index

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-10 17:12 [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Vivek Goyal
2018-12-10 17:12 ` [PATCH 01/52] fuse: add skeleton virtio_fs.ko module Vivek Goyal
2018-12-10 17:12 ` [PATCH 02/52] fuse: add probe/remove virtio driver Vivek Goyal
2018-12-10 17:12 ` [PATCH 03/52] fuse: rely on mutex_unlock() barrier instead of fput() Vivek Goyal
2018-12-10 17:12 ` [PATCH 04/52] fuse: extract fuse_fill_super_common() Vivek Goyal
2018-12-10 17:12 ` [PATCH 05/52] virtio_fs: get mount working Vivek Goyal
2018-12-10 17:12 ` [PATCH 06/52] fuse: export fuse_end_request() Vivek Goyal
2018-12-10 17:12 ` [PATCH 07/52] fuse: export fuse_len_args() Vivek Goyal
2018-12-10 17:12 ` [PATCH 08/52] fuse: add fuse_iqueue_ops callbacks Vivek Goyal
2018-12-10 17:12 ` [PATCH 09/52] fuse: process requests queues Vivek Goyal
2018-12-10 17:12 ` [PATCH 10/52] fuse: export fuse_get_unique() Vivek Goyal
2018-12-10 17:12 ` [PATCH 11/52] fuse: implement FUSE_FORGET for virtio-fs Vivek Goyal
2018-12-10 17:12 ` [PATCH 12/52] virtio_fs: Set up dax_device Vivek Goyal
2018-12-10 17:12 ` [PATCH 13/52] dax: remove block device dependencies Vivek Goyal
2018-12-10 17:12 ` [PATCH 14/52] fuse: add fuse_conn->dax_dev field Vivek Goyal
2018-12-10 17:12 ` [PATCH 15/52] fuse: map virtio_fs DAX window BAR Vivek Goyal
2018-12-12 16:37   ` Christian Borntraeger
2018-12-13 11:55     ` Stefan Hajnoczi
2018-12-13 16:06   ` kbuild test robot
2018-12-13 19:55   ` Dan Williams
2018-12-13 20:09     ` Dr. David Alan Gilbert
2018-12-13 20:15       ` Dan Williams
2018-12-13 20:40         ` Vivek Goyal
2018-12-13 21:18           ` Vivek Goyal
2018-12-14 10:09             ` Dr. David Alan Gilbert
2018-12-10 17:12 ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them Vivek Goyal
2018-12-12 16:36   ` [PATCH] virtio-fs: fix semicolon.cocci warnings kbuild test robot
2018-12-12 16:36   ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them kbuild test robot
2018-12-10 17:12 ` [PATCH 17/52] virtio-fs: Retrieve shm capabilities for cache Vivek Goyal
2018-12-10 17:12 ` [PATCH 18/52] virtio-fs: Map cache using the values from the capabilities Vivek Goyal
2018-12-13  9:10   ` David Hildenbrand
2018-12-13  9:13     ` Dr. David Alan Gilbert
2018-12-13  9:34       ` David Hildenbrand
2018-12-13 10:00         ` Dr. David Alan Gilbert
2018-12-13 11:26           ` David Hildenbrand
2018-12-13 12:15             ` Dr. David Alan Gilbert
2018-12-13 12:24               ` David Hildenbrand
2018-12-13 12:38                 ` Cornelia Huck
2018-12-14 13:44                   ` Stefan Hajnoczi
2018-12-14 13:50                     ` Cornelia Huck
2018-12-14 14:06                       ` Dr. David Alan Gilbert
2018-12-17 11:25                       ` Stefan Hajnoczi
2018-12-17 10:53                     ` David Hildenbrand
2018-12-17 14:56                       ` Stefan Hajnoczi
2018-12-18 17:13                         ` Cornelia Huck
2018-12-18 17:25                           ` David Hildenbrand
2019-01-02 10:24                             ` Stefan Hajnoczi
2019-03-17  0:33   ` Liu Bo
2019-03-20 10:42     ` Dr. David Alan Gilbert
2019-03-17  0:35   ` [PATCH] virtio-fs: fix multiple tag support Liu Bo
2019-03-19 20:26     ` Vivek Goyal
2019-03-20  2:04       ` Liu Bo
2018-12-10 17:12 ` [PATCH 19/52] virito-fs: Make dax optional Vivek Goyal
2018-12-10 17:12 ` [PATCH 20/52] Limit number of pages returned by direct_access() Vivek Goyal
2018-12-10 17:12 ` [PATCH 21/52] fuse: Introduce fuse_dax_mapping Vivek Goyal
2018-12-10 17:12 ` [PATCH 22/52] Create a list of free memory ranges Vivek Goyal
2018-12-11 17:44   ` kbuild test robot
2018-12-15 19:22   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 23/52] fuse: simplify fuse_fill_super_common() calling Vivek Goyal
2018-12-10 17:12 ` [PATCH 24/52] fuse: Introduce setupmapping/removemapping commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 25/52] Introduce interval tree basic data structures Vivek Goyal
2018-12-10 17:12 ` [PATCH 26/52] fuse: Implement basic DAX read/write support commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 27/52] fuse: Maintain a list of busy elements Vivek Goyal
2018-12-10 17:12 ` [PATCH 28/52] Do fallocate() to grow file before mapping for file growing writes Vivek Goyal
2018-12-11  6:13   ` kbuild test robot
2018-12-11  6:20   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 29/52] fuse: add DAX mmap support Vivek Goyal
2018-12-10 17:12 ` [PATCH 30/52] fuse: delete dentry if timeout is zero Vivek Goyal
2018-12-10 17:12 ` [PATCH 31/52] dax: Pass dax_dev to dax_writeback_mapping_range() Vivek Goyal
2018-12-11  6:12   ` kbuild test robot
2018-12-11 17:38   ` kbuild test robot
2018-12-10 17:12 ` [PATCH 32/52] fuse: Define dax address space operations Vivek Goyal
2018-12-10 17:12 ` [PATCH 33/52] fuse, dax: Take ->i_mmap_sem lock during dax page fault Vivek Goyal
2018-12-10 17:13 ` Vivek Goyal [this message]
2018-12-10 17:13 ` [PATCH 35/52] fuse: Add logic to do direct reclaim of memory Vivek Goyal
2018-12-10 17:13 ` [PATCH 36/52] fuse: Kick worker when free memory drops below 20% of total ranges Vivek Goyal
2018-12-10 17:13 ` [PATCH 37/52] fuse: multiplex cached/direct_io/dax file operations Vivek Goyal
2018-12-10 17:13 ` [PATCH 38/52] Dispatch FORGET requests later instead of dropping them Vivek Goyal
2018-12-10 17:13 ` [PATCH 39/52] Release file in process context Vivek Goyal
2018-12-10 17:13 ` [PATCH 40/52] fuse: Do not block on inode lock while freeing memory range Vivek Goyal
2018-12-10 17:13 ` [PATCH 41/52] fuse: Reschedule dax free work if too many EAGAIN attempts Vivek Goyal
2018-12-10 17:13 ` [PATCH 42/52] fuse: Wait for memory ranges to become free Vivek Goyal
2018-12-10 17:13 ` [PATCH 43/52] fuse: Take inode lock for dax inode truncation Vivek Goyal
2018-12-10 17:13 ` [PATCH 44/52] fuse: Clear setuid bit even in direct I/O path Vivek Goyal
2018-12-10 17:13 ` [PATCH 45/52] virtio: Free fuse devices on umount Vivek Goyal
2018-12-10 17:13 ` [PATCH 46/52] virtio-fs: Retrieve shm capabilities for version table Vivek Goyal
2018-12-10 17:13 ` [PATCH 47/52] virtio-fs: Map using the values from the capabilities Vivek Goyal
2018-12-10 17:13 ` [PATCH 48/52] virtio-fs: pass version table pointer to fuse Vivek Goyal
2018-12-10 17:13 ` [PATCH 49/52] fuse: don't crash if version table is NULL Vivek Goyal
2018-12-10 17:13 ` [PATCH 50/52] fuse: add shared version support (virtio-fs only) Vivek Goyal
2018-12-10 17:13 ` [PATCH 51/52] fuse: shared version cleanups Vivek Goyal
2018-12-10 17:13 ` [PATCH 52/52] fuse: fix fuse_permission() for the default_permissions case Vivek Goyal
2018-12-19 21:25   ` kbuild test robot
2018-12-11 12:54 ` [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Stefan Hajnoczi
2018-12-12 20:30 ` Konrad Rzeszutek Wilk
2018-12-12 21:22   ` Vivek Goyal
2019-02-12 15:56 ` Aneesh Kumar K.V
2019-02-12 18:57   ` Vivek Goyal

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181210171318.16998-35-vgoyal@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=stefanha@redhat.com \
    --cc=sweil@redhat.com \
    --cc=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox