From: Vivek Goyal <vgoyal@redhat.com>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org
Cc: vgoyal@redhat.com, miklos@szeredi.hu, stefanha@redhat.com,
dgilbert@redhat.com, sweil@redhat.com, swhiteho@redhat.com
Subject: [PATCH 34/52] fuse: Add logic to free up a memory range
Date: Mon, 10 Dec 2018 12:13:00 -0500 [thread overview]
Message-ID: <20181210171318.16998-35-vgoyal@redhat.com> (raw)
In-Reply-To: <20181210171318.16998-1-vgoyal@redhat.com>
Add logic to free up a busy memory range. Freed memory range will be
returned to free pool. Add a worker which can be started to select
and free some busy memory ranges.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
fs/fuse/file.c | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/fuse/fuse_i.h | 10 ++++
fs/fuse/inode.c | 2 +
3 files changed, 159 insertions(+), 1 deletion(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 73068289f62e..17becdff3014 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -272,7 +272,15 @@ static int fuse_setup_one_mapping(struct inode *inode,
pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err);
- /* TODO: What locking is required here. For now, using fc->lock */
+ /*
+ * We don't take a refernce on inode. inode is valid right now and
+ * when inode is going away, cleanup logic should first cleanup
+ * dmap entries.
+ *
+ * TODO: Do we need to ensure that we are holding inode lock
+ * as well.
+ */
+ dmap->inode = inode;
dmap->start = offset;
dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1;
/* Protected by fi->i_dmap_sem */
@@ -347,6 +355,8 @@ void fuse_removemapping(struct inode *inode)
continue;
}
+ dmap->inode = NULL;
+
/* Add it back to free ranges list */
free_dax_mapping(fc, dmap);
}
@@ -3694,3 +3704,139 @@ void fuse_init_file_inode(struct inode *inode)
inode->i_data.a_ops = &fuse_dax_file_aops;
}
}
+
+int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
+ u64 dmap_start)
+{
+ int ret;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_dax_mapping *dmap;
+
+ WARN_ON(!inode_is_locked(inode));
+
+ /* Find fuse dax mapping at file offset inode. */
+ dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, dmap_start,
+ dmap_start);
+
+ /* Range already got cleaned up by somebody else */
+ if (!dmap)
+ return 0;
+
+ ret = filemap_fdatawrite_range(inode->i_mapping, dmap->start, dmap->end);
+ if (ret) {
+ printk("filemap_fdatawrite_range() failed. err=%d start=0x%llx,"
+ " end=0x%llx\n", ret, dmap->start, dmap->end);
+ return ret;
+ }
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ dmap->start >> PAGE_SHIFT,
+ dmap->end >> PAGE_SHIFT);
+ /* TODO: What to do if above fails? For now,
+ * leave the range in place.
+ */
+ if (ret) {
+ printk("invalidate_inode_pages2_range() failed err=%d\n", ret);
+ return ret;
+ }
+
+ /* Remove dax mapping from inode interval tree now */
+ fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+ fi->nr_dmaps--;
+
+ /* Cleanup dmap entry and add back to free list */
+ spin_lock(&fc->lock);
+ list_del_init(&dmap->busy_list);
+ WARN_ON(fc->nr_busy_ranges == 0);
+ fc->nr_busy_ranges--;
+ dmap->inode = NULL;
+ dmap->start = dmap->end = 0;
+ __free_dax_mapping(fc, dmap);
+ spin_unlock(&fc->lock);
+
+ pr_debug("fuse: freed memory range window_offset=0x%llx,"
+ " length=0x%llx\n", dmap->window_offset,
+ dmap->length);
+
+ return ret;
+}
+
+/*
+ * Free a range of memory.
+ * Locking.
+ * 1. Take inode->i_rwsem to prever further read/write.
+ * 2. Take fuse_inode->i_mmap_sem to block dax faults.
+ * 3. Take fuse_inode->i_dmap_sem to protect interval tree. It might not
+ * be strictly necessary as lock 1 and 2 seem sufficient.
+ */
+int fuse_dax_free_one_mapping(struct fuse_conn *fc, struct inode *inode,
+ u64 dmap_start)
+{
+ int ret;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ inode_lock(inode);
+ down_write(&fi->i_mmap_sem);
+ down_write(&fi->i_dmap_sem);
+ ret = fuse_dax_free_one_mapping_locked(fc, inode, dmap_start);
+ up_write(&fi->i_dmap_sem);
+ up_write(&fi->i_mmap_sem);
+ inode_unlock(inode);
+ return ret;
+}
+
+int fuse_dax_free_memory(struct fuse_conn *fc, unsigned long nr_to_free)
+{
+ struct fuse_dax_mapping *dmap, *pos;
+ int ret, i;
+ u64 dmap_start = 0, window_offset = 0;
+ struct inode *inode = NULL;
+
+ /* Pick first busy range and free it for now*/
+ for (i = 0; i < nr_to_free; i++) {
+ dmap = NULL;
+ spin_lock(&fc->lock);
+
+ list_for_each_entry(pos, &fc->busy_ranges, busy_list) {
+ dmap = pos;
+ inode = igrab(dmap->inode);
+ /*
+ * This inode is going away. That will free
+ * up all the ranges anyway, continue to
+ * next range.
+ */
+ if (!inode)
+ continue;
+ dmap_start = dmap->start;
+ window_offset = dmap->window_offset;
+ break;
+ }
+ spin_unlock(&fc->lock);
+ if (!dmap)
+ return 0;
+
+ ret = fuse_dax_free_one_mapping(fc, inode, dmap_start);
+ iput(inode);
+ if (ret) {
+ printk("%s(window_offset=0x%llx) failed. err=%d\n",
+ __func__, window_offset, ret);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* TODO: This probably should go in inode.c */
+void fuse_dax_free_mem_worker(struct work_struct *work)
+{
+ int ret;
+ struct fuse_conn *fc = container_of(work, struct fuse_conn,
+ dax_free_work.work);
+ pr_debug("fuse: Worker to free memory called.\n");
+ pr_debug("fuse: Worker to free memory called. nr_free_ranges=%lu"
+ " nr_busy_ranges=%lu\n", fc->nr_free_ranges,
+ fc->nr_busy_ranges);
+ ret = fuse_dax_free_memory(fc, FUSE_DAX_RECLAIM_CHUNK);
+ if (ret)
+ pr_debug("fuse: fuse_dax_free_memory() failed with err=%d\n", ret);
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 280f717deb57..383deaf0ecf1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -50,6 +50,9 @@
#define FUSE_DAX_MEM_RANGE_SZ (2*1024*1024)
#define FUSE_DAX_MEM_RANGE_PAGES (FUSE_DAX_MEM_RANGE_SZ/PAGE_SIZE)
+/* Number of ranges reclaimer will try to free in one invocation */
+#define FUSE_DAX_RECLAIM_CHUNK (10)
+
/** List of active connections */
extern struct list_head fuse_conn_list;
@@ -102,6 +105,9 @@ struct fuse_forget_link {
/** Translation information for file offsets to DAX window offsets */
struct fuse_dax_mapping {
+ /* Pointer to inode where this memory range is mapped */
+ struct inode *inode;
+
/* Will connect in fc->free_ranges to keep track of free memory */
struct list_head list;
@@ -870,6 +876,9 @@ struct fuse_conn {
unsigned long nr_busy_ranges;
struct list_head busy_ranges;
+ /* Worker to free up memory ranges */
+ struct delayed_work dax_free_work;
+
/*
* DAX Window Free Ranges. TODO: This might not be best place to store
* this free list
@@ -1244,6 +1253,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
* Get the next unique ID for a request
*/
u64 fuse_get_unique(struct fuse_iqueue *fiq);
+void fuse_dax_free_mem_worker(struct work_struct *work);
void fuse_removemapping(struct inode *inode);
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 59fc5a7a18fc..44f7bc44e319 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -713,6 +713,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
fc->user_ns = get_user_ns(user_ns);
INIT_LIST_HEAD(&fc->free_ranges);
INIT_LIST_HEAD(&fc->busy_ranges);
+ INIT_DELAYED_WORK(&fc->dax_free_work, fuse_dax_free_mem_worker);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -721,6 +722,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (refcount_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
+ flush_delayed_work(&fc->dax_free_work);
if (fc->dax_dev)
fuse_free_dax_mem_ranges(&fc->free_ranges);
put_pid_ns(fc->pid_ns);
--
2.13.6
next prev parent reply other threads:[~2018-12-10 17:14 UTC|newest]
Thread overview: 98+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-10 17:12 [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Vivek Goyal
2018-12-10 17:12 ` [PATCH 01/52] fuse: add skeleton virtio_fs.ko module Vivek Goyal
2018-12-10 17:12 ` [PATCH 02/52] fuse: add probe/remove virtio driver Vivek Goyal
2018-12-10 17:12 ` [PATCH 03/52] fuse: rely on mutex_unlock() barrier instead of fput() Vivek Goyal
2018-12-10 17:12 ` [PATCH 04/52] fuse: extract fuse_fill_super_common() Vivek Goyal
2018-12-10 17:12 ` [PATCH 05/52] virtio_fs: get mount working Vivek Goyal
2018-12-10 17:12 ` [PATCH 06/52] fuse: export fuse_end_request() Vivek Goyal
2018-12-10 17:12 ` [PATCH 07/52] fuse: export fuse_len_args() Vivek Goyal
2018-12-10 17:12 ` [PATCH 08/52] fuse: add fuse_iqueue_ops callbacks Vivek Goyal
2018-12-10 17:12 ` [PATCH 09/52] fuse: process requests queues Vivek Goyal
2018-12-10 17:12 ` [PATCH 10/52] fuse: export fuse_get_unique() Vivek Goyal
2018-12-10 17:12 ` [PATCH 11/52] fuse: implement FUSE_FORGET for virtio-fs Vivek Goyal
2018-12-10 17:12 ` [PATCH 12/52] virtio_fs: Set up dax_device Vivek Goyal
2018-12-10 17:12 ` [PATCH 13/52] dax: remove block device dependencies Vivek Goyal
2018-12-10 17:12 ` [PATCH 14/52] fuse: add fuse_conn->dax_dev field Vivek Goyal
2018-12-10 17:12 ` [PATCH 15/52] fuse: map virtio_fs DAX window BAR Vivek Goyal
2018-12-12 16:37 ` Christian Borntraeger
2018-12-13 11:55 ` Stefan Hajnoczi
2018-12-13 16:06 ` kbuild test robot
2018-12-13 19:55 ` Dan Williams
2018-12-13 20:09 ` Dr. David Alan Gilbert
2018-12-13 20:15 ` Dan Williams
2018-12-13 20:40 ` Vivek Goyal
2018-12-13 21:18 ` Vivek Goyal
2018-12-14 10:09 ` Dr. David Alan Gilbert
2018-12-10 17:12 ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them Vivek Goyal
2018-12-12 16:36 ` [PATCH] virtio-fs: fix semicolon.cocci warnings kbuild test robot
2018-12-12 16:36 ` [PATCH 16/52] virtio-fs: Add VIRTIO_PCI_CAP_SHARED_MEMORY_CFG and utility to find them kbuild test robot
2018-12-10 17:12 ` [PATCH 17/52] virtio-fs: Retrieve shm capabilities for cache Vivek Goyal
2018-12-10 17:12 ` [PATCH 18/52] virtio-fs: Map cache using the values from the capabilities Vivek Goyal
2018-12-13 9:10 ` David Hildenbrand
2018-12-13 9:13 ` Dr. David Alan Gilbert
2018-12-13 9:34 ` David Hildenbrand
2018-12-13 10:00 ` Dr. David Alan Gilbert
2018-12-13 11:26 ` David Hildenbrand
2018-12-13 12:15 ` Dr. David Alan Gilbert
2018-12-13 12:24 ` David Hildenbrand
2018-12-13 12:38 ` Cornelia Huck
2018-12-14 13:44 ` Stefan Hajnoczi
2018-12-14 13:50 ` Cornelia Huck
2018-12-14 14:06 ` Dr. David Alan Gilbert
2018-12-17 11:25 ` Stefan Hajnoczi
2018-12-17 10:53 ` David Hildenbrand
2018-12-17 14:56 ` Stefan Hajnoczi
2018-12-18 17:13 ` Cornelia Huck
2018-12-18 17:25 ` David Hildenbrand
2019-01-02 10:24 ` Stefan Hajnoczi
2019-03-17 0:33 ` Liu Bo
2019-03-20 10:42 ` Dr. David Alan Gilbert
2019-03-17 0:35 ` [PATCH] virtio-fs: fix multiple tag support Liu Bo
2019-03-19 20:26 ` Vivek Goyal
2019-03-20 2:04 ` Liu Bo
2018-12-10 17:12 ` [PATCH 19/52] virito-fs: Make dax optional Vivek Goyal
2018-12-10 17:12 ` [PATCH 20/52] Limit number of pages returned by direct_access() Vivek Goyal
2018-12-10 17:12 ` [PATCH 21/52] fuse: Introduce fuse_dax_mapping Vivek Goyal
2018-12-10 17:12 ` [PATCH 22/52] Create a list of free memory ranges Vivek Goyal
2018-12-11 17:44 ` kbuild test robot
2018-12-15 19:22 ` kbuild test robot
2018-12-10 17:12 ` [PATCH 23/52] fuse: simplify fuse_fill_super_common() calling Vivek Goyal
2018-12-10 17:12 ` [PATCH 24/52] fuse: Introduce setupmapping/removemapping commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 25/52] Introduce interval tree basic data structures Vivek Goyal
2018-12-10 17:12 ` [PATCH 26/52] fuse: Implement basic DAX read/write support commands Vivek Goyal
2018-12-10 17:12 ` [PATCH 27/52] fuse: Maintain a list of busy elements Vivek Goyal
2018-12-10 17:12 ` [PATCH 28/52] Do fallocate() to grow file before mapping for file growing writes Vivek Goyal
2018-12-11 6:13 ` kbuild test robot
2018-12-11 6:20 ` kbuild test robot
2018-12-10 17:12 ` [PATCH 29/52] fuse: add DAX mmap support Vivek Goyal
2018-12-10 17:12 ` [PATCH 30/52] fuse: delete dentry if timeout is zero Vivek Goyal
2018-12-10 17:12 ` [PATCH 31/52] dax: Pass dax_dev to dax_writeback_mapping_range() Vivek Goyal
2018-12-11 6:12 ` kbuild test robot
2018-12-11 17:38 ` kbuild test robot
2018-12-10 17:12 ` [PATCH 32/52] fuse: Define dax address space operations Vivek Goyal
2018-12-10 17:12 ` [PATCH 33/52] fuse, dax: Take ->i_mmap_sem lock during dax page fault Vivek Goyal
2018-12-10 17:13 ` Vivek Goyal [this message]
2018-12-10 17:13 ` [PATCH 35/52] fuse: Add logic to do direct reclaim of memory Vivek Goyal
2018-12-10 17:13 ` [PATCH 36/52] fuse: Kick worker when free memory drops below 20% of total ranges Vivek Goyal
2018-12-10 17:13 ` [PATCH 37/52] fuse: multiplex cached/direct_io/dax file operations Vivek Goyal
2018-12-10 17:13 ` [PATCH 38/52] Dispatch FORGET requests later instead of dropping them Vivek Goyal
2018-12-10 17:13 ` [PATCH 39/52] Release file in process context Vivek Goyal
2018-12-10 17:13 ` [PATCH 40/52] fuse: Do not block on inode lock while freeing memory range Vivek Goyal
2018-12-10 17:13 ` [PATCH 41/52] fuse: Reschedule dax free work if too many EAGAIN attempts Vivek Goyal
2018-12-10 17:13 ` [PATCH 42/52] fuse: Wait for memory ranges to become free Vivek Goyal
2018-12-10 17:13 ` [PATCH 43/52] fuse: Take inode lock for dax inode truncation Vivek Goyal
2018-12-10 17:13 ` [PATCH 44/52] fuse: Clear setuid bit even in direct I/O path Vivek Goyal
2018-12-10 17:13 ` [PATCH 45/52] virtio: Free fuse devices on umount Vivek Goyal
2018-12-10 17:13 ` [PATCH 46/52] virtio-fs: Retrieve shm capabilities for version table Vivek Goyal
2018-12-10 17:13 ` [PATCH 47/52] virtio-fs: Map using the values from the capabilities Vivek Goyal
2018-12-10 17:13 ` [PATCH 48/52] virtio-fs: pass version table pointer to fuse Vivek Goyal
2018-12-10 17:13 ` [PATCH 49/52] fuse: don't crash if version table is NULL Vivek Goyal
2018-12-10 17:13 ` [PATCH 50/52] fuse: add shared version support (virtio-fs only) Vivek Goyal
2018-12-10 17:13 ` [PATCH 51/52] fuse: shared version cleanups Vivek Goyal
2018-12-10 17:13 ` [PATCH 52/52] fuse: fix fuse_permission() for the default_permissions case Vivek Goyal
2018-12-19 21:25 ` kbuild test robot
2018-12-11 12:54 ` [PATCH 00/52] [RFC] virtio-fs: shared file system for virtual machines Stefan Hajnoczi
2018-12-12 20:30 ` Konrad Rzeszutek Wilk
2018-12-12 21:22 ` Vivek Goyal
2019-02-12 15:56 ` Aneesh Kumar K.V
2019-02-12 18:57 ` Vivek Goyal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181210171318.16998-35-vgoyal@redhat.com \
--to=vgoyal@redhat.com \
--cc=dgilbert@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=stefanha@redhat.com \
--cc=sweil@redhat.com \
--cc=swhiteho@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).