All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: "J. Bruce Fields" <bfields@fieldses.org>, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	linux-rdma@vger.kernel.org, linux-api@vger.kernel.org,
	Dave Chinner <david@fromorbit.com>,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org,
	Jeff Layton <jlayton@poochiereds.net>,
	Christoph Hellwig <hch@lst.de>
Subject: [PATCH v7 04/12] fs: MAP_DIRECT core
Date: Fri, 06 Oct 2017 15:35:38 -0700	[thread overview]
Message-ID: <150732933864.22363.2459100387849051724.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150732931273.22363.8436792888326501071.stgit@dwillia2-desk3.amr.corp.intel.com>

Introduce a set of helper apis for filesystems to establish FL_LAYOUT
leases to protect against writes and block map updates while a
MAP_DIRECT mapping is established. While the lease protects against the
syscall write path and fallocate it does not protect against allocating
write-faults, so this relies on i_mapdcount to disable block map updates
from write faults.

Like the pnfs case MAP_DIRECT does its own timeout of the lease since we
need to have a process context for running map_direct_invalidate().

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/Makefile               |    2 
 fs/mapdirect.c            |  232 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mapdirect.h |   45 +++++++++
 3 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 fs/mapdirect.c
 create mode 100644 include/linux/mapdirect.h

diff --git a/fs/Makefile b/fs/Makefile
index 7bbaca9c67b1..c0e791d235d8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
-obj-$(CONFIG_FS_DAX)		+= dax.o
+obj-$(CONFIG_FS_DAX)		+= dax.o mapdirect.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
diff --git a/fs/mapdirect.c b/fs/mapdirect.c
new file mode 100644
index 000000000000..9ac7c1d946a2
--- /dev/null
+++ b/fs/mapdirect.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/mapdirect.h>
+#include <linux/workqueue.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#define MAPDIRECT_BREAK 0
+#define MAPDIRECT_VALID 1
+
+struct map_direct_state {
+	atomic_t mds_ref;
+	atomic_t mds_vmaref;
+	unsigned long mds_state;
+	struct inode *mds_inode;
+	struct delayed_work mds_work;
+	struct fasync_struct *mds_fa;
+	struct vm_area_struct *mds_vma;
+};
+
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return test_bit(MAPDIRECT_VALID, &mds->mds_state);
+}
+EXPORT_SYMBOL_GPL(is_map_direct_valid);
+
+static void put_map_direct(struct map_direct_state *mds)
+{
+	if (!atomic_dec_and_test(&mds->mds_ref))
+		return;
+	kfree(mds);
+}
+
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	struct vm_area_struct *vma = mds->mds_vma;
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	void *owner = mds;
+
+	if (!atomic_dec_and_test(&mds->mds_vmaref))
+		return 0;
+
+	/*
+	 * Flush in-flight+forced lm_break events that may be
+	 * referencing this dying vma.
+	 */
+	mds->mds_vma = NULL;
+	set_bit(MAPDIRECT_BREAK, &mds->mds_state);
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+	flush_delayed_work(&mds->mds_work);
+	iput(inode);
+
+	put_map_direct(mds);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(put_map_direct_vma);
+
+void get_map_direct_vma(struct map_direct_state *mds)
+{
+	atomic_inc(&mds->mds_vmaref);
+}
+EXPORT_SYMBOL_GPL(get_map_direct_vma);
+
+static void map_direct_invalidate(struct work_struct *work)
+{
+	struct map_direct_state *mds;
+	struct vm_area_struct *vma;
+	struct inode *inode;
+	void *owner;
+
+	mds = container_of(work, typeof(*mds), mds_work.work);
+
+	clear_bit(MAPDIRECT_VALID, &mds->mds_state);
+
+	vma = ACCESS_ONCE(mds->mds_vma);
+	inode = mds->mds_inode;
+	if (vma) {
+		unsigned long len = vma->vm_end - vma->vm_start;
+		loff_t start = (loff_t) vma->vm_pgoff * PAGE_SIZE;
+
+		unmap_mapping_range(inode->i_mapping, start, len, 1);
+	}
+	owner = mds;
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+
+	put_map_direct(mds);
+}
+
+static bool map_direct_lm_break(struct file_lock *fl)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	/*
+	 * Given that we need to take sleeping locks to invalidate the
+	 * mapping we schedule that work with the original timeout set
+	 * by the file-locks core. Then we tell the core to hold off on
+	 * continuing with the lease break until the delayed work
+	 * completes the invalidation and the lease unlock.
+	 *
+	 * Note that this assumes that i_mapdcount is protecting against
+	 * block-map modifying write-faults since we are unable to use
+	 * leases in that path due to locking constraints.
+	 */
+	if (!test_and_set_bit(MAPDIRECT_BREAK, &mds->mds_state)) {
+		schedule_delayed_work(&mds->mds_work, lease_break_time * HZ);
+		kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	}
+
+	/* Tell the core lease code to wait for delayed work completion */
+	fl->fl_break_time = 0;
+
+	return false;
+}
+
+static int map_direct_lm_change(struct file_lock *fl, int arg,
+		struct list_head *dispose)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	WARN_ON(!(arg & F_UNLCK));
+
+	i_mapdcount_dec(mds->mds_inode);
+	return lease_modify(fl, arg, dispose);
+}
+
+static void map_direct_lm_setup(struct file_lock *fl, void **priv)
+{
+	struct file *file = fl->fl_file;
+	struct map_direct_state *mds = *priv;
+	struct fasync_struct *fa = mds->mds_fa;
+
+	/*
+	 * Comment copied from lease_setup():
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, file, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
+}
+
+static const struct lock_manager_operations map_direct_lm_ops = {
+	.lm_break = map_direct_lm_break,
+	.lm_change = map_direct_lm_change,
+	.lm_setup = map_direct_lm_setup,
+};
+
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma)
+{
+	struct map_direct_state *mds = kzalloc(sizeof(*mds), GFP_KERNEL);
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	struct fasync_struct *fa;
+	struct file_lock *fl;
+	void *owner = mds;
+	int rc = -ENOMEM;
+
+	if (!mds)
+		return ERR_PTR(-ENOMEM);
+
+	mds->mds_vma = vma;
+	atomic_set(&mds->mds_ref, 1);
+	atomic_set(&mds->mds_vmaref, 1);
+	set_bit(MAPDIRECT_VALID, &mds->mds_state);
+	mds->mds_inode = inode;
+	ihold(inode);
+	INIT_DELAYED_WORK(&mds->mds_work, map_direct_invalidate);
+
+	fa = fasync_alloc();
+	if (!fa)
+		goto err_fasync_alloc;
+	mds->mds_fa = fa;
+	fa->fa_fd = fd;
+
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto err_lock_alloc;
+
+	locks_init_lock(fl);
+	fl->fl_lmops = &map_direct_lm_ops;
+	fl->fl_flags = FL_LAYOUT;
+	fl->fl_type = F_RDLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner = mds;
+	atomic_inc(&mds->mds_ref);
+	fl->fl_pid = current->tgid;
+	fl->fl_file = file;
+
+	rc = vfs_setlease(file, fl->fl_type, &fl, &owner);
+	if (rc)
+		goto err_setlease;
+	if (fl) {
+		WARN_ON(1);
+		owner = mds;
+		vfs_setlease(file, F_UNLCK, NULL, &owner);
+		owner = NULL;
+		rc = -ENXIO;
+		goto err_setlease;
+	}
+
+	i_mapdcount_inc(inode);
+	return mds;
+
+err_setlease:
+	locks_free_lock(fl);
+err_lock_alloc:
+	/* if owner is NULL then the lease machinery is reponsible @fa */
+	if (owner)
+		fasync_free(fa);
+err_fasync_alloc:
+	iput(inode);
+	kfree(mds);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(map_direct_register);
diff --git a/include/linux/mapdirect.h b/include/linux/mapdirect.h
new file mode 100644
index 000000000000..724e27d8615e
--- /dev/null
+++ b/include/linux/mapdirect.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __MAPDIRECT_H__
+#define __MAPDIRECT_H__
+#include <linux/err.h>
+
+struct inode;
+struct work_struct;
+struct vm_area_struct;
+struct map_direct_state;
+
+#if IS_ENABLED(CONFIG_FS_DAX)
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma);
+int put_map_direct_vma(struct map_direct_state *mds);
+void get_map_direct_vma(struct map_direct_state *mds);
+bool is_map_direct_valid(struct map_direct_state *mds);
+#else
+static inline struct map_direct_state *map_direct_register(int fd,
+		struct vm_area_struct *vma)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	return 0;
+}
+static inline void get_map_direct_vma(struct map_direct_state *mds)
+{
+}
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return false;
+}
+#endif
+#endif /* __MAPDIRECT_H__ */

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>,
	"Darrick J. Wong"
	<darrick.wong-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>,
	Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>,
	"J. Bruce Fields"
	<bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	Jeff Moyer <jmoyer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Jeff Layton <jlayton-vpEMnDpepFuMZCB2o+C8xQ@public.gmane.org>,
	Ross Zwisler
	<ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Subject: [PATCH v7 04/12] fs: MAP_DIRECT core
Date: Fri, 06 Oct 2017 15:35:38 -0700	[thread overview]
Message-ID: <150732933864.22363.2459100387849051724.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150732931273.22363.8436792888326501071.stgit-p8uTFz9XbKj2zm6wflaqv1nYeNYlB/vhral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Introduce a set of helper apis for filesystems to establish FL_LAYOUT
leases to protect against writes and block map updates while a
MAP_DIRECT mapping is established. While the lease protects against the
syscall write path and fallocate it does not protect against allocating
write-faults, so this relies on i_mapdcount to disable block map updates
from write faults.

Like the pnfs case MAP_DIRECT does its own timeout of the lease since we
need to have a process context for running map_direct_invalidate().

Cc: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
Cc: Jeff Moyer <jmoyer-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Cc: Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>
Cc: "Darrick J. Wong" <darrick.wong-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
Cc: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Cc: Jeff Layton <jlayton-vpEMnDpepFuMZCB2o+C8xQ@public.gmane.org>
Cc: "J. Bruce Fields" <bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org>
Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 fs/Makefile               |    2 
 fs/mapdirect.c            |  232 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mapdirect.h |   45 +++++++++
 3 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 fs/mapdirect.c
 create mode 100644 include/linux/mapdirect.h

diff --git a/fs/Makefile b/fs/Makefile
index 7bbaca9c67b1..c0e791d235d8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
-obj-$(CONFIG_FS_DAX)		+= dax.o
+obj-$(CONFIG_FS_DAX)		+= dax.o mapdirect.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
diff --git a/fs/mapdirect.c b/fs/mapdirect.c
new file mode 100644
index 000000000000..9ac7c1d946a2
--- /dev/null
+++ b/fs/mapdirect.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/mapdirect.h>
+#include <linux/workqueue.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#define MAPDIRECT_BREAK 0
+#define MAPDIRECT_VALID 1
+
+struct map_direct_state {
+	atomic_t mds_ref;
+	atomic_t mds_vmaref;
+	unsigned long mds_state;
+	struct inode *mds_inode;
+	struct delayed_work mds_work;
+	struct fasync_struct *mds_fa;
+	struct vm_area_struct *mds_vma;
+};
+
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return test_bit(MAPDIRECT_VALID, &mds->mds_state);
+}
+EXPORT_SYMBOL_GPL(is_map_direct_valid);
+
+static void put_map_direct(struct map_direct_state *mds)
+{
+	if (!atomic_dec_and_test(&mds->mds_ref))
+		return;
+	kfree(mds);
+}
+
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	struct vm_area_struct *vma = mds->mds_vma;
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	void *owner = mds;
+
+	if (!atomic_dec_and_test(&mds->mds_vmaref))
+		return 0;
+
+	/*
+	 * Flush in-flight+forced lm_break events that may be
+	 * referencing this dying vma.
+	 */
+	mds->mds_vma = NULL;
+	set_bit(MAPDIRECT_BREAK, &mds->mds_state);
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+	flush_delayed_work(&mds->mds_work);
+	iput(inode);
+
+	put_map_direct(mds);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(put_map_direct_vma);
+
+void get_map_direct_vma(struct map_direct_state *mds)
+{
+	atomic_inc(&mds->mds_vmaref);
+}
+EXPORT_SYMBOL_GPL(get_map_direct_vma);
+
+static void map_direct_invalidate(struct work_struct *work)
+{
+	struct map_direct_state *mds;
+	struct vm_area_struct *vma;
+	struct inode *inode;
+	void *owner;
+
+	mds = container_of(work, typeof(*mds), mds_work.work);
+
+	clear_bit(MAPDIRECT_VALID, &mds->mds_state);
+
+	vma = ACCESS_ONCE(mds->mds_vma);
+	inode = mds->mds_inode;
+	if (vma) {
+		unsigned long len = vma->vm_end - vma->vm_start;
+		loff_t start = (loff_t) vma->vm_pgoff * PAGE_SIZE;
+
+		unmap_mapping_range(inode->i_mapping, start, len, 1);
+	}
+	owner = mds;
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+
+	put_map_direct(mds);
+}
+
+static bool map_direct_lm_break(struct file_lock *fl)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	/*
+	 * Given that we need to take sleeping locks to invalidate the
+	 * mapping we schedule that work with the original timeout set
+	 * by the file-locks core. Then we tell the core to hold off on
+	 * continuing with the lease break until the delayed work
+	 * completes the invalidation and the lease unlock.
+	 *
+	 * Note that this assumes that i_mapdcount is protecting against
+	 * block-map modifying write-faults since we are unable to use
+	 * leases in that path due to locking constraints.
+	 */
+	if (!test_and_set_bit(MAPDIRECT_BREAK, &mds->mds_state)) {
+		schedule_delayed_work(&mds->mds_work, lease_break_time * HZ);
+		kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	}
+
+	/* Tell the core lease code to wait for delayed work completion */
+	fl->fl_break_time = 0;
+
+	return false;
+}
+
+static int map_direct_lm_change(struct file_lock *fl, int arg,
+		struct list_head *dispose)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	WARN_ON(!(arg & F_UNLCK));
+
+	i_mapdcount_dec(mds->mds_inode);
+	return lease_modify(fl, arg, dispose);
+}
+
+static void map_direct_lm_setup(struct file_lock *fl, void **priv)
+{
+	struct file *file = fl->fl_file;
+	struct map_direct_state *mds = *priv;
+	struct fasync_struct *fa = mds->mds_fa;
+
+	/*
+	 * Comment copied from lease_setup():
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, file, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
+}
+
+static const struct lock_manager_operations map_direct_lm_ops = {
+	.lm_break = map_direct_lm_break,
+	.lm_change = map_direct_lm_change,
+	.lm_setup = map_direct_lm_setup,
+};
+
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma)
+{
+	struct map_direct_state *mds = kzalloc(sizeof(*mds), GFP_KERNEL);
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	struct fasync_struct *fa;
+	struct file_lock *fl;
+	void *owner = mds;
+	int rc = -ENOMEM;
+
+	if (!mds)
+		return ERR_PTR(-ENOMEM);
+
+	mds->mds_vma = vma;
+	atomic_set(&mds->mds_ref, 1);
+	atomic_set(&mds->mds_vmaref, 1);
+	set_bit(MAPDIRECT_VALID, &mds->mds_state);
+	mds->mds_inode = inode;
+	ihold(inode);
+	INIT_DELAYED_WORK(&mds->mds_work, map_direct_invalidate);
+
+	fa = fasync_alloc();
+	if (!fa)
+		goto err_fasync_alloc;
+	mds->mds_fa = fa;
+	fa->fa_fd = fd;
+
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto err_lock_alloc;
+
+	locks_init_lock(fl);
+	fl->fl_lmops = &map_direct_lm_ops;
+	fl->fl_flags = FL_LAYOUT;
+	fl->fl_type = F_RDLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner = mds;
+	atomic_inc(&mds->mds_ref);
+	fl->fl_pid = current->tgid;
+	fl->fl_file = file;
+
+	rc = vfs_setlease(file, fl->fl_type, &fl, &owner);
+	if (rc)
+		goto err_setlease;
+	if (fl) {
+		WARN_ON(1);
+		owner = mds;
+		vfs_setlease(file, F_UNLCK, NULL, &owner);
+		owner = NULL;
+		rc = -ENXIO;
+		goto err_setlease;
+	}
+
+	i_mapdcount_inc(inode);
+	return mds;
+
+err_setlease:
+	locks_free_lock(fl);
+err_lock_alloc:
+	/* if owner is NULL then the lease machinery is reponsible @fa */
+	if (owner)
+		fasync_free(fa);
+err_fasync_alloc:
+	iput(inode);
+	kfree(mds);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(map_direct_register);
diff --git a/include/linux/mapdirect.h b/include/linux/mapdirect.h
new file mode 100644
index 000000000000..724e27d8615e
--- /dev/null
+++ b/include/linux/mapdirect.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __MAPDIRECT_H__
+#define __MAPDIRECT_H__
+#include <linux/err.h>
+
+struct inode;
+struct work_struct;
+struct vm_area_struct;
+struct map_direct_state;
+
+#if IS_ENABLED(CONFIG_FS_DAX)
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma);
+int put_map_direct_vma(struct map_direct_state *mds);
+void get_map_direct_vma(struct map_direct_state *mds);
+bool is_map_direct_valid(struct map_direct_state *mds);
+#else
+static inline struct map_direct_state *map_direct_register(int fd,
+		struct vm_area_struct *vma)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	return 0;
+}
+static inline void get_map_direct_vma(struct map_direct_state *mds)
+{
+}
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return false;
+}
+#endif
+#endif /* __MAPDIRECT_H__ */

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: linux-xfs@vger.kernel.org, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	linux-rdma@vger.kernel.org, linux-api@vger.kernel.org,
	Dave Chinner <david@fromorbit.com>,
	Christoph Hellwig <hch@lst.de>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	linux-mm@kvack.org, Jeff Moyer <jmoyer@redhat.com>,
	linux-fsdevel@vger.kernel.org,
	Jeff Layton <jlayton@poochiereds.net>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: [PATCH v7 04/12] fs: MAP_DIRECT core
Date: Fri, 06 Oct 2017 15:35:38 -0700	[thread overview]
Message-ID: <150732933864.22363.2459100387849051724.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150732931273.22363.8436792888326501071.stgit@dwillia2-desk3.amr.corp.intel.com>

Introduce a set of helper apis for filesystems to establish FL_LAYOUT
leases to protect against writes and block map updates while a
MAP_DIRECT mapping is established. While the lease protects against the
syscall write path and fallocate it does not protect against allocating
write-faults, so this relies on i_mapdcount to disable block map updates
from write faults.

Like the pnfs case MAP_DIRECT does its own timeout of the lease since we
need to have a process context for running map_direct_invalidate().

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/Makefile               |    2 
 fs/mapdirect.c            |  232 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mapdirect.h |   45 +++++++++
 3 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 fs/mapdirect.c
 create mode 100644 include/linux/mapdirect.h

diff --git a/fs/Makefile b/fs/Makefile
index 7bbaca9c67b1..c0e791d235d8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
-obj-$(CONFIG_FS_DAX)		+= dax.o
+obj-$(CONFIG_FS_DAX)		+= dax.o mapdirect.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
diff --git a/fs/mapdirect.c b/fs/mapdirect.c
new file mode 100644
index 000000000000..9ac7c1d946a2
--- /dev/null
+++ b/fs/mapdirect.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/mapdirect.h>
+#include <linux/workqueue.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#define MAPDIRECT_BREAK 0
+#define MAPDIRECT_VALID 1
+
+struct map_direct_state {
+	atomic_t mds_ref;
+	atomic_t mds_vmaref;
+	unsigned long mds_state;
+	struct inode *mds_inode;
+	struct delayed_work mds_work;
+	struct fasync_struct *mds_fa;
+	struct vm_area_struct *mds_vma;
+};
+
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return test_bit(MAPDIRECT_VALID, &mds->mds_state);
+}
+EXPORT_SYMBOL_GPL(is_map_direct_valid);
+
+static void put_map_direct(struct map_direct_state *mds)
+{
+	if (!atomic_dec_and_test(&mds->mds_ref))
+		return;
+	kfree(mds);
+}
+
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	struct vm_area_struct *vma = mds->mds_vma;
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	void *owner = mds;
+
+	if (!atomic_dec_and_test(&mds->mds_vmaref))
+		return 0;
+
+	/*
+	 * Flush in-flight+forced lm_break events that may be
+	 * referencing this dying vma.
+	 */
+	mds->mds_vma = NULL;
+	set_bit(MAPDIRECT_BREAK, &mds->mds_state);
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+	flush_delayed_work(&mds->mds_work);
+	iput(inode);
+
+	put_map_direct(mds);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(put_map_direct_vma);
+
+void get_map_direct_vma(struct map_direct_state *mds)
+{
+	atomic_inc(&mds->mds_vmaref);
+}
+EXPORT_SYMBOL_GPL(get_map_direct_vma);
+
+static void map_direct_invalidate(struct work_struct *work)
+{
+	struct map_direct_state *mds;
+	struct vm_area_struct *vma;
+	struct inode *inode;
+	void *owner;
+
+	mds = container_of(work, typeof(*mds), mds_work.work);
+
+	clear_bit(MAPDIRECT_VALID, &mds->mds_state);
+
+	vma = ACCESS_ONCE(mds->mds_vma);
+	inode = mds->mds_inode;
+	if (vma) {
+		unsigned long len = vma->vm_end - vma->vm_start;
+		loff_t start = (loff_t) vma->vm_pgoff * PAGE_SIZE;
+
+		unmap_mapping_range(inode->i_mapping, start, len, 1);
+	}
+	owner = mds;
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+
+	put_map_direct(mds);
+}
+
+static bool map_direct_lm_break(struct file_lock *fl)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	/*
+	 * Given that we need to take sleeping locks to invalidate the
+	 * mapping we schedule that work with the original timeout set
+	 * by the file-locks core. Then we tell the core to hold off on
+	 * continuing with the lease break until the delayed work
+	 * completes the invalidation and the lease unlock.
+	 *
+	 * Note that this assumes that i_mapdcount is protecting against
+	 * block-map modifying write-faults since we are unable to use
+	 * leases in that path due to locking constraints.
+	 */
+	if (!test_and_set_bit(MAPDIRECT_BREAK, &mds->mds_state)) {
+		schedule_delayed_work(&mds->mds_work, lease_break_time * HZ);
+		kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	}
+
+	/* Tell the core lease code to wait for delayed work completion */
+	fl->fl_break_time = 0;
+
+	return false;
+}
+
+static int map_direct_lm_change(struct file_lock *fl, int arg,
+		struct list_head *dispose)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	WARN_ON(!(arg & F_UNLCK));
+
+	i_mapdcount_dec(mds->mds_inode);
+	return lease_modify(fl, arg, dispose);
+}
+
+static void map_direct_lm_setup(struct file_lock *fl, void **priv)
+{
+	struct file *file = fl->fl_file;
+	struct map_direct_state *mds = *priv;
+	struct fasync_struct *fa = mds->mds_fa;
+
+	/*
+	 * Comment copied from lease_setup():
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, file, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
+}
+
+static const struct lock_manager_operations map_direct_lm_ops = {
+	.lm_break = map_direct_lm_break,
+	.lm_change = map_direct_lm_change,
+	.lm_setup = map_direct_lm_setup,
+};
+
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma)
+{
+	struct map_direct_state *mds = kzalloc(sizeof(*mds), GFP_KERNEL);
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	struct fasync_struct *fa;
+	struct file_lock *fl;
+	void *owner = mds;
+	int rc = -ENOMEM;
+
+	if (!mds)
+		return ERR_PTR(-ENOMEM);
+
+	mds->mds_vma = vma;
+	atomic_set(&mds->mds_ref, 1);
+	atomic_set(&mds->mds_vmaref, 1);
+	set_bit(MAPDIRECT_VALID, &mds->mds_state);
+	mds->mds_inode = inode;
+	ihold(inode);
+	INIT_DELAYED_WORK(&mds->mds_work, map_direct_invalidate);
+
+	fa = fasync_alloc();
+	if (!fa)
+		goto err_fasync_alloc;
+	mds->mds_fa = fa;
+	fa->fa_fd = fd;
+
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto err_lock_alloc;
+
+	locks_init_lock(fl);
+	fl->fl_lmops = &map_direct_lm_ops;
+	fl->fl_flags = FL_LAYOUT;
+	fl->fl_type = F_RDLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner = mds;
+	atomic_inc(&mds->mds_ref);
+	fl->fl_pid = current->tgid;
+	fl->fl_file = file;
+
+	rc = vfs_setlease(file, fl->fl_type, &fl, &owner);
+	if (rc)
+		goto err_setlease;
+	if (fl) {
+		WARN_ON(1);
+		owner = mds;
+		vfs_setlease(file, F_UNLCK, NULL, &owner);
+		owner = NULL;
+		rc = -ENXIO;
+		goto err_setlease;
+	}
+
+	i_mapdcount_inc(inode);
+	return mds;
+
+err_setlease:
+	locks_free_lock(fl);
+err_lock_alloc:
+	/* if owner is NULL then the lease machinery is reponsible @fa */
+	if (owner)
+		fasync_free(fa);
+err_fasync_alloc:
+	iput(inode);
+	kfree(mds);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(map_direct_register);
diff --git a/include/linux/mapdirect.h b/include/linux/mapdirect.h
new file mode 100644
index 000000000000..724e27d8615e
--- /dev/null
+++ b/include/linux/mapdirect.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __MAPDIRECT_H__
+#define __MAPDIRECT_H__
+#include <linux/err.h>
+
+struct inode;
+struct work_struct;
+struct vm_area_struct;
+struct map_direct_state;
+
+#if IS_ENABLED(CONFIG_FS_DAX)
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma);
+int put_map_direct_vma(struct map_direct_state *mds);
+void get_map_direct_vma(struct map_direct_state *mds);
+bool is_map_direct_valid(struct map_direct_state *mds);
+#else
+static inline struct map_direct_state *map_direct_register(int fd,
+		struct vm_area_struct *vma)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	return 0;
+}
+static inline void get_map_direct_vma(struct map_direct_state *mds)
+{
+}
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return false;
+}
+#endif
+#endif /* __MAPDIRECT_H__ */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: linux-xfs@vger.kernel.org, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	linux-rdma@vger.kernel.org, linux-api@vger.kernel.org,
	Dave Chinner <david@fromorbit.com>,
	Christoph Hellwig <hch@lst.de>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	linux-mm@kvack.org, Jeff Moyer <jmoyer@redhat.com>,
	linux-fsdevel@vger.kernel.org,
	Jeff Layton <jlayton@poochiereds.net>,
	Ross Zwisler <ross.zwisler@linux.intel.com>
Subject: [PATCH v7 04/12] fs: MAP_DIRECT core
Date: Fri, 06 Oct 2017 15:35:38 -0700	[thread overview]
Message-ID: <150732933864.22363.2459100387849051724.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <150732931273.22363.8436792888326501071.stgit@dwillia2-desk3.amr.corp.intel.com>

Introduce a set of helper apis for filesystems to establish FL_LAYOUT
leases to protect against writes and block map updates while a
MAP_DIRECT mapping is established. While the lease protects against the
syscall write path and fallocate it does not protect against allocating
write-faults, so this relies on i_mapdcount to disable block map updates
from write faults.

Like the pnfs case MAP_DIRECT does its own timeout of the lease since we
need to have a process context for running map_direct_invalidate().

Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/Makefile               |    2 
 fs/mapdirect.c            |  232 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mapdirect.h |   45 +++++++++
 3 files changed, 278 insertions(+), 1 deletion(-)
 create mode 100644 fs/mapdirect.c
 create mode 100644 include/linux/mapdirect.h

diff --git a/fs/Makefile b/fs/Makefile
index 7bbaca9c67b1..c0e791d235d8 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
-obj-$(CONFIG_FS_DAX)		+= dax.o
+obj-$(CONFIG_FS_DAX)		+= dax.o mapdirect.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
diff --git a/fs/mapdirect.c b/fs/mapdirect.c
new file mode 100644
index 000000000000..9ac7c1d946a2
--- /dev/null
+++ b/fs/mapdirect.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/mapdirect.h>
+#include <linux/workqueue.h>
+#include <linux/signal.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#define MAPDIRECT_BREAK 0
+#define MAPDIRECT_VALID 1
+
+struct map_direct_state {
+	atomic_t mds_ref;
+	atomic_t mds_vmaref;
+	unsigned long mds_state;
+	struct inode *mds_inode;
+	struct delayed_work mds_work;
+	struct fasync_struct *mds_fa;
+	struct vm_area_struct *mds_vma;
+};
+
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return test_bit(MAPDIRECT_VALID, &mds->mds_state);
+}
+EXPORT_SYMBOL_GPL(is_map_direct_valid);
+
+static void put_map_direct(struct map_direct_state *mds)
+{
+	if (!atomic_dec_and_test(&mds->mds_ref))
+		return;
+	kfree(mds);
+}
+
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	struct vm_area_struct *vma = mds->mds_vma;
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	void *owner = mds;
+
+	if (!atomic_dec_and_test(&mds->mds_vmaref))
+		return 0;
+
+	/*
+	 * Flush in-flight+forced lm_break events that may be
+	 * referencing this dying vma.
+	 */
+	mds->mds_vma = NULL;
+	set_bit(MAPDIRECT_BREAK, &mds->mds_state);
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+	flush_delayed_work(&mds->mds_work);
+	iput(inode);
+
+	put_map_direct(mds);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(put_map_direct_vma);
+
+void get_map_direct_vma(struct map_direct_state *mds)
+{
+	atomic_inc(&mds->mds_vmaref);
+}
+EXPORT_SYMBOL_GPL(get_map_direct_vma);
+
+static void map_direct_invalidate(struct work_struct *work)
+{
+	struct map_direct_state *mds;
+	struct vm_area_struct *vma;
+	struct inode *inode;
+	void *owner;
+
+	mds = container_of(work, typeof(*mds), mds_work.work);
+
+	clear_bit(MAPDIRECT_VALID, &mds->mds_state);
+
+	vma = ACCESS_ONCE(mds->mds_vma);
+	inode = mds->mds_inode;
+	if (vma) {
+		unsigned long len = vma->vm_end - vma->vm_start;
+		loff_t start = (loff_t) vma->vm_pgoff * PAGE_SIZE;
+
+		unmap_mapping_range(inode->i_mapping, start, len, 1);
+	}
+	owner = mds;
+	vfs_setlease(vma->vm_file, F_UNLCK, NULL, &owner);
+
+	put_map_direct(mds);
+}
+
+static bool map_direct_lm_break(struct file_lock *fl)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	/*
+	 * Given that we need to take sleeping locks to invalidate the
+	 * mapping we schedule that work with the original timeout set
+	 * by the file-locks core. Then we tell the core to hold off on
+	 * continuing with the lease break until the delayed work
+	 * completes the invalidation and the lease unlock.
+	 *
+	 * Note that this assumes that i_mapdcount is protecting against
+	 * block-map modifying write-faults since we are unable to use
+	 * leases in that path due to locking constraints.
+	 */
+	if (!test_and_set_bit(MAPDIRECT_BREAK, &mds->mds_state)) {
+		schedule_delayed_work(&mds->mds_work, lease_break_time * HZ);
+		kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	}
+
+	/* Tell the core lease code to wait for delayed work completion */
+	fl->fl_break_time = 0;
+
+	return false;
+}
+
+static int map_direct_lm_change(struct file_lock *fl, int arg,
+		struct list_head *dispose)
+{
+	struct map_direct_state *mds = fl->fl_owner;
+
+	WARN_ON(!(arg & F_UNLCK));
+
+	i_mapdcount_dec(mds->mds_inode);
+	return lease_modify(fl, arg, dispose);
+}
+
+static void map_direct_lm_setup(struct file_lock *fl, void **priv)
+{
+	struct file *file = fl->fl_file;
+	struct map_direct_state *mds = *priv;
+	struct fasync_struct *fa = mds->mds_fa;
+
+	/*
+	 * Comment copied from lease_setup():
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, file, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
+}
+
+static const struct lock_manager_operations map_direct_lm_ops = {
+	.lm_break = map_direct_lm_break,
+	.lm_change = map_direct_lm_change,
+	.lm_setup = map_direct_lm_setup,
+};
+
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma)
+{
+	struct map_direct_state *mds = kzalloc(sizeof(*mds), GFP_KERNEL);
+	struct file *file = vma->vm_file;
+	struct inode *inode = file_inode(file);
+	struct fasync_struct *fa;
+	struct file_lock *fl;
+	void *owner = mds;
+	int rc = -ENOMEM;
+
+	if (!mds)
+		return ERR_PTR(-ENOMEM);
+
+	mds->mds_vma = vma;
+	atomic_set(&mds->mds_ref, 1);
+	atomic_set(&mds->mds_vmaref, 1);
+	set_bit(MAPDIRECT_VALID, &mds->mds_state);
+	mds->mds_inode = inode;
+	ihold(inode);
+	INIT_DELAYED_WORK(&mds->mds_work, map_direct_invalidate);
+
+	fa = fasync_alloc();
+	if (!fa)
+		goto err_fasync_alloc;
+	mds->mds_fa = fa;
+	fa->fa_fd = fd;
+
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto err_lock_alloc;
+
+	locks_init_lock(fl);
+	fl->fl_lmops = &map_direct_lm_ops;
+	fl->fl_flags = FL_LAYOUT;
+	fl->fl_type = F_RDLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner = mds;
+	atomic_inc(&mds->mds_ref);
+	fl->fl_pid = current->tgid;
+	fl->fl_file = file;
+
+	rc = vfs_setlease(file, fl->fl_type, &fl, &owner);
+	if (rc)
+		goto err_setlease;
+	if (fl) {
+		WARN_ON(1);
+		owner = mds;
+		vfs_setlease(file, F_UNLCK, NULL, &owner);
+		owner = NULL;
+		rc = -ENXIO;
+		goto err_setlease;
+	}
+
+	i_mapdcount_inc(inode);
+	return mds;
+
+err_setlease:
+	locks_free_lock(fl);
+err_lock_alloc:
+	/* if owner is NULL then the lease machinery is reponsible @fa */
+	if (owner)
+		fasync_free(fa);
+err_fasync_alloc:
+	iput(inode);
+	kfree(mds);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(map_direct_register);
diff --git a/include/linux/mapdirect.h b/include/linux/mapdirect.h
new file mode 100644
index 000000000000..724e27d8615e
--- /dev/null
+++ b/include/linux/mapdirect.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __MAPDIRECT_H__
+#define __MAPDIRECT_H__
+#include <linux/err.h>
+
+struct inode;
+struct work_struct;
+struct vm_area_struct;
+struct map_direct_state;
+
+#if IS_ENABLED(CONFIG_FS_DAX)
+struct map_direct_state *map_direct_register(int fd, struct vm_area_struct *vma);
+int put_map_direct_vma(struct map_direct_state *mds);
+void get_map_direct_vma(struct map_direct_state *mds);
+bool is_map_direct_valid(struct map_direct_state *mds);
+#else
+static inline struct map_direct_state *map_direct_register(int fd,
+		struct vm_area_struct *vma)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+int put_map_direct_vma(struct map_direct_state *mds)
+{
+	return 0;
+}
+static inline void get_map_direct_vma(struct map_direct_state *mds)
+{
+}
+bool is_map_direct_valid(struct map_direct_state *mds)
+{
+	return false;
+}
+#endif
+#endif /* __MAPDIRECT_H__ */


  parent reply	other threads:[~2017-10-06 22:38 UTC|newest]

Thread overview: 158+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-06 22:35 [PATCH v7 00/12] MAP_DIRECT for DAX RDMA and userspace flush Dan Williams
2017-10-06 22:35 ` Dan Williams
2017-10-06 22:35 ` Dan Williams
2017-10-06 22:35 ` Dan Williams
2017-10-06 22:35 ` [PATCH v7 01/12] mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35 ` [PATCH v7 02/12] fs, mm: pass fd to ->mmap_validate() Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35 ` [PATCH v7 03/12] fs: introduce i_mapdcount Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-09  3:08   ` Dave Chinner
2017-10-09  3:08     ` Dave Chinner
2017-10-09  3:08     ` Dave Chinner
2017-10-09  3:08     ` Dave Chinner
2017-10-06 22:35 ` Dan Williams [this message]
2017-10-06 22:35   ` [PATCH v7 04/12] fs: MAP_DIRECT core Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35 ` [PATCH v7 05/12] xfs: prepare xfs_break_layouts() for reuse with MAP_DIRECT Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35 ` [PATCH v7 06/12] xfs: wire up MAP_DIRECT Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-09  3:40   ` Dave Chinner
2017-10-09  3:40     ` Dave Chinner
2017-10-09  3:40     ` Dave Chinner
2017-10-09 17:08     ` Dan Williams
2017-10-09 17:08       ` Dan Williams
2017-10-09 17:08       ` Dan Williams
2017-10-09 22:50       ` Dave Chinner
2017-10-09 22:50         ` Dave Chinner
2017-10-06 22:35 ` [PATCH v7 07/12] dma-mapping: introduce dma_has_iommu() Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:35   ` Dan Williams
2017-10-06 22:45   ` David Woodhouse
2017-10-06 22:45     ` David Woodhouse
2017-10-06 22:45     ` David Woodhouse
2017-10-06 22:52     ` Dan Williams
2017-10-06 22:52       ` Dan Williams
2017-10-06 22:52       ` Dan Williams
2017-10-06 22:52       ` Dan Williams
2017-10-06 23:10       ` David Woodhouse
2017-10-06 23:10         ` David Woodhouse
2017-10-06 23:10         ` David Woodhouse
2017-10-06 23:15         ` Dan Williams
2017-10-06 23:15           ` Dan Williams
2017-10-06 23:15           ` Dan Williams
2017-10-06 23:15           ` Dan Williams
2017-10-07 11:08           ` David Woodhouse
2017-10-07 11:08             ` David Woodhouse
2017-10-07 23:33             ` Dan Williams
2017-10-07 23:33               ` Dan Williams
2017-10-07 23:33               ` Dan Williams
2017-10-07 23:33               ` Dan Williams
2017-10-06 23:12       ` Dan Williams
2017-10-06 23:12         ` Dan Williams
2017-10-08  3:45   ` [PATCH v8] dma-mapping: introduce dma_get_iommu_domain() Dan Williams
2017-10-08  3:45     ` Dan Williams
2017-10-08  3:45     ` Dan Williams
2017-10-09 10:37     ` Robin Murphy
2017-10-09 10:37       ` Robin Murphy
2017-10-09 10:37       ` Robin Murphy
2017-10-09 10:37       ` Robin Murphy
2017-10-09 17:32       ` Dan Williams
2017-10-09 17:32         ` Dan Williams
2017-10-10 14:40     ` Raj, Ashok
2017-10-10 14:40       ` Raj, Ashok
2017-10-09 18:58   ` [PATCH v7 07/12] dma-mapping: introduce dma_has_iommu() Jason Gunthorpe
2017-10-09 18:58     ` Jason Gunthorpe
2017-10-09 18:58     ` Jason Gunthorpe
2017-10-09 18:58     ` Jason Gunthorpe
2017-10-09 19:05     ` Dan Williams
2017-10-09 19:05       ` Dan Williams
2017-10-09 19:18       ` Jason Gunthorpe
2017-10-09 19:18         ` Jason Gunthorpe
2017-10-09 19:18         ` Jason Gunthorpe
2017-10-09 19:18         ` Jason Gunthorpe
2017-10-09 19:28         ` Dan Williams
2017-10-09 19:28           ` Dan Williams
2017-10-09 19:28           ` Dan Williams
2017-10-09 19:28           ` Dan Williams
2017-10-10 17:25           ` Jason Gunthorpe
2017-10-10 17:25             ` Jason Gunthorpe
2017-10-10 17:25             ` Jason Gunthorpe
2017-10-10 17:25             ` Jason Gunthorpe
2017-10-10 17:39             ` Dan Williams
2017-10-10 17:39               ` Dan Williams
2017-10-10 17:39               ` Dan Williams
2017-10-10 17:39               ` Dan Williams
2017-10-10 18:05               ` Jason Gunthorpe
2017-10-10 18:05                 ` Jason Gunthorpe
2017-10-10 18:05                 ` Jason Gunthorpe
2017-10-10 18:05                 ` Jason Gunthorpe
2017-10-10 20:17                 ` Dan Williams
2017-10-10 20:17                   ` Dan Williams
2017-10-10 20:17                   ` Dan Williams
2017-10-12 18:27                   ` Jason Gunthorpe
2017-10-12 18:27                     ` Jason Gunthorpe
2017-10-12 18:27                     ` Jason Gunthorpe
2017-10-12 20:10                     ` Dan Williams
2017-10-12 20:10                       ` Dan Williams
2017-10-13  6:50                       ` Christoph Hellwig
2017-10-13  6:50                         ` Christoph Hellwig
2017-10-13  6:50                         ` Christoph Hellwig
2017-10-13 15:03                         ` Jason Gunthorpe
2017-10-13 15:03                           ` Jason Gunthorpe
2017-10-13 15:03                           ` Jason Gunthorpe
2017-10-13 15:03                           ` Jason Gunthorpe
2017-10-15 15:14                           ` Matan Barak
2017-10-15 15:14                             ` Matan Barak
2017-10-15 15:14                             ` Matan Barak
2017-10-15 15:14                             ` Matan Barak
2017-10-15 15:21                             ` Dan Williams
2017-10-15 15:21                               ` Dan Williams
2017-10-15 15:21                               ` Dan Williams
2017-10-13  7:09         ` Christoph Hellwig
2017-10-13  7:09           ` Christoph Hellwig
2017-10-13  7:09           ` Christoph Hellwig
2017-10-06 22:36 ` [PATCH v7 08/12] fs, mapdirect: introduce ->lease_direct() Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 09/12] xfs: wire up ->lease_direct() Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-09  3:45   ` Dave Chinner
2017-10-09  3:45     ` Dave Chinner
2017-10-09  3:45     ` Dave Chinner
2017-10-09  3:45     ` Dave Chinner
2017-10-09 17:10     ` Dan Williams
2017-10-09 17:10       ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 10/12] device-dax: " Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 11/12] IB/core: use MAP_DIRECT to fix / enable RDMA to DAX mappings Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-08  4:02   ` [PATCH v8 1/2] iommu: up-level sg_num_pages() from amd-iommu Dan Williams
2017-10-08  4:02     ` Dan Williams
2017-10-08  4:02     ` Dan Williams
2017-10-08  4:04   ` [PATCH v8 2/2] IB/core: use MAP_DIRECT to fix / enable RDMA to DAX mappings Dan Williams
2017-10-08  4:04     ` Dan Williams
2017-10-08  4:04     ` Dan Williams
2017-10-08  6:45     ` kbuild test robot
2017-10-08  6:45       ` kbuild test robot
2017-10-08  6:45       ` kbuild test robot
2017-10-08 15:49       ` Dan Williams
2017-10-08 15:49         ` Dan Williams
2017-10-08 15:49         ` Dan Williams
2017-10-06 22:36 ` [PATCH v7 12/12] tools/testing/nvdimm: enable rdma unit tests Dan Williams
2017-10-06 22:36   ` Dan Williams
2017-10-06 22:36   ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=150732933864.22363.2459100387849051724.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=bfields@fieldses.org \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jlayton@poochiereds.net \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.