linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>,
	Dan Williams <dan.j.williams@intel.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Theodore Ts'o" <tytso@mit.edu>,
	John Hubbard <jhubbard@nvidia.com>,
	Michal Hocko <mhocko@suse.com>,
	Dave Chinner <david@fromorbit.com>,
	linux-xfs@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org,
	linux-mm@kvack.org, Ira Weiny <ira.weiny@intel.com>
Subject: [RFC PATCH v2 13/19] {mm,file}: Add file_pins objects
Date: Fri,  9 Aug 2019 15:58:27 -0700	[thread overview]
Message-ID: <20190809225833.6657-14-ira.weiny@intel.com> (raw)
In-Reply-To: <20190809225833.6657-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

User page pins (aka GUP) needs to track file information of files being
pinned by those calls.  Depending on the needs of the caller this
information is stored in 1 of 2 ways.

1) Some subsystems like RDMA associate GUP pins with file descriptors
   which can be passed around to other process'.  In this case a file
   being pined must be associated with an owning file object (which can
   then be resolved back to any of the processes which have a file
   descriptor 'pointing' to that file object).

2) Other subsystems do not have an owning file and can therefore
   associate the file pin directly to the mm of the process which
   created them.

This patch introduces the new file pin structures and ensures struct
file and struct mm_struct are prepared to store them.

In subsequent patches the required information will be passed into new
pin page calls and procfs is enhanced to show this information to the user.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 fs/file_table.c          |  4 ++++
 include/linux/file.h     | 49 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h       |  2 ++
 include/linux/mm_types.h |  2 ++
 kernel/fork.c            |  3 +++
 5 files changed, 60 insertions(+)

diff --git a/fs/file_table.c b/fs/file_table.c
index b07b53f24ff5..38947b9a4769 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -46,6 +46,7 @@ static void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
 
+	WARN_ON(!list_empty(&f->file_pins));
 	put_cred(f->f_cred);
 	kmem_cache_free(filp_cachep, f);
 }
@@ -118,6 +119,9 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
 	f->f_mode = OPEN_FMODE(flags);
 	/* f->f_version: 0 */
 
+	INIT_LIST_HEAD(&f->file_pins);
+	spin_lock_init(&f->fp_lock);
+
 	return f;
 }
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 3fcddff56bc4..cd79adad5b23 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -9,6 +9,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/posix_types.h>
+#include <linux/kref.h>
 
 struct file;
 
@@ -91,4 +92,52 @@ extern void fd_install(unsigned int fd, struct file *file);
 extern void flush_delayed_fput(void);
 extern void __fput_sync(struct file *);
 
+/**
+ * struct file_file_pin
+ *
+ * Associate a pin'ed file with another file owner.
+ *
+ * Subsystems such as RDMA have the ability to pin memory which is associated
+ * with a file descriptor which can be passed to other processes without
+ * necessarily having that memory accessed in the remote processes address
+ * space.
+ *
+ * @file file backing memory which was pined by a GUP caller
+ * @f_owner the file representing the GUP owner
+ * @list of all file pins this owner has
+ *       (struct file *)->file_pins
+ * @ref number of times this pin was taken (roughly the number of pages pinned
+ *      in the file)
+ */
+struct file_file_pin {
+	struct file *file;
+	struct file *f_owner;
+	struct list_head list;
+	struct kref ref;
+};
+
+/*
+ * struct mm_file_pin
+ *
+ * Some GUP callers do not have an "owning" file.  Those pins are accounted for
+ * in the mm of the process that called GUP.
+ *
+ * The tuple {file, inode} is used to track this as a unique file pin and to
+ * track when this pin has been removed.
+ *
+ * @file file backing memory which was pined by a GUP caller
+ * @mm back point to owning mm
+ * @inode backing the file
+ * @list of all file pins this owner has
+ *       (struct mm_struct *)->file_pins
+ * @ref number of times this pin was taken
+ */
+struct mm_file_pin {
+	struct file *file;
+	struct mm_struct *mm;
+	struct inode *inode;
+	struct list_head list;
+	struct kref ref;
+};
+
 #endif /* __LINUX_FILE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2e41ce547913..d2e08feb9737 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -963,6 +963,8 @@ struct file {
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 	errseq_t		f_wb_err;
+	struct list_head        file_pins;
+	spinlock_t              fp_lock;
 } __randomize_layout
   __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6a7a1083b6fb..4f6ea4acddbd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -516,6 +516,8 @@ struct mm_struct {
 		/* HMM needs to track a few things per mm */
 		struct hmm *hmm;
 #endif
+		struct list_head file_pins;
+		spinlock_t fp_lock; /* lock file_pins */
 	} __randomize_layout;
 
 	/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 0e2f9a2c132c..093f2f2fce1a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -675,6 +675,7 @@ void __mmdrop(struct mm_struct *mm)
 	BUG_ON(mm == &init_mm);
 	WARN_ON_ONCE(mm == current->mm);
 	WARN_ON_ONCE(mm == current->active_mm);
+	WARN_ON(!list_empty(&mm->file_pins));
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	mmu_notifier_mm_destroy(mm);
@@ -1013,6 +1014,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->pmd_huge_pte = NULL;
 #endif
 	mm_init_uprobes_state(mm);
+	INIT_LIST_HEAD(&mm->file_pins);
+	spin_lock_init(&mm->fp_lock);
 
 	if (current->mm) {
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
-- 
2.20.1


  parent reply	other threads:[~2019-08-09 22:59 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-09 22:58 [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 01/19] fs/locks: Export F_LAYOUT lease to user space ira.weiny
2019-08-09 23:52   ` Dave Chinner
2019-08-12 17:36     ` Ira Weiny
2019-08-14  8:05       ` Dave Chinner
2019-08-14 11:21         ` Jeff Layton
2019-08-14 11:38           ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 02/19] fs/locks: Add Exclusive flag to user Layout lease ira.weiny
2019-08-14 14:15   ` Jeff Layton
2019-08-14 21:56     ` Dave Chinner
2019-08-26 10:41       ` Jeff Layton
2019-08-29 23:34         ` Ira Weiny
2019-09-04 12:52           ` Jeff Layton
2019-09-04 23:12   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 03/19] mm/gup: Pass flags down to __gup_device_huge* calls ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 04/19] mm/gup: Ensure F_LAYOUT lease is held prior to GUP'ing pages ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 05/19] fs/ext4: Teach ext4 to break layout leases ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 06/19] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range ira.weiny
2019-08-23 15:18   ` Vivek Goyal
2019-08-29 18:52     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 07/19] fs/xfs: Teach xfs to use new dax_layout_busy_page() ira.weiny
2019-08-09 23:30   ` Dave Chinner
2019-08-12 18:05     ` Ira Weiny
2019-08-14  8:04       ` Dave Chinner
2019-08-09 22:58 ` [RFC PATCH v2 08/19] fs/xfs: Fail truncate if page lease can't be broken ira.weiny
2019-08-09 23:22   ` Dave Chinner
2019-08-12 18:08     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 09/19] mm/gup: Introduce vaddr_pin structure ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 10/19] mm/gup: Pass a NULL vaddr_pin through GUP fast ira.weiny
2019-08-10  0:06   ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 11/19] mm/gup: Pass follow_page_context further down the call stack ira.weiny
2019-08-10  0:18   ` John Hubbard
2019-08-12 19:01     ` Ira Weiny
2019-08-09 22:58 ` [RFC PATCH v2 12/19] mm/gup: Prep put_user_pages() to take an vaddr_pin struct ira.weiny
2019-08-10  0:30   ` John Hubbard
2019-08-12 20:46     ` Ira Weiny
2019-08-09 22:58 ` ira.weiny [this message]
2019-08-09 22:58 ` [RFC PATCH v2 14/19] fs/locks: Associate file pins while performing GUP ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 15/19] mm/gup: Introduce vaddr_pin_pages() ira.weiny
2019-08-10  0:09   ` John Hubbard
2019-08-12 21:00     ` Ira Weiny
2019-08-12 21:20       ` John Hubbard
2019-08-11 23:07   ` John Hubbard
2019-08-12 21:01     ` Ira Weiny
2019-08-12 12:28   ` Jason Gunthorpe
2019-08-12 21:48     ` Ira Weiny
2019-08-13 11:47       ` Jason Gunthorpe
2019-08-13 17:46         ` Ira Weiny
2019-08-13 17:56           ` John Hubbard
2019-08-09 22:58 ` [RFC PATCH v2 16/19] RDMA/uverbs: Add back pointer to system file object ira.weiny
2019-08-12 13:00   ` Jason Gunthorpe
2019-08-12 17:28     ` Ira Weiny
2019-08-12 17:56       ` Jason Gunthorpe
2019-08-12 21:15         ` Ira Weiny
2019-08-13 11:48           ` Jason Gunthorpe
2019-08-13 17:41             ` Ira Weiny
2019-08-13 18:00               ` Jason Gunthorpe
2019-08-13 20:38                 ` Ira Weiny
2019-08-14 12:23                   ` Jason Gunthorpe
2019-08-14 17:50                     ` Ira Weiny
2019-08-14 18:15                       ` Jason Gunthorpe
2019-09-04 22:25                     ` Ira Weiny
2019-09-11  8:19                       ` Jason Gunthorpe
2019-08-09 22:58 ` [RFC PATCH v2 17/19] RDMA/umem: Convert to vaddr_[pin|unpin]* operations ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 18/19] {mm,procfs}: Add display file_pins proc ira.weiny
2019-08-09 22:58 ` [RFC PATCH v2 19/19] mm/gup: Remove FOLL_LONGTERM DAX exclusion ira.weiny
2019-08-14 10:17 ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Jan Kara
2019-08-14 18:08   ` Ira Weiny
2019-08-15 13:05     ` Jan Kara
2019-08-16 19:05       ` Ira Weiny
2019-08-16 23:20         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ; -) Ira Weiny
2019-08-19  6:36           ` Jan Kara
2019-08-17  2:26         ` [RFC PATCH v2 00/19] RDMA/FS DAX truncate proposal V1,000,002 ;-) Dave Chinner
2019-08-19  6:34           ` Jan Kara
2019-08-19  9:24             ` Dave Chinner
2019-08-19 12:38               ` Jason Gunthorpe
2019-08-19 21:53                 ` Ira Weiny
2019-08-20  1:12                 ` Dave Chinner
2019-08-20 11:55                   ` Jason Gunthorpe
2019-08-21 18:02                     ` Ira Weiny
2019-08-21 18:13                       ` Jason Gunthorpe
2019-08-21 18:22                         ` John Hubbard
2019-08-21 18:57                         ` Ira Weiny
2019-08-21 19:06                           ` Ira Weiny
2019-08-21 19:48                           ` Jason Gunthorpe
2019-08-21 20:44                             ` Ira Weiny
2019-08-21 23:49                               ` Jason Gunthorpe
2019-08-23  3:23                               ` Dave Chinner
2019-08-23 12:04                                 ` Jason Gunthorpe
2019-08-24  0:11                                   ` Dave Chinner
2019-08-24  5:08                                     ` Ira Weiny
2019-08-26  5:55                                       ` Dave Chinner
2019-08-29  2:02                                         ` Ira Weiny
2019-08-29  3:27                                           ` John Hubbard
2019-08-29 16:16                                             ` Ira Weiny
2019-09-02 22:26                                           ` Dave Chinner
2019-09-04 16:54                                             ` Ira Weiny
2019-08-25 19:39                                     ` Jason Gunthorpe
2019-08-24  4:49                                 ` Ira Weiny
2019-08-25 19:40                                   ` Jason Gunthorpe
2019-08-23  0:59                       ` Dave Chinner
2019-08-23 17:15                         ` Ira Weiny
2019-08-24  0:18                           ` Dave Chinner
2019-08-20  0:05               ` John Hubbard
2019-08-20  1:20                 ` Dave Chinner
2019-08-20  3:09                   ` John Hubbard
2019-08-20  3:36                     ` Dave Chinner
2019-08-21 18:43                       ` John Hubbard
2019-08-21 19:09                         ` Ira Weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190809225833.6657-14-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=tytso@mit.edu \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).