All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>,
	Linux-Netdev <netdev@vger.kernel.org>,
	Linux-NFS <linux-nfs@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	David Miller <davem@davemloft.net>,
	Trond Myklebust <Trond.Myklebust@netapp.com>,
	Neil Brown <neilb@suse.de>, Christoph Hellwig <hch@infradead.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Mike Christie <michaelc@cs.wisc.edu>,
	Eric B Munson <emunson@mgebm.net>,
	Sebastian Andrzej Siewior <sebastian@breakpoint.cc>,
	Mel Gorman <mgorman@suse.de>
Subject: [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O
Date: Fri, 29 Jun 2012 14:33:19 +0100	[thread overview]
Message-ID: <1340976805-5799-7-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1340976805-5799-1-git-send-email-mgorman@suse.de>

This patch adds two new APIs get_kernel_pages() and get_kernel_page()
that may be used to pin a vector of kernel addresses for IO. The initial
user is expected to be NFS for allowing pages to be written to swap
using aops->direct_IO(). Strictly speaking, swap-over-NFS only needs
to pin one page for IO but it makes sense to express the API in terms
of a vector and add a helper for pinning single pages.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/blk_types.h |    2 ++
 include/linux/fs.h        |    2 ++
 include/linux/mm.h        |    4 ++++
 mm/memory.c               |   53 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0edb65d..7b7ac9c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,6 +160,7 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
+	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -201,5 +202,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
+#define REQ_KERNEL		(1 << __REQ_KERNEL)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 006aa85..c2a4554 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,6 +165,8 @@ struct inodes_stat_t {
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
+#define KERNEL_READ		(READ|REQ_KERNEL)
+#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3d4cd9..bbb3167 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1019,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct kvec;
+int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
+			struct page **pages);
+int get_kernel_page(unsigned long start, int write, struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
diff --git a/mm/memory.c b/mm/memory.c
index 6322d36..c8153f5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1843,6 +1843,59 @@ next_page:
 EXPORT_SYMBOL(__get_user_pages);
 
 /*
+ * get_kernel_pages() - pin kernel pages in memory
+ * @kiov:	An array of struct kvec structures
+ * @nr_segs:	number of segments to pin
+ * @write:	pinning for read/write, currently ignored
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_segs long.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with.
+ */
+int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
+		struct page **pages)
+{
+	int seg;
+
+	for (seg = 0; seg < nr_segs; seg++) {
+		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
+			return seg;
+
+		/* virt_to_page sanity checks the PFN */
+		pages[seg] = virt_to_page(kiov[seg].iov_base);
+		page_cache_get(pages[seg]);
+	}
+
+	return seg;
+}
+EXPORT_SYMBOL_GPL(get_kernel_pages);
+
+/*
+ * get_kernel_page() - pin a kernel page in memory
+ * @start:	starting kernel address
+ * @write:	pinning for read/write, currently ignored
+ * @pages:	array that receives pointer to the page pinned.
+ *		Must be at least nr_segs long.
+ *
+ * Returns 1 if page is pinned. If the page was not pinned, returns
+ * -errno. The page returned must be released with a put_page() call
+ * when it is finished with.
+ */
+int get_kernel_page(unsigned long start, int write, struct page **pages)
+{
+	const struct kvec kiov = {
+		.iov_base = (void *)start,
+		.iov_len = PAGE_SIZE
+	};
+
+	return get_kernel_pages(&kiov, 1, write, pages);
+}
+EXPORT_SYMBOL_GPL(get_kernel_page);
+
+/*
  * fixup_user_fault() - manually resolve a user page fault
  * @tsk:	the task_struct to use for page fault accounting, or
  *		NULL if faults are not to be recorded.
-- 
1.7.9.2


WARNING: multiple messages have this Message-ID (diff)
From: Mel Gorman <mgorman@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>,
	Linux-Netdev <netdev@vger.kernel.org>,
	Linux-NFS <linux-nfs@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	David Miller <davem@davemloft.net>,
	Trond Myklebust <Trond.Myklebust@netapp.com>,
	Neil Brown <neilb@suse.de>, Christoph Hellwig <hch@infradead.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Mike Christie <michaelc@cs.wisc.edu>,
	Eric B Munson <emunson@mgebm.net>,
	Sebastian Andrzej Siewior <sebastian@breakpoint.cc>,
	Mel Gorman <mgorman@suse.de>
Subject: [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O
Date: Fri, 29 Jun 2012 14:33:19 +0100	[thread overview]
Message-ID: <1340976805-5799-7-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1340976805-5799-1-git-send-email-mgorman@suse.de>

This patch adds two new APIs get_kernel_pages() and get_kernel_page()
that may be used to pin a vector of kernel addresses for IO. The initial
user is expected to be NFS for allowing pages to be written to swap
using aops->direct_IO(). Strictly speaking, swap-over-NFS only needs
to pin one page for IO but it makes sense to express the API in terms
of a vector and add a helper for pinning single pages.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
---
 include/linux/blk_types.h |    2 ++
 include/linux/fs.h        |    2 ++
 include/linux/mm.h        |    4 ++++
 mm/memory.c               |   53 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0edb65d..7b7ac9c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,6 +160,7 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
+	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -201,5 +202,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
+#define REQ_KERNEL		(1 << __REQ_KERNEL)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 006aa85..c2a4554 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -165,6 +165,8 @@ struct inodes_stat_t {
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
+#define KERNEL_READ		(READ|REQ_KERNEL)
+#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b3d4cd9..bbb3167 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1019,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct kvec;
+int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
+			struct page **pages);
+int get_kernel_page(unsigned long start, int write, struct page **pages);
 struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
diff --git a/mm/memory.c b/mm/memory.c
index 6322d36..c8153f5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1843,6 +1843,59 @@ next_page:
 EXPORT_SYMBOL(__get_user_pages);
 
 /*
+ * get_kernel_pages() - pin kernel pages in memory
+ * @kiov:	An array of struct kvec structures
+ * @nr_segs:	number of segments to pin
+ * @write:	pinning for read/write, currently ignored
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_segs long.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno. Each page returned must be released
+ * with a put_page() call when it is finished with.
+ */
+int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
+		struct page **pages)
+{
+	int seg;
+
+	for (seg = 0; seg < nr_segs; seg++) {
+		if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
+			return seg;
+
+		/* virt_to_page sanity checks the PFN */
+		pages[seg] = virt_to_page(kiov[seg].iov_base);
+		page_cache_get(pages[seg]);
+	}
+
+	return seg;
+}
+EXPORT_SYMBOL_GPL(get_kernel_pages);
+
+/*
+ * get_kernel_page() - pin a kernel page in memory
+ * @start:	starting kernel address
+ * @write:	pinning for read/write, currently ignored
+ * @pages:	array that receives pointer to the page pinned.
+ *		Must be at least nr_segs long.
+ *
+ * Returns 1 if page is pinned. If the page was not pinned, returns
+ * -errno. The page returned must be released with a put_page() call
+ * when it is finished with.
+ */
+int get_kernel_page(unsigned long start, int write, struct page **pages)
+{
+	const struct kvec kiov = {
+		.iov_base = (void *)start,
+		.iov_len = PAGE_SIZE
+	};
+
+	return get_kernel_pages(&kiov, 1, write, pages);
+}
+EXPORT_SYMBOL_GPL(get_kernel_page);
+
+/*
  * fixup_user_fault() - manually resolve a user page fault
  * @tsk:	the task_struct to use for page fault accounting, or
  *		NULL if faults are not to be recorded.
-- 
1.7.9.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-06-29 13:36 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-29 13:33 [PATCH 00/12] Swap-over-NFS without deadlocking V8 Mel Gorman
2012-06-29 13:33 ` Mel Gorman
2012-06-29 13:33 ` [PATCH 01/12] netvm: Prevent a stream-specific deadlock Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 02/12] selinux: tag avc cache alloc as non-critical Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 03/12] mm: Methods for teaching filesystems about PG_swapcache pages Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 04/12] mm: Add support for a filesystem to activate swap files and use direct_IO for writing swap pages Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 05/12] mm: swap: Implement generic handler for swap_activate Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` Mel Gorman [this message]
2012-06-29 13:33   ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-06-29 13:33 ` [PATCH 07/12] mm: Add support for direct_IO to highmem pages Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 08/12] nfs: teach the NFS client how to treat PG_swapcache pages Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 09/12] nfs: disable data cache revalidation for swapfiles Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 10/12] nfs: enable swap on NFS Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 11/12] nfs: Prevent page allocator recursions with swap over NFS Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-06-29 13:33 ` [PATCH 12/12] Avoid dereferencing bd_disk during swap_entry_free for network storage Mel Gorman
2012-06-29 13:33   ` Mel Gorman
2012-07-01 17:22 ` [PATCH 00/12] Swap-over-NFS without deadlocking V8 Eric B Munson
2012-07-02 14:35   ` Mel Gorman
2012-07-02 14:35     ` Mel Gorman
2012-07-03  0:10     ` Eric B Munson
2012-07-03  8:58       ` Mel Gorman
2012-07-03  8:58         ` Mel Gorman
  -- strict thread matches above, loose matches on Subject: below --
2012-07-12  6:40 [PATCH 00/12] Swap-over-NFS without deadlocking V9 Mel Gorman
2012-07-12  6:41 ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-07-12  6:41   ` Mel Gorman
2012-06-22 14:30 [PATCH 00/12] Swap-over-NFS without deadlocking V7 Mel Gorman
2012-06-22 14:31 ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-06-22 14:31   ` Mel Gorman
2012-06-20  9:37 [PATCH 00/12] Swap-over-NFS without deadlocking V6 Mel Gorman
2012-06-20  9:37 ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-06-20  9:37   ` Mel Gorman
2012-06-20 16:11   ` Rik van Riel
2012-06-20 16:11     ` Rik van Riel
2012-05-17 14:51 [PATCH 00/12] Swap-over-NFS without deadlocking V5 Mel Gorman
2012-05-17 14:51 ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-05-17 14:51   ` Mel Gorman
2012-05-10 13:54 [PATCH 00/12] Swap-over-NFS without deadlocking V4 Mel Gorman
2012-05-10 13:54 ` [PATCH 06/12] mm: Add get_kernel_page[s] for pinning of kernel addresses for I/O Mel Gorman
2012-05-10 13:54   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1340976805-5799-7-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=Trond.Myklebust@netapp.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=emunson@mgebm.net \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=michaelc@cs.wisc.edu \
    --cc=neilb@suse.de \
    --cc=netdev@vger.kernel.org \
    --cc=sebastian@breakpoint.cc \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.