All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Al Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>
Cc: David Howells <dhowells@redhat.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	Jeff Layton <jlayton@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Jason Gunthorpe <jgg@nvidia.com>,
	Logan Gunthorpe <logang@deltatee.com>,
	Hillf Danton <hdanton@sina.com>,
	Christian Brauner <brauner@kernel.org>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Dominique Martinet <asmadeus@codewreck.org>,
	Eric Van Hensbergen <ericvh@gmail.com>,
	Latchesar Ionkov <lucho@ionkov.net>,
	Christian Schoenebeck <linux_oss@crudebyte.com>,
	v9fs-developer@lists.sourceforge.net
Subject: [RFC PATCH 10/11] 9p: Pin pages rather than ref'ing if appropriate
Date: Fri, 30 Jun 2023 16:25:23 +0100	[thread overview]
Message-ID: <20230630152524.661208-11-dhowells@redhat.com> (raw)
In-Reply-To: <20230630152524.661208-1-dhowells@redhat.com>

Convert the 9p filesystem to use iov_iter_extract_pages() instead of
iov_iter_get_pages().  This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: v9fs-developer@lists.sourceforge.net
---
 net/9p/trans_common.c |  8 ++--
 net/9p/trans_common.h |  2 +-
 net/9p/trans_virtio.c | 92 ++++++++++++++-----------------------------
 3 files changed, 34 insertions(+), 68 deletions(-)

diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index c827f694551c..4342de18f08b 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -9,16 +9,16 @@
 #include "trans_common.h"
 
 /**
- * p9_release_pages - Release pages after the transaction.
+ * p9_unpin_pages - Unpin pages after the transaction.
  * @pages: array of pages to be put
  * @nr_pages: size of array
  */
-void p9_release_pages(struct page **pages, int nr_pages)
+void p9_unpin_pages(struct page **pages, int nr_pages)
 {
 	int i;
 
 	for (i = 0; i < nr_pages; i++)
 		if (pages[i])
-			put_page(pages[i]);
+			unpin_user_page(pages[i]);
 }
-EXPORT_SYMBOL(p9_release_pages);
+EXPORT_SYMBOL(p9_unpin_pages);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index 32134db6abf3..fd94c48aba5b 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -4,4 +4,4 @@
  * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
  */
 
-void p9_release_pages(struct page **pages, int nr_pages);
+void p9_unpin_pages(struct page **pages, int nr_pages);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..93569de2bdba 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -310,71 +310,35 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
 			       struct iov_iter *data,
 			       int count,
 			       size_t *offs,
-			       int *need_drop)
+			       bool *need_unpin,
+			       iov_iter_extraction_t extraction_flags)
 {
 	int nr_pages;
 	int err;
+	int n;
 
 	if (!iov_iter_count(data))
 		return 0;
 
-	if (!iov_iter_is_kvec(data)) {
-		int n;
-		/*
-		 * We allow only p9_max_pages pinned. We wait for the
-		 * Other zc request to finish here
-		 */
-		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
-			err = wait_event_killable(vp_wq,
-			      (atomic_read(&vp_pinned) < chan->p9_max_pages));
-			if (err == -ERESTARTSYS)
-				return err;
-		}
-		n = iov_iter_get_pages_alloc2(data, pages, count, offs);
-		if (n < 0)
-			return n;
-		*need_drop = 1;
-		nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
-		atomic_add(nr_pages, &vp_pinned);
-		return n;
-	} else {
-		/* kernel buffer, no need to pin pages */
-		int index;
-		size_t len;
-		void *p;
-
-		/* we'd already checked that it's non-empty */
-		while (1) {
-			len = iov_iter_single_seg_count(data);
-			if (likely(len)) {
-				p = data->kvec->iov_base + data->iov_offset;
-				break;
-			}
-			iov_iter_advance(data, 0);
-		}
-		if (len > count)
-			len = count;
-
-		nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
-			   (unsigned long)p / PAGE_SIZE;
-
-		*pages = kmalloc_array(nr_pages, sizeof(struct page *),
-				       GFP_NOFS);
-		if (!*pages)
-			return -ENOMEM;
-
-		*need_drop = 0;
-		p -= (*offs = offset_in_page(p));
-		for (index = 0; index < nr_pages; index++) {
-			if (is_vmalloc_addr(p))
-				(*pages)[index] = vmalloc_to_page(p);
-			else
-				(*pages)[index] = kmap_to_page(p);
-			p += PAGE_SIZE;
-		}
-		iov_iter_advance(data, len);
-		return len;
+	/*
+	 * We allow only p9_max_pages pinned. We wait for the
+	 * Other zc request to finish here
+	 */
+	if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+		err = wait_event_killable(vp_wq,
+					  (atomic_read(&vp_pinned) < chan->p9_max_pages));
+		if (err == -ERESTARTSYS)
+			return err;
 	}
+
+	n = iov_iter_extract_pages(data, pages, count, INT_MAX,
+				   extraction_flags, offs);
+	if (n < 0)
+		return n;
+	*need_unpin = iov_iter_extract_will_pin(data);
+	nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
+	atomic_add(nr_pages, &vp_pinned);
+	return n;
 }
 
 static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
@@ -429,7 +393,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	struct virtio_chan *chan = client->trans;
 	struct scatterlist *sgs[4];
 	size_t offs;
-	int need_drop = 0;
+	bool need_unpin;
 	int kicked = 0;
 
 	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -437,7 +401,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	if (uodata) {
 		__le32 sz;
 		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
-					    outlen, &offs, &need_drop);
+					    outlen, &offs, &need_unpin,
+					    WRITE_FROM_ITER);
 		if (n < 0) {
 			err = n;
 			goto err_out;
@@ -456,7 +421,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 		memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
 	} else if (uidata) {
 		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
-					    inlen, &offs, &need_drop);
+					    inlen, &offs, &need_unpin,
+					    READ_INTO_ITER);
 		if (n < 0) {
 			err = n;
 			goto err_out;
@@ -542,13 +508,13 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	 * Non kernel buffers are pinned, unpin them
 	 */
 err_out:
-	if (need_drop) {
+	if (need_unpin) {
 		if (in_pages) {
-			p9_release_pages(in_pages, in_nr_pages);
+			p9_unpin_pages(in_pages, in_nr_pages);
 			atomic_sub(in_nr_pages, &vp_pinned);
 		}
 		if (out_pages) {
-			p9_release_pages(out_pages, out_nr_pages);
+			p9_unpin_pages(out_pages, out_nr_pages);
 			atomic_sub(out_nr_pages, &vp_pinned);
 		}
 		/* wakeup anybody waiting for slots to pin pages */


  parent reply	other threads:[~2023-06-30 15:28 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-30 15:25 [RFC PATCH 00/11] iov_iter: Use I/O direction from kiocb, iomap & request rather than iov_iter David Howells
2023-06-30 15:25 ` [RFC PATCH 01/11] iov_iter: Fix comment refs to iov_iter_get_pages/pages_alloc() David Howells
2023-07-06 15:21   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 02/11] vfs: Set IOCB_WRITE in iocbs that we're going to write from David Howells
2023-07-06 15:22   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 03/11] vfs: Use init_kiocb() to initialise new IOCBs David Howells
2023-06-30 15:39   ` Jens Axboe
2023-06-30 16:00   ` David Howells
2023-06-30 16:05     ` Jens Axboe
2023-07-06 15:29   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 04/11] iov_iter: Use IOCB_WRITE rather than iterator direction David Howells
2023-06-30 15:25 ` [RFC PATCH 05/11] iov_iter: Use IOMAP_WRITE " David Howells
2023-07-06 15:30   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 06/11] iov_iter: Use op_is_write() " David Howells
2023-07-06 15:30   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 07/11] cifs: Drop the check using iov_iter_rw() David Howells
2023-06-30 15:25 ` [RFC PATCH 08/11] iov_iter: Drop iov_iter_rw() and fold in last user David Howells
2023-07-06 15:31   ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 09/11] iov_iter: Use I/O dir flags with iov_iter_extract_pages() David Howells
2023-06-30 15:25 ` David Howells [this message]
2023-06-30 15:25 ` [RFC PATCH 11/11] scsi: Use extract_iter_to_sg() David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230630152524.661208-11-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=asmadeus@codewreck.org \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=david@redhat.com \
    --cc=ericvh@gmail.com \
    --cc=hch@infradead.org \
    --cc=hdanton@sina.com \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux_oss@crudebyte.com \
    --cc=logang@deltatee.com \
    --cc=lucho@ionkov.net \
    --cc=v9fs-developer@lists.sourceforge.net \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.