All of lore.kernel.org
 help / color / mirror / Atom feed
From: jglisse@redhat.com
To: linux-kernel@vger.kernel.org
Cc: "Jérôme Glisse" <jglisse@redhat.com>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-mm@kvack.org, "John Hubbard" <jhubbard@nvidia.com>,
	"Jan Kara" <jack@suse.cz>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Johannes Thumshirn" <jthumshirn@suse.de>,
	"Christoph Hellwig" <hch@lst.de>, "Jens Axboe" <axboe@kernel.dk>,
	"Ming Lei" <ming.lei@redhat.com>,
	"Dave Chinner" <david@fromorbit.com>,
	"Jason Gunthorpe" <jgg@ziepe.ca>,
	"Matthew Wilcox" <willy@infradead.org>,
	"Ernesto A . Fernández" <ernesto.mnd.fernandez@gmail.com>,
	"Jeff Moyer" <jmoyer@redhat.com>
Subject: [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not
Date: Thu, 11 Apr 2019 17:08:31 -0400	[thread overview]
Message-ID: <20190411210834.4105-13-jglisse@redhat.com> (raw)
In-Reply-To: <20190411210834.4105-1-jglisse@redhat.com>

From: Jérôme Glisse <jglisse@redhat.com>

We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).

So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
---
 fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 60 insertions(+), 22 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@ struct dio_submit {
 	unsigned cur_page_len;		/* Nr of bytes at cur_page_offset */
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
+	bool cur_page_from_gup;		/* Current page is coming from GUP */
 
 	struct iov_iter *iter;
 	/*
@@ -148,6 +149,8 @@ struct dio {
 		struct page *pages[DIO_PAGES];	/* page buffer */
 		struct work_struct complete_work;/* deferred AIO completion */
 	};
+
+	bool gup;			/* pages are coming from GUP */
 } ____cacheline_aligned_in_smp;
 
 static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
+	dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
@@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 			dio->page_errors = ret;
 		get_page(page);
 		dio->pages[0] = page;
+		dio->gup = false;
 		sdio->head = 0;
 		sdio->tail = 1;
 		sdio->from = 0;
@@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (sdio->head < sdio->tail)
-		put_page(dio->pages[sdio->head++]);
+	while (sdio->head < sdio->tail) {
+		if (dio->gup)
+			put_user_page(dio->pages[sdio->head++]);
+		else
+			put_page(dio->pages[sdio->head++]);
+	}
 }
 
 /*
@@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
 {
 	int ret;
 
-	ret = bio_add_page(sdio->bio, sdio->cur_page,
-			sdio->cur_page_len, sdio->cur_page_offset, false);
+	/*
+	 * The bio is stealing the page reference and that is fine we can add a
+	 * page only once ie when dio_send_cur_page() is call and each call to
+	 * dio_send_cur_page() clear the cur_page (on success).
+	 */
+	ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+			 sdio->cur_page_offset, sdio->cur_page_from_gup);
 	if (ret == sdio->cur_page_len) {
 		/*
 		 * Decrement count only, if we are done with this page
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
 		if (ret == 0) {
 			ret = dio_bio_add_page(sdio);
+			if (!ret)
+				/* Clear the current page. */
+				sdio->cur_page = NULL;
 			BUG_ON(ret != 0);
 		}
-	}
+	} else
+		/* Clear the current page. */
+		sdio->cur_page = NULL;
 out:
 	return ret;
 }
@@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 static inline int
 submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		    unsigned offset, unsigned len, sector_t blocknr,
-		    struct buffer_head *map_bh)
+		    struct buffer_head *map_bh, bool gup)
 {
 	int ret = 0;
 
@@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	get_page(page);		/* It is in dio */
+	/* Steal page reference and GUP flag */
 	sdio->cur_page = page;
+	sdio->cur_page_from_gup = gup;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
 	sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		if (sdio->bio)
 			dio_bio_submit(dio, sdio);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 	}
 	return ret;
 }
@@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
 	this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
 
 	page = ZERO_PAGE(0);
+	get_page(page);
 	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
-				sdio->next_block_for_io, map_bh))
+				sdio->next_block_for_io, map_bh, false)) {
+		put_page(page);
 		return;
+	}
 
 	sdio->next_block_for_io += this_chunk_blocks;
 }
 
+static inline void dio_put_page(const struct dio *dio, bool stolen,
+				struct page *page)
+{
+	/* If page reference was stolen then nothing to do. */
+	if (stolen)
+		return;
+
+	if (dio->gup)
+		put_user_page(page);
+	else
+		put_page(page);
+}
+
 /*
  * Walk the user pages, and the file, mapping blocks to disk and generating
  * a sequence of (page,offset,len,block) mappings.  These mappings are injected
@@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
+		bool stolen = false;
 		struct page *page;
 		size_t from, to;
 
@@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->op == REQ_OP_WRITE) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					return -ENOTBLK;
 				}
 
@@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
-						  map_bh);
+						  map_bh, dio->gup);
 			if (ret) {
-				put_page(page);
+				dio_put_page(dio, stolen, page);
 				goto out;
-			}
+			} else
+				/* The page reference has been  stolen ... */
+				stolen = true;
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		put_page(page);
+		dio_put_page(dio, stolen, page);
 	}
 out:
 	return ret;
@@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		put_page(sdio.cur_page);
-		sdio.cur_page = NULL;
+		else {
+			if (sdio.cur_page_from_gup)
+				put_user_page(sdio.cur_page);
+			else
+				put_page(sdio.cur_page);
+		}
 	}
 	if (sdio.bio)
 		dio_bio_submit(dio, &sdio);
-- 
2.20.1


  parent reply	other threads:[~2019-04-11 21:09 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-11 21:08 [PATCH v1 00/15] Keep track of GUPed pages in fs and block jglisse
2019-04-11 21:08 ` jglisse
2019-04-11 21:08 ` [PATCH v1 01/15] fs/direct-io: fix trailing whitespace issues jglisse
2019-04-11 21:08 ` [PATCH v1 02/15] iov_iter: add helper to test if an iter would use GUP jglisse
2019-04-11 21:08 ` [PATCH v1 03/15] block: introduce bvec_page()/bvec_set_page() to get/set bio_vec.bv_page jglisse
2019-04-11 21:08 ` [PATCH v1 04/15] block: introduce BIO_VEC_INIT() macro to initialize bio_vec structure jglisse
2019-04-11 21:08   ` jglisse
2019-04-11 21:08 ` [PATCH v1 05/15] block: replace all bio_vec->bv_page by bvec_page()/bvec_set_page() jglisse
2019-04-11 21:08 ` [PATCH v1 06/15] block: convert bio_vec.bv_page to bv_pfn to store pfn and not page jglisse
2019-04-11 21:08 ` [PATCH v1 07/15] block: add bvec_put_page_dirty*() to replace put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 08/15] block: use bvec_put_page() instead of put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 09/15] block: bvec_put_page_dirty* instead of set_page_dirty* and bvec_put_page jglisse
2019-04-11 21:08 ` [PATCH v1 10/15] block: add gup flag to bio_add_page()/bio_add_pc_page()/__bio_add_page() jglisse
2019-04-15 14:59   ` Jan Kara
2019-04-15 15:24     ` Jerome Glisse
2019-04-16 16:46       ` Jan Kara
2019-04-16 16:54         ` Dan Williams
2019-04-16 16:54           ` Dan Williams
2019-04-16 17:07         ` Jerome Glisse
2019-04-16  0:22     ` Jerome Glisse
2019-04-16 16:52       ` Jan Kara
2019-04-16 18:32         ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 11/15] block: make sure bio_add_page*() knows page that are coming from GUP jglisse
2019-04-11 21:08 ` jglisse [this message]
2019-04-11 23:14   ` [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not Dave Chinner
2019-04-12  0:08     ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 13/15] fs/splice: use put_user_page() when appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 14/15] fs: use bvec_set_gup_page() where appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 15/15] ceph: use put_user_pages() instead of ceph_put_page_vector() jglisse
2019-04-15  7:46   ` Yan, Zheng
2019-04-15 15:11     ` Jerome Glisse
2019-04-16  0:00 ` [PATCH v1 00/15] Keep track of GUPed pages in fs and block Dave Chinner
2019-04-16  0:00   ` Dave Chinner
2019-04-16 18:35 ` Boaz Harrosh
2019-04-16 18:47   ` Jerome Glisse
2019-04-16 18:47     ` Jerome Glisse
2019-04-16 19:14     ` Boaz Harrosh
2019-04-16 18:59   ` Kent Overstreet
2019-04-16 18:59     ` Kent Overstreet
2019-04-16 19:12     ` Dan Williams
2019-04-16 19:12       ` Dan Williams
2019-04-16 19:28       ` Boaz Harrosh
2019-04-16 19:57         ` Jerome Glisse
2019-04-16 19:57           ` Jerome Glisse
2019-04-16 22:09           ` Boaz Harrosh
2019-04-16 23:16             ` Jerome Glisse
2019-04-16 23:16               ` Jerome Glisse
2019-04-17  1:11               ` Boaz Harrosh
2019-04-17  2:03                 ` Jerome Glisse
2019-04-17  2:03                   ` Jerome Glisse
2019-04-17 21:19                   ` Jerome Glisse
2019-04-17 21:19                     ` Jerome Glisse
2019-04-16 23:34             ` Jerome Glisse
2019-04-16 23:34               ` Jerome Glisse
2019-04-17 21:54         ` Dan Williams
2019-04-17 21:54           ` Dan Williams
2019-04-18 15:56           ` Boaz Harrosh
2019-04-16 19:49       ` Jerome Glisse
2019-04-16 19:49         ` Jerome Glisse
2019-04-17 21:53         ` Dan Williams
2019-04-17 21:53           ` Dan Williams
2019-04-17 22:28           ` Jerome Glisse
2019-04-17 22:28             ` Jerome Glisse
2019-04-17 23:32             ` Dan Williams
2019-04-17 23:32               ` Dan Williams
2019-04-18 10:42             ` Jan Kara
2019-04-18 10:42               ` Jan Kara
2019-04-18 14:27               ` Jerome Glisse
2019-04-18 14:27                 ` Jerome Glisse
2019-04-18 15:30                 ` Jan Kara
2019-04-18 15:30                   ` Jan Kara
2019-04-18 15:36                   ` Jerome Glisse
2019-04-18 15:36                     ` Jerome Glisse
2019-04-18 18:03               ` Dan Williams
2019-04-18 18:03                 ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190411210834.4105-13-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=ernesto.mnd.fernandez@gmail.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=jmoyer@redhat.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ming.lei@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.