linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: jglisse@redhat.com
To: linux-kernel@vger.kernel.org
Cc: "Jérôme Glisse" <jglisse@redhat.com>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-mm@kvack.org, "John Hubbard" <jhubbard@nvidia.com>,
	"Jan Kara" <jack@suse.cz>,
	"Dan Williams" <dan.j.williams@intel.com>,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Johannes Thumshirn" <jthumshirn@suse.de>,
	"Christoph Hellwig" <hch@lst.de>, "Jens Axboe" <axboe@kernel.dk>,
	"Ming Lei" <ming.lei@redhat.com>,
	"Dave Chinner" <david@fromorbit.com>,
	"Jason Gunthorpe" <jgg@ziepe.ca>,
	"Matthew Wilcox" <willy@infradead.org>,
	"Ernesto A . Fernández" <ernesto.mnd.fernandez@gmail.com>,
	"Jeff Moyer" <jmoyer@redhat.com>
Subject: [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not
Date: Thu, 11 Apr 2019 17:08:31 -0400	[thread overview]
Message-ID: <20190411210834.4105-13-jglisse@redhat.com> (raw)
In-Reply-To: <20190411210834.4105-1-jglisse@redhat.com>

From: Jérôme Glisse <jglisse@redhat.com>

We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).

So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
---
 fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 60 insertions(+), 22 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@ struct dio_submit {
 	unsigned cur_page_len;		/* Nr of bytes at cur_page_offset */
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
+	bool cur_page_from_gup;		/* Current page is coming from GUP */
 
 	struct iov_iter *iter;
 	/*
@@ -148,6 +149,8 @@ struct dio {
 		struct page *pages[DIO_PAGES];	/* page buffer */
 		struct work_struct complete_work;/* deferred AIO completion */
 	};
+
+	bool gup;			/* pages are coming from GUP */
 } ____cacheline_aligned_in_smp;
 
 static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
+	dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
@@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 			dio->page_errors = ret;
 		get_page(page);
 		dio->pages[0] = page;
+		dio->gup = false;
 		sdio->head = 0;
 		sdio->tail = 1;
 		sdio->from = 0;
@@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (sdio->head < sdio->tail)
-		put_page(dio->pages[sdio->head++]);
+	while (sdio->head < sdio->tail) {
+		if (dio->gup)
+			put_user_page(dio->pages[sdio->head++]);
+		else
+			put_page(dio->pages[sdio->head++]);
+	}
 }
 
 /*
@@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
 {
 	int ret;
 
-	ret = bio_add_page(sdio->bio, sdio->cur_page,
-			sdio->cur_page_len, sdio->cur_page_offset, false);
+	/*
+	 * The bio is stealing the page reference and that is fine we can add a
+	 * page only once ie when dio_send_cur_page() is call and each call to
+	 * dio_send_cur_page() clear the cur_page (on success).
+	 */
+	ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+			 sdio->cur_page_offset, sdio->cur_page_from_gup);
 	if (ret == sdio->cur_page_len) {
 		/*
 		 * Decrement count only, if we are done with this page
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
 		if (ret == 0) {
 			ret = dio_bio_add_page(sdio);
+			if (!ret)
+				/* Clear the current page. */
+				sdio->cur_page = NULL;
 			BUG_ON(ret != 0);
 		}
-	}
+	} else
+		/* Clear the current page. */
+		sdio->cur_page = NULL;
 out:
 	return ret;
 }
@@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 static inline int
 submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		    unsigned offset, unsigned len, sector_t blocknr,
-		    struct buffer_head *map_bh)
+		    struct buffer_head *map_bh, bool gup)
 {
 	int ret = 0;
 
@@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	get_page(page);		/* It is in dio */
+	/* Steal page reference and GUP flag */
 	sdio->cur_page = page;
+	sdio->cur_page_from_gup = gup;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
 	sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		if (sdio->bio)
 			dio_bio_submit(dio, sdio);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 	}
 	return ret;
 }
@@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
 	this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
 
 	page = ZERO_PAGE(0);
+	get_page(page);
 	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
-				sdio->next_block_for_io, map_bh))
+				sdio->next_block_for_io, map_bh, false)) {
+		put_page(page);
 		return;
+	}
 
 	sdio->next_block_for_io += this_chunk_blocks;
 }
 
+static inline void dio_put_page(const struct dio *dio, bool stolen,
+				struct page *page)
+{
+	/* If page reference was stolen then nothing to do. */
+	if (stolen)
+		return;
+
+	if (dio->gup)
+		put_user_page(page);
+	else
+		put_page(page);
+}
+
 /*
  * Walk the user pages, and the file, mapping blocks to disk and generating
  * a sequence of (page,offset,len,block) mappings.  These mappings are injected
@@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
+		bool stolen = false;
 		struct page *page;
 		size_t from, to;
 
@@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->op == REQ_OP_WRITE) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					return -ENOTBLK;
 				}
 
@@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
-						  map_bh);
+						  map_bh, dio->gup);
 			if (ret) {
-				put_page(page);
+				dio_put_page(dio, stolen, page);
 				goto out;
-			}
+			} else
+				/* The page reference has been  stolen ... */
+				stolen = true;
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		put_page(page);
+		dio_put_page(dio, stolen, page);
 	}
 out:
 	return ret;
@@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		put_page(sdio.cur_page);
-		sdio.cur_page = NULL;
+		else {
+			if (sdio.cur_page_from_gup)
+				put_user_page(sdio.cur_page);
+			else
+				put_page(sdio.cur_page);
+		}
 	}
 	if (sdio.bio)
 		dio_bio_submit(dio, &sdio);
-- 
2.20.1


  parent reply	other threads:[~2019-04-11 21:09 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-11 21:08 [PATCH v1 00/15] Keep track of GUPed pages in fs and block jglisse
2019-04-11 21:08 ` [PATCH v1 01/15] fs/direct-io: fix trailing whitespace issues jglisse
2019-04-11 21:08 ` [PATCH v1 02/15] iov_iter: add helper to test if an iter would use GUP jglisse
2019-04-11 21:08 ` [PATCH v1 03/15] block: introduce bvec_page()/bvec_set_page() to get/set bio_vec.bv_page jglisse
2019-04-11 21:08 ` [PATCH v1 04/15] block: introduce BIO_VEC_INIT() macro to initialize bio_vec structure jglisse
2019-04-11 21:08 ` [PATCH v1 05/15] block: replace all bio_vec->bv_page by bvec_page()/bvec_set_page() jglisse
2019-04-11 21:08 ` [PATCH v1 06/15] block: convert bio_vec.bv_page to bv_pfn to store pfn and not page jglisse
2019-04-11 21:08 ` [PATCH v1 07/15] block: add bvec_put_page_dirty*() to replace put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 08/15] block: use bvec_put_page() instead of put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 09/15] block: bvec_put_page_dirty* instead of set_page_dirty* and bvec_put_page jglisse
2019-04-11 21:08 ` [PATCH v1 10/15] block: add gup flag to bio_add_page()/bio_add_pc_page()/__bio_add_page() jglisse
2019-04-15 14:59   ` Jan Kara
2019-04-15 15:24     ` Jerome Glisse
2019-04-16 16:46       ` Jan Kara
2019-04-16 16:54         ` Dan Williams
2019-04-16 17:07         ` Jerome Glisse
2019-04-16  0:22     ` Jerome Glisse
2019-04-16 16:52       ` Jan Kara
2019-04-16 18:32         ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 11/15] block: make sure bio_add_page*() knows page that are coming from GUP jglisse
2019-04-11 21:08 ` jglisse [this message]
2019-04-11 23:14   ` [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not Dave Chinner
2019-04-12  0:08     ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 13/15] fs/splice: use put_user_page() when appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 14/15] fs: use bvec_set_gup_page() where appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 15/15] ceph: use put_user_pages() instead of ceph_put_page_vector() jglisse
2019-04-15  7:46   ` Yan, Zheng
2019-04-15 15:11     ` Jerome Glisse
2019-04-16  0:00 ` [PATCH v1 00/15] Keep track of GUPed pages in fs and block Dave Chinner
     [not found] ` <2c124cc4-b97e-ee28-2926-305bc6bc74bd@plexistor.com>
2019-04-16 18:47   ` Jerome Glisse
2019-04-16 18:59   ` Kent Overstreet
2019-04-16 19:12     ` Dan Williams
2019-04-16 19:49       ` Jerome Glisse
2019-04-17 21:53         ` Dan Williams
2019-04-17 22:28           ` Jerome Glisse
2019-04-17 23:32             ` Dan Williams
2019-04-18 10:42             ` Jan Kara
2019-04-18 14:27               ` Jerome Glisse
2019-04-18 15:30                 ` Jan Kara
2019-04-18 15:36                   ` Jerome Glisse
2019-04-18 18:03               ` Dan Williams
     [not found]       ` <ccac6c5a-7120-0455-88de-ca321b01e825@plexistor.com>
2019-04-16 19:57         ` Jerome Glisse
     [not found]           ` <41e2d7e1-104b-a006-2824-015ca8c76cc8@gmail.com>
2019-04-16 23:16             ` Jerome Glisse
     [not found]               ` <fa00a2ff-3664-3165-7af8-9d9c53238245@plexistor.com>
2019-04-17  2:03                 ` Jerome Glisse
2019-04-17 21:19                   ` Jerome Glisse
2019-04-16 23:34             ` Jerome Glisse
2019-04-17 21:54         ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190411210834.4105-13-jglisse@redhat.com \
    --to=jglisse@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=ernesto.mnd.fernandez@gmail.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jgg@ziepe.ca \
    --cc=jhubbard@nvidia.com \
    --cc=jmoyer@redhat.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ming.lei@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).