From: jglisse@redhat.com
To: linux-kernel@vger.kernel.org
Cc: "Jérôme Glisse" <jglisse@redhat.com>,
linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
linux-mm@kvack.org, "John Hubbard" <jhubbard@nvidia.com>,
"Jan Kara" <jack@suse.cz>,
"Dan Williams" <dan.j.williams@intel.com>,
"Alexander Viro" <viro@zeniv.linux.org.uk>,
"Johannes Thumshirn" <jthumshirn@suse.de>,
"Christoph Hellwig" <hch@lst.de>, "Jens Axboe" <axboe@kernel.dk>,
"Ming Lei" <ming.lei@redhat.com>,
"Dave Chinner" <david@fromorbit.com>,
"Jason Gunthorpe" <jgg@ziepe.ca>,
"Matthew Wilcox" <willy@infradead.org>,
"Ernesto A . Fernández" <ernesto.mnd.fernandez@gmail.com>,
"Jeff Moyer" <jmoyer@redhat.com>
Subject: [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not
Date: Thu, 11 Apr 2019 17:08:31 -0400 [thread overview]
Message-ID: <20190411210834.4105-13-jglisse@redhat.com> (raw)
In-Reply-To: <20190411210834.4105-1-jglisse@redhat.com>
From: Jérôme Glisse <jglisse@redhat.com>
We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).
So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
---
fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
1 file changed, 60 insertions(+), 22 deletions(-)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@ struct dio_submit {
unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
sector_t cur_page_block; /* Where it starts */
loff_t cur_page_fs_offset; /* Offset in file */
+ bool cur_page_from_gup; /* Current page is coming from GUP */
struct iov_iter *iter;
/*
@@ -148,6 +149,8 @@ struct dio {
struct page *pages[DIO_PAGES]; /* page buffer */
struct work_struct complete_work;/* deferred AIO completion */
};
+
+ bool gup; /* pages are coming from GUP */
} ____cacheline_aligned_in_smp;
static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
ssize_t ret;
+ dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
&sdio->from);
@@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
dio->page_errors = ret;
get_page(page);
dio->pages[0] = page;
+ dio->gup = false;
sdio->head = 0;
sdio->tail = 1;
sdio->from = 0;
@@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
*/
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
- while (sdio->head < sdio->tail)
- put_page(dio->pages[sdio->head++]);
+ while (sdio->head < sdio->tail) {
+ if (dio->gup)
+ put_user_page(dio->pages[sdio->head++]);
+ else
+ put_page(dio->pages[sdio->head++]);
+ }
}
/*
@@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
{
int ret;
- ret = bio_add_page(sdio->bio, sdio->cur_page,
- sdio->cur_page_len, sdio->cur_page_offset, false);
+ /*
+ * The bio is stealing the page reference and that is fine we can add a
+ * page only once ie when dio_send_cur_page() is call and each call to
+ * dio_send_cur_page() clear the cur_page (on success).
+ */
+ ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+ sdio->cur_page_offset, sdio->cur_page_from_gup);
if (ret == sdio->cur_page_len) {
/*
* Decrement count only, if we are done with this page
*/
if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
sdio->pages_in_io--;
- get_page(sdio->cur_page);
sdio->final_block_in_bio = sdio->cur_page_block +
(sdio->cur_page_len >> sdio->blkbits);
ret = 0;
@@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
if (ret == 0) {
ret = dio_bio_add_page(sdio);
+ if (!ret)
+ /* Clear the current page. */
+ sdio->cur_page = NULL;
BUG_ON(ret != 0);
}
- }
+ } else
+ /* Clear the current page. */
+ sdio->cur_page = NULL;
out:
return ret;
}
@@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
static inline int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr,
- struct buffer_head *map_bh)
+ struct buffer_head *map_bh, bool gup)
{
int ret = 0;
@@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
*/
if (sdio->cur_page) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- put_page(sdio->cur_page);
- sdio->cur_page = NULL;
if (ret)
return ret;
}
- get_page(page); /* It is in dio */
+ /* Steal page reference and GUP flag */
sdio->cur_page = page;
+ sdio->cur_page_from_gup = gup;
sdio->cur_page_offset = offset;
sdio->cur_page_len = len;
sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
- put_page(sdio->cur_page);
- sdio->cur_page = NULL;
}
return ret;
}
@@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
page = ZERO_PAGE(0);
+ get_page(page);
if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
- sdio->next_block_for_io, map_bh))
+ sdio->next_block_for_io, map_bh, false)) {
+ put_page(page);
return;
+ }
sdio->next_block_for_io += this_chunk_blocks;
}
+static inline void dio_put_page(const struct dio *dio, bool stolen,
+ struct page *page)
+{
+ /* If page reference was stolen then nothing to do. */
+ if (stolen)
+ return;
+
+ if (dio->gup)
+ put_user_page(page);
+ else
+ put_page(page);
+}
+
/*
* Walk the user pages, and the file, mapping blocks to disk and generating
* a sequence of (page,offset,len,block) mappings. These mappings are injected
@@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
int ret = 0;
while (sdio->block_in_file < sdio->final_block_in_request) {
+ bool stolen = false;
struct page *page;
size_t from, to;
@@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
ret = get_more_blocks(dio, sdio, map_bh);
if (ret) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
}
if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
/* AKPM: eargh, -ENOTBLK is a hack */
if (dio->op == REQ_OP_WRITE) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
return -ENOTBLK;
}
@@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
if (sdio->block_in_file >=
i_size_aligned >> blkbits) {
/* We hit eof */
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
}
zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
from,
this_chunk_bytes,
sdio->next_block_for_io,
- map_bh);
+ map_bh, dio->gup);
if (ret) {
- put_page(page);
+ dio_put_page(dio, stolen, page);
goto out;
- }
+ } else
+ /* The page reference has been stolen ... */
+ stolen = true;
sdio->next_block_for_io += this_chunk_blocks;
sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
}
/* Drop the ref which was taken in get_user_pages() */
- put_page(page);
+ dio_put_page(dio, stolen, page);
}
out:
return ret;
@@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
if (retval == 0)
retval = ret2;
- put_page(sdio.cur_page);
- sdio.cur_page = NULL;
+ else {
+ if (sdio.cur_page_from_gup)
+ put_user_page(sdio.cur_page);
+ else
+ put_page(sdio.cur_page);
+ }
}
if (sdio.bio)
dio_bio_submit(dio, &sdio);
--
2.20.1
next prev parent reply other threads:[~2019-04-11 21:09 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-11 21:08 [PATCH v1 00/15] Keep track of GUPed pages in fs and block jglisse
2019-04-11 21:08 ` [PATCH v1 01/15] fs/direct-io: fix trailing whitespace issues jglisse
2019-04-11 21:08 ` [PATCH v1 02/15] iov_iter: add helper to test if an iter would use GUP jglisse
2019-04-11 21:08 ` [PATCH v1 03/15] block: introduce bvec_page()/bvec_set_page() to get/set bio_vec.bv_page jglisse
2019-04-11 21:08 ` [PATCH v1 04/15] block: introduce BIO_VEC_INIT() macro to initialize bio_vec structure jglisse
2019-04-11 21:08 ` [PATCH v1 05/15] block: replace all bio_vec->bv_page by bvec_page()/bvec_set_page() jglisse
2019-04-11 21:08 ` [PATCH v1 06/15] block: convert bio_vec.bv_page to bv_pfn to store pfn and not page jglisse
2019-04-11 21:08 ` [PATCH v1 07/15] block: add bvec_put_page_dirty*() to replace put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 08/15] block: use bvec_put_page() instead of put_page(bvec_page()) jglisse
2019-04-11 21:08 ` [PATCH v1 09/15] block: bvec_put_page_dirty* instead of set_page_dirty* and bvec_put_page jglisse
2019-04-11 21:08 ` [PATCH v1 10/15] block: add gup flag to bio_add_page()/bio_add_pc_page()/__bio_add_page() jglisse
2019-04-15 14:59 ` Jan Kara
2019-04-15 15:24 ` Jerome Glisse
2019-04-16 16:46 ` Jan Kara
2019-04-16 16:54 ` Dan Williams
2019-04-16 17:07 ` Jerome Glisse
2019-04-16 0:22 ` Jerome Glisse
2019-04-16 16:52 ` Jan Kara
2019-04-16 18:32 ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 11/15] block: make sure bio_add_page*() knows page that are coming from GUP jglisse
2019-04-11 21:08 ` jglisse [this message]
2019-04-11 23:14 ` [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not Dave Chinner
2019-04-12 0:08 ` Jerome Glisse
2019-04-11 21:08 ` [PATCH v1 13/15] fs/splice: use put_user_page() when appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 14/15] fs: use bvec_set_gup_page() where appropriate jglisse
2019-04-11 21:08 ` [PATCH v1 15/15] ceph: use put_user_pages() instead of ceph_put_page_vector() jglisse
2019-04-15 7:46 ` Yan, Zheng
2019-04-15 15:11 ` Jerome Glisse
2019-04-16 0:00 ` [PATCH v1 00/15] Keep track of GUPed pages in fs and block Dave Chinner
[not found] ` <2c124cc4-b97e-ee28-2926-305bc6bc74bd@plexistor.com>
2019-04-16 18:47 ` Jerome Glisse
2019-04-16 18:59 ` Kent Overstreet
2019-04-16 19:12 ` Dan Williams
2019-04-16 19:49 ` Jerome Glisse
2019-04-17 21:53 ` Dan Williams
2019-04-17 22:28 ` Jerome Glisse
2019-04-17 23:32 ` Dan Williams
2019-04-18 10:42 ` Jan Kara
2019-04-18 14:27 ` Jerome Glisse
2019-04-18 15:30 ` Jan Kara
2019-04-18 15:36 ` Jerome Glisse
2019-04-18 18:03 ` Dan Williams
[not found] ` <ccac6c5a-7120-0455-88de-ca321b01e825@plexistor.com>
2019-04-16 19:57 ` Jerome Glisse
[not found] ` <41e2d7e1-104b-a006-2824-015ca8c76cc8@gmail.com>
2019-04-16 23:16 ` Jerome Glisse
[not found] ` <fa00a2ff-3664-3165-7af8-9d9c53238245@plexistor.com>
2019-04-17 2:03 ` Jerome Glisse
2019-04-17 21:19 ` Jerome Glisse
2019-04-16 23:34 ` Jerome Glisse
2019-04-17 21:54 ` Dan Williams
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190411210834.4105-13-jglisse@redhat.com \
--to=jglisse@redhat.com \
--cc=axboe@kernel.dk \
--cc=dan.j.williams@intel.com \
--cc=david@fromorbit.com \
--cc=ernesto.mnd.fernandez@gmail.com \
--cc=hch@lst.de \
--cc=jack@suse.cz \
--cc=jgg@ziepe.ca \
--cc=jhubbard@nvidia.com \
--cc=jmoyer@redhat.com \
--cc=jthumshirn@suse.de \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ming.lei@redhat.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).