linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: LKML <linux-kernel@vger.kernel.org>
Cc: linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	Jan Kara <jack@suse.cz>
Subject: [PATCH 5/6] fs: Take mapping lock during direct IO
Date: Thu, 31 Jan 2013 22:49:53 +0100	[thread overview]
Message-ID: <1359668994-13433-6-git-send-email-jack@suse.cz> (raw)
In-Reply-To: <1359668994-13433-1-git-send-email-jack@suse.cz>

Make direct IO code grab mapping range lock just before DIO is submitted
for the range under IO and release the lock once the IO is complete.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/direct-io.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3a430f3..1127ca5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -56,10 +56,13 @@
  * blocksize.
  */
 
+struct dio_bio_data;
+
 /* dio_state only used in the submission path */
 
 struct dio_submit {
 	struct bio *bio;		/* bio under assembly */
+	struct dio_bio_data *bio_data;	/* structure to be attached to the bio*/
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -143,7 +146,17 @@ struct dio {
 	struct page *pages[DIO_PAGES];	/* page buffer */
 } ____cacheline_aligned_in_smp;
 
+/*
+ * Structure associated with each submitted bio to provide back pointer and
+ * lock for the range accessed by the bio.
+ */
+struct dio_bio_data {
+	struct dio *dio;
+	struct range_lock lock;
+};
+
 static struct kmem_cache *dio_cache __read_mostly;
+static struct kmem_cache *dio_bio_data_cache __read_mostly;
 
 /*
  * How many pages are in the queue?
@@ -275,10 +288,13 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
  */
 static void dio_bio_end_aio(struct bio *bio, int error)
 {
-	struct dio *dio = bio->bi_private;
+	struct dio_bio_data *bio_data = bio->bi_private;
+	struct dio *dio = bio_data->dio;
 	unsigned long remaining;
 	unsigned long flags;
 
+	range_unlock(&dio->inode->i_mapping->mapping_lock, &bio_data->lock);
+	kmem_cache_free(dio_bio_data_cache, bio_data);
 	/* cleanup the bio */
 	dio_bio_complete(dio, bio);
 
@@ -298,14 +314,17 @@ static void dio_bio_end_aio(struct bio *bio, int error)
  * The BIO completion handler simply queues the BIO up for the process-context
  * handler.
  *
- * During I/O bi_private points at the dio.  After I/O, bi_private is used to
- * implement a singly-linked list of completed BIOs, at dio->bio_list.
+ * During I/O bi_private points at the dio_data.  After I/O, bi_private is used
+ * to implement a singly-linked list of completed BIOs, at dio->bio_list.
  */
 static void dio_bio_end_io(struct bio *bio, int error)
 {
-	struct dio *dio = bio->bi_private;
+	struct dio_bio_data *bio_data = bio->bi_private;
+	struct dio *dio = bio_data->dio;
 	unsigned long flags;
 
+	range_unlock(&dio->inode->i_mapping->mapping_lock, &bio_data->lock);
+	kmem_cache_free(dio_bio_data_cache, bio_data);
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	bio->bi_private = dio->bio_list;
 	dio->bio_list = bio;
@@ -325,7 +344,8 @@ static void dio_bio_end_io(struct bio *bio, int error)
  */
 void dio_end_io(struct bio *bio, int error)
 {
-	struct dio *dio = bio->bi_private;
+	struct dio_bio_data *bio_data = bio->bi_private;
+	struct dio *dio = bio_data->dio;
 
 	if (dio->is_async)
 		dio_bio_end_aio(bio, error);
@@ -369,8 +389,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 {
 	struct bio *bio = sdio->bio;
 	unsigned long flags;
-
-	bio->bi_private = dio;
+	loff_t start = sdio->logical_offset_in_bio;
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
 	dio->refcount++;
@@ -380,10 +399,30 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 		bio_set_pages_dirty(bio);
 
 	if (sdio->submit_io)
-		sdio->submit_io(dio->rw, bio, dio->inode,
-			       sdio->logical_offset_in_bio);
-	else
+		sdio->submit_io(dio->rw, bio, dio->inode, start);
+	else {
+		struct address_space *mapping = dio->inode->i_mapping;
+		loff_t end = sdio->logical_offset_in_bio + bio->bi_size - 1;
+
+		sdio->bio_data->dio = dio;
+		range_lock_init(&sdio->bio_data->lock,
+			start >> PAGE_CACHE_SHIFT, end >> PAGE_CACHE_SHIFT);
+		range_lock(&mapping->mapping_lock, &sdio->bio_data->lock);
+		/*
+		 * Once we hold mapping range lock writeout and invalidation
+		 * cannot race with page faults of buffered IO.
+		 */
+		filemap_write_and_wait_range(mapping, start, end);
+		if (dio->rw == WRITE && mapping->nrpages) {
+			invalidate_inode_pages2_range(mapping,
+				start >> PAGE_CACHE_SHIFT,
+				end >> PAGE_CACHE_SHIFT);
+		}
+		bio->bi_private = sdio->bio_data;
+		sdio->bio_data = NULL;
+
 		submit_bio(dio->rw, bio);
+	}
 
 	sdio->bio = NULL;
 	sdio->boundary = 0;
@@ -397,6 +436,8 @@ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
 	while (dio_pages_present(sdio))
 		page_cache_release(dio_get_page(dio, sdio));
+	if (sdio->bio_data)
+		kmem_cache_free(dio_bio_data_cache, sdio->bio_data);
 }
 
 /*
@@ -600,6 +641,11 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
 	nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
 	nr_pages = min(nr_pages, BIO_MAX_PAGES);
 	BUG_ON(nr_pages <= 0);
+	sdio->bio_data = kmem_cache_alloc(dio_bio_data_cache, GFP_KERNEL);
+	if (!sdio->bio_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
 	sdio->boundary = 0;
 out:
@@ -1307,6 +1353,7 @@ EXPORT_SYMBOL(__blockdev_direct_IO);
 static __init int dio_init(void)
 {
 	dio_cache = KMEM_CACHE(dio, SLAB_PANIC);
+	dio_bio_data_cache = KMEM_CACHE(dio_bio_data, SLAB_PANIC);
 	return 0;
 }
 module_init(dio_init)
-- 
1.7.1


  parent reply	other threads:[~2013-01-31 21:51 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-31 21:49 [PATCH 0/6 RFC] Mapping range lock Jan Kara
2013-01-31 21:49 ` [PATCH 1/6] lib: Implement range locks Jan Kara
2013-01-31 23:57   ` Andrew Morton
2013-02-04 16:41     ` Jan Kara
2013-02-11  5:42   ` Michel Lespinasse
2013-02-11 10:27     ` Jan Kara
2013-02-11 11:03       ` Michel Lespinasse
2013-02-11 12:58         ` Jan Kara
2013-01-31 21:49 ` [PATCH 2/6] fs: Take mapping lock in generic read paths Jan Kara
2013-01-31 23:59   ` Andrew Morton
2013-02-04 12:47     ` Jan Kara
2013-02-08 14:59       ` Jan Kara
2013-01-31 21:49 ` [PATCH 3/6] fs: Provide function to take mapping lock in buffered write path Jan Kara
2013-01-31 21:49 ` [PATCH 4/6] fs: Don't call dio_cleanup() before submitting all bios Jan Kara
2013-01-31 21:49 ` Jan Kara [this message]
2013-01-31 21:49 ` [PATCH 6/6] ext3: Convert ext3 to use mapping lock Jan Kara
2013-02-01  0:07 ` [PATCH 0/6 RFC] Mapping range lock Andrew Morton
2013-02-04  9:29   ` Zheng Liu
2013-02-04 12:38   ` Jan Kara
2013-02-05 23:25     ` Dave Chinner
2013-02-06 19:25       ` Jan Kara
2013-02-07  2:43         ` Dave Chinner
2013-02-07 11:06           ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1359668994-13433-6-git-send-email-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).