[Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios
@ 2018-09-25  5:38 Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 1/3] gfs2: add more timing info to the journal recovery process Abhi Das
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Abhi Das @ 2018-09-25  5:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patchset does things a bit differently from the previous attempts
to find the journal head based on Andreas' suggestions.

It uses a pool of bios to maintain a readahead queue of sorts that allows
us to process the completed bios in sequential order to locate the jhead.

I've done a little bit of testing and it seems to be holding up so far.
I plan to do more testing.

I haven't done a performance analysis vs the old method yet, so I don't
know how well this does. There might be some optimizations we can do w.r.t
repeated allocations and such.

Abhi Das (3):
  gfs2: add more timing info to the journal recovery process
  gfs2: add a helper function to get_log_header that can be used
    elsewhere
  gfs2: introduce bio_pool to readahead journal to find jhead

 fs/gfs2/bmap.c       |   8 +-
 fs/gfs2/incore.h     |   3 +
 fs/gfs2/lops.c       | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/lops.h       |   1 +
 fs/gfs2/ops_fstype.c |   2 +
 fs/gfs2/recovery.c   | 171 ++++++------------------
 fs/gfs2/recovery.h   |   2 +
 fs/gfs2/sys.c        |  27 ++--
 8 files changed, 433 insertions(+), 140 deletions(-)

-- 
2.4.11

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 RFC PATCH 1/3] gfs2: add more timing info to the journal recovery process
  2018-09-25  5:38 [Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios Abhi Das
@ 2018-09-25  5:38 ` Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 2/3] gfs2: add a helper function to get_log_header that can be used elsewhere Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead Abhi Das
  2 siblings, 0 replies; 5+ messages in thread
From: Abhi Das @ 2018-09-25  5:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Time the gfs2_map_journal_extents() function and the journal head
lookup and report.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/bmap.c     | 8 ++++++--
 fs/gfs2/recovery.c | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 03128ed..dddb5a4 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -14,6 +14,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/iomap.h>
+#include <linux/ktime.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -2248,7 +2249,9 @@ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
 	unsigned int shift = sdp->sd_sb.sb_bsize_shift;
 	u64 size;
 	int rc;
+	ktime_t start, end;
 
+	start = ktime_get();
 	lblock_stop = i_size_read(jd->jd_inode) >> shift;
 	size = (lblock_stop - lblock) << shift;
 	jd->nr_extents = 0;
@@ -2268,8 +2271,9 @@ int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
 		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
 	} while(size > 0);
 
-	fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
-		jd->nr_extents);
+	end = ktime_get();
+	fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
+		jd->nr_extents, ktime_ms_delta(end, start));
 	return 0;
 
 fail:
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 0f501f9..b0717a0 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -460,6 +460,8 @@ void gfs2_recover_func(struct work_struct *work)
 	if (error)
 		goto fail_gunlock_ji;
 	t_jhd = ktime_get();
+	fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
+		ktime_ms_delta(t_jhd, t_jlck));
 
 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 		fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 RFC PATCH 2/3] gfs2: add a helper function to get_log_header that can be used elsewhere
  2018-09-25  5:38 [Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 1/3] gfs2: add more timing info to the journal recovery process Abhi Das
@ 2018-09-25  5:38 ` Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead Abhi Das
  2 siblings, 0 replies; 5+ messages in thread
From: Abhi Das @ 2018-09-25  5:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Move and re-order the error checks and hash/crc computations into another
function __get_log_header() so it can be used in scenarios where buffer_heads
are not being used for the log header.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/recovery.c | 53 ++++++++++++++++++++++++++++++++---------------------
 fs/gfs2/recovery.h |  2 ++
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index b0717a0..2dac430 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -120,6 +120,35 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 	}
 }
 
+int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
+		     unsigned int blkno, struct gfs2_log_header_host *head)
+{
+	u32 hash, crc;
+
+	if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+	    lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
+	    (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
+		return 1;
+
+	hash = crc32(~0, lh, LH_V1_SIZE - 4);
+	hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
+
+	if (be32_to_cpu(lh->lh_hash) != hash)
+		return 1;
+
+	crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
+		     sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
+
+	if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
+		return 1;
+
+	head->lh_sequence = be64_to_cpu(lh->lh_sequence);
+	head->lh_flags = be32_to_cpu(lh->lh_flags);
+	head->lh_tail = be32_to_cpu(lh->lh_tail);
+	head->lh_blkno = be32_to_cpu(lh->lh_blkno);
+
+	return 0;
+}
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -137,36 +166,18 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 			  struct gfs2_log_header_host *head)
 {
-	struct gfs2_log_header *lh;
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct buffer_head *bh;
-	u32 hash, crc;
 	int error;
 
 	error = gfs2_replay_read_block(jd, blk, &bh);
 	if (error)
 		return error;
-	lh = (void *)bh->b_data;
-
-	hash = crc32(~0, lh, LH_V1_SIZE - 4);
-	hash = ~crc32_le_shift(hash, 4);  /* assume lh_hash is zero */
-
-	crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
-		     bh->b_size - LH_V1_SIZE - 4);
-
-	error = lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
-		lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
-		be32_to_cpu(lh->lh_blkno) != blk ||
-		be32_to_cpu(lh->lh_hash) != hash ||
-		(lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc);
 
+	error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
+				 blk, head);
 	brelse(bh);
 
-	if (!error) {
-		head->lh_sequence = be64_to_cpu(lh->lh_sequence);
-		head->lh_flags = be32_to_cpu(lh->lh_flags);
-		head->lh_tail = be32_to_cpu(lh->lh_tail);
-		head->lh_blkno = be32_to_cpu(lh->lh_blkno);
-	}
 	return error;
 }
 
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 11fdfab..943a67c 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -31,6 +31,8 @@ extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 		    struct gfs2_log_header_host *head);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
 extern void gfs2_recover_func(struct work_struct *work);
+extern int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
+			    unsigned int blkno, struct gfs2_log_header_host *head);
 
 #endif /* __RECOVERY_DOT_H__ */
 
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead
  2018-09-25  5:38 [Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 1/3] gfs2: add more timing info to the journal recovery process Abhi Das
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 2/3] gfs2: add a helper function to get_log_header that can be used elsewhere Abhi Das
@ 2018-09-25  5:38 ` Abhi Das
  2018-09-25  9:42   ` Steven Whitehouse
  2 siblings, 1 reply; 5+ messages in thread
From: Abhi Das @ 2018-09-25  5:38 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch adds a new data structure called bio_pool. This is
basically a dynamically allocated array of struct bio* and
associated variables to manage this data structure.

The array is used in a circular fashion until the entire array
has bios that are in flight. i.e. they need to be waited on and
consumed upon completion, in order to make room for more. To
locate the journal head, we read the journal sequentially from
the beginning, creating bios and submitting them as necessary.

We wait for these inflight bios in the order we submit them even
though the block layer may complete them out of order. This strict
ordering allows us to determine the journal head without having
to do extra reads.

A tunable allows us to configure the size of the bio_pool.

Signed-off-by: Abhi Das <adas@redhat.com>
---
 fs/gfs2/incore.h     |   3 +
 fs/gfs2/lops.c       | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/lops.h       |   1 +
 fs/gfs2/ops_fstype.c |   2 +
 fs/gfs2/recovery.c   | 116 ++---------------
 fs/gfs2/sys.c        |  27 ++--
 6 files changed, 391 insertions(+), 117 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b96d39c..424687f 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -542,6 +542,8 @@ struct gfs2_jdesc {
 	int jd_recover_error;
 	/* Replay stuff */
 
+	struct gfs2_log_header_host jd_jhead;
+	struct mutex jd_jh_mutex;
 	unsigned int jd_found_blocks;
 	unsigned int jd_found_revokes;
 	unsigned int jd_replayed_blocks;
@@ -610,6 +612,7 @@ struct gfs2_tune {
 	unsigned int gt_complain_secs;
 	unsigned int gt_statfs_quantum;
 	unsigned int gt_statfs_slow;
+	unsigned int gt_bio_pool_size; /* No of bios to use for the bio_pool */
 };
 
 enum {
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f2567f9..69fc058 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/list_sort.h>
 
+#include "bmap.h"
 #include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
@@ -370,6 +371,364 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 		       gfs2_log_bmap(sdp));
 }
 
+/*
+ * The bio_pool structure is an array of bios of length 'size'.
+ * 'cur' is the index of the next bio to be submitted for I/O.
+ * 'wait' is the index of bio we need to wait on for I/O completion.
+ * 'inflight' is the number of bios submitted, but not yet completed.
+ */
+struct bio_pool {
+	struct bio **bios;
+	unsigned int size;
+	unsigned int cur;
+	unsigned int wait;
+	unsigned int inflight;
+};
+typedef int (search_bio_t) (struct gfs2_jdesc *jd, const void *ptr);
+
+/**
+ * bio_pool_submit_bio - Submit the current bio in the pool
+ *
+ * @pool: The bio pool
+ *
+ * Submit the current bio (pool->bios[pool->cur]) and update internal pool
+ * management variables. If pool->inflight == pool->size, we've maxed out all
+ * the bios in our pool and the caller needs to wait on some bios, process and
+ * free them so new ones can be added.
+ *
+ * Returns: 1 if we maxed out our bios, 0, otherwise
+ */
+
+static int bio_pool_submit_bio(struct bio_pool *pool)
+{
+	int ret = 0;
+	BUG_ON(!pool || !pool->bios || !pool->bios[pool->cur]);
+
+	bio_set_op_attrs(pool->bios[pool->cur], REQ_OP_READ, 0);
+	submit_bio(pool->bios[pool->cur]);
+	pool->cur = pool->cur == pool->size - 1 ? 0 : pool->cur + 1;
+	pool->inflight++;
+	if (pool->inflight == pool->size)
+		ret = 1;
+	return ret;
+}
+
+/**
+ * bio_pool_get_cur - Do what's necessary to get a valid bio for the caller.
+ *
+ * @pool: The bio pool
+ * @sdp: The gfs2 superblock
+ * @blkno: The block number we wish to add to a bio
+ * @end_io: The end_io completion callback
+ *
+ * If there's no currently active bio, we allocate one for the blkno and return.
+ *
+ * If there's an active bio at pool->bios[pool->cur], we check if the requested
+ * block maybe to tacked onto it. If yes, we do nothing and return.
+ *
+ * If the block can't be added (non-contiguous), we submit the current bio.
+ * pool->cur, pool->inflight will change and we fall through to allocate a new
+ * bio and return. In this case, it is possible that submitting the current bio
+ * has maxed out our readahead (bio_pool_submit_bio() returns 1). We pass this
+ * error code back to the caller.
+ *
+ * Returns: 1 if bio_pool_submit_bio() maxed readahead, else 0.
+ */
+
+static int bio_pool_get_cur(struct bio_pool *pool, struct gfs2_sbd *sdp,
+			    u64 blkno, bio_end_io_t end_io, void *private)
+{
+	struct super_block *sb = sdp->sd_vfs;
+	struct bio *bio;
+	int ret = 0;
+
+	BUG_ON(!pool || !pool->bios);
+
+	if (pool->bios[pool->cur]) {
+		u64 nblk;
+		nblk = bio_end_sector(pool->bios[pool->cur]);
+		nblk >>= sdp->sd_fsb2bb_shift;
+		if (blkno == nblk)
+			return 0;
+		ret = bio_pool_submit_bio(pool);
+	}
+	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+	bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
+	bio_set_dev(bio, sb->s_bdev);
+	bio->bi_end_io = end_io;
+	bio->bi_private = private;
+	pool->bios[pool->cur] = bio;
+
+	return ret;
+}
+
+/**
+ * gfs2_jhead_search - search a block for the journal head
+ *
+ * @jd: The journal descriptor
+ * @ptr: Pointer to the block data
+ *
+ * Among the valid log headers, we try to locate the journal head with the
+ * largest sequence number that is also monotonically increasing.
+ *
+ * Returns: 1, if found, 0 otherwise.
+ */
+
+int gfs2_jhead_search(struct gfs2_jdesc *jd, const void *ptr)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct gfs2_log_header_host uninitialized_var(lh);
+	int ret = 0;
+
+	if (!__get_log_header(sdp, ptr, 0, &lh)) {
+		if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
+			jd->jd_jhead = lh;
+		else
+			ret = 1;
+	}
+	return ret;
+}
+
+/**
+ * gfs2_bio_process - search a bio
+ *
+ * @jd: The journal descriptor
+ * @bio: The bio to process
+ * @search: The search function
+ *
+ * For each page in the bio, call the 'search' function to look for the journal
+ * head. Note that the bio and its pages are cleaned up in this function, so
+ * the 'search' function ptr can be NULL and the result of this function would
+ * simply be a cleanup of the bio.
+ *
+ * Returns: 1 if jhead was found, 0 otherwise.
+ */
+
+int gfs2_bio_process(struct gfs2_jdesc *jd, struct bio *bio,
+		     search_bio_t search)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct page *page;
+	struct bio_vec *bvec;
+	int i, found = 0;
+
+	if (bio->bi_status) {
+		fs_err(sdp, "Error %d reading from journal, jid=%u\n",
+		       bio->bi_status, jd->jd_jid);
+	}
+
+	bio_for_each_segment_all(bvec, bio, i) {
+		page = bvec->bv_page;
+		if (search && !found)
+			found = search(jd, page_address(page));
+		mempool_free(page, gfs2_page_pool);
+	}
+
+	bio_put(bio);
+	return found;
+}
+
+static void gfs2_bio_wait(struct bio *bio)
+{
+	while (1) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!READ_ONCE(bio->bi_private))
+			break;
+		io_schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+}
+
+static void gfs2_bio_end_io(struct bio *bio)
+{
+	struct task_struct *waiter = bio->bi_private;
+
+	WRITE_ONCE(bio->bi_private, NULL);
+	wake_up_process(waiter);
+}
+
+/**
+ * bio_pool_wait_process - wait on the next bio and process the completed bio.
+ *
+ * @pool: The bio pool
+ * @jd: The journal descriptor
+ * @search: The function to pass to gfs2_bio_process() to process the bio.
+ *
+ * Wait on the next bio indexed by pool->wait. Upon completion, call
+ * gfs2_bio_process() to process the bio. Update the internal pool management
+ * variables.
+ *
+ * Returns: 1 if gfs2_bio_process() found the jhead, 0 otherwise.
+ */
+
+static int bio_pool_wait_process(struct bio_pool *pool, struct gfs2_jdesc *jd,
+				search_bio_t search)
+{
+	int ret;
+	BUG_ON(!pool || !pool->bios || !pool->bios[pool->wait]);
+	gfs2_bio_wait(pool->bios[pool->wait]);
+	ret = gfs2_bio_process(jd, pool->bios[pool->wait], search);
+	pool->bios[pool->wait] = NULL;
+	pool->inflight--;
+	pool->wait = pool->wait == pool->size - 1 ? 0 : pool->wait + 1;
+	return ret;
+}
+
+/**
+ * bio_pool_process_page - Add a page to the pool and flush bios, wait
+ *                         for completion and process as necessary.
+ * @pool: The bio_pool
+ * @page: The page to be added
+ * @jd: The journal descriptor
+ * @blkno: The block corresponding to the page
+ *
+ * As a general rule, we wait on the next bio if we submitted enough bios such
+ * that pool.inflight == pool.size.
+ *
+ * 1. Figure out which bio in the pool is able to take the page. This may mean
+ *    submitting a previous bio. This may also involve waiting on a bio if the
+ *    number of inflight bios is maxed out.
+ * 2. Add the page to the current bio. On successful addition, we simply return
+ *    unless this was the last page. If so, we submit this bio and wait for *all*
+ *    the inflight bios to complete and look for the jhead them. We do this
+ *    because this function won't be called back again after the last page.
+ *    If adding the page fails due to the current bio being full, step 3.
+ * 3. Submit the current bio and wait on the next bio if needed. Try to add the
+ *    page again into the a new bio
+ *
+ * Returns: 0, if page was queued for search, 1, if found and errno otherwise
+ */
+
+static int bio_pool_process_page(struct bio_pool *pool, struct page *page,
+				 struct gfs2_jdesc *jd, u64 blkno)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct super_block *sb = sdp->sd_vfs;
+	int ret, last = page->private;
+
+	BUG_ON(!pool || !pool->bios);
+
+try_again:
+	ret = bio_pool_get_cur(pool, sdp, blkno, gfs2_bio_end_io, current);
+	if (ret) { /* We had to submit current bio and maxed out, so we wait */
+		ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
+		if (ret)
+			goto out;
+	}
+
+	ret = bio_add_page(pool->bios[pool->cur], page, sb->s_blocksize, 0);
+	if (ret > 0) { /* successfully added */
+		ret = 0;
+		goto out;
+	}
+	/* bio is full, need to submit it */
+	ret = bio_pool_submit_bio(pool);
+	if (ret) {
+		ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
+		if (ret)
+			goto out;
+	}
+	goto try_again;
+
+out:
+	if (ret == 0 && last) { /* look for jhead in the last parts */
+		bio_pool_submit_bio(pool);
+		while (pool->inflight) {
+			ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
+			if (ret)
+				break;
+		};
+	}
+	return ret;
+}
+
+static int bio_pool_init(struct bio_pool *pool, unsigned int size)
+{
+	BUG_ON(!pool || size < 2 || size > 64);
+	pool->bios = kcalloc(size, sizeof(struct bio*), GFP_NOFS);
+	if (!pool->bios)
+		return ENOMEM;
+
+	pool->size = size;
+	pool->cur = 0;
+	pool->wait = 0;
+	pool->inflight = 0;
+	return 0;
+}
+
+static void bio_pool_cleanup(struct bio_pool *pool, struct gfs2_jdesc *jd)
+{
+	int i;
+
+	for (i = 0; i < pool->size; i++) {
+		if (!pool->bios[i])
+			continue;
+		gfs2_bio_wait(pool->bios[i]);
+		gfs2_bio_process(jd, pool->bios[i], NULL);
+	}
+}
+
+static void bio_pool_uninit(struct bio_pool *pool, struct gfs2_jdesc *jd)
+{
+	if (!pool)
+		return;
+	if (pool->bios) {
+		bio_pool_cleanup(pool, jd);
+		kfree(pool->bios);
+		memset(pool, 0, sizeof(struct bio_pool));
+	}
+}
+
+/**
+ * gfs2_log_jh_lookup - Use a pool of bios to read in the journal and locate
+ *                      the journal head
+ * @jd: The journal descriptor
+ *
+ * Use the pool of bios for readahead. When enough bios are inflight (i.e
+ * submitted), we wait for the earliest submitted bio to complete before
+ * creating another. This way, we can get some readahead going as well as
+ * process the completed bios sequentially.
+ *
+ * We don't submit any more bios once we've found the head.
+ *
+ * Returns: 0 on success(jd->jd_jhead contains the journal head), errno
+ *          otherwise
+ */
+
+int gfs2_log_jh_lookup(struct gfs2_jdesc *jd)
+{
+	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
+	struct gfs2_journal_extent *je;
+	int i, ret = 0;
+
+	struct bio_pool bpool;
+
+	memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
+	ret = bio_pool_init(&bpool, gfs2_tune_get(sdp, gt_bio_pool_size));
+	if (ret)
+		return ret;
+
+	if (list_empty(&jd->extent_list))
+		gfs2_map_journal_extents(sdp, jd);
+
+	list_for_each_entry(je, &jd->extent_list, list) {
+		for (i = 0; i < je->blocks; i++) {
+			struct page *page;
+			page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+			page_private(page) = (list_is_last(&je->list, &jd->extent_list)
+					      && i == (je->blocks - 1));
+			ret = bio_pool_process_page(&bpool, page, jd, je->dblock + i);
+			if (ret)
+				goto out;
+		}
+	}
+out:
+	bio_pool_uninit(&bpool, jd);
+	if (ret == 1) /* found */
+		return 0;
+	return ret;
+}
+
 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 				      u32 ld_length, u32 ld_data1)
 {
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index e494939..10589fd 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
 extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern int gfs2_log_jh_lookup(struct gfs2_jdesc *jd);
 
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c2469833b..362a9d4 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -61,6 +61,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
 	gt->gt_new_files_jdata = 0;
 	gt->gt_max_readahead = BIT(18);
 	gt->gt_complain_secs = 10;
+	gt->gt_bio_pool_size = 16;
 }
 
 static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -579,6 +580,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
 			break;
 		}
 
+		mutex_init(&jd->jd_jh_mutex);
 		spin_lock(&sdp->sd_jindex_spin);
 		jd->jd_jid = sdp->sd_journals++;
 		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 2dac430..fe267cf 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -149,6 +149,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
 
 	return 0;
 }
+
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -182,85 +183,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 }
 
 /**
- * find_good_lh - find a good log header
- * @jd: the journal
- * @blk: the segment to start searching from
- * @lh: the log header to fill in
- * @forward: if true search forward in the log, else search backward
- *
- * Call get_log_header() to get a log header for a segment, but if the
- * segment is bad, either scan forward or backward until we find a good one.
- *
- * Returns: errno
- */
-
-static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
-			struct gfs2_log_header_host *head)
-{
-	unsigned int orig_blk = *blk;
-	int error;
-
-	for (;;) {
-		error = get_log_header(jd, *blk, head);
-		if (error <= 0)
-			return error;
-
-		if (++*blk == jd->jd_blocks)
-			*blk = 0;
-
-		if (*blk == orig_blk) {
-			gfs2_consist_inode(GFS2_I(jd->jd_inode));
-			return -EIO;
-		}
-	}
-}
-
-/**
- * jhead_scan - make sure we've found the head of the log
- * @jd: the journal
- * @head: this is filled in with the log descriptor of the head
- *
- * At this point, seg and lh should be either the head of the log or just
- * before.  Scan forward until we find the head.
- *
- * Returns: errno
- */
-
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
-	unsigned int blk = head->lh_blkno;
-	struct gfs2_log_header_host lh;
-	int error;
-
-	for (;;) {
-		if (++blk == jd->jd_blocks)
-			blk = 0;
-
-		error = get_log_header(jd, blk, &lh);
-		if (error < 0)
-			return error;
-		if (error == 1)
-			continue;
-
-		if (lh.lh_sequence == head->lh_sequence) {
-			gfs2_consist_inode(GFS2_I(jd->jd_inode));
-			return -EIO;
-		}
-		if (lh.lh_sequence < head->lh_sequence)
-			break;
-
-		*head = lh;
-	}
-
-	return 0;
-}
-
-/**
  * gfs2_find_jhead - find the head of a log
  * @jd: the journal
  * @head: the log descriptor for the head of the log is returned here
  *
- * Do a binary search of a journal and find the valid log entry with the
+ * Do a search of a journal and find the valid log entry with the
  * highest sequence number.  (i.e. the log head)
  *
  * Returns: errno
@@ -268,40 +195,15 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 
 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-	struct gfs2_log_header_host lh_1, lh_m;
-	u32 blk_1, blk_2, blk_m;
-	int error;
-
-	blk_1 = 0;
-	blk_2 = jd->jd_blocks - 1;
+	int ret;
 
-	for (;;) {
-		blk_m = (blk_1 + blk_2) / 2;
+	mutex_lock(&jd->jd_jh_mutex);
+	ret = gfs2_log_jh_lookup(jd);
+	if (ret == 0)
+		*head = jd->jd_jhead;
+	mutex_unlock(&jd->jd_jh_mutex);
 
-		error = find_good_lh(jd, &blk_1, &lh_1);
-		if (error)
-			return error;
-
-		error = find_good_lh(jd, &blk_m, &lh_m);
-		if (error)
-			return error;
-
-		if (blk_1 == blk_m || blk_m == blk_2)
-			break;
-
-		if (lh_1.lh_sequence <= lh_m.lh_sequence)
-			blk_1 = blk_m;
-		else
-			blk_2 = blk_m;
-	}
-
-	error = jhead_scan(jd, &lh_1);
-	if (error)
-		return error;
-
-	*head = lh_1;
-
-	return error;
+	return ret;
 }
 
 /**
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 1787d29..a8a9307 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -573,7 +573,8 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
 }
 
 static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
-			int check_zero, const char *buf, size_t len)
+			int check_zero, int check_range, unsigned int low,
+			unsigned int high, const char *buf, size_t len)
 {
 	struct gfs2_tune *gt = &sdp->sd_tune;
 	unsigned int x;
@@ -589,6 +590,9 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
 	if (check_zero && !x)
 		return -EINVAL;
 
+	if (check_range && (x < low || x > high))
+		return -EINVAL;
+
 	spin_lock(&gt->gt_spin);
 	*field = x;
 	spin_unlock(&gt->gt_spin);
@@ -605,20 +609,22 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
 }                                                                             \
 TUNE_ATTR_3(name, name##_show, store)
 
-#define TUNE_ATTR(name, check_zero)                                           \
+#define TUNE_ATTR(name, check_zero, check_range, low, high)		      \
 static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 {                                                                             \
-	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len);  \
+	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, check_range,\
+			low, high, buf, len);                                 \
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-TUNE_ATTR(quota_warn_period, 0);
-TUNE_ATTR(quota_quantum, 0);
-TUNE_ATTR(max_readahead, 0);
-TUNE_ATTR(complain_secs, 0);
-TUNE_ATTR(statfs_slow, 0);
-TUNE_ATTR(new_files_jdata, 0);
-TUNE_ATTR(statfs_quantum, 1);
+TUNE_ATTR(quota_warn_period, 0, 0, 0, 0);
+TUNE_ATTR(quota_quantum, 0, 0, 0, 0);
+TUNE_ATTR(max_readahead, 0, 0, 0, 0);
+TUNE_ATTR(complain_secs, 0, 0, 0, 0);
+TUNE_ATTR(statfs_slow, 0, 0, 0, 0);
+TUNE_ATTR(new_files_jdata, 0, 0, 0, 0);
+TUNE_ATTR(statfs_quantum, 1, 0, 0, 0);
+TUNE_ATTR(bio_pool_size, 1, 1, 2, 64);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
@@ -630,6 +636,7 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_statfs_quantum.attr,
 	&tune_attr_quota_scale.attr,
 	&tune_attr_new_files_jdata.attr,
+	&tune_attr_bio_pool_size.attr,
 	NULL,
 };
 
-- 
2.4.11



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead
  2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead Abhi Das
@ 2018-09-25  9:42   ` Steven Whitehouse
  0 siblings, 0 replies; 5+ messages in thread
From: Steven Whitehouse @ 2018-09-25  9:42 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Hi,



On 25/09/18 06:38, Abhi Das wrote:
> This patch adds a new data structure called bio_pool. This is
> basically a dynamically allocated array of struct bio* and
> associated variables to manage this data structure.
>
> The array is used in a circular fashion until the entire array
> has bios that are in flight. i.e. they need to be waited on and
> consumed upon completion, in order to make room for more. To
> locate the journal head, we read the journal sequentially from
> the beginning, creating bios and submitting them as necessary.
>
> We wait for these inflight bios in the order we submit them even
> though the block layer may complete them out of order. This strict
> ordering allows us to determine the journal head without having
> to do extra reads.
>
> A tunable allows us to configure the size of the bio_pool.
I'd rather not introduce a new tunable. What size should the pool be? Is 
there any reason that we even need to have the array to keep track of 
the bios? If the pages are in the page cache (i.e. address space of the 
journal inode) then we should be able to simply wait on the pages in 
order I think, without needing a separate list,

Steve.

> Signed-off-by: Abhi Das <adas@redhat.com>
> ---
>   fs/gfs2/incore.h     |   3 +
>   fs/gfs2/lops.c       | 359 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   fs/gfs2/lops.h       |   1 +
>   fs/gfs2/ops_fstype.c |   2 +
>   fs/gfs2/recovery.c   | 116 ++---------------
>   fs/gfs2/sys.c        |  27 ++--
>   6 files changed, 391 insertions(+), 117 deletions(-)
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index b96d39c..424687f 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -542,6 +542,8 @@ struct gfs2_jdesc {
>   	int jd_recover_error;
>   	/* Replay stuff */
>   
> +	struct gfs2_log_header_host jd_jhead;
> +	struct mutex jd_jh_mutex;
>   	unsigned int jd_found_blocks;
>   	unsigned int jd_found_revokes;
>   	unsigned int jd_replayed_blocks;
> @@ -610,6 +612,7 @@ struct gfs2_tune {
>   	unsigned int gt_complain_secs;
>   	unsigned int gt_statfs_quantum;
>   	unsigned int gt_statfs_slow;
> +	unsigned int gt_bio_pool_size; /* No of bios to use for the bio_pool */
>   };
>   
>   enum {
> diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
> index f2567f9..69fc058 100644
> --- a/fs/gfs2/lops.c
> +++ b/fs/gfs2/lops.c
> @@ -18,6 +18,7 @@
>   #include <linux/fs.h>
>   #include <linux/list_sort.h>
>   
> +#include "bmap.h"
>   #include "dir.h"
>   #include "gfs2.h"
>   #include "incore.h"
> @@ -370,6 +371,364 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
>   		       gfs2_log_bmap(sdp));
>   }
>   
> +/*
> + * The bio_pool structure is an array of bios of length 'size'.
> + * 'cur' is the index of the next bio to be submitted for I/O.
> + * 'wait' is the index of bio we need to wait on for I/O completion.
> + * 'inflight' is the number of bios submitted, but not yet completed.
> + */
> +struct bio_pool {
> +	struct bio **bios;
> +	unsigned int size;
> +	unsigned int cur;
> +	unsigned int wait;
> +	unsigned int inflight;
> +};
> +typedef int (search_bio_t) (struct gfs2_jdesc *jd, const void *ptr);
> +
> +/**
> + * bio_pool_submit_bio - Submit the current bio in the pool
> + *
> + * @pool: The bio pool
> + *
> + * Submit the current bio (pool->bios[pool->cur]) and update internal pool
> + * management variables. If pool->inflight == pool->size, we've maxed out all
> + * the bios in our pool and the caller needs to wait on some bios, process and
> + * free them so new ones can be added.
> + *
> + * Returns: 1 if we maxed out our bios, 0, otherwise
> + */
> +
> +static int bio_pool_submit_bio(struct bio_pool *pool)
> +{
> +	int ret = 0;
> +	BUG_ON(!pool || !pool->bios || !pool->bios[pool->cur]);
> +
> +	bio_set_op_attrs(pool->bios[pool->cur], REQ_OP_READ, 0);
> +	submit_bio(pool->bios[pool->cur]);
> +	pool->cur = pool->cur == pool->size - 1 ? 0 : pool->cur + 1;
> +	pool->inflight++;
> +	if (pool->inflight == pool->size)
> +		ret = 1;
> +	return ret;
> +}
> +
> +/**
> + * bio_pool_get_cur - Do what's necessary to get a valid bio for the caller.
> + *
> + * @pool: The bio pool
> + * @sdp: The gfs2 superblock
> + * @blkno: The block number we wish to add to a bio
> + * @end_io: The end_io completion callback
> + *
> + * If there's no currently active bio, we allocate one for the blkno and return.
> + *
> + * If there's an active bio at pool->bios[pool->cur], we check if the requested
> + * block maybe to tacked onto it. If yes, we do nothing and return.
> + *
> + * If the block can't be added (non-contiguous), we submit the current bio.
> + * pool->cur, pool->inflight will change and we fall through to allocate a new
> + * bio and return. In this case, it is possible that submitting the current bio
> + * has maxed out our readahead (bio_pool_submit_bio() returns 1). We pass this
> + * error code back to the caller.
> + *
> + * Returns: 1 if bio_pool_submit_bio() maxed readahead, else 0.
> + */
> +
> +static int bio_pool_get_cur(struct bio_pool *pool, struct gfs2_sbd *sdp,
> +			    u64 blkno, bio_end_io_t end_io, void *private)
> +{
> +	struct super_block *sb = sdp->sd_vfs;
> +	struct bio *bio;
> +	int ret = 0;
> +
> +	BUG_ON(!pool || !pool->bios);
> +
> +	if (pool->bios[pool->cur]) {
> +		u64 nblk;
> +		nblk = bio_end_sector(pool->bios[pool->cur]);
> +		nblk >>= sdp->sd_fsb2bb_shift;
> +		if (blkno == nblk)
> +			return 0;
> +		ret = bio_pool_submit_bio(pool);
> +	}
> +	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
> +	bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
> +	bio_set_dev(bio, sb->s_bdev);
> +	bio->bi_end_io = end_io;
> +	bio->bi_private = private;
> +	pool->bios[pool->cur] = bio;
> +
> +	return ret;
> +}
> +
> +/**
> + * gfs2_jhead_search - search a block for the journal head
> + *
> + * @jd: The journal descriptor
> + * @ptr: Pointer to the block data
> + *
> + * Among the valid log headers, we try to locate the journal head with the
> + * largest sequence number that is also monotonically increasing.
> + *
> + * Returns: 1, if found, 0 otherwise.
> + */
> +
> +int gfs2_jhead_search(struct gfs2_jdesc *jd, const void *ptr)
> +{
> +	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +	struct gfs2_log_header_host uninitialized_var(lh);
> +	int ret = 0;
> +
> +	if (!__get_log_header(sdp, ptr, 0, &lh)) {
> +		if (lh.lh_sequence > jd->jd_jhead.lh_sequence)
> +			jd->jd_jhead = lh;
> +		else
> +			ret = 1;
> +	}
> +	return ret;
> +}
> +
> +/**
> + * gfs2_bio_process - search a bio
> + *
> + * @jd: The journal descriptor
> + * @bio: The bio to process
> + * @search: The search function
> + *
> + * For each page in the bio, call the 'search' function to look for the journal
> + * head. Note that the bio and its pages are cleaned up in this function, so
> + * the 'search' function ptr can be NULL and the result of this function would
> + * simply be a cleanup of the bio.
> + *
> + * Returns: 1 if jhead was found, 0 otherwise.
> + */
> +
> +int gfs2_bio_process(struct gfs2_jdesc *jd, struct bio *bio,
> +		     search_bio_t search)
> +{
> +	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +	struct page *page;
> +	struct bio_vec *bvec;
> +	int i, found = 0;
> +
> +	if (bio->bi_status) {
> +		fs_err(sdp, "Error %d reading from journal, jid=%u\n",
> +		       bio->bi_status, jd->jd_jid);
> +	}
> +
> +	bio_for_each_segment_all(bvec, bio, i) {
> +		page = bvec->bv_page;
> +		if (search && !found)
> +			found = search(jd, page_address(page));
> +		mempool_free(page, gfs2_page_pool);
> +	}
> +
> +	bio_put(bio);
> +	return found;
> +}
> +
> +static void gfs2_bio_wait(struct bio *bio)
> +{
> +	while (1) {
> +		set_current_state(TASK_UNINTERRUPTIBLE);
> +		if (!READ_ONCE(bio->bi_private))
> +			break;
> +		io_schedule();
> +	}
> +	__set_current_state(TASK_RUNNING);
> +}
> +
> +static void gfs2_bio_end_io(struct bio *bio)
> +{
> +	struct task_struct *waiter = bio->bi_private;
> +
> +	WRITE_ONCE(bio->bi_private, NULL);
> +	wake_up_process(waiter);
> +}
> +
> +/**
> + * bio_pool_wait_process - wait on the next bio and process the completed bio.
> + *
> + * @pool: The bio pool
> + * @jd: The journal descriptor
> + * @search: The function to pass to gfs2_bio_process() to process the bio.
> + *
> + * Wait on the next bio indexed by pool->wait. Upon completion, call
> + * gfs2_bio_process() to process the bio. Update the internal pool management
> + * variables.
> + *
> + * Returns: 1 if gfs2_bio_process() found the jhead, 0 otherwise.
> + */
> +
> +static int bio_pool_wait_process(struct bio_pool *pool, struct gfs2_jdesc *jd,
> +				search_bio_t search)
> +{
> +	int ret;
> +	BUG_ON(!pool || !pool->bios || !pool->bios[pool->wait]);
> +	gfs2_bio_wait(pool->bios[pool->wait]);
> +	ret = gfs2_bio_process(jd, pool->bios[pool->wait], search);
> +	pool->bios[pool->wait] = NULL;
> +	pool->inflight--;
> +	pool->wait = pool->wait == pool->size - 1 ? 0 : pool->wait + 1;
> +	return ret;
> +}
> +
> +/**
> + * bio_pool_process_page - Add a page to the pool and flush bios, wait
> + *                         for completion and process as necessary.
> + * @pool: The bio_pool
> + * @page: The page to be added
> + * @jd: The journal descriptor
> + * @blkno: The block corresponding to the page
> + *
> + * As a general rule, we wait on the next bio if we submitted enough bios such
> + * that pool.inflight == pool.size.
> + *
> + * 1. Figure out which bio in the pool is able to take the page. This may mean
> + *    submitting a previous bio. This may also involve waiting on a bio if the
> + *    number of inflight bios is maxed out.
> + * 2. Add the page to the current bio. On successful addition, we simply return
> + *    unless this was the last page. If so, we submit this bio and wait for *all*
> + *    the inflight bios to complete and look for the jhead them. We do this
> + *    because this function won't be called back again after the last page.
> + *    If adding the page fails due to the current bio being full, step 3.
> + * 3. Submit the current bio and wait on the next bio if needed. Try to add the
> + *    page again into the a new bio
> + *
> + * Returns: 0, if page was queued for search, 1, if found and errno otherwise
> + */
> +
> +static int bio_pool_process_page(struct bio_pool *pool, struct page *page,
> +				 struct gfs2_jdesc *jd, u64 blkno)
> +{
> +	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +	struct super_block *sb = sdp->sd_vfs;
> +	int ret, last = page->private;
> +
> +	BUG_ON(!pool || !pool->bios);
> +
> +try_again:
> +	ret = bio_pool_get_cur(pool, sdp, blkno, gfs2_bio_end_io, current);
> +	if (ret) { /* We had to submit current bio and maxed out, so we wait */
> +		ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	ret = bio_add_page(pool->bios[pool->cur], page, sb->s_blocksize, 0);
> +	if (ret > 0) { /* successfully added */
> +		ret = 0;
> +		goto out;
> +	}
> +	/* bio is full, need to submit it */
> +	ret = bio_pool_submit_bio(pool);
> +	if (ret) {
> +		ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
> +		if (ret)
> +			goto out;
> +	}
> +	goto try_again;
> +
> +out:
> +	if (ret == 0 && last) { /* look for jhead in the last parts */
> +		bio_pool_submit_bio(pool);
> +		while (pool->inflight) {
> +			ret = bio_pool_wait_process(pool, jd, gfs2_jhead_search);
> +			if (ret)
> +				break;
> +		};
> +	}
> +	return ret;
> +}
> +
> +static int bio_pool_init(struct bio_pool *pool, unsigned int size)
> +{
> +	BUG_ON(!pool || size < 2 || size > 64);
> +	pool->bios = kcalloc(size, sizeof(struct bio*), GFP_NOFS);
> +	if (!pool->bios)
> +		return ENOMEM;
> +
> +	pool->size = size;
> +	pool->cur = 0;
> +	pool->wait = 0;
> +	pool->inflight = 0;
> +	return 0;
> +}
> +
> +static void bio_pool_cleanup(struct bio_pool *pool, struct gfs2_jdesc *jd)
> +{
> +	int i;
> +
> +	for (i = 0; i < pool->size; i++) {
> +		if (!pool->bios[i])
> +			continue;
> +		gfs2_bio_wait(pool->bios[i]);
> +		gfs2_bio_process(jd, pool->bios[i], NULL);
> +	}
> +}
> +
> +static void bio_pool_uninit(struct bio_pool *pool, struct gfs2_jdesc *jd)
> +{
> +	if (!pool)
> +		return;
> +	if (pool->bios) {
> +		bio_pool_cleanup(pool, jd);
> +		kfree(pool->bios);
> +		memset(pool, 0, sizeof(struct bio_pool));
> +	}
> +}
> +
> +/**
> + * gfs2_log_jh_lookup - Use a pool of bios to read in the journal and locate
> + *                      the journal head
> + * @jd: The journal descriptor
> + *
> + * Use the pool of bios for readahead. When enough bios are inflight (i.e
> + * submitted), we wait for the earliest submitted bio to complete before
> + * creating another. This way, we can get some readahead going as well as
> + * process the completed bios sequentially.
> + *
> + * We don't submit any more bios once we've found the head.
> + *
> + * Returns: 0 on success(jd->jd_jhead contains the journal head), errno
> + *          otherwise
> + */
> +
> +int gfs2_log_jh_lookup(struct gfs2_jdesc *jd)
> +{
> +	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
> +	struct gfs2_journal_extent *je;
> +	int i, ret = 0;
> +
> +	struct bio_pool bpool;
> +
> +	memset(&jd->jd_jhead, 0, sizeof(struct gfs2_log_header_host));
> +	ret = bio_pool_init(&bpool, gfs2_tune_get(sdp, gt_bio_pool_size));
> +	if (ret)
> +		return ret;
> +
> +	if (list_empty(&jd->extent_list))
> +		gfs2_map_journal_extents(sdp, jd);
> +
> +	list_for_each_entry(je, &jd->extent_list, list) {
> +		for (i = 0; i < je->blocks; i++) {
> +			struct page *page;
> +			page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
> +			page_private(page) = (list_is_last(&je->list, &jd->extent_list)
> +					      && i == (je->blocks - 1));
> +			ret = bio_pool_process_page(&bpool, page, jd, je->dblock + i);
> +			if (ret)
> +				goto out;
> +		}
> +	}
> +out:
> +	bio_pool_uninit(&bpool, jd);
> +	if (ret == 1) /* found */
> +		return 0;
> +	return ret;
> +}
> +
>   static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
>   				      u32 ld_length, u32 ld_data1)
>   {
> diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
> index e494939..10589fd 100644
> --- a/fs/gfs2/lops.h
> +++ b/fs/gfs2/lops.h
> @@ -32,6 +32,7 @@ extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
>   extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
>   extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
>   extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
> +extern int gfs2_log_jh_lookup(struct gfs2_jdesc *jd);
>   
>   static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
>   {
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index c2469833b..362a9d4 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -61,6 +61,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
>   	gt->gt_new_files_jdata = 0;
>   	gt->gt_max_readahead = BIT(18);
>   	gt->gt_complain_secs = 10;
> +	gt->gt_bio_pool_size = 16;
>   }
>   
>   static struct gfs2_sbd *init_sbd(struct super_block *sb)
> @@ -579,6 +580,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
>   			break;
>   		}
>   
> +		mutex_init(&jd->jd_jh_mutex);
>   		spin_lock(&sdp->sd_jindex_spin);
>   		jd->jd_jid = sdp->sd_journals++;
>   		list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
> diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
> index 2dac430..fe267cf 100644
> --- a/fs/gfs2/recovery.c
> +++ b/fs/gfs2/recovery.c
> @@ -149,6 +149,7 @@ int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
>   
>   	return 0;
>   }
> +
>   /**
>    * get_log_header - read the log header for a given segment
>    * @jd: the journal
> @@ -182,85 +183,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
>   }
>   
>   /**
> - * find_good_lh - find a good log header
> - * @jd: the journal
> - * @blk: the segment to start searching from
> - * @lh: the log header to fill in
> - * @forward: if true search forward in the log, else search backward
> - *
> - * Call get_log_header() to get a log header for a segment, but if the
> - * segment is bad, either scan forward or backward until we find a good one.
> - *
> - * Returns: errno
> - */
> -
> -static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
> -			struct gfs2_log_header_host *head)
> -{
> -	unsigned int orig_blk = *blk;
> -	int error;
> -
> -	for (;;) {
> -		error = get_log_header(jd, *blk, head);
> -		if (error <= 0)
> -			return error;
> -
> -		if (++*blk == jd->jd_blocks)
> -			*blk = 0;
> -
> -		if (*blk == orig_blk) {
> -			gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -			return -EIO;
> -		}
> -	}
> -}
> -
> -/**
> - * jhead_scan - make sure we've found the head of the log
> - * @jd: the journal
> - * @head: this is filled in with the log descriptor of the head
> - *
> - * At this point, seg and lh should be either the head of the log or just
> - * before.  Scan forward until we find the head.
> - *
> - * Returns: errno
> - */
> -
> -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
> -{
> -	unsigned int blk = head->lh_blkno;
> -	struct gfs2_log_header_host lh;
> -	int error;
> -
> -	for (;;) {
> -		if (++blk == jd->jd_blocks)
> -			blk = 0;
> -
> -		error = get_log_header(jd, blk, &lh);
> -		if (error < 0)
> -			return error;
> -		if (error == 1)
> -			continue;
> -
> -		if (lh.lh_sequence == head->lh_sequence) {
> -			gfs2_consist_inode(GFS2_I(jd->jd_inode));
> -			return -EIO;
> -		}
> -		if (lh.lh_sequence < head->lh_sequence)
> -			break;
> -
> -		*head = lh;
> -	}
> -
> -	return 0;
> -}
> -
> -/**
>    * gfs2_find_jhead - find the head of a log
>    * @jd: the journal
>    * @head: the log descriptor for the head of the log is returned here
>    *
> - * Do a binary search of a journal and find the valid log entry with the
> + * Do a search of a journal and find the valid log entry with the
>    * highest sequence number.  (i.e. the log head)
>    *
>    * Returns: errno
> @@ -268,40 +195,15 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
>   
>   int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
>   {
> -	struct gfs2_log_header_host lh_1, lh_m;
> -	u32 blk_1, blk_2, blk_m;
> -	int error;
> -
> -	blk_1 = 0;
> -	blk_2 = jd->jd_blocks - 1;
> +	int ret;
>   
> -	for (;;) {
> -		blk_m = (blk_1 + blk_2) / 2;
> +	mutex_lock(&jd->jd_jh_mutex);
> +	ret = gfs2_log_jh_lookup(jd);
> +	if (ret == 0)
> +		*head = jd->jd_jhead;
> +	mutex_unlock(&jd->jd_jh_mutex);
>   
> -		error = find_good_lh(jd, &blk_1, &lh_1);
> -		if (error)
> -			return error;
> -
> -		error = find_good_lh(jd, &blk_m, &lh_m);
> -		if (error)
> -			return error;
> -
> -		if (blk_1 == blk_m || blk_m == blk_2)
> -			break;
> -
> -		if (lh_1.lh_sequence <= lh_m.lh_sequence)
> -			blk_1 = blk_m;
> -		else
> -			blk_2 = blk_m;
> -	}
> -
> -	error = jhead_scan(jd, &lh_1);
> -	if (error)
> -		return error;
> -
> -	*head = lh_1;
> -
> -	return error;
> +	return ret;
>   }
>   
>   /**
> diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
> index 1787d29..a8a9307 100644
> --- a/fs/gfs2/sys.c
> +++ b/fs/gfs2/sys.c
> @@ -573,7 +573,8 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
>   }
>   
>   static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
> -			int check_zero, const char *buf, size_t len)
> +			int check_zero, int check_range, unsigned int low,
> +			unsigned int high, const char *buf, size_t len)
>   {
>   	struct gfs2_tune *gt = &sdp->sd_tune;
>   	unsigned int x;
> @@ -589,6 +590,9 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
>   	if (check_zero && !x)
>   		return -EINVAL;
>   
> +	if (check_range && (x < low || x > high))
> +		return -EINVAL;
> +
>   	spin_lock(&gt->gt_spin);
>   	*field = x;
>   	spin_unlock(&gt->gt_spin);
> @@ -605,20 +609,22 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf)                   \
>   }                                                                             \
>   TUNE_ATTR_3(name, name##_show, store)
>   
> -#define TUNE_ATTR(name, check_zero)                                           \
> +#define TUNE_ATTR(name, check_zero, check_range, low, high)		      \
>   static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
>   {                                                                             \
> -	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len);  \
> +	return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, check_range,\
> +			low, high, buf, len);                                 \
>   }                                                                             \
>   TUNE_ATTR_2(name, name##_store)
>   
> -TUNE_ATTR(quota_warn_period, 0);
> -TUNE_ATTR(quota_quantum, 0);
> -TUNE_ATTR(max_readahead, 0);
> -TUNE_ATTR(complain_secs, 0);
> -TUNE_ATTR(statfs_slow, 0);
> -TUNE_ATTR(new_files_jdata, 0);
> -TUNE_ATTR(statfs_quantum, 1);
> +TUNE_ATTR(quota_warn_period, 0, 0, 0, 0);
> +TUNE_ATTR(quota_quantum, 0, 0, 0, 0);
> +TUNE_ATTR(max_readahead, 0, 0, 0, 0);
> +TUNE_ATTR(complain_secs, 0, 0, 0, 0);
> +TUNE_ATTR(statfs_slow, 0, 0, 0, 0);
> +TUNE_ATTR(new_files_jdata, 0, 0, 0, 0);
> +TUNE_ATTR(statfs_quantum, 1, 0, 0, 0);
> +TUNE_ATTR(bio_pool_size, 1, 1, 2, 64);
>   TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
>   
>   static struct attribute *tune_attrs[] = {
> @@ -630,6 +636,7 @@ static struct attribute *tune_attrs[] = {
>   	&tune_attr_statfs_quantum.attr,
>   	&tune_attr_quota_scale.attr,
>   	&tune_attr_new_files_jdata.attr,
> +	&tune_attr_bio_pool_size.attr,
>   	NULL,
>   };
>   



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-09-25  9:42 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-25  5:38 [Cluster-devel] [GFS2 RFC PATCH 0/3] Locating jhead using a pool of bios Abhi Das
2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 1/3] gfs2: add more timing info to the journal recovery process Abhi Das
2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 2/3] gfs2: add a helper function to get_log_header that can be used elsewhere Abhi Das
2018-09-25  5:38 ` [Cluster-devel] [GFS2 RFC PATCH 3/3] gfs2: introduce bio_pool to readahead journal to find jhead Abhi Das
2018-09-25  9:42   ` Steven Whitehouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.