From: Coly Li <colyli@suse.de>
To: axboe@kernel.dk
Cc: linux-bcache@vger.kernel.org, linux-block@vger.kernel.org,
Coly Li <colyli@suse.de>, Jianpeng Ma <jianpeng.ma@intel.com>,
Qiaowei Ren <qiaowei.ren@intel.com>
Subject: [PATCH 18/20] bcache: read jset from NVDIMM pages for journal replay
Date: Wed, 10 Feb 2021 13:07:40 +0800 [thread overview]
Message-ID: <20210210050742.31237-19-colyli@suse.de> (raw)
In-Reply-To: <20210210050742.31237-1-colyli@suse.de>
This patch implements two methods to read jset from media for journal
replay,
- __jnl_rd_bkt() for block device
This is the legacy method to read jset via block device interface.
- __jnl_rd_nvm_bkt() for NVDIMM
This is the method to read jset from NVDIMM memory interface, a.k.a
memcopy() from NVDIMM pages to DRAM pages.
If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set,
during running cache set, journal_read_bucket() will read the journal
content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of
NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which
were initialized and maintained in previous runs of the cache set.
A thing should be noticed is, when bch_journal_read() is called, the
linear address of NVDIMM pages is not loaded and initialized yet, it
is necessary to call __bch_journal_nvdimm_init() before reading the jset
from NVDIMM pages.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Jianpeng Ma <jianpeng.ma@intel.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
---
drivers/md/bcache/journal.c | 81 ++++++++++++++++++++++++++-----------
1 file changed, 57 insertions(+), 24 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index b242fcb47ce2..8d08627f5a89 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -34,60 +34,84 @@ static void journal_read_endio(struct bio *bio)
closure_put(cl);
}
+static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx,
+ unsigned int len, unsigned int offset,
+ struct closure *cl)
+{
+ sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]);
+ struct bio *bio = &ca->journal.bio;
+ struct jset *data = ca->set->journal.w[0].data;
+
+ bio_reset(bio);
+ bio->bi_iter.bi_sector = bucket + offset;
+ bio_set_dev(bio, ca->bdev);
+ bio->bi_iter.bi_size = len << 9;
+ bio->bi_end_io = journal_read_endio;
+ bio->bi_private = cl;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bch_bio_map(bio, data);
+
+ closure_bio_submit(ca->set, bio, cl);
+ closure_sync(cl);
+
+ /* Indeed journal.w[0].data */
+ return data;
+}
+
+static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx,
+ unsigned int len, unsigned int offset)
+{
+ void *jset_addr = (void *)ca->sb.d[bkt_idx] + (offset << 9);
+ struct jset *data = ca->set->journal.w[0].data;
+
+ memcpy(data, jset_addr, len << 9);
+
+ /* Indeed journal.w[0].data */
+ return data;
+}
+
static int journal_read_bucket(struct cache *ca, struct list_head *list,
- unsigned int bucket_index)
+ unsigned int bucket_idx)
{
struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio;
struct journal_replay *i;
- struct jset *j, *data = ca->set->journal.w[0].data;
+ struct jset *j;
struct closure cl;
unsigned int len, left, offset = 0;
int ret = 0;
- sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
closure_init_stack(&cl);
- pr_debug("reading %u\n", bucket_index);
+ pr_debug("reading %u\n", bucket_idx);
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
- bio_reset(bio);
- bio->bi_iter.bi_sector = bucket + offset;
- bio_set_dev(bio, ca->bdev);
- bio->bi_iter.bi_size = len << 9;
-
- bio->bi_end_io = journal_read_endio;
- bio->bi_private = &cl;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
- closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
+ if (!bch_has_feature_nvdimm_meta(&ca->sb))
+ j = __jnl_rd_bkt(ca, bucket_idx, len, offset, &cl);
+ else
+ j = __jnl_rd_nvm_bkt(ca, bucket_idx, len, offset);
/* This function could be simpler now since we no longer write
* journal entries that overlap bucket boundaries; this means
* the start of a bucket will always have a valid journal entry
* if it has any journal entries at all.
*/
-
- j = data;
while (len) {
struct list_head *where;
size_t blocks, bytes = set_bytes(j);
if (j->magic != jset_magic(&ca->sb)) {
- pr_debug("%u: bad magic\n", bucket_index);
+ pr_debug("%u: bad magic\n", bucket_idx);
return ret;
}
if (bytes > left << 9 ||
bytes > PAGE_SIZE << JSET_BITS) {
pr_info("%u: too big, %zu bytes, offset %u\n",
- bucket_index, bytes, offset);
+ bucket_idx, bytes, offset);
return ret;
}
@@ -96,7 +120,7 @@ reread: left = ca->sb.bucket_size - offset;
if (j->csum != csum_set(j)) {
pr_info("%u: bad csum, %zu bytes, offset %u\n",
- bucket_index, bytes, offset);
+ bucket_idx, bytes, offset);
return ret;
}
@@ -158,8 +182,8 @@ reread: left = ca->sb.bucket_size - offset;
list_add(&i->list, where);
ret = 1;
- if (j->seq > ja->seq[bucket_index])
- ja->seq[bucket_index] = j->seq;
+ if (j->seq > ja->seq[bucket_idx])
+ ja->seq[bucket_idx] = j->seq;
next_set:
offset += blocks * ca->sb.block_size;
len -= blocks * ca->sb.block_size;
@@ -170,6 +194,8 @@ reread: left = ca->sb.bucket_size - offset;
return ret;
}
+static int __bch_journal_nvdimm_init(struct cache *ca);
+
int bch_journal_read(struct cache_set *c, struct list_head *list)
{
#define read_bucket(b) \
@@ -188,6 +214,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
unsigned int i, l, r, m;
uint64_t seq;
+ /*
+ * Linear addresses of NVDIMM pages for journaling is not
+ * initialized yet, do it before read jset from NVDIMM pages.
+ */
+ if (bch_has_feature_nvdimm_meta(&ca->sb))
+ __bch_journal_nvdimm_init(ca);
+
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
--
2.26.2
next prev parent reply other threads:[~2021-02-10 5:10 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-10 5:07 [PATCH 00/20] bcache patches for Linux v5.12 Coly Li
2021-02-10 5:07 ` [PATCH 01/20] bcache: consider the fragmentation when update the writeback rate Coly Li
2021-02-10 5:07 ` [PATCH 02/20] bcache: Fix register_device_aync typo Coly Li
2021-02-10 5:07 ` [PATCH 03/20] Revert "bcache: Kill btree_io_wq" Coly Li
2021-02-10 5:07 ` [PATCH 04/20] bcache: Give btree_io_wq correct semantics again Coly Li
2021-02-10 5:07 ` [PATCH 05/20] bcache: Move journal work to new flush wq Coly Li
2021-02-10 5:07 ` [PATCH 06/20] bcache: Avoid comma separated statements Coly Li
2021-02-10 5:07 ` [PATCH 07/20] bcache: add initial data structures for nvm pages Coly Li
2021-02-10 15:09 ` Jens Axboe
2021-02-11 3:58 ` Coly Li
2021-02-10 5:07 ` [PATCH 08/20] bcache: initialize the nvm pages allocator Coly Li
2021-02-10 5:07 ` [PATCH 09/20] bcache: initialization of the buddy Coly Li
2021-02-10 5:07 ` [PATCH 10/20] bcache: bch_nvm_alloc_pages() " Coly Li
2021-02-10 5:07 ` [PATCH 11/20] bcache: bch_nvm_free_pages() " Coly Li
2021-02-10 5:07 ` [PATCH 12/20] bcache: get allocated pages from specific owner Coly Li
2021-02-10 5:07 ` [PATCH 13/20] bcache: persist owner info when alloc/free pages Coly Li
2021-02-10 5:07 ` [PATCH 14/20] bcache: use bucket index for SET_GC_MARK() in bch_btree_gc_finish() Coly Li
2021-02-10 5:07 ` [PATCH 15/20] bcache: add BCH_FEATURE_INCOMPAT_NVDIMM_META into incompat feature set Coly Li
2021-02-10 5:07 ` [PATCH 16/20] bcache: initialize bcache journal for NVDIMM meta device Coly Li
2021-02-10 5:07 ` [PATCH 17/20] bcache: support storing bcache journal into " Coly Li
2021-02-18 21:21 ` Nix
2021-02-10 5:07 ` Coly Li [this message]
2021-02-10 5:07 ` [PATCH 19/20] bcache: add sysfs interface register_nvdimm_meta to register " Coly Li
2021-02-10 5:07 ` [PATCH 20/20] bcache: only initialize nvm-pages allocator when CONFIG_BCACHE_NVM_PAGES configured Coly Li
2021-02-10 15:11 ` [PATCH 00/20] bcache patches for Linux v5.12 Jens Axboe
2021-02-12 16:09 ` Coly Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210210050742.31237-19-colyli@suse.de \
--to=colyli@suse.de \
--cc=axboe@kernel.dk \
--cc=jianpeng.ma@intel.com \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=qiaowei.ren@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).