From: Coly Li <colyli@suse.de>
To: axboe@kernel.dk
Cc: linux-bcache@vger.kernel.org, linux-block@vger.kernel.org,
jianpeng.ma@intel.com, qiaowei.ren@intel.com,
Coly Li <colyli@suse.de>
Subject: [PATCH 11/13] bcache: read jset from NVDIMM pages for journal replay
Date: Wed, 14 Apr 2021 13:46:46 +0800 [thread overview]
Message-ID: <20210414054648.24098-12-colyli@suse.de> (raw)
In-Reply-To: <20210414054648.24098-1-colyli@suse.de>
This patch implements two methods to read jset from media for journal
replay,
- __jnl_rd_bkt() for block device
This is the legacy method to read jset via block device interface.
- __jnl_rd_nvm_bkt() for NVDIMM
This is the method to read jset from NVDIMM memory interface, a.k.a
memcopy() from NVDIMM pages to DRAM pages.
If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set,
during running cache set, journal_read_bucket() will read the journal
content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of
NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which
were initialized and maintained in previous runs of the cache set.
A thing should be noticed is, when bch_journal_read() is called, the
linear address of NVDIMM pages is not loaded and initialized yet, it
is necessary to call __bch_journal_nvdimm_init() before reading the jset
from NVDIMM pages.
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Jianpeng Ma <jianpeng.ma@intel.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
---
drivers/md/bcache/journal.c | 93 +++++++++++++++++++++++++++----------
1 file changed, 69 insertions(+), 24 deletions(-)
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index e3785da10434..e5f7166e5d9b 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -34,60 +34,96 @@ static void journal_read_endio(struct bio *bio)
closure_put(cl);
}
+static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx,
+ unsigned int len, unsigned int offset,
+ struct closure *cl)
+{
+ sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]);
+ struct bio *bio = &ca->journal.bio;
+ struct jset *data = ca->set->journal.w[0].data;
+
+ bio_reset(bio);
+ bio->bi_iter.bi_sector = bucket + offset;
+ bio_set_dev(bio, ca->bdev);
+ bio->bi_iter.bi_size = len << 9;
+ bio->bi_end_io = journal_read_endio;
+ bio->bi_private = cl;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bch_bio_map(bio, data);
+
+ closure_bio_submit(ca->set, bio, cl);
+ closure_sync(cl);
+
+ /* Indeed journal.w[0].data */
+ return data;
+}
+
+#ifdef CONFIG_BCACHE_NVM_PAGES
+
+static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx,
+ unsigned int len, unsigned int offset)
+{
+ void *jset_addr = (void *)ca->sb.d[bkt_idx] + (offset << 9);
+ struct jset *data = ca->set->journal.w[0].data;
+
+ memcpy(data, jset_addr, len << 9);
+
+ /* Indeed journal.w[0].data */
+ return data;
+}
+
+#else /* CONFIG_BCACHE_NVM_PAGES */
+
+static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx,
+ unsigned int len, unsigned int offset)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_BCACHE_NVM_PAGES */
+
static int journal_read_bucket(struct cache *ca, struct list_head *list,
- unsigned int bucket_index)
+ unsigned int bucket_idx)
{
struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio;
struct journal_replay *i;
- struct jset *j, *data = ca->set->journal.w[0].data;
+ struct jset *j;
struct closure cl;
unsigned int len, left, offset = 0;
int ret = 0;
- sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
closure_init_stack(&cl);
- pr_debug("reading %u\n", bucket_index);
+ pr_debug("reading %u\n", bucket_idx);
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
- bio_reset(bio);
- bio->bi_iter.bi_sector = bucket + offset;
- bio_set_dev(bio, ca->bdev);
- bio->bi_iter.bi_size = len << 9;
-
- bio->bi_end_io = journal_read_endio;
- bio->bi_private = &cl;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
- closure_bio_submit(ca->set, bio, &cl);
- closure_sync(&cl);
+ if (!bch_has_feature_nvdimm_meta(&ca->sb))
+ j = __jnl_rd_bkt(ca, bucket_idx, len, offset, &cl);
+ else
+ j = __jnl_rd_nvm_bkt(ca, bucket_idx, len, offset);
/* This function could be simpler now since we no longer write
* journal entries that overlap bucket boundaries; this means
* the start of a bucket will always have a valid journal entry
* if it has any journal entries at all.
*/
-
- j = data;
while (len) {
struct list_head *where;
size_t blocks, bytes = set_bytes(j);
if (j->magic != jset_magic(&ca->sb)) {
- pr_debug("%u: bad magic\n", bucket_index);
+ pr_debug("%u: bad magic\n", bucket_idx);
return ret;
}
if (bytes > left << 9 ||
bytes > PAGE_SIZE << JSET_BITS) {
pr_info("%u: too big, %zu bytes, offset %u\n",
- bucket_index, bytes, offset);
+ bucket_idx, bytes, offset);
return ret;
}
@@ -96,7 +132,7 @@ reread: left = ca->sb.bucket_size - offset;
if (j->csum != csum_set(j)) {
pr_info("%u: bad csum, %zu bytes, offset %u\n",
- bucket_index, bytes, offset);
+ bucket_idx, bytes, offset);
return ret;
}
@@ -158,8 +194,8 @@ reread: left = ca->sb.bucket_size - offset;
list_add(&i->list, where);
ret = 1;
- if (j->seq > ja->seq[bucket_index])
- ja->seq[bucket_index] = j->seq;
+ if (j->seq > ja->seq[bucket_idx])
+ ja->seq[bucket_idx] = j->seq;
next_set:
offset += blocks * ca->sb.block_size;
len -= blocks * ca->sb.block_size;
@@ -170,6 +206,8 @@ reread: left = ca->sb.bucket_size - offset;
return ret;
}
+static int __bch_journal_nvdimm_init(struct cache *ca);
+
int bch_journal_read(struct cache_set *c, struct list_head *list)
{
#define read_bucket(b) \
@@ -188,6 +226,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
unsigned int i, l, r, m;
uint64_t seq;
+ /*
+ * Linear addresses of NVDIMM pages for journaling is not
+ * initialized yet, do it before read jset from NVDIMM pages.
+ */
+ if (bch_has_feature_nvdimm_meta(&ca->sb))
+ __bch_journal_nvdimm_init(ca);
+
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
--
2.26.2
next prev parent reply other threads:[~2021-04-14 5:47 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-14 5:46 [PATCH 00/13] bcache patches for Linux v5.13 -- 2nd wave Coly Li
2021-04-14 5:46 ` [PATCH 01/13] bcache: add initial data structures for nvm pages Coly Li
2021-04-14 5:46 ` [PATCH 02/13] bcache: initialize the nvm pages allocator Coly Li
2021-04-14 5:46 ` [PATCH 03/13] bcache: initialization of the buddy Coly Li
2021-04-14 5:46 ` [PATCH 04/13] bcache: bch_nvm_alloc_pages() " Coly Li
2021-04-14 5:46 ` [PATCH 05/13] bcache: bch_nvm_free_pages() " Coly Li
2021-04-14 5:46 ` [PATCH 06/13] bcache: get allocated pages from specific owner Coly Li
2021-04-14 5:46 ` [PATCH 07/13] bcache: use bucket index to set GC_MARK_METADATA for journal buckets in bch_btree_gc_finish() Coly Li
2021-04-14 5:46 ` [PATCH 08/13] bcache: add BCH_FEATURE_INCOMPAT_NVDIMM_META into incompat feature set Coly Li
2021-04-14 5:46 ` [PATCH 09/13] bcache: initialize bcache journal for NVDIMM meta device Coly Li
2021-04-14 5:46 ` [PATCH 10/13] bcache: support storing bcache journal into " Coly Li
2021-04-14 5:46 ` Coly Li [this message]
2021-04-14 5:46 ` [PATCH 12/13] bcache: add sysfs interface register_nvdimm_meta to register " Coly Li
2021-04-14 5:46 ` [PATCH 13/13] bcache: use div_u64() in init_owner_info() Coly Li
2021-04-16 12:01 ` Jens Axboe
2021-04-16 3:55 ` [PATCH 00/13] bcache patches for Linux v5.13 -- 2nd wave Coly Li
2021-04-16 12:02 ` Jens Axboe
2021-04-16 12:26 ` Coly Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210414054648.24098-12-colyli@suse.de \
--to=colyli@suse.de \
--cc=axboe@kernel.dk \
--cc=jianpeng.ma@intel.com \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=qiaowei.ren@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).