From: Hannes Reinecke <hare@suse.de>
To: Coly Li <colyli@suse.de>, axboe@kernel.dk
Cc: linux-bcache@vger.kernel.org, linux-block@vger.kernel.org,
Jianpeng Ma <jianpeng.ma@intel.com>,
Qiaowei Ren <qiaowei.ren@intel.com>
Subject: Re: [PATCH 12/14] bcache: support storing bcache journal into NVDIMM meta device
Date: Tue, 22 Jun 2021 13:03:16 +0200 [thread overview]
Message-ID: <e27c6d67-7085-ec35-7ad4-f391ac5a2454@suse.de> (raw)
In-Reply-To: <20210615054921.101421-13-colyli@suse.de>
On 6/15/21 7:49 AM, Coly Li wrote:
> This patch implements two methods to store bcache journal to,
> 1) __journal_write_unlocked() for block interface device
> The latency method to compose bio and issue the jset bio to cache
> device (e.g. SSD). c->journal.key.ptr[0] indicates the LBA on cache
> device to store the journal jset.
> 2) __journal_nvdimm_write_unlocked() for memory interface NVDIMM
> Use memory interface to access NVDIMM pages and store the jset by
> memcpy_flushcache(). c->journal.key.ptr[0] indicates the linear
> address from the NVDIMM pages to store the journal jset.
>
> For lagency configuration without NVDIMM meta device, journal I/O is
legacy?
> handled by __journal_write_unlocked() with existing code logic. If the
> NVDIMM meta device is used (by bcache-tools), the journal I/O will
> be handled by __journal_nvdimm_write_unlocked() and go into the NVDIMM
> pages.
>
> And when NVDIMM meta device is used, sb.d[] stores the linear addresses
> from NVDIMM pages (no more bucket index), in journal_reclaim() the
> journaling location in c->journal.key.ptr[0] should also be updated by
> linear address from NVDIMM pages (no more LBA combined by sectors offset
> and bucket index).
>
> Signed-off-by: Coly Li <colyli@suse.de>
> Cc: Jianpeng Ma <jianpeng.ma@intel.com>
> Cc: Qiaowei Ren <qiaowei.ren@intel.com>
> ---
> drivers/md/bcache/journal.c | 119 ++++++++++++++++++++++++----------
> drivers/md/bcache/nvm-pages.h | 1 +
> drivers/md/bcache/super.c | 28 +++++++-
> 3 files changed, 110 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
> index 32599d2ff5d2..03ecedf813b0 100644
> --- a/drivers/md/bcache/journal.c
> +++ b/drivers/md/bcache/journal.c
> @@ -596,6 +596,8 @@ static void do_journal_discard(struct cache *ca)
> return;
> }
>
> + BUG_ON(bch_has_feature_nvdimm_meta(&ca->sb));
> +
> switch (atomic_read(&ja->discard_in_flight)) {
> case DISCARD_IN_FLIGHT:
> return;
> @@ -661,9 +663,13 @@ static void journal_reclaim(struct cache_set *c)
> goto out;
>
> ja->cur_idx = next;
> - k->ptr[0] = MAKE_PTR(0,
> - bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
> - ca->sb.nr_this_dev);
> + if (!bch_has_feature_nvdimm_meta(&ca->sb))
> + k->ptr[0] = MAKE_PTR(0,
> + bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
> + ca->sb.nr_this_dev);
> + else
> + k->ptr[0] = ca->sb.d[ja->cur_idx];
> +
> atomic_long_inc(&c->reclaimed_journal_buckets);
>
> bkey_init(k);
> @@ -729,46 +735,21 @@ static void journal_write_unlock(struct closure *cl)
> spin_unlock(&c->journal.lock);
> }
>
> -static void journal_write_unlocked(struct closure *cl)
> +
> +static void __journal_write_unlocked(struct cache_set *c)
> __releases(c->journal.lock)
> {
> - struct cache_set *c = container_of(cl, struct cache_set, journal.io);
> - struct cache *ca = c->cache;
> - struct journal_write *w = c->journal.cur;
> struct bkey *k = &c->journal.key;
> - unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
> - ca->sb.block_size;
> -
> + struct journal_write *w = c->journal.cur;
> + struct closure *cl = &c->journal.io;
> + struct cache *ca = c->cache;
> struct bio *bio;
> struct bio_list list;
> + unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
> + ca->sb.block_size;
>
> bio_list_init(&list);
>
> - if (!w->need_write) {
> - closure_return_with_destructor(cl, journal_write_unlock);
> - return;
> - } else if (journal_full(&c->journal)) {
> - journal_reclaim(c);
> - spin_unlock(&c->journal.lock);
> -
> - btree_flush_write(c);
> - continue_at(cl, journal_write, bch_journal_wq);
> - return;
> - }
> -
> - c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
> -
> - w->data->btree_level = c->root->level;
> -
> - bkey_copy(&w->data->btree_root, &c->root->key);
> - bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
> -
> - w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
> - w->data->magic = jset_magic(&ca->sb);
> - w->data->version = BCACHE_JSET_VERSION;
> - w->data->last_seq = last_seq(&c->journal);
> - w->data->csum = csum_set(w->data);
> -
> for (i = 0; i < KEY_PTRS(k); i++) {
> ca = c->cache;
> bio = &ca->journal.bio;
> @@ -793,7 +774,6 @@ static void journal_write_unlocked(struct closure *cl)
>
> ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
> }
> -
> /* If KEY_PTRS(k) == 0, this jset gets lost in air */
> BUG_ON(i == 0);
>
> @@ -805,6 +785,73 @@ static void journal_write_unlocked(struct closure *cl)
>
> while ((bio = bio_list_pop(&list)))
> closure_bio_submit(c, bio, cl);
> +}
> +
> +#if defined(CONFIG_BCACHE_NVM_PAGES)
> +
> +static void __journal_nvdimm_write_unlocked(struct cache_set *c)
> + __releases(c->journal.lock)
> +{
> + struct journal_write *w = c->journal.cur;
> + struct cache *ca = c->cache;
> + unsigned int sectors;
> +
> + sectors = set_blocks(w->data, block_bytes(ca)) * ca->sb.block_size;
> + atomic_long_add(sectors, &ca->meta_sectors_written);
> +
> + memcpy_flushcache((void *)c->journal.key.ptr[0], w->data, sectors << 9);
> +
> + c->journal.key.ptr[0] += sectors << 9;
> + ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
> +
> + atomic_dec_bug(&fifo_back(&c->journal.pin));
> + bch_journal_next(&c->journal);
> + journal_reclaim(c);
> +
> + spin_unlock(&c->journal.lock);
> +}
> +
> +#else /* CONFIG_BCACHE_NVM_PAGES */
> +
> +static void __journal_nvdimm_write_unlocked(struct cache_set *c) { }
> +
> +#endif /* CONFIG_BCACHE_NVM_PAGES */
> +
> +static void journal_write_unlocked(struct closure *cl)
> +{
> + struct cache_set *c = container_of(cl, struct cache_set, journal.io);
> + struct cache *ca = c->cache;
> + struct journal_write *w = c->journal.cur;
> +
> + if (!w->need_write) {
> + closure_return_with_destructor(cl, journal_write_unlock);
> + return;
> + } else if (journal_full(&c->journal)) {
> + journal_reclaim(c);
> + spin_unlock(&c->journal.lock);
> +
> + btree_flush_write(c);
> + continue_at(cl, journal_write, bch_journal_wq);
> + return;
> + }
> +
> + c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
> +
> + w->data->btree_level = c->root->level;
> +
> + bkey_copy(&w->data->btree_root, &c->root->key);
> + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
> +
> + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
> + w->data->magic = jset_magic(&ca->sb);
> + w->data->version = BCACHE_JSET_VERSION;
> + w->data->last_seq = last_seq(&c->journal);
> + w->data->csum = csum_set(w->data);
> +
> + if (!bch_has_feature_nvdimm_meta(&ca->sb))
> + __journal_write_unlocked(c);
> + else
> + __journal_nvdimm_write_unlocked(c);
>
> continue_at(cl, journal_write_done, NULL);
> }
> diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h
> index c763bf2e2721..736a661777b7 100644
> --- a/drivers/md/bcache/nvm-pages.h
> +++ b/drivers/md/bcache/nvm-pages.h
> @@ -5,6 +5,7 @@
>
> #if defined(CONFIG_BCACHE_NVM_PAGES)
> #include <linux/bcache-nvm.h>
> +#include <linux/libnvdimm.h>
> #endif /* CONFIG_BCACHE_NVM_PAGES */
>
> /*
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index cce0f6bf0944..4d6666d03aa7 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1686,7 +1686,32 @@ void bch_cache_set_release(struct kobject *kobj)
> static void cache_set_free(struct closure *cl)
> {
> struct cache_set *c = container_of(cl, struct cache_set, cl);
> - struct cache *ca;
> + struct cache *ca = c->cache;
> +
> +#if defined(CONFIG_BCACHE_NVM_PAGES)
> + /* Flush cache if journal stored in NVDIMM */
> + if (ca && bch_has_feature_nvdimm_meta(&ca->sb)) {
> + unsigned long bucket_size = ca->sb.bucket_size;
> + int i;
> +
> + for (i = 0; i < ca->sb.keys; i++) {
> + unsigned long offset = 0;
> + unsigned int len = round_down(UINT_MAX, 2);
> +
> + if ((void *)ca->sb.d[i] == NULL)
> + continue;
> +
> + while (bucket_size > 0) {
> + if (len > bucket_size)
> + len = bucket_size;
> + arch_invalidate_pmem(
> + (void *)(ca->sb.d[i] + offset), len);
> + offset += len;
> + bucket_size -= len;
> + }
> + }
> + }
> +#endif /* CONFIG_BCACHE_NVM_PAGES */
>
> debugfs_remove(c->debug);
>
> @@ -1698,7 +1723,6 @@ static void cache_set_free(struct closure *cl)
> bch_bset_sort_state_free(&c->sort);
> free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb)));
>
> - ca = c->cache;
> if (ca) {
> ca->set = NULL;
> c->cache = NULL;
>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
next prev parent reply other threads:[~2021-06-22 11:03 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-15 5:49 [PATCH 00/14] bcache patches for Linux v5.14 Coly Li
2021-06-15 5:49 ` [PATCH 01/14] bcache: fix error info in register_bcache() Coly Li
2021-06-22 9:47 ` Hannes Reinecke
2021-06-15 5:49 ` [PATCH 02/14] md: bcache: Fix spelling of 'acquire' Coly Li
2021-06-22 10:03 ` Hannes Reinecke
2021-06-15 5:49 ` [PATCH 03/14] bcache: add initial data structures for nvm pages Coly Li
2021-06-21 16:17 ` Ask help for code review (was Re: [PATCH 03/14] bcache: add initial data structures for nvm pages) Coly Li
2021-06-22 8:41 ` Huang, Ying
2021-06-23 4:32 ` Coly Li
2021-06-23 6:53 ` Huang, Ying
2021-06-23 7:04 ` Christoph Hellwig
2021-06-23 7:19 ` Coly Li
2021-06-23 7:21 ` Christoph Hellwig
2021-06-23 10:05 ` Coly Li
2021-06-23 11:16 ` Coly Li
2021-06-23 11:49 ` Christoph Hellwig
2021-06-23 12:09 ` Coly Li
2021-06-22 10:19 ` [PATCH 03/14] bcache: add initial data structures for nvm pages Hannes Reinecke
2021-06-23 7:09 ` Coly Li
2021-06-15 5:49 ` [PATCH 04/14] bcache: initialize the nvm pages allocator Coly Li
2021-06-22 10:39 ` Hannes Reinecke
2021-06-23 5:26 ` Coly Li
2021-06-23 9:16 ` Hannes Reinecke
2021-06-23 9:34 ` Coly Li
2021-06-15 5:49 ` [PATCH 05/14] bcache: initialization of the buddy Coly Li
2021-06-22 10:45 ` Hannes Reinecke
2021-06-23 5:35 ` Coly Li
2021-06-23 5:46 ` Re[2]: " Pavel Goran
2021-06-23 6:03 ` Coly Li
2021-06-15 5:49 ` [PATCH 06/14] bcache: bch_nvm_alloc_pages() " Coly Li
2021-06-22 10:51 ` Hannes Reinecke
2021-06-23 6:02 ` Coly Li
2021-06-15 5:49 ` [PATCH 07/14] bcache: bch_nvm_free_pages() " Coly Li
2021-06-22 10:53 ` Hannes Reinecke
2021-06-23 6:06 ` Coly Li
2021-06-15 5:49 ` [PATCH 08/14] bcache: get allocated pages from specific owner Coly Li
2021-06-22 10:54 ` Hannes Reinecke
2021-06-23 6:08 ` Coly Li
2021-06-15 5:49 ` [PATCH 09/14] bcache: use bucket index to set GC_MARK_METADATA for journal buckets in bch_btree_gc_finish() Coly Li
2021-06-22 10:55 ` Hannes Reinecke
2021-06-23 6:09 ` Coly Li
2021-06-15 5:49 ` [PATCH 10/14] bcache: add BCH_FEATURE_INCOMPAT_NVDIMM_META into incompat feature set Coly Li
2021-06-22 10:59 ` Hannes Reinecke
2021-06-23 6:09 ` Coly Li
2021-06-15 5:49 ` [PATCH 11/14] bcache: initialize bcache journal for NVDIMM meta device Coly Li
2021-06-22 11:01 ` Hannes Reinecke
2021-06-23 6:17 ` Coly Li
2021-06-23 9:20 ` Hannes Reinecke
2021-06-23 10:14 ` Coly Li
2021-06-15 5:49 ` [PATCH 12/14] bcache: support storing bcache journal into " Coly Li
2021-06-22 11:03 ` Hannes Reinecke [this message]
2021-06-23 6:19 ` Coly Li
2021-06-15 5:49 ` [PATCH 13/14] bcache: read jset from NVDIMM pages for journal replay Coly Li
2021-06-22 11:04 ` Hannes Reinecke
2021-06-23 6:21 ` Coly Li
2021-06-15 5:49 ` [PATCH 14/14] bcache: add sysfs interface register_nvdimm_meta to register NVDIMM meta device Coly Li
2021-06-22 11:04 ` Hannes Reinecke
2021-06-21 15:14 ` [PATCH 00/14] bcache patches for Linux v5.14 Jens Axboe
2021-06-21 15:25 ` Coly Li
2021-06-21 15:27 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e27c6d67-7085-ec35-7ad4-f391ac5a2454@suse.de \
--to=hare@suse.de \
--cc=axboe@kernel.dk \
--cc=colyli@suse.de \
--cc=jianpeng.ma@intel.com \
--cc=linux-bcache@vger.kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=qiaowei.ren@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).