linux-bcache.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Coly Li <colyli@suse.de>
To: linux-bcache@vger.kernel.org
Cc: linux-block@vger.kernel.org, linux-nvdimm@lists.linux.dev,
	axboe@kernel.dk, hare@suse.com, jack@suse.cz,
	dan.j.williams@intel.com, hch@lst.de, ying.huang@intel.com,
	Coly Li <colyli@suse.de>, Hannes Reinecke <hare@suse.de>,
	Jianpeng Ma <jianpeng.ma@intel.com>,
	Qiaowei Ren <qiaowei.ren@intel.com>
Subject: [PATCH v12 09/12] bcache: initialize bcache journal for NVDIMM meta device
Date: Thu, 12 Aug 2021 01:02:21 +0800	[thread overview]
Message-ID: <20210811170224.42837-10-colyli@suse.de> (raw)
In-Reply-To: <20210811170224.42837-1-colyli@suse.de>

The nvm-pages allocator may store and index the NVDIMM pages allocated
for bcache journal. This patch adds the initialization to store bcache
journal space on NVDIMM pages if BCH_FEATURE_INCOMPAT_NVDIMM_META bit is
set by bcache-tools.

If BCH_FEATURE_INCOMPAT_NVDIMM_META is set, get_nvdimm_journal_space()
will return the linear address of NVDIMM pages for bcache journal,
- If there is previously allocated space, find it from nvm-pages owner
  list and return to bch_journal_init().
- If there is no previously allocated space, require a new NVDIMM range
  from the nvm-pages allocator, and return it to bch_journal_init().

And in bch_journal_init(), keys in sb.d[] store the corresponding linear
address from NVDIMM into sb.d[i].ptr[0] where 'i' is the bucket index to
iterate all journal buckets.

Later when bcache journaling code stores the journaling jset, the target
NVDIMM linear address stored (and updated) in sb.d[i].ptr[0] can be used
directly in memory copy from DRAM pages into NVDIMM pages.

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jianpeng Ma <jianpeng.ma@intel.com>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
---
 drivers/md/bcache/journal.c   | 117 ++++++++++++++++++++++++++++++++++
 drivers/md/bcache/journal.h   |   2 +-
 drivers/md/bcache/nvm-pages.c |   9 +++
 drivers/md/bcache/nvm-pages.h |   1 +
 drivers/md/bcache/super.c     |  18 +++---
 5 files changed, 136 insertions(+), 11 deletions(-)

diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 61bd79babf7a..9fe6c1abfd84 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -9,6 +9,8 @@
 #include "btree.h"
 #include "debug.h"
 #include "extents.h"
+#include "nvm-pages.h"
+#include "features.h"
 
 #include <trace/events/bcache.h>
 
@@ -982,3 +984,118 @@ int bch_journal_alloc(struct cache_set *c)
 
 	return 0;
 }
+
+#if defined(CONFIG_BCACHE_NVM_PAGES)
+
+static void *find_journal_nvmpg_base(struct bch_nvmpg_head *nvmpg_head,
+				   struct cache *ca)
+{
+	void *addr = NULL;
+	unsigned long jnl_offset, jnl_pgoff, jnl_ns_id;
+	int i;
+
+	jnl_offset = (unsigned long)ca->sb.d[0];
+	jnl_ns_id = BCH_NVMPG_GET_NS_ID(jnl_offset);
+	jnl_pgoff = BCH_NVMPG_GET_OFFSET(jnl_offset) >> PAGE_SHIFT;
+
+	for (i = 0; i < BCH_NVMPG_NS_MAX; i++) {
+		struct bch_nvmpg_recs *recs;
+		struct bch_nvmpg_rec *rec;
+		unsigned long recs_offset = 0;
+		int j;
+
+		recs_offset = nvmpg_head->recs_offset[i];
+		recs = bch_nvmpg_offset_to_ptr(recs_offset);
+		while (recs) {
+			for (j = 0; j < recs->size; j++) {
+				rec = &recs->recs[j];
+				if ((rec->pgoff != jnl_pgoff) ||
+				    (rec->ns_id != jnl_ns_id))
+					continue;
+
+				addr = bch_nvmpg_offset_to_ptr(jnl_offset);
+				goto out;
+			}
+			recs_offset = recs->next_offset;
+			recs = bch_nvmpg_offset_to_ptr(recs_offset);
+		}
+	}
+
+out:
+	return addr;
+}
+
+static void *get_journal_nvmpg_space(struct cache *ca)
+{
+	struct bch_nvmpg_head *head = NULL;
+	void *ret = NULL;
+	int order;
+
+	head = bch_get_nvmpg_head(ca->sb.set_uuid);
+	if (head) {
+		ret = find_journal_nvmpg_base(head, ca);
+		if (ret)
+			goto found;
+	}
+
+	order = ilog2((ca->sb.bucket_size *
+		       ca->sb.njournal_buckets) / PAGE_SECTORS);
+	ret = bch_nvmpg_alloc_pages(order, ca->sb.set_uuid);
+	if (ret)
+		memset(ret, 0, (1 << order) * PAGE_SIZE);
+found:
+	return ret;
+}
+
+#endif /* CONFIG_BCACHE_NVM_PAGES */
+
+static int __bch_journal_nvdimm_init(struct cache *ca)
+{
+	int ret = -1;
+
+#if defined(CONFIG_BCACHE_NVM_PAGES)
+	int i;
+	void *jnl_base = NULL;
+
+	jnl_base = get_journal_nvmpg_space(ca);
+	if (!jnl_base) {
+		pr_err("Failed to get journal space from nvdimm\n");
+		goto out;
+	}
+
+	/* Iniialized and reloaded from on-disk super block already */
+	if (ca->sb.d[0] != 0)
+		goto out;
+
+	for (i = 0; i < ca->sb.keys; i++) {
+		unsigned long jnl_offset;
+
+		jnl_offset = bch_nvmpg_ptr_to_offset(bch_nvmpg_id_to_ns(0),
+					jnl_base + (bucket_bytes(ca) * i));
+		ca->sb.d[i] = jnl_offset;
+	}
+
+	ret = 0;
+out:
+#endif /* CONFIG_BCACHE_NVM_PAGES */
+
+	return ret;
+}
+
+
+int bch_journal_init(struct cache_set *c)
+{
+	int i, ret = 0;
+	struct cache *ca = c->cache;
+
+	ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
+			      2, SB_JOURNAL_BUCKETS);
+
+	if (!bch_has_feature_nvdimm_meta(&ca->sb)) {
+		for (i = 0; i < ca->sb.keys; i++)
+			ca->sb.d[i] = ca->sb.first_bucket + i;
+	} else
+		ret = __bch_journal_nvdimm_init(ca);
+
+	return ret;
+}
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index f2ea34d5f431..e3a7fa5a8fda 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -179,7 +179,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list);
 void bch_journal_meta(struct cache_set *c, struct closure *cl);
 int bch_journal_read(struct cache_set *c, struct list_head *list);
 int bch_journal_replay(struct cache_set *c, struct list_head *list);
-
+int bch_journal_init(struct cache_set *c);
 void bch_journal_free(struct cache_set *c);
 int bch_journal_alloc(struct cache_set *c);
 
diff --git a/drivers/md/bcache/nvm-pages.c b/drivers/md/bcache/nvm-pages.c
index 497360c60f26..55f3f9b7fb0c 100644
--- a/drivers/md/bcache/nvm-pages.c
+++ b/drivers/md/bcache/nvm-pages.c
@@ -24,6 +24,15 @@
 
 struct bch_nvmpg_set *global_nvmpg_set;
 
+struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id)
+{
+	if ((ns_id >= 0) && (ns_id < BCH_NVMPG_NS_MAX))
+		return global_nvmpg_set->ns_tbl[ns_id];
+
+	pr_emerg("Invalid ns_id: %d\n", ns_id);
+	return NULL;
+}
+
 void *bch_nvmpg_offset_to_ptr(unsigned long offset)
 {
 	int ns_id = BCH_NVMPG_GET_NS_ID(offset);
diff --git a/drivers/md/bcache/nvm-pages.h b/drivers/md/bcache/nvm-pages.h
index 2f6f2ffbfd80..13cc6a532bda 100644
--- a/drivers/md/bcache/nvm-pages.h
+++ b/drivers/md/bcache/nvm-pages.h
@@ -94,6 +94,7 @@ void bch_nvmpg_exit(void);
 void *bch_nvmpg_alloc_pages(int order, const char *uuid);
 void bch_nvmpg_free_pages(void *addr, int order, const char *uuid);
 struct bch_nvmpg_head *bch_get_nvmpg_head(const char *uuid);
+struct bch_nvmpg_ns *bch_nvmpg_id_to_ns(int ns_id);
 
 #else
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4326ffa0d21f..e66e1d6ef260 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -147,9 +147,11 @@ static const char *read_super_common(struct cache_sb *sb,  struct block_device *
 		goto err;
 
 	err = "Journal buckets not sequential";
-	for (i = 0; i < sb->keys; i++)
-		if (sb->d[i] != sb->first_bucket + i)
-			goto err;
+	if (!bch_has_feature_nvdimm_meta(sb)) {
+		for (i = 0; i < sb->keys; i++)
+			if (sb->d[i] != sb->first_bucket + i)
+				goto err;
+	}
 
 	err = "Too many journal buckets";
 	if (sb->first_bucket + sb->keys > sb->nbuckets)
@@ -2065,14 +2067,10 @@ static int run_cache_set(struct cache_set *c)
 		if (bch_journal_replay(c, &journal))
 			goto err;
 	} else {
-		unsigned int j;
-
 		pr_notice("invalidating existing data\n");
-		ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
-					2, SB_JOURNAL_BUCKETS);
-
-		for (j = 0; j < ca->sb.keys; j++)
-			ca->sb.d[j] = ca->sb.first_bucket + j;
+		err = "error initializing journal";
+		if (bch_journal_init(c))
+			goto err;
 
 		bch_initial_gc_finish(c);
 
-- 
2.26.2


  parent reply	other threads:[~2021-08-11 17:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-11 17:02 [PATCH v12 00/12] bcache: support NVDIMM for journaling Coly Li
2021-08-11 17:02 ` [PATCH v12 01/12] bcache: add initial data structures for nvm pages Coly Li
2021-08-11 17:02 ` [PATCH v12 02/12] bcache: initialize the nvm pages allocator Coly Li
2021-08-12  5:43   ` Dan Williams
2021-08-12  8:26     ` Coly Li
2021-08-11 17:02 ` [PATCH v12 03/12] bcache: initialization of the buddy Coly Li
2021-08-11 17:02 ` [PATCH v12 04/12] bcache: bch_nvmpg_alloc_pages() " Coly Li
2021-08-11 17:02 ` [PATCH v12 05/12] bcache: bch_nvmpg_free_pages() of the buddy allocator Coly Li
2021-08-11 17:02 ` [PATCH v12 06/12] bcache: get recs list head for allocated pages by specific uuid Coly Li
2021-08-11 17:02 ` [PATCH v12 07/12] bcache: use bucket index to set GC_MARK_METADATA for journal buckets in bch_btree_gc_finish() Coly Li
2021-08-11 17:02 ` [PATCH v12 08/12] bcache: add BCH_FEATURE_INCOMPAT_NVDIMM_META into incompat feature set Coly Li
2021-08-11 17:02 ` Coly Li [this message]
2021-08-11 17:02 ` [PATCH v12 10/12] bcache: support storing bcache journal into NVDIMM meta device Coly Li
2021-08-11 17:02 ` [PATCH v12 11/12] bcache: read jset from NVDIMM pages for journal replay Coly Li
2021-08-11 17:02 ` [PATCH v12 12/12] bcache: add sysfs interface register_nvdimm_meta to register NVDIMM meta device Coly Li
2021-08-15 16:21 ` [PATCH v12 00/12] bcache: support NVDIMM for journaling Coly Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210811170224.42837-10-colyli@suse.de \
    --to=colyli@suse.de \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=hare@suse.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jianpeng.ma@intel.com \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvdimm@lists.linux.dev \
    --cc=qiaowei.ren@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).