All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] lightnvm: pblk: Flexible metadata
@ 2018-10-05 13:34 Igor Konopko
  2018-10-05 13:34 ` [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read Igor Konopko
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

This series of patches extends the way how pblk can
store L2P sector metadata. After this set of changes
any size of NVMe metadata (including 0) is supported.

Igor Konopko (5):
  lightnvm: pblk: Do not reuse DMA memory on partial read
  lightnvm: pblk: Helpers for OOB metadata
  lightnvm: Flexible DMA pool entry size
  lightnvm: Disable interleaved metadata
  lightnvm: pblk: Support for packed metadata

 drivers/lightnvm/core.c          | 33 ++++++++++----
 drivers/lightnvm/pblk-core.c     | 77 +++++++++++++++++++++++++-------
 drivers/lightnvm/pblk-init.c     | 54 +++++++++++++++++++++-
 drivers/lightnvm/pblk-map.c      | 21 ++++++---
 drivers/lightnvm/pblk-rb.c       |  3 ++
 drivers/lightnvm/pblk-read.c     | 56 +++++++++++------------
 drivers/lightnvm/pblk-recovery.c | 28 +++++++-----
 drivers/lightnvm/pblk-sysfs.c    |  7 +++
 drivers/lightnvm/pblk-write.c    | 14 ++++--
 drivers/lightnvm/pblk.h          | 55 +++++++++++++++++++++--
 drivers/nvme/host/lightnvm.c     |  7 ++-
 include/linux/lightnvm.h         |  9 ++--
 12 files changed, 278 insertions(+), 86 deletions(-)

-- 
2.17.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read
  2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
@ 2018-10-05 13:34 ` Igor Konopko
  2018-10-12 11:32   ` Javier Gonzalez
  2018-10-05 13:34 ` [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata Igor Konopko
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

Currently DMA allocated memory is reused on partial read
path for some internal pblk structs. In preparation for
dynamic DMA pool sizes we need to change it to kmalloc
allocated memory.

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-read.c | 20 +++++---------------
 drivers/lightnvm/pblk.h      |  2 ++
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index d340dece1d00..08f6ebd4bc48 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -224,7 +224,6 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 	unsigned long *read_bitmap = pr_ctx->bitmap;
 	int nr_secs = pr_ctx->orig_nr_secs;
 	int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
-	__le64 *lba_list_mem, *lba_list_media;
 	void *src_p, *dst_p;
 	int hole, i;
 
@@ -237,13 +236,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 		rqd->ppa_list[0] = ppa;
 	}
 
-	/* Re-use allocated memory for intermediate lbas */
-	lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
-	lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
-
 	for (i = 0; i < nr_secs; i++) {
-		lba_list_media[i] = meta_list[i].lba;
-		meta_list[i].lba = lba_list_mem[i];
+		pr_ctx->lba_list_media[i] = meta_list[i].lba;
+		meta_list[i].lba = pr_ctx->lba_list_mem[i];
 	}
 
 	/* Fill the holes in the original bio */
@@ -255,7 +250,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 		line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
 		kref_put(&line->ref, pblk_line_put);
 
-		meta_list[hole].lba = lba_list_media[i];
+		meta_list[hole].lba = pr_ctx->lba_list_media[i];
 
 		src_bv = new_bio->bi_io_vec[i++];
 		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
@@ -295,13 +290,9 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_pr_ctx *pr_ctx;
 	struct bio *new_bio, *bio = r_ctx->private;
-	__le64 *lba_list_mem;
 	int nr_secs = rqd->nr_ppas;
 	int i;
 
-	/* Re-use allocated memory for intermediate lbas */
-	lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
-
 	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
 
 	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
@@ -312,12 +303,12 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 		goto fail_free_pages;
 	}
 
-	pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
+	pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
 	if (!pr_ctx)
 		goto fail_free_pages;
 
 	for (i = 0; i < nr_secs; i++)
-		lba_list_mem[i] = meta_list[i].lba;
+		pr_ctx->lba_list_mem[i] = meta_list[i].lba;
 
 	new_bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
@@ -325,7 +316,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 	rqd->bio = new_bio;
 	rqd->nr_ppas = nr_holes;
 
-	pr_ctx->ppa_ptr = NULL;
 	pr_ctx->orig_bio = bio;
 	bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
 	pr_ctx->bio_init_idx = bio_init_idx;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 0f98ea24ee59..aea09879636f 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -132,6 +132,8 @@ struct pblk_pr_ctx {
 	unsigned int bio_init_idx;
 	void *ppa_ptr;
 	dma_addr_t dma_ppa_list;
+	__le64 lba_list_mem[NVM_MAX_VLBA];
+	__le64 lba_list_media[NVM_MAX_VLBA];
 };
 
 /* Pad context */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
  2018-10-05 13:34 ` [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read Igor Konopko
@ 2018-10-05 13:34 ` Igor Konopko
  2018-10-09  9:02   ` Hans Holmberg
  2018-10-12 11:52   ` Javier Gonzalez
  2018-10-05 13:34 ` [PATCH 3/5] lightnvm: Flexible DMA pool entry size Igor Konopko
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

Currently pblk assumes that size of OOB metadata on drive is always
equal to size of pblk_sec_meta struct. This commit add helpers which will
allow to handle different sizes of OOB metadata on drive.

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-core.c     |  6 ++---
 drivers/lightnvm/pblk-map.c      | 21 ++++++++++------
 drivers/lightnvm/pblk-read.c     | 41 +++++++++++++++++++-------------
 drivers/lightnvm/pblk-recovery.c | 14 ++++++-----
 drivers/lightnvm/pblk.h          | 37 +++++++++++++++++++++++++++-
 5 files changed, 86 insertions(+), 33 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6944aac43b01..7cb39d84c833 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -743,7 +743,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
 	rqd.opcode = NVM_OP_PREAD;
 	rqd.nr_ppas = lm->smeta_sec;
 	rqd.is_seq = 1;
-
 	for (i = 0; i < lm->smeta_sec; i++, paddr++)
 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
 
@@ -796,10 +795,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
 	rqd.is_seq = 1;
 
 	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
-		struct pblk_sec_meta *meta_list = rqd.meta_list;
+		void *meta_list = rqd.meta_list;
 
 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
-		meta_list[i].lba = lba_list[paddr] = addr_empty;
+		pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
+		lba_list[paddr] = addr_empty;
 	}
 
 	ret = pblk_submit_io_sync_sem(pblk, &rqd);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 6dcbd44e3acb..4c7a9909308e 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -22,7 +22,7 @@
 static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			      struct ppa_addr *ppa_list,
 			      unsigned long *lun_bitmap,
-			      struct pblk_sec_meta *meta_list,
+			      void *meta_list,
 			      unsigned int valid_secs)
 {
 	struct pblk_line *line = pblk_line_get_data(pblk);
@@ -68,14 +68,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			kref_get(&line->ref);
 			w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
 			w_ctx->ppa = ppa_list[i];
-			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
+			pblk_set_meta_lba(pblk, meta_list, i, w_ctx->lba);
 			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
 			if (lba_list[paddr] != addr_empty)
 				line->nr_valid_lbas++;
 			else
 				atomic64_inc(&pblk->pad_wa);
 		} else {
-			lba_list[paddr] = meta_list[i].lba = addr_empty;
+			lba_list[paddr] = addr_empty;
+			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
 			__pblk_map_invalidate(pblk, line, paddr);
 		}
 	}
@@ -88,7 +89,7 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
 		 unsigned long *lun_bitmap, unsigned int valid_secs,
 		 unsigned int off)
 {
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	unsigned int map_secs;
 	int min = pblk->min_write_pgs;
@@ -97,7 +98,10 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
 	for (i = off; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
 		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
-					lun_bitmap, &meta_list[i], map_secs)) {
+					lun_bitmap,
+					pblk_get_meta_buffer(pblk,
+							     meta_list, i),
+					map_secs)) {
 			bio_put(rqd->bio);
 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
 			pblk_pipeline_stop(pblk);
@@ -113,7 +117,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	struct pblk_line *e_line, *d_line;
 	unsigned int map_secs;
@@ -123,7 +127,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	for (i = 0; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
 		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
-					lun_bitmap, &meta_list[i], map_secs)) {
+					lun_bitmap,
+					pblk_get_meta_buffer(pblk,
+							     meta_list, i),
+					map_secs)) {
 			bio_put(rqd->bio);
 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
 			pblk_pipeline_stop(pblk);
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 08f6ebd4bc48..6584a2588f61 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 				 struct bio *bio, sector_t blba,
 				 unsigned long *read_bitmap)
 {
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	struct ppa_addr ppas[NVM_MAX_VLBA];
 	int nr_secs = rqd->nr_ppas;
 	bool advanced_bio = false;
@@ -58,7 +58,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 retry:
 		if (pblk_ppa_empty(p)) {
 			WARN_ON(test_and_set_bit(i, read_bitmap));
-			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
+			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
 
 			if (unlikely(!advanced_bio)) {
 				bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
@@ -78,7 +78,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 				goto retry;
 			}
 			WARN_ON(test_and_set_bit(i, read_bitmap));
-			meta_list[i].lba = cpu_to_le64(lba);
+			pblk_set_meta_lba(pblk, meta_list, i, lba);
 			advanced_bio = true;
 #ifdef CONFIG_NVM_PBLK_DEBUG
 			atomic_long_inc(&pblk->cache_reads);
@@ -105,12 +105,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
 				sector_t blba)
 {
-	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	int nr_lbas = rqd->nr_ppas;
 	int i;
 
+	if (!pblk_is_oob_meta_supported(pblk))
+		return;
+
 	for (i = 0; i < nr_lbas; i++) {
-		u64 lba = le64_to_cpu(meta_lba_list[i].lba);
+		u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
 
 		if (lba == ADDR_EMPTY)
 			continue;
@@ -134,9 +137,12 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
 static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
 				 u64 *lba_list, int nr_lbas)
 {
-	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+	void *meta_lba_list = rqd->meta_list;
 	int i, j;
 
+	if (!pblk_is_oob_meta_supported(pblk))
+		return;
+
 	for (i = 0, j = 0; i < nr_lbas; i++) {
 		u64 lba = lba_list[i];
 		u64 meta_lba;
@@ -144,7 +150,7 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
 		if (lba == ADDR_EMPTY)
 			continue;
 
-		meta_lba = le64_to_cpu(meta_lba_list[j].lba);
+		meta_lba = pblk_get_meta_lba(pblk, meta_lba_list, j);
 
 		if (lba != meta_lba) {
 #ifdef CONFIG_NVM_PBLK_DEBUG
@@ -219,7 +225,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 	struct bio *new_bio = rqd->bio;
 	struct bio *bio = pr_ctx->orig_bio;
 	struct bio_vec src_bv, dst_bv;
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	int bio_init_idx = pr_ctx->bio_init_idx;
 	unsigned long *read_bitmap = pr_ctx->bitmap;
 	int nr_secs = pr_ctx->orig_nr_secs;
@@ -237,8 +243,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 	}
 
 	for (i = 0; i < nr_secs; i++) {
-		pr_ctx->lba_list_media[i] = meta_list[i].lba;
-		meta_list[i].lba = pr_ctx->lba_list_mem[i];
+		pr_ctx->lba_list_media[i] = pblk_get_meta_lba(pblk,
+							meta_list, i);
+		pblk_set_meta_lba(pblk, meta_list, i,
+				  pr_ctx->lba_list_mem[i]);
 	}
 
 	/* Fill the holes in the original bio */
@@ -250,7 +258,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
 		line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
 		kref_put(&line->ref, pblk_line_put);
 
-		meta_list[hole].lba = pr_ctx->lba_list_media[i];
+		pblk_set_meta_lba(pblk, meta_list, hole,
+				  pr_ctx->lba_list_media[i]);
 
 		src_bv = new_bio->bi_io_vec[i++];
 		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
@@ -286,7 +295,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 			    unsigned long *read_bitmap,
 			    int nr_holes)
 {
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_pr_ctx *pr_ctx;
 	struct bio *new_bio, *bio = r_ctx->private;
@@ -308,7 +317,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 		goto fail_free_pages;
 
 	for (i = 0; i < nr_secs; i++)
-		pr_ctx->lba_list_mem[i] = meta_list[i].lba;
+		pr_ctx->lba_list_mem[i] = pblk_get_meta_lba(pblk, meta_list, i);
 
 	new_bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
@@ -373,7 +382,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
 			 sector_t lba, unsigned long *read_bitmap)
 {
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	void *meta_list = rqd->meta_list;
 	struct ppa_addr ppa;
 
 	pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
@@ -385,7 +394,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
 retry:
 	if (pblk_ppa_empty(ppa)) {
 		WARN_ON(test_and_set_bit(0, read_bitmap));
-		meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
+		pblk_set_meta_lba(pblk, meta_list, 0, ADDR_EMPTY);
 		return;
 	}
 
@@ -399,7 +408,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
 		}
 
 		WARN_ON(test_and_set_bit(0, read_bitmap));
-		meta_list[0].lba = cpu_to_le64(lba);
+		pblk_set_meta_lba(pblk, meta_list, 0, lba);
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
 		atomic_long_inc(&pblk->cache_reads);
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 5740b7509bd8..fa63f9fa5ba8 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -124,7 +124,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
 
 struct pblk_recov_alloc {
 	struct ppa_addr *ppa_list;
-	struct pblk_sec_meta *meta_list;
+	void *meta_list;
 	struct nvm_rq *rqd;
 	void *data;
 	dma_addr_t dma_ppa_list;
@@ -158,7 +158,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_sec_meta *meta_list;
+	void *meta_list;
 	struct pblk_pad_rq *pad_rq;
 	struct nvm_rq *rqd;
 	struct bio *bio;
@@ -242,7 +242,8 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
 
 			pblk_map_invalidate(pblk, dev_ppa);
-			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
+			lba_list[w_ptr] = addr_empty;
+			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
 			rqd->ppa_list[i] = dev_ppa;
 		}
 	}
@@ -325,6 +326,7 @@ static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
 			return 1;
 		else if (chunk->wp < line_wp)
 			line_wp = chunk->wp;
+
 	}
 
 	return 0;
@@ -336,7 +338,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr *ppa_list;
-	struct pblk_sec_meta *meta_list;
+	void *meta_list;
 	struct nvm_rq *rqd;
 	struct bio *bio;
 	void *data;
@@ -434,7 +436,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	}
 
 	for (i = 0; i < rqd->nr_ppas; i++) {
-		u64 lba = le64_to_cpu(meta_list[i].lba);
+		u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
 
 		lba_list[paddr++] = cpu_to_le64(lba);
 
@@ -463,7 +465,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
 	struct nvm_geo *geo = &dev->geo;
 	struct nvm_rq *rqd;
 	struct ppa_addr *ppa_list;
-	struct pblk_sec_meta *meta_list;
+	void *meta_list;
 	struct pblk_recov_alloc p;
 	void *data;
 	dma_addr_t dma_ppa_list, dma_meta_list;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index aea09879636f..53156b6f99a3 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -87,7 +87,6 @@ enum {
 };
 
 struct pblk_sec_meta {
-	u64 reserved;
 	__le64 lba;
 };
 
@@ -1366,4 +1365,40 @@ static inline char *pblk_disk_name(struct pblk *pblk)
 
 	return disk->disk_name;
 }
+
+static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	/* Pblk uses OOB meta to store LBA of given physical sector.
+	 * The LBA is eventually used in recovery mode and/or for handling
+	 * telemetry events (e.g., relocate sector).
+	 */
+
+	return (geo->sos >= sizeof(struct pblk_sec_meta));
+}
+
+static inline struct pblk_sec_meta *pblk_get_meta_buffer(struct pblk *pblk,
+							 void *meta_ptr,
+							 int index)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	return meta_ptr + max_t(int, geo->sos, sizeof(struct pblk_sec_meta))
+		* index;
+}
+
+static inline void pblk_set_meta_lba(struct pblk *pblk, void *meta_ptr,
+				     int index, u64 lba)
+{
+	pblk_get_meta_buffer(pblk, meta_ptr, index)->lba = cpu_to_le64(lba);
+}
+
+static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
+				    int index)
+{
+	return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
+}
 #endif /* PBLK_H_ */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
  2018-10-05 13:34 ` [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read Igor Konopko
  2018-10-05 13:34 ` [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata Igor Konopko
@ 2018-10-05 13:34 ` Igor Konopko
  2018-10-09  9:16   ` Hans Holmberg
  2018-10-05 13:34 ` [PATCH 4/5] lightnvm: Disable interleaved metadata Igor Konopko
  2018-10-05 13:34 ` [PATCH 5/5] lightnvm: pblk: Support for packed metadata Igor Konopko
  4 siblings, 1 reply; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

Currently whole lightnvm and pblk uses single DMA pool,
for which entry size is always equal to PAGE_SIZE.
PPA list always needs 8b*64, so there is only 56b*64
space for OOB meta. Since NVMe OOB meta can be bigger,
such as 128b, this solution is not robustness.

This patch add the possiblity to support OOB meta above
56b by creating separate DMA pool for PBLK with entry
size which is big enough to store both PPA list and such
a OOB metadata.

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
 drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
 drivers/lightnvm/pblk-init.c     | 11 +++++++++++
 drivers/lightnvm/pblk-read.c     |  3 ++-
 drivers/lightnvm/pblk-recovery.c |  9 +++++----
 drivers/lightnvm/pblk.h          | 11 ++++++++++-
 drivers/nvme/host/lightnvm.c     |  6 ++++--
 include/linux/lightnvm.h         |  8 +++++---
 8 files changed, 71 insertions(+), 29 deletions(-)

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index efb976a863d2..48db7a096257 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
 }
 EXPORT_SYMBOL(nvm_unregister_tgt_type);
 
-void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
-							dma_addr_t *dma_handler)
+void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
+				gfp_t mem_flags, dma_addr_t *dma_handler)
 {
-	return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
-								dma_handler);
+	return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
+						mem_flags, dma_handler);
 }
 EXPORT_SYMBOL(nvm_dev_dma_alloc);
 
-void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
+void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
+				void *addr, dma_addr_t dma_handler)
 {
-	dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
+	dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
 }
 EXPORT_SYMBOL(nvm_dev_dma_free);
 
+void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
+{
+	return dev->ops->create_dma_pool(dev, name, size);
+}
+EXPORT_SYMBOL(nvm_dev_dma_create);
+
+void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
+{
+	dev->ops->destroy_dma_pool(pool);
+}
+EXPORT_SYMBOL(nvm_dev_dma_destroy);
+
 static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 {
 	struct nvm_dev *dev;
@@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
 	}
 
 	rqd->nr_ppas = nr_ppas;
-	rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
+	rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
+						&rqd->dma_ppa_list);
 	if (!rqd->ppa_list) {
 		pr_err("nvm: failed to allocate dma memory\n");
 		return -ENOMEM;
@@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
 	if (!rqd->ppa_list)
 		return;
 
-	nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
+	nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
+				rqd->dma_ppa_list);
 }
 
 static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
@@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
 	if (!dev->q || !dev->ops)
 		return -EINVAL;
 
-	dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
+	dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
 	if (!dev->dma_pool) {
 		pr_err("nvm: could not create dma pool\n");
 		return -ENOMEM;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 7cb39d84c833..131972b13e27 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -242,16 +242,16 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
-							&rqd->dma_meta_list);
+	rqd->meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
+					   GFP_KERNEL, &rqd->dma_meta_list);
 	if (!rqd->meta_list)
 		return -ENOMEM;
 
 	if (rqd->nr_ppas == 1)
 		return 0;
 
-	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
-	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
+	rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
+	rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
 
 	return 0;
 }
@@ -261,7 +261,7 @@ void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
 	struct nvm_tgt_dev *dev = pblk->dev;
 
 	if (rqd->meta_list)
-		nvm_dev_dma_free(dev->parent, rqd->meta_list,
+		nvm_dev_dma_free(dev->parent, pblk->dma_pool, rqd->meta_list,
 				rqd->dma_meta_list);
 }
 
@@ -840,13 +840,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
 	int i, j;
 	int ret;
 
-	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
+	meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool, GFP_KERNEL,
 							&dma_meta_list);
 	if (!meta_list)
 		return -ENOMEM;
 
-	ppa_list = meta_list + pblk_dma_meta_size;
-	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+	ppa_list = meta_list + pblk_dma_meta_size(pblk);
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
 
 next_rq:
 	memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -919,7 +919,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
 		goto next_rq;
 
 free_rqd_dma:
-	nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
+	nvm_dev_dma_free(dev->parent, pblk->dma_pool,
+			 rqd.meta_list, rqd.dma_meta_list);
 	return ret;
 }
 
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index e3573880dbda..b794e279da31 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -1087,6 +1087,7 @@ static void pblk_free(struct pblk *pblk)
 	pblk_l2p_free(pblk);
 	pblk_rwb_free(pblk);
 	pblk_core_free(pblk);
+	nvm_dev_dma_destroy(pblk->dev->parent, pblk->dma_pool);
 
 	kfree(pblk);
 }
@@ -1178,6 +1179,15 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 	atomic_long_set(&pblk->write_failed, 0);
 	atomic_long_set(&pblk->erase_failed, 0);
 
+	pblk->dma_pool = nvm_dev_dma_create(dev->parent, (pblk_dma_ppa_size +
+					    pblk_dma_meta_size(pblk)),
+					    tdisk->disk_name);
+	if (!pblk->dma_pool) {
+		pblk_err(pblk, "could not allocate dma pool\n");
+		kfree(pblk);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	ret = pblk_core_init(pblk);
 	if (ret) {
 		pblk_err(pblk, "could not initialize core\n");
@@ -1249,6 +1259,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 fail_free_core:
 	pblk_core_free(pblk);
 fail:
+	nvm_dev_dma_destroy(dev->parent, pblk->dma_pool);
 	kfree(pblk);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 6584a2588f61..5d213c4f83c1 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -499,7 +499,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	return NVM_IO_OK;
 
 fail_meta_free:
-	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+	nvm_dev_dma_free(dev->parent, pblk->dma_pool,
+			 rqd->meta_list, rqd->dma_meta_list);
 fail_rqd_free:
 	pblk_free_rqd(pblk, rqd, PBLK_READ);
 	return ret;
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index fa63f9fa5ba8..4b703877907b 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -471,12 +471,13 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
 	dma_addr_t dma_ppa_list, dma_meta_list;
 	int ret = 0;
 
-	meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
+	meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
+					 GFP_KERNEL, &dma_meta_list);
 	if (!meta_list)
 		return -ENOMEM;
 
-	ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
-	dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
+	ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
+	dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
 
 	data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
 	if (!data) {
@@ -507,7 +508,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
 	mempool_free(rqd, &pblk->r_rq_pool);
 	kfree(data);
 free_meta_list:
-	nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
+	nvm_dev_dma_free(dev->parent, pblk->dma_pool, meta_list, dma_meta_list);
 
 	return ret;
 }
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 53156b6f99a3..6e4a63fd4c49 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -103,7 +103,6 @@ enum {
 	PBLK_RL_LOW = 4
 };
 
-#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
 #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
 
 /* write buffer completion context */
@@ -711,6 +710,7 @@ struct pblk {
 	struct timer_list wtimer;
 
 	struct pblk_gc gc;
+	void *dma_pool;
 };
 
 struct pblk_line_ws {
@@ -1401,4 +1401,13 @@ static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
 {
 	return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
 }
+
+static inline int pblk_dma_meta_size(struct pblk *pblk)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+
+	return max_t(int, sizeof(struct pblk_sec_meta), geo->sos)
+				* NVM_MAX_VLBA;
+}
 #endif /* PBLK_H_ */
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 986526ff1521..e370793f52d5 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -736,11 +736,13 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return ret;
 }
 
-static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
+static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
+					int size)
 {
 	struct nvme_ns *ns = nvmdev->q->queuedata;
 
-	return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
+	size = round_up(size, PAGE_SIZE);
+	return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
 }
 
 static void nvme_nvm_destroy_dma_pool(void *pool)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 36a84180c1e8..c6c998716ee7 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
 							struct nvm_chk_meta *);
 typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
 typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
 								dma_addr_t *);
@@ -668,8 +668,10 @@ struct nvm_tgt_type {
 extern int nvm_register_tgt_type(struct nvm_tgt_type *);
 extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
 
-extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
-extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
+extern void *nvm_dev_dma_alloc(struct nvm_dev *, void *, gfp_t, dma_addr_t *);
+extern void nvm_dev_dma_free(struct nvm_dev *, void *, void *, dma_addr_t);
+extern void *nvm_dev_dma_create(struct nvm_dev *, int, char *);
+extern void nvm_dev_dma_destroy(struct nvm_dev *, void *);
 
 extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/5] lightnvm: Disable interleaved metadata
  2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
                   ` (2 preceding siblings ...)
  2018-10-05 13:34 ` [PATCH 3/5] lightnvm: Flexible DMA pool entry size Igor Konopko
@ 2018-10-05 13:34 ` Igor Konopko
  2018-10-09  9:30   ` Hans Holmberg
  2018-10-05 13:34 ` [PATCH 5/5] lightnvm: pblk: Support for packed metadata Igor Konopko
  4 siblings, 1 reply; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

Currently pblk and lightnvm does only check for size
of OOB metadata and does not care wheather this meta
is located in separate buffer or is interleaved with
data in single buffer.

In reality only the first scenario is supported, where
second mode will break pblk functionality during any
IO operation.

The goal of this patch is to block creation of pblk
devices in case of interleaved metadata

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-init.c | 6 ++++++
 drivers/nvme/host/lightnvm.c | 1 +
 include/linux/lightnvm.h     | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index b794e279da31..1529aa37b30f 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -1152,6 +1152,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (geo->ext) {
+		pblk_err(pblk, "extended metadata not supported\n");
+		kfree(pblk);
+		return ERR_PTR(-EINVAL);
+	}
+
 	spin_lock_init(&pblk->resubmit_lock);
 	spin_lock_init(&pblk->trans_lock);
 	spin_lock_init(&pblk->lock);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index e370793f52d5..7020e87bcee4 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -989,6 +989,7 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
 
 	geo->csecs = 1 << ns->lba_shift;
 	geo->sos = ns->ms;
+	geo->ext = ns->ext;
 }
 
 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c6c998716ee7..abd29f50f2a1 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -357,6 +357,7 @@ struct nvm_geo {
 	u32	clba;		/* sectors per chunk */
 	u16	csecs;		/* sector size */
 	u16	sos;		/* out-of-band area size */
+	u16	ext;		/* metadata in extended data buffer */
 
 	/* device write constrains */
 	u32	ws_min;		/* minimum write size */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/5] lightnvm: pblk: Support for packed metadata
  2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
                   ` (3 preceding siblings ...)
  2018-10-05 13:34 ` [PATCH 4/5] lightnvm: Disable interleaved metadata Igor Konopko
@ 2018-10-05 13:34 ` Igor Konopko
  4 siblings, 0 replies; 22+ messages in thread
From: Igor Konopko @ 2018-10-05 13:34 UTC (permalink / raw)
  To: mb, javier; +Cc: linux-block, marcin.dziegielewski, igor.j.konopko

In current pblk implementation, l2p mapping for not closed lines
is always stored only in OOB metadata and recovered from it.

Such a solution does not provide data integrity when drives does
not have such a OOB metadata space.

The goal of this patch is to add support for so called packed
metadata, which store l2p mapping for open lines in last sector
of every write unit.

Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
---
 drivers/lightnvm/pblk-core.c     | 52 +++++++++++++++++++++++++++++---
 drivers/lightnvm/pblk-init.c     | 37 +++++++++++++++++++++--
 drivers/lightnvm/pblk-rb.c       |  3 ++
 drivers/lightnvm/pblk-recovery.c |  5 +--
 drivers/lightnvm/pblk-sysfs.c    |  7 +++++
 drivers/lightnvm/pblk-write.c    | 14 ++++++---
 drivers/lightnvm/pblk.h          |  5 ++-
 7 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 131972b13e27..e11a46c05067 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk)
 {
 	unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
 
-	if (secs_avail >= pblk->min_write_pgs)
+	if (secs_avail >= pblk->min_write_pgs_data)
 		pblk_write_kick(pblk);
 }
 
@@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct list_head *move_list = NULL;
-	int vsc = le32_to_cpu(*line->vsc);
+	int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
+			* (pblk->min_write_pgs - pblk->min_write_pgs_data);
+	int vsc = le32_to_cpu(*line->vsc) + packed_meta;
 
 	lockdep_assert_held(&line->lock);
 
@@ -620,12 +622,15 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 }
 
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
-		   unsigned long secs_to_flush)
+		   unsigned long secs_to_flush, bool skip_meta)
 {
 	int max = pblk->sec_per_write;
 	int min = pblk->min_write_pgs;
 	int secs_to_sync = 0;
 
+	if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
+		min = max = pblk->min_write_pgs_data;
+
 	if (secs_avail >= max)
 		secs_to_sync = max;
 	else if (secs_avail >= min)
@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
 next_rq:
 	memset(&rqd, 0, sizeof(struct nvm_rq));
 
-	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
 	rq_len = rq_ppas * geo->csecs;
 
 	bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
@@ -2161,3 +2166,42 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
 	}
 	spin_unlock(&pblk->trans_lock);
 }
+
+void pblk_set_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	void *meta_list = rqd->meta_list;
+	void *page;
+	int i = 0;
+
+	if (pblk_is_oob_meta_supported(pblk))
+		return;
+
+	/* We need to zero out metadata corresponding to packed meta page */
+	pblk_set_meta_lba(pblk, meta_list, rqd->nr_ppas - 1, ADDR_EMPTY);
+
+	page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+	/* We need to fill last page of request (packed metadata)
+	 * with data from oob meta buffer.
+	 */
+	for (; i < rqd->nr_ppas; i++)
+		memcpy(page + (i * sizeof(struct pblk_sec_meta)),
+			pblk_get_meta_buffer(pblk, meta_list, i),
+			sizeof(struct pblk_sec_meta));
+}
+
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	void *meta_list = rqd->meta_list;
+	void *page;
+	int i = 0;
+
+	if (pblk_is_oob_meta_supported(pblk))
+		return;
+
+	page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+	/* We need to fill oob meta buffer with data from packe metadata */
+	for (; i < rqd->nr_ppas; i++)
+		memcpy(pblk_get_meta_buffer(pblk, meta_list, i),
+			page + (i * sizeof(struct pblk_sec_meta)),
+			sizeof(struct pblk_sec_meta));
+}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 1529aa37b30f..d2a63494def6 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -407,8 +407,40 @@ static int pblk_core_init(struct pblk *pblk)
 	pblk->min_write_pgs = geo->ws_opt;
 	max_write_ppas = pblk->min_write_pgs * geo->all_luns;
 	pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
+	pblk->min_write_pgs_data = pblk->min_write_pgs;
 	pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
 
+	if (!pblk_is_oob_meta_supported(pblk)) {
+		/* For drives which does not have OOB metadata feature
+		 * in order to support recovery feature we need to use
+		 * so called packed metadata. Packed metada will store
+		 * the same information as OOB metadata (l2p table mapping,
+		 * but in the form of the single page at the end of
+		 * every write request.
+		 */
+		if (pblk->min_write_pgs
+			* sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
+			/* We want to keep all the packed metadata on single
+			 * page per write requests. So we need to ensure that
+			 * it will fit.
+			 *
+			 * This is more like sanity check, since there is
+			 * no device with such a big minimal write size
+			 * (above 1 metabytes).
+			 */
+			pblk_err(pblk, "Not supported min write size\n");
+			return -EINVAL;
+		}
+		/* For packed meta approach we do some simplification.
+		 * On read path we always issue requests which size
+		 * equal to max_write_pgs, with all pages filled with
+		 * user payload except of last one page which will be
+		 * filled with packed metadata.
+		 */
+		pblk->max_write_pgs = pblk->min_write_pgs;
+		pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
+	}
+
 	pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
 								GFP_KERNEL);
 	if (!pblk->pad_dist)
@@ -639,7 +671,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct nvm_geo *geo = &dev->geo;
 	sector_t provisioned;
-	int sec_meta, blk_meta;
+	int sec_meta, blk_meta, clba;
 
 	if (geo->op == NVM_TARGET_DEFAULT_OP)
 		pblk->op = PBLK_DEFAULT_OP;
@@ -662,7 +694,8 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
 	blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
 
-	pblk->capacity = (provisioned - blk_meta) * geo->clba;
+	clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
+	pblk->capacity = (provisioned - blk_meta) * clba;
 
 	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
 	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index f653faa6a9ed..b6376a23a271 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -547,6 +547,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
 		to_read = count;
 	}
 
+	/* Add space for packed metadata if in use*/
+	pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
+
 	c_ctx->sentry = pos;
 	c_ctx->nr_valid = to_read;
 	c_ctx->nr_padded = pad;
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 4b703877907b..41ba2bf2f817 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -188,7 +188,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	kref_init(&pad_rq->ref);
 
 next_pad_rq:
-	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
 	if (rq_ppas < pblk->min_write_pgs) {
 		pblk_err(pblk, "corrupted pad line %d\n", line->id);
 		goto fail_free_pad;
@@ -366,7 +366,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 next_rq:
 	memset(rqd, 0, pblk_g_rq_size);
 
-	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
 	if (!rq_ppas)
 		rq_ppas = pblk->min_write_pgs;
 	rq_len = rq_ppas * geo->csecs;
@@ -435,6 +435,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 		goto retry_rq;
 	}
 
+	pblk_get_packed_meta(pblk, rqd);
 	for (i = 0; i < rqd->nr_ppas; i++) {
 		u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
 
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index 2d2818155aa8..7d8958df9472 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
 	if (kstrtouint(page, 0, &sec_per_write))
 		return -EINVAL;
 
+	if (!pblk_is_oob_meta_supported(pblk)) {
+		/* For packed metadata case it is
+		 * not allowed to change sec_per_write.
+		 */
+		return -EINVAL;
+	}
+
 	if (sec_per_write < pblk->min_write_pgs
 				|| sec_per_write > pblk->max_write_pgs
 				|| sec_per_write % pblk->min_write_pgs != 0)
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index fa8726493b39..c78dfb996bcb 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -332,7 +332,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 {
 	int secs_to_sync;
 
-	secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+	secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
 	if ((!secs_to_sync && secs_to_flush)
@@ -502,6 +502,11 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
 		return NVM_IO_ERR;
 	}
 
+	/* This is the first place when we have write requests mapped
+	 * and we can fill packed metadata with l2p mappings.
+	 */
+	pblk_set_packed_meta(pblk, rqd);
+
 	meta_line = pblk_should_submit_meta_io(pblk, rqd);
 
 	/* Submit data write for current data line */
@@ -553,7 +558,7 @@ static int pblk_submit_write(struct pblk *pblk)
 	struct bio *bio;
 	struct nvm_rq *rqd;
 	unsigned int secs_avail, secs_to_sync, secs_to_com;
-	unsigned int secs_to_flush;
+	unsigned int secs_to_flush, packed_meta_pgs;
 	unsigned long pos;
 	unsigned int resubmit;
 
@@ -589,7 +594,7 @@ static int pblk_submit_write(struct pblk *pblk)
 			return 1;
 
 		secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
-		if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+		if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
 			return 1;
 
 		secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
@@ -604,7 +609,8 @@ static int pblk_submit_write(struct pblk *pblk)
 		pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
 	}
 
-	bio = bio_alloc(GFP_KERNEL, secs_to_sync);
+	packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
+	bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 6e4a63fd4c49..0da6aabd5988 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -626,6 +626,7 @@ struct pblk {
 	int state;			/* pblk line state */
 
 	int min_write_pgs; /* Minimum amount of pages required by controller */
+	int min_write_pgs_data; /* Minimum amount of payload pages */
 	int max_write_pgs; /* Maximum amount of pages supported by controller */
 
 	sector_t capacity; /* Device capacity when bad blocks are subtracted */
@@ -831,7 +832,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
-		   unsigned long secs_to_flush);
+		   unsigned long secs_to_flush, bool skip_meta);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
 		  unsigned long *lun_bitmap);
 void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
@@ -855,6 +856,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
 			  u64 *lba_list, int nr_secs);
 void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
 			 sector_t blba, int nr_secs);
+void pblk_set_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
 
 /*
  * pblk user I/O write path
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-05 13:34 ` [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata Igor Konopko
@ 2018-10-09  9:02   ` Hans Holmberg
  2018-10-09  9:12     ` Matias Bjørling
  2018-10-09  9:31     ` Igor Konopko
  2018-10-12 11:52   ` Javier Gonzalez
  1 sibling, 2 replies; 22+ messages in thread
From: Hans Holmberg @ 2018-10-09  9:02 UTC (permalink / raw)
  To: igor.j.konopko
  Cc: Matias Bjorling, Javier Gonzalez, linux-block, marcin.dziegielewski

Hi Igor!

One important thing: this patch breaks the on-disk-storage format so
that needs to be handled(see my comment on this) and  some additional
nitpicks below.

Thanks,
Hans

On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>
> Currently pblk assumes that size of OOB metadata on drive is always
> equal to size of pblk_sec_meta struct. This commit add helpers which will
> allow to handle different sizes of OOB metadata on drive.
>
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
>  drivers/lightnvm/pblk-core.c     |  6 ++---
>  drivers/lightnvm/pblk-map.c      | 21 ++++++++++------
>  drivers/lightnvm/pblk-read.c     | 41 +++++++++++++++++++-------------
>  drivers/lightnvm/pblk-recovery.c | 14 ++++++-----
>  drivers/lightnvm/pblk.h          | 37 +++++++++++++++++++++++++++-
>  5 files changed, 86 insertions(+), 33 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 6944aac43b01..7cb39d84c833 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -743,7 +743,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
>         rqd.opcode = NVM_OP_PREAD;
>         rqd.nr_ppas = lm->smeta_sec;
>         rqd.is_seq = 1;
> -
>         for (i = 0; i < lm->smeta_sec; i++, paddr++)
>                 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>
> @@ -796,10 +795,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
>         rqd.is_seq = 1;
>
>         for (i = 0; i < lm->smeta_sec; i++, paddr++) {
> -               struct pblk_sec_meta *meta_list = rqd.meta_list;
> +               void *meta_list = rqd.meta_list;
>
>                 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
> -               meta_list[i].lba = lba_list[paddr] = addr_empty;
> +               pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
> +               lba_list[paddr] = addr_empty;
>         }
>
>         ret = pblk_submit_io_sync_sem(pblk, &rqd);
> diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
> index 6dcbd44e3acb..4c7a9909308e 100644
> --- a/drivers/lightnvm/pblk-map.c
> +++ b/drivers/lightnvm/pblk-map.c
> @@ -22,7 +22,7 @@
>  static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
>                               struct ppa_addr *ppa_list,
>                               unsigned long *lun_bitmap,
> -                             struct pblk_sec_meta *meta_list,
> +                             void *meta_list,
>                               unsigned int valid_secs)
>  {
>         struct pblk_line *line = pblk_line_get_data(pblk);
> @@ -68,14 +68,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
>                         kref_get(&line->ref);
>                         w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
>                         w_ctx->ppa = ppa_list[i];
> -                       meta_list[i].lba = cpu_to_le64(w_ctx->lba);
> +                       pblk_set_meta_lba(pblk, meta_list, i, w_ctx->lba);
>                         lba_list[paddr] = cpu_to_le64(w_ctx->lba);
>                         if (lba_list[paddr] != addr_empty)
>                                 line->nr_valid_lbas++;
>                         else
>                                 atomic64_inc(&pblk->pad_wa);
>                 } else {
> -                       lba_list[paddr] = meta_list[i].lba = addr_empty;
> +                       lba_list[paddr] = addr_empty;
> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>                         __pblk_map_invalidate(pblk, line, paddr);
>                 }
>         }
> @@ -88,7 +89,7 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
>                  unsigned long *lun_bitmap, unsigned int valid_secs,
>                  unsigned int off)
>  {
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
>         unsigned int map_secs;
>         int min = pblk->min_write_pgs;
> @@ -97,7 +98,10 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
>         for (i = off; i < rqd->nr_ppas; i += min) {
>                 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
>                 if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
> -                                       lun_bitmap, &meta_list[i], map_secs)) {
> +                                       lun_bitmap,
> +                                       pblk_get_meta_buffer(pblk,
> +                                                            meta_list, i),
> +                                       map_secs)) {
>                         bio_put(rqd->bio);
>                         pblk_free_rqd(pblk, rqd, PBLK_WRITE);
>                         pblk_pipeline_stop(pblk);
> @@ -113,7 +117,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>         struct nvm_tgt_dev *dev = pblk->dev;
>         struct nvm_geo *geo = &dev->geo;
>         struct pblk_line_meta *lm = &pblk->lm;
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
>         struct pblk_line *e_line, *d_line;
>         unsigned int map_secs;
> @@ -123,7 +127,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>         for (i = 0; i < rqd->nr_ppas; i += min) {
>                 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
>                 if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
> -                                       lun_bitmap, &meta_list[i], map_secs)) {
> +                                       lun_bitmap,
> +                                       pblk_get_meta_buffer(pblk,
> +                                                            meta_list, i),
> +                                       map_secs)) {
>                         bio_put(rqd->bio);
>                         pblk_free_rqd(pblk, rqd, PBLK_WRITE);
>                         pblk_pipeline_stop(pblk);
> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> index 08f6ebd4bc48..6584a2588f61 100644
> --- a/drivers/lightnvm/pblk-read.c
> +++ b/drivers/lightnvm/pblk-read.c
> @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>                                  struct bio *bio, sector_t blba,
>                                  unsigned long *read_bitmap)
>  {
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         struct ppa_addr ppas[NVM_MAX_VLBA];
>         int nr_secs = rqd->nr_ppas;
>         bool advanced_bio = false;
> @@ -58,7 +58,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  retry:
>                 if (pblk_ppa_empty(p)) {
>                         WARN_ON(test_and_set_bit(i, read_bitmap));
> -                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>
>                         if (unlikely(!advanced_bio)) {
>                                 bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
> @@ -78,7 +78,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>                                 goto retry;
>                         }
>                         WARN_ON(test_and_set_bit(i, read_bitmap));
> -                       meta_list[i].lba = cpu_to_le64(lba);
> +                       pblk_set_meta_lba(pblk, meta_list, i, lba);
>                         advanced_bio = true;
>  #ifdef CONFIG_NVM_PBLK_DEBUG
>                         atomic_long_inc(&pblk->cache_reads);
> @@ -105,12 +105,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>  static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
>                                 sector_t blba)
>  {
> -       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         int nr_lbas = rqd->nr_ppas;
>         int i;
>
> +       if (!pblk_is_oob_meta_supported(pblk))
> +               return;
> +
>         for (i = 0; i < nr_lbas; i++) {
> -               u64 lba = le64_to_cpu(meta_lba_list[i].lba);
> +               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
>
>                 if (lba == ADDR_EMPTY)
>                         continue;
> @@ -134,9 +137,12 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
>  static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
>                                  u64 *lba_list, int nr_lbas)
>  {
> -       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
> +       void *meta_lba_list = rqd->meta_list;
>         int i, j;
>
> +       if (!pblk_is_oob_meta_supported(pblk))
> +               return;
> +
>         for (i = 0, j = 0; i < nr_lbas; i++) {
>                 u64 lba = lba_list[i];
>                 u64 meta_lba;
> @@ -144,7 +150,7 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
>                 if (lba == ADDR_EMPTY)
>                         continue;
>
> -               meta_lba = le64_to_cpu(meta_lba_list[j].lba);
> +               meta_lba = pblk_get_meta_lba(pblk, meta_lba_list, j);
>
>                 if (lba != meta_lba) {
>  #ifdef CONFIG_NVM_PBLK_DEBUG
> @@ -219,7 +225,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>         struct bio *new_bio = rqd->bio;
>         struct bio *bio = pr_ctx->orig_bio;
>         struct bio_vec src_bv, dst_bv;
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         int bio_init_idx = pr_ctx->bio_init_idx;
>         unsigned long *read_bitmap = pr_ctx->bitmap;
>         int nr_secs = pr_ctx->orig_nr_secs;
> @@ -237,8 +243,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>         }
>
>         for (i = 0; i < nr_secs; i++) {
> -               pr_ctx->lba_list_media[i] = meta_list[i].lba;
> -               meta_list[i].lba = pr_ctx->lba_list_mem[i];
> +               pr_ctx->lba_list_media[i] = pblk_get_meta_lba(pblk,
> +                                                       meta_list, i);
> +               pblk_set_meta_lba(pblk, meta_list, i,
> +                                 pr_ctx->lba_list_mem[i]);
>         }
>
>         /* Fill the holes in the original bio */
> @@ -250,7 +258,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>                 line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
>                 kref_put(&line->ref, pblk_line_put);
>
> -               meta_list[hole].lba = pr_ctx->lba_list_media[i];
> +               pblk_set_meta_lba(pblk, meta_list, hole,
> +                                 pr_ctx->lba_list_media[i]);
>
>                 src_bv = new_bio->bi_io_vec[i++];
>                 dst_bv = bio->bi_io_vec[bio_init_idx + hole];
> @@ -286,7 +295,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>                             unsigned long *read_bitmap,
>                             int nr_holes)
>  {
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
>         struct pblk_pr_ctx *pr_ctx;
>         struct bio *new_bio, *bio = r_ctx->private;
> @@ -308,7 +317,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>                 goto fail_free_pages;
>
>         for (i = 0; i < nr_secs; i++)
> -               pr_ctx->lba_list_mem[i] = meta_list[i].lba;
> +               pr_ctx->lba_list_mem[i] = pblk_get_meta_lba(pblk, meta_list, i);
>
>         new_bio->bi_iter.bi_sector = 0; /* internal bio */
>         bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
> @@ -373,7 +382,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
>  static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>                          sector_t lba, unsigned long *read_bitmap)
>  {
> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
> +       void *meta_list = rqd->meta_list;
>         struct ppa_addr ppa;
>
>         pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
> @@ -385,7 +394,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>  retry:
>         if (pblk_ppa_empty(ppa)) {
>                 WARN_ON(test_and_set_bit(0, read_bitmap));
> -               meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
> +               pblk_set_meta_lba(pblk, meta_list, 0, ADDR_EMPTY);
>                 return;
>         }
>
> @@ -399,7 +408,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>                 }
>
>                 WARN_ON(test_and_set_bit(0, read_bitmap));
> -               meta_list[0].lba = cpu_to_le64(lba);
> +               pblk_set_meta_lba(pblk, meta_list, 0, lba);
>
>  #ifdef CONFIG_NVM_PBLK_DEBUG
>                 atomic_long_inc(&pblk->cache_reads);
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 5740b7509bd8..fa63f9fa5ba8 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -124,7 +124,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
>
>  struct pblk_recov_alloc {
>         struct ppa_addr *ppa_list;
> -       struct pblk_sec_meta *meta_list;
> +       void *meta_list;
>         struct nvm_rq *rqd;
>         void *data;
>         dma_addr_t dma_ppa_list;
> @@ -158,7 +158,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>  {
>         struct nvm_tgt_dev *dev = pblk->dev;
>         struct nvm_geo *geo = &dev->geo;
> -       struct pblk_sec_meta *meta_list;
> +       void *meta_list;
>         struct pblk_pad_rq *pad_rq;
>         struct nvm_rq *rqd;
>         struct bio *bio;
> @@ -242,7 +242,8 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>                         dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
>
>                         pblk_map_invalidate(pblk, dev_ppa);
> -                       lba_list[w_ptr] = meta_list[i].lba = addr_empty;
> +                       lba_list[w_ptr] = addr_empty;
> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>                         rqd->ppa_list[i] = dev_ppa;
>                 }
>         }
> @@ -325,6 +326,7 @@ static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
>                         return 1;
>                 else if (chunk->wp < line_wp)
>                         line_wp = chunk->wp;
> +

Please remove the introduced newline

>         }
>
>         return 0;
> @@ -336,7 +338,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>         struct nvm_tgt_dev *dev = pblk->dev;
>         struct nvm_geo *geo = &dev->geo;
>         struct ppa_addr *ppa_list;
> -       struct pblk_sec_meta *meta_list;
> +       void *meta_list;
>         struct nvm_rq *rqd;
>         struct bio *bio;
>         void *data;
> @@ -434,7 +436,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>         }
>
>         for (i = 0; i < rqd->nr_ppas; i++) {
> -               u64 lba = le64_to_cpu(meta_list[i].lba);
> +               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
>
>                 lba_list[paddr++] = cpu_to_le64(lba);
>
> @@ -463,7 +465,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>         struct nvm_geo *geo = &dev->geo;
>         struct nvm_rq *rqd;
>         struct ppa_addr *ppa_list;
> -       struct pblk_sec_meta *meta_list;
> +       void *meta_list;
>         struct pblk_recov_alloc p;
>         void *data;
>         dma_addr_t dma_ppa_list, dma_meta_list;
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index aea09879636f..53156b6f99a3 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -87,7 +87,6 @@ enum {
>  };
>
>  struct pblk_sec_meta {
> -       u64 reserved;
>         __le64 lba;
>  };

It's nice to reduce the required metadata size, but this silently
breaks pblk the on-disk-storage format. We can't have that.

I suggest breaking out this change as a separate patch that also
increases SMETA_VERSION_MAJOR and instructs the user via the kernel
log to migrate the data manually (or factory reset the decice/pblk
instance).


>
> @@ -1366,4 +1365,40 @@ static inline char *pblk_disk_name(struct pblk *pblk)
>
>         return disk->disk_name;
>  }
> +
> +static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
> +{
> +       struct nvm_tgt_dev *dev = pblk->dev;
> +       struct nvm_geo *geo = &dev->geo;
> +
> +       /* Pblk uses OOB meta to store LBA of given physical sector.
> +        * The LBA is eventually used in recovery mode and/or for handling
> +        * telemetry events (e.g., relocate sector).
> +        */
> +
> +       return (geo->sos >= sizeof(struct pblk_sec_meta));
> +}
> +
> +static inline struct pblk_sec_meta *pblk_get_meta_buffer(struct pblk *pblk,
> +                                                        void *meta_ptr,

_ptr is surperflous, we know it's a pointer already.

> +                                                        int index)

The name of the function is misleading, maybe pblk_get_sec_meta instead?

> +{
> +       struct nvm_tgt_dev *dev = pblk->dev;
> +       struct nvm_geo *geo = &dev->geo;
> +
> +       return meta_ptr + max_t(int, geo->sos, sizeof(struct pblk_sec_meta))
> +               * index;

What is the max_t for? How could pblk_sec_meta ever be bigger than geo->sos?

> +}
> +
> +static inline void pblk_set_meta_lba(struct pblk *pblk, void *meta_ptr,
> +                                    int index, u64 lba)
> +{
> +       pblk_get_meta_buffer(pblk, meta_ptr, index)->lba = cpu_to_le64(lba);
> +}
> +
> +static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
> +                                   int index)
> +{
> +       return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
> +}

I would prefer having these functions get/set lbas of type __le64
instead, as we always have to update the emeta lba list at the same
time we set the oob metadata, and the emeta lba list is __le64,
It's easy to mess this up and byte-ordering bugs are hard to detect.

>  #endif /* PBLK_H_ */
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-09  9:02   ` Hans Holmberg
@ 2018-10-09  9:12     ` Matias Bjørling
  2018-10-09 12:39       ` Javier Gonzalez
  2018-10-09  9:31     ` Igor Konopko
  1 sibling, 1 reply; 22+ messages in thread
From: Matias Bjørling @ 2018-10-09  9:12 UTC (permalink / raw)
  To: hans.ml.holmberg, igor.j.konopko
  Cc: javier, linux-block, marcin.dziegielewski

>>   };
>>
>>   struct pblk_sec_meta {
>> -       u64 reserved;
>>          __le64 lba;
>>   };
> 
> It's nice to reduce the required metadata size, but this silently
> breaks pblk the on-disk-storage format. We can't have that.
> 
> I suggest breaking out this change as a separate patch that also
> increases SMETA_VERSION_MAJOR and instructs the user via the kernel
> log to migrate the data manually (or factory reset the decice/pblk
> instance).
> 

In that case, we should include the initialization values of the target 
somewhere. E.g., line metadata, or some FTL log to be implemented at 
some point.

Another fix for now will be to keep it as 16b, and do the patch that you 
propose later. Igor, I believe the metadata will fit? 16b per LBA should 
be max 1K in the case of 64 LBAs.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-05 13:34 ` [PATCH 3/5] lightnvm: Flexible DMA pool entry size Igor Konopko
@ 2018-10-09  9:16   ` Hans Holmberg
  2018-10-09 10:01     ` Igor Konopko
  0 siblings, 1 reply; 22+ messages in thread
From: Hans Holmberg @ 2018-10-09  9:16 UTC (permalink / raw)
  To: igor.j.konopko
  Cc: Matias Bjorling, Javier Gonzalez, linux-block, marcin.dziegielewski

On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>
> Currently whole lightnvm and pblk uses single DMA pool,
> for which entry size is always equal to PAGE_SIZE.
> PPA list always needs 8b*64, so there is only 56b*64
> space for OOB meta. Since NVMe OOB meta can be bigger,
> such as 128b, this solution is not robustness.
>
> This patch add the possiblity to support OOB meta above
> 56b by creating separate DMA pool for PBLK with entry
> size which is big enough to store both PPA list and such
> a OOB metadata.
>
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
>  drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>  drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>  drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>  drivers/lightnvm/pblk-read.c     |  3 ++-
>  drivers/lightnvm/pblk-recovery.c |  9 +++++----
>  drivers/lightnvm/pblk.h          | 11 ++++++++++-
>  drivers/nvme/host/lightnvm.c     |  6 ++++--
>  include/linux/lightnvm.h         |  8 +++++---
>  8 files changed, 71 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
> index efb976a863d2..48db7a096257 100644
> --- a/drivers/lightnvm/core.c
> +++ b/drivers/lightnvm/core.c
> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>  }
>  EXPORT_SYMBOL(nvm_unregister_tgt_type);
>
> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
> -                                                       dma_addr_t *dma_handler)
> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>  {
> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
> -                                                               dma_handler);
> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
> +                                               mem_flags, dma_handler);
>  }
>  EXPORT_SYMBOL(nvm_dev_dma_alloc);
>
> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
> +                               void *addr, dma_addr_t dma_handler)
>  {
> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>  }
>  EXPORT_SYMBOL(nvm_dev_dma_free);
>
> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
> +{
> +       return dev->ops->create_dma_pool(dev, name, size);
> +}
> +EXPORT_SYMBOL(nvm_dev_dma_create);
> +
> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
> +{
> +       dev->ops->destroy_dma_pool(pool);
> +}
> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
> +
>  static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>  {
>         struct nvm_dev *dev;
> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>         }
>
>         rqd->nr_ppas = nr_ppas;
> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
> +                                               &rqd->dma_ppa_list);
>         if (!rqd->ppa_list) {
>                 pr_err("nvm: failed to allocate dma memory\n");
>                 return -ENOMEM;
> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>         if (!rqd->ppa_list)
>                 return;
>
> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
> +                               rqd->dma_ppa_list);
>  }
>
>  static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>         if (!dev->q || !dev->ops)
>                 return -EINVAL;
>
> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>         if (!dev->dma_pool) {
>                 pr_err("nvm: could not create dma pool\n");
>                 return -ENOMEM;

Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
Wouldn't it be more straightforward to use dma_pool_* instead?

> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 7cb39d84c833..131972b13e27 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -242,16 +242,16 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
>  {
>         struct nvm_tgt_dev *dev = pblk->dev;
>
> -       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
> -                                                       &rqd->dma_meta_list);
> +       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
> +                                          GFP_KERNEL, &rqd->dma_meta_list);
>         if (!rqd->meta_list)
>                 return -ENOMEM;
>
>         if (rqd->nr_ppas == 1)
>                 return 0;
>
> -       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
> -       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
> +       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
> +       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
>
>         return 0;
>  }
> @@ -261,7 +261,7 @@ void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
>         struct nvm_tgt_dev *dev = pblk->dev;
>
>         if (rqd->meta_list)
> -               nvm_dev_dma_free(dev->parent, rqd->meta_list,
> +               nvm_dev_dma_free(dev->parent, pblk->dma_pool, rqd->meta_list,
>                                 rqd->dma_meta_list);
>  }
>
> @@ -840,13 +840,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>         int i, j;
>         int ret;
>
> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool, GFP_KERNEL,
>                                                         &dma_meta_list);
>         if (!meta_list)
>                 return -ENOMEM;
>
> -       ppa_list = meta_list + pblk_dma_meta_size;
> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
> +       ppa_list = meta_list + pblk_dma_meta_size(pblk);
> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
>
>  next_rq:
>         memset(&rqd, 0, sizeof(struct nvm_rq));
> @@ -919,7 +919,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>                 goto next_rq;
>
>  free_rqd_dma:
> -       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
> +                        rqd.meta_list, rqd.dma_meta_list);
>         return ret;
>  }
>
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index e3573880dbda..b794e279da31 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -1087,6 +1087,7 @@ static void pblk_free(struct pblk *pblk)
>         pblk_l2p_free(pblk);
>         pblk_rwb_free(pblk);
>         pblk_core_free(pblk);
> +       nvm_dev_dma_destroy(pblk->dev->parent, pblk->dma_pool);
>
>         kfree(pblk);
>  }
> @@ -1178,6 +1179,15 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
>         atomic_long_set(&pblk->write_failed, 0);
>         atomic_long_set(&pblk->erase_failed, 0);
>
> +       pblk->dma_pool = nvm_dev_dma_create(dev->parent, (pblk_dma_ppa_size +
> +                                           pblk_dma_meta_size(pblk)),
> +                                           tdisk->disk_name);
> +       if (!pblk->dma_pool) {
> +               pblk_err(pblk, "could not allocate dma pool\n");
> +               kfree(pblk);
> +               return ERR_PTR(-ENOMEM);
> +       }
> +
>         ret = pblk_core_init(pblk);
>         if (ret) {
>                 pblk_err(pblk, "could not initialize core\n");
> @@ -1249,6 +1259,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
>  fail_free_core:
>         pblk_core_free(pblk);
>  fail:
> +       nvm_dev_dma_destroy(dev->parent, pblk->dma_pool);
>         kfree(pblk);
>         return ERR_PTR(ret);
>  }
> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> index 6584a2588f61..5d213c4f83c1 100644
> --- a/drivers/lightnvm/pblk-read.c
> +++ b/drivers/lightnvm/pblk-read.c
> @@ -499,7 +499,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
>         return NVM_IO_OK;
>
>  fail_meta_free:
> -       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
> +                        rqd->meta_list, rqd->dma_meta_list);
>  fail_rqd_free:
>         pblk_free_rqd(pblk, rqd, PBLK_READ);
>         return ret;
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index fa63f9fa5ba8..4b703877907b 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -471,12 +471,13 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>         dma_addr_t dma_ppa_list, dma_meta_list;
>         int ret = 0;
>
> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
> +                                        GFP_KERNEL, &dma_meta_list);
>         if (!meta_list)
>                 return -ENOMEM;
>
> -       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
> +       ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
>
>         data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
>         if (!data) {
> @@ -507,7 +508,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>         mempool_free(rqd, &pblk->r_rq_pool);
>         kfree(data);
>  free_meta_list:
> -       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool, meta_list, dma_meta_list);
>
>         return ret;
>  }
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 53156b6f99a3..6e4a63fd4c49 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -103,7 +103,6 @@ enum {
>         PBLK_RL_LOW = 4
>  };
>
> -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
>  #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
>
>  /* write buffer completion context */
> @@ -711,6 +710,7 @@ struct pblk {
>         struct timer_list wtimer;
>
>         struct pblk_gc gc;
> +       void *dma_pool;
>  };
>
>  struct pblk_line_ws {
> @@ -1401,4 +1401,13 @@ static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
>  {
>         return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
>  }
> +
> +static inline int pblk_dma_meta_size(struct pblk *pblk)
> +{
> +       struct nvm_tgt_dev *dev = pblk->dev;
> +       struct nvm_geo *geo = &dev->geo;
> +
> +       return max_t(int, sizeof(struct pblk_sec_meta), geo->sos)
> +                               * NVM_MAX_VLBA;
> +}
>  #endif /* PBLK_H_ */
> diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
> index 986526ff1521..e370793f52d5 100644
> --- a/drivers/nvme/host/lightnvm.c
> +++ b/drivers/nvme/host/lightnvm.c
> @@ -736,11 +736,13 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
>         return ret;
>  }
>
> -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
> +static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
> +                                       int size)
>  {
>         struct nvme_ns *ns = nvmdev->q->queuedata;
>
> -       return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
> +       size = round_up(size, PAGE_SIZE);
> +       return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
>  }
>
>  static void nvme_nvm_destroy_dma_pool(void *pool)
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index 36a84180c1e8..c6c998716ee7 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
>                                                         struct nvm_chk_meta *);
>  typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
>  typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
> -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
> +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
>  typedef void (nvm_destroy_dma_pool_fn)(void *);
>  typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
>                                                                 dma_addr_t *);
> @@ -668,8 +668,10 @@ struct nvm_tgt_type {
>  extern int nvm_register_tgt_type(struct nvm_tgt_type *);
>  extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
>
> -extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
> -extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
> +extern void *nvm_dev_dma_alloc(struct nvm_dev *, void *, gfp_t, dma_addr_t *);
> +extern void nvm_dev_dma_free(struct nvm_dev *, void *, void *, dma_addr_t);
> +extern void *nvm_dev_dma_create(struct nvm_dev *, int, char *);
> +extern void nvm_dev_dma_destroy(struct nvm_dev *, void *);
>
>  extern struct nvm_dev *nvm_alloc_dev(int);
>  extern int nvm_register(struct nvm_dev *);
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/5] lightnvm: Disable interleaved metadata
  2018-10-05 13:34 ` [PATCH 4/5] lightnvm: Disable interleaved metadata Igor Konopko
@ 2018-10-09  9:30   ` Hans Holmberg
  0 siblings, 0 replies; 22+ messages in thread
From: Hans Holmberg @ 2018-10-09  9:30 UTC (permalink / raw)
  To: igor.j.konopko
  Cc: Matias Bjorling, Javier Gonzalez, linux-block, marcin.dziegielewski

A couple of nitpicks below.

On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>
> Currently pblk and lightnvm does only check for size
> of OOB metadata and does not care wheather this meta

wheather -> whether

> is located in separate buffer or is interleaved with
> data in single buffer.
>
> In reality only the first scenario is supported, where
> second mode will break pblk functionality during any
> IO operation.
>
> The goal of this patch is to block creation of pblk
> devices in case of interleaved metadata
>
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
>  drivers/lightnvm/pblk-init.c | 6 ++++++
>  drivers/nvme/host/lightnvm.c | 1 +
>  include/linux/lightnvm.h     | 1 +
>  3 files changed, 8 insertions(+)
>
> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> index b794e279da31..1529aa37b30f 100644
> --- a/drivers/lightnvm/pblk-init.c
> +++ b/drivers/lightnvm/pblk-init.c
> @@ -1152,6 +1152,12 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
>                 return ERR_PTR(-EINVAL);
>         }
>
> +       if (geo->ext) {
> +               pblk_err(pblk, "extended metadata not supported\n");
> +               kfree(pblk);
> +               return ERR_PTR(-EINVAL);
> +       }
> +
>         spin_lock_init(&pblk->resubmit_lock);
>         spin_lock_init(&pblk->trans_lock);
>         spin_lock_init(&pblk->lock);
> diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
> index e370793f52d5..7020e87bcee4 100644
> --- a/drivers/nvme/host/lightnvm.c
> +++ b/drivers/nvme/host/lightnvm.c
> @@ -989,6 +989,7 @@ void nvme_nvm_update_nvm_info(struct nvme_ns *ns)
>
>         geo->csecs = 1 << ns->lba_shift;
>         geo->sos = ns->ms;
> +       geo->ext = ns->ext;
>  }
>
>  int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index c6c998716ee7..abd29f50f2a1 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -357,6 +357,7 @@ struct nvm_geo {
>         u32     clba;           /* sectors per chunk */
>         u16     csecs;          /* sector size */
>         u16     sos;            /* out-of-band area size */
> +       u16     ext;            /* metadata in extended data buffer */

ext is a boolean (see struct nvme_ns) so better make the nvm_geo copy
boolean as well.

>
>         /* device write constrains */
>         u32     ws_min;         /* minimum write size */
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-09  9:02   ` Hans Holmberg
  2018-10-09  9:12     ` Matias Bjørling
@ 2018-10-09  9:31     ` Igor Konopko
  1 sibling, 0 replies; 22+ messages in thread
From: Igor Konopko @ 2018-10-09  9:31 UTC (permalink / raw)
  To: Hans Holmberg
  Cc: Matias Bjorling, Javier Gonzalez, linux-block, marcin.dziegielewski



On 09.10.2018 11:02, Hans Holmberg wrote:
> Hi Igor!
> 
> One important thing: this patch breaks the on-disk-storage format so
> that needs to be handled(see my comment on this) and  some additional
> nitpicks below.
> 
> Thanks,
> Hans
> 
> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>
>> Currently pblk assumes that size of OOB metadata on drive is always
>> equal to size of pblk_sec_meta struct. This commit add helpers which will
>> allow to handle different sizes of OOB metadata on drive.
>>
>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>> ---
>>   drivers/lightnvm/pblk-core.c     |  6 ++---
>>   drivers/lightnvm/pblk-map.c      | 21 ++++++++++------
>>   drivers/lightnvm/pblk-read.c     | 41 +++++++++++++++++++-------------
>>   drivers/lightnvm/pblk-recovery.c | 14 ++++++-----
>>   drivers/lightnvm/pblk.h          | 37 +++++++++++++++++++++++++++-
>>   5 files changed, 86 insertions(+), 33 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 6944aac43b01..7cb39d84c833 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -743,7 +743,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
>>          rqd.opcode = NVM_OP_PREAD;
>>          rqd.nr_ppas = lm->smeta_sec;
>>          rqd.is_seq = 1;
>> -
>>          for (i = 0; i < lm->smeta_sec; i++, paddr++)
>>                  rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>>
>> @@ -796,10 +795,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
>>          rqd.is_seq = 1;
>>
>>          for (i = 0; i < lm->smeta_sec; i++, paddr++) {
>> -               struct pblk_sec_meta *meta_list = rqd.meta_list;
>> +               void *meta_list = rqd.meta_list;
>>
>>                  rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
>> -               meta_list[i].lba = lba_list[paddr] = addr_empty;
>> +               pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>> +               lba_list[paddr] = addr_empty;
>>          }
>>
>>          ret = pblk_submit_io_sync_sem(pblk, &rqd);
>> diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
>> index 6dcbd44e3acb..4c7a9909308e 100644
>> --- a/drivers/lightnvm/pblk-map.c
>> +++ b/drivers/lightnvm/pblk-map.c
>> @@ -22,7 +22,7 @@
>>   static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
>>                                struct ppa_addr *ppa_list,
>>                                unsigned long *lun_bitmap,
>> -                             struct pblk_sec_meta *meta_list,
>> +                             void *meta_list,
>>                                unsigned int valid_secs)
>>   {
>>          struct pblk_line *line = pblk_line_get_data(pblk);
>> @@ -68,14 +68,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
>>                          kref_get(&line->ref);
>>                          w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
>>                          w_ctx->ppa = ppa_list[i];
>> -                       meta_list[i].lba = cpu_to_le64(w_ctx->lba);
>> +                       pblk_set_meta_lba(pblk, meta_list, i, w_ctx->lba);
>>                          lba_list[paddr] = cpu_to_le64(w_ctx->lba);
>>                          if (lba_list[paddr] != addr_empty)
>>                                  line->nr_valid_lbas++;
>>                          else
>>                                  atomic64_inc(&pblk->pad_wa);
>>                  } else {
>> -                       lba_list[paddr] = meta_list[i].lba = addr_empty;
>> +                       lba_list[paddr] = addr_empty;
>> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>>                          __pblk_map_invalidate(pblk, line, paddr);
>>                  }
>>          }
>> @@ -88,7 +89,7 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
>>                   unsigned long *lun_bitmap, unsigned int valid_secs,
>>                   unsigned int off)
>>   {
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
>>          unsigned int map_secs;
>>          int min = pblk->min_write_pgs;
>> @@ -97,7 +98,10 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
>>          for (i = off; i < rqd->nr_ppas; i += min) {
>>                  map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
>>                  if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
>> -                                       lun_bitmap, &meta_list[i], map_secs)) {
>> +                                       lun_bitmap,
>> +                                       pblk_get_meta_buffer(pblk,
>> +                                                            meta_list, i),
>> +                                       map_secs)) {
>>                          bio_put(rqd->bio);
>>                          pblk_free_rqd(pblk, rqd, PBLK_WRITE);
>>                          pblk_pipeline_stop(pblk);
>> @@ -113,7 +117,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>          struct nvm_tgt_dev *dev = pblk->dev;
>>          struct nvm_geo *geo = &dev->geo;
>>          struct pblk_line_meta *lm = &pblk->lm;
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
>>          struct pblk_line *e_line, *d_line;
>>          unsigned int map_secs;
>> @@ -123,7 +127,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>          for (i = 0; i < rqd->nr_ppas; i += min) {
>>                  map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
>>                  if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
>> -                                       lun_bitmap, &meta_list[i], map_secs)) {
>> +                                       lun_bitmap,
>> +                                       pblk_get_meta_buffer(pblk,
>> +                                                            meta_list, i),
>> +                                       map_secs)) {
>>                          bio_put(rqd->bio);
>>                          pblk_free_rqd(pblk, rqd, PBLK_WRITE);
>>                          pblk_pipeline_stop(pblk);
>> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
>> index 08f6ebd4bc48..6584a2588f61 100644
>> --- a/drivers/lightnvm/pblk-read.c
>> +++ b/drivers/lightnvm/pblk-read.c
>> @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>                                   struct bio *bio, sector_t blba,
>>                                   unsigned long *read_bitmap)
>>   {
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          struct ppa_addr ppas[NVM_MAX_VLBA];
>>          int nr_secs = rqd->nr_ppas;
>>          bool advanced_bio = false;
>> @@ -58,7 +58,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>   retry:
>>                  if (pblk_ppa_empty(p)) {
>>                          WARN_ON(test_and_set_bit(i, read_bitmap));
>> -                       meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
>> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>>
>>                          if (unlikely(!advanced_bio)) {
>>                                  bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
>> @@ -78,7 +78,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>                                  goto retry;
>>                          }
>>                          WARN_ON(test_and_set_bit(i, read_bitmap));
>> -                       meta_list[i].lba = cpu_to_le64(lba);
>> +                       pblk_set_meta_lba(pblk, meta_list, i, lba);
>>                          advanced_bio = true;
>>   #ifdef CONFIG_NVM_PBLK_DEBUG
>>                          atomic_long_inc(&pblk->cache_reads);
>> @@ -105,12 +105,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
>>   static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
>>                                  sector_t blba)
>>   {
>> -       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          int nr_lbas = rqd->nr_ppas;
>>          int i;
>>
>> +       if (!pblk_is_oob_meta_supported(pblk))
>> +               return;
>> +
>>          for (i = 0; i < nr_lbas; i++) {
>> -               u64 lba = le64_to_cpu(meta_lba_list[i].lba);
>> +               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
>>
>>                  if (lba == ADDR_EMPTY)
>>                          continue;
>> @@ -134,9 +137,12 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
>>   static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
>>                                   u64 *lba_list, int nr_lbas)
>>   {
>> -       struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
>> +       void *meta_lba_list = rqd->meta_list;
>>          int i, j;
>>
>> +       if (!pblk_is_oob_meta_supported(pblk))
>> +               return;
>> +
>>          for (i = 0, j = 0; i < nr_lbas; i++) {
>>                  u64 lba = lba_list[i];
>>                  u64 meta_lba;
>> @@ -144,7 +150,7 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
>>                  if (lba == ADDR_EMPTY)
>>                          continue;
>>
>> -               meta_lba = le64_to_cpu(meta_lba_list[j].lba);
>> +               meta_lba = pblk_get_meta_lba(pblk, meta_lba_list, j);
>>
>>                  if (lba != meta_lba) {
>>   #ifdef CONFIG_NVM_PBLK_DEBUG
>> @@ -219,7 +225,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>>          struct bio *new_bio = rqd->bio;
>>          struct bio *bio = pr_ctx->orig_bio;
>>          struct bio_vec src_bv, dst_bv;
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          int bio_init_idx = pr_ctx->bio_init_idx;
>>          unsigned long *read_bitmap = pr_ctx->bitmap;
>>          int nr_secs = pr_ctx->orig_nr_secs;
>> @@ -237,8 +243,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>>          }
>>
>>          for (i = 0; i < nr_secs; i++) {
>> -               pr_ctx->lba_list_media[i] = meta_list[i].lba;
>> -               meta_list[i].lba = pr_ctx->lba_list_mem[i];
>> +               pr_ctx->lba_list_media[i] = pblk_get_meta_lba(pblk,
>> +                                                       meta_list, i);
>> +               pblk_set_meta_lba(pblk, meta_list, i,
>> +                                 pr_ctx->lba_list_mem[i]);
>>          }
>>
>>          /* Fill the holes in the original bio */
>> @@ -250,7 +258,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
>>                  line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
>>                  kref_put(&line->ref, pblk_line_put);
>>
>> -               meta_list[hole].lba = pr_ctx->lba_list_media[i];
>> +               pblk_set_meta_lba(pblk, meta_list, hole,
>> +                                 pr_ctx->lba_list_media[i]);
>>
>>                  src_bv = new_bio->bi_io_vec[i++];
>>                  dst_bv = bio->bi_io_vec[bio_init_idx + hole];
>> @@ -286,7 +295,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>>                              unsigned long *read_bitmap,
>>                              int nr_holes)
>>   {
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
>>          struct pblk_pr_ctx *pr_ctx;
>>          struct bio *new_bio, *bio = r_ctx->private;
>> @@ -308,7 +317,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
>>                  goto fail_free_pages;
>>
>>          for (i = 0; i < nr_secs; i++)
>> -               pr_ctx->lba_list_mem[i] = meta_list[i].lba;
>> +               pr_ctx->lba_list_mem[i] = pblk_get_meta_lba(pblk, meta_list, i);
>>
>>          new_bio->bi_iter.bi_sector = 0; /* internal bio */
>>          bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
>> @@ -373,7 +382,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
>>   static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>>                           sector_t lba, unsigned long *read_bitmap)
>>   {
>> -       struct pblk_sec_meta *meta_list = rqd->meta_list;
>> +       void *meta_list = rqd->meta_list;
>>          struct ppa_addr ppa;
>>
>>          pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
>> @@ -385,7 +394,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>>   retry:
>>          if (pblk_ppa_empty(ppa)) {
>>                  WARN_ON(test_and_set_bit(0, read_bitmap));
>> -               meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
>> +               pblk_set_meta_lba(pblk, meta_list, 0, ADDR_EMPTY);
>>                  return;
>>          }
>>
>> @@ -399,7 +408,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
>>                  }
>>
>>                  WARN_ON(test_and_set_bit(0, read_bitmap));
>> -               meta_list[0].lba = cpu_to_le64(lba);
>> +               pblk_set_meta_lba(pblk, meta_list, 0, lba);
>>
>>   #ifdef CONFIG_NVM_PBLK_DEBUG
>>                  atomic_long_inc(&pblk->cache_reads);
>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>> index 5740b7509bd8..fa63f9fa5ba8 100644
>> --- a/drivers/lightnvm/pblk-recovery.c
>> +++ b/drivers/lightnvm/pblk-recovery.c
>> @@ -124,7 +124,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
>>
>>   struct pblk_recov_alloc {
>>          struct ppa_addr *ppa_list;
>> -       struct pblk_sec_meta *meta_list;
>> +       void *meta_list;
>>          struct nvm_rq *rqd;
>>          void *data;
>>          dma_addr_t dma_ppa_list;
>> @@ -158,7 +158,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>>   {
>>          struct nvm_tgt_dev *dev = pblk->dev;
>>          struct nvm_geo *geo = &dev->geo;
>> -       struct pblk_sec_meta *meta_list;
>> +       void *meta_list;
>>          struct pblk_pad_rq *pad_rq;
>>          struct nvm_rq *rqd;
>>          struct bio *bio;
>> @@ -242,7 +242,8 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
>>                          dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
>>
>>                          pblk_map_invalidate(pblk, dev_ppa);
>> -                       lba_list[w_ptr] = meta_list[i].lba = addr_empty;
>> +                       lba_list[w_ptr] = addr_empty;
>> +                       pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
>>                          rqd->ppa_list[i] = dev_ppa;
>>                  }
>>          }
>> @@ -325,6 +326,7 @@ static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
>>                          return 1;
>>                  else if (chunk->wp < line_wp)
>>                          line_wp = chunk->wp;
>> +
> 
> Please remove the introduced newline
> 
>>          }
>>
>>          return 0;
>> @@ -336,7 +338,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>          struct nvm_tgt_dev *dev = pblk->dev;
>>          struct nvm_geo *geo = &dev->geo;
>>          struct ppa_addr *ppa_list;
>> -       struct pblk_sec_meta *meta_list;
>> +       void *meta_list;
>>          struct nvm_rq *rqd;
>>          struct bio *bio;
>>          void *data;
>> @@ -434,7 +436,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
>>          }
>>
>>          for (i = 0; i < rqd->nr_ppas; i++) {
>> -               u64 lba = le64_to_cpu(meta_list[i].lba);
>> +               u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
>>
>>                  lba_list[paddr++] = cpu_to_le64(lba);
>>
>> @@ -463,7 +465,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>>          struct nvm_geo *geo = &dev->geo;
>>          struct nvm_rq *rqd;
>>          struct ppa_addr *ppa_list;
>> -       struct pblk_sec_meta *meta_list;
>> +       void *meta_list;
>>          struct pblk_recov_alloc p;
>>          void *data;
>>          dma_addr_t dma_ppa_list, dma_meta_list;
>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
>> index aea09879636f..53156b6f99a3 100644
>> --- a/drivers/lightnvm/pblk.h
>> +++ b/drivers/lightnvm/pblk.h
>> @@ -87,7 +87,6 @@ enum {
>>   };
>>
>>   struct pblk_sec_meta {
>> -       u64 reserved;
>>          __le64 lba;
>>   };
> 
> It's nice to reduce the required metadata size, but this silently
> breaks pblk the on-disk-storage format. We can't have that.
> 
> I suggest breaking out this change as a separate patch that also
> increases SMETA_VERSION_MAJOR and instructs the user via the kernel
> log to migrate the data manually (or factory reset the decice/pblk
> instance).
> 

Sure, for now we can keep 16b of meta. I hadn't thought about breaking 
on-disk-storage format, so thanks for that comment. I will revert that 
part for now.

> 
>>
>> @@ -1366,4 +1365,40 @@ static inline char *pblk_disk_name(struct pblk *pblk)
>>
>>          return disk->disk_name;
>>   }
>> +
>> +static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
>> +{
>> +       struct nvm_tgt_dev *dev = pblk->dev;
>> +       struct nvm_geo *geo = &dev->geo;
>> +
>> +       /* Pblk uses OOB meta to store LBA of given physical sector.
>> +        * The LBA is eventually used in recovery mode and/or for handling
>> +        * telemetry events (e.g., relocate sector).
>> +        */
>> +
>> +       return (geo->sos >= sizeof(struct pblk_sec_meta));
>> +}
>> +
>> +static inline struct pblk_sec_meta *pblk_get_meta_buffer(struct pblk *pblk,
>> +                                                        void *meta_ptr,
> 
> _ptr is surperflous, we know it's a pointer already.

Sure, makes sense, I can change that.

> 
>> +                                                        int index)
> 
> The name of the function is misleading, maybe pblk_get_sec_meta instead?
> 
>> +{
>> +       struct nvm_tgt_dev *dev = pblk->dev;
>> +       struct nvm_geo *geo = &dev->geo;
>> +
>> +       return meta_ptr + max_t(int, geo->sos, sizeof(struct pblk_sec_meta))
>> +               * index;
> 
> What is the max_t for? How could pblk_sec_meta ever be bigger than geo->sos?
> 

In patch 5 of that series I'm implementing handling for case when 
geo->sos is 0, so that's why I use max_t here as preparation for the 
next patch.

>> +}
>> +
>> +static inline void pblk_set_meta_lba(struct pblk *pblk, void *meta_ptr,
>> +                                    int index, u64 lba)
>> +{
>> +       pblk_get_meta_buffer(pblk, meta_ptr, index)->lba = cpu_to_le64(lba);
>> +}
>> +
>> +static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
>> +                                   int index)
>> +{
>> +       return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
>> +}
> 
> I would prefer having these functions get/set lbas of type __le64
> instead, as we always have to update the emeta lba list at the same
> time we set the oob metadata, and the emeta lba list is __le64,
> It's easy to mess this up and byte-ordering bugs are hard to detect.
> 

Sure, makes sense, I can change that.

>>   #endif /* PBLK_H_ */
>> --
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09  9:16   ` Hans Holmberg
@ 2018-10-09 10:01     ` Igor Konopko
  2018-10-09 12:10       ` Hans Holmberg
  0 siblings, 1 reply; 22+ messages in thread
From: Igor Konopko @ 2018-10-09 10:01 UTC (permalink / raw)
  To: Hans Holmberg
  Cc: Matias Bjorling, Javier Gonzalez, linux-block, marcin.dziegielewski



On 09.10.2018 11:16, Hans Holmberg wrote:
> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>
>> Currently whole lightnvm and pblk uses single DMA pool,
>> for which entry size is always equal to PAGE_SIZE.
>> PPA list always needs 8b*64, so there is only 56b*64
>> space for OOB meta. Since NVMe OOB meta can be bigger,
>> such as 128b, this solution is not robustness.
>>
>> This patch add the possiblity to support OOB meta above
>> 56b by creating separate DMA pool for PBLK with entry
>> size which is big enough to store both PPA list and such
>> a OOB metadata.
>>
>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>> ---
>>   drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>   drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>   drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>   drivers/lightnvm/pblk-read.c     |  3 ++-
>>   drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>   drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>   drivers/nvme/host/lightnvm.c     |  6 ++++--
>>   include/linux/lightnvm.h         |  8 +++++---
>>   8 files changed, 71 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>> index efb976a863d2..48db7a096257 100644
>> --- a/drivers/lightnvm/core.c
>> +++ b/drivers/lightnvm/core.c
>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>   }
>>   EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>
>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>> -                                                       dma_addr_t *dma_handler)
>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>   {
>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>> -                                                               dma_handler);
>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>> +                                               mem_flags, dma_handler);
>>   }
>>   EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>
>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>> +                               void *addr, dma_addr_t dma_handler)
>>   {
>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>   }
>>   EXPORT_SYMBOL(nvm_dev_dma_free);
>>
>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>> +{
>> +       return dev->ops->create_dma_pool(dev, name, size);
>> +}
>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>> +
>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>> +{
>> +       dev->ops->destroy_dma_pool(pool);
>> +}
>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>> +
>>   static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>   {
>>          struct nvm_dev *dev;
>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>          }
>>
>>          rqd->nr_ppas = nr_ppas;
>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>> +                                               &rqd->dma_ppa_list);
>>          if (!rqd->ppa_list) {
>>                  pr_err("nvm: failed to allocate dma memory\n");
>>                  return -ENOMEM;
>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>          if (!rqd->ppa_list)
>>                  return;
>>
>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>> +                               rqd->dma_ppa_list);
>>   }
>>
>>   static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>          if (!dev->q || !dev->ops)
>>                  return -EINVAL;
>>
>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>          if (!dev->dma_pool) {
>>                  pr_err("nvm: could not create dma pool\n");
>>                  return -ENOMEM;
> 
> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
> Wouldn't it be more straightforward to use dma_pool_* instead?
> 

In order to call dma_pool_create() I need NVMe device structure, which 
in my understanding is not public, so this is why I decided to reuse 
plumbing which was available in nvm_dev_* interfaces.

If there is some easy way to call dma_pool_create() from pblk module and 
I'm missing that - let me know. I can rewrite this part, if there is 
some better way to do so.

>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 7cb39d84c833..131972b13e27 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -242,16 +242,16 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
>>   {
>>          struct nvm_tgt_dev *dev = pblk->dev;
>>
>> -       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> -                                                       &rqd->dma_meta_list);
>> +       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
>> +                                          GFP_KERNEL, &rqd->dma_meta_list);
>>          if (!rqd->meta_list)
>>                  return -ENOMEM;
>>
>>          if (rqd->nr_ppas == 1)
>>                  return 0;
>>
>> -       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
>> -       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
>> +       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
>> +       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
>>
>>          return 0;
>>   }
>> @@ -261,7 +261,7 @@ void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
>>          struct nvm_tgt_dev *dev = pblk->dev;
>>
>>          if (rqd->meta_list)
>> -               nvm_dev_dma_free(dev->parent, rqd->meta_list,
>> +               nvm_dev_dma_free(dev->parent, pblk->dma_pool, rqd->meta_list,
>>                                  rqd->dma_meta_list);
>>   }
>>
>> @@ -840,13 +840,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>>          int i, j;
>>          int ret;
>>
>> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
>> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool, GFP_KERNEL,
>>                                                          &dma_meta_list);
>>          if (!meta_list)
>>                  return -ENOMEM;
>>
>> -       ppa_list = meta_list + pblk_dma_meta_size;
>> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
>> +       ppa_list = meta_list + pblk_dma_meta_size(pblk);
>> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
>>
>>   next_rq:
>>          memset(&rqd, 0, sizeof(struct nvm_rq));
>> @@ -919,7 +919,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
>>                  goto next_rq;
>>
>>   free_rqd_dma:
>> -       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
>> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
>> +                        rqd.meta_list, rqd.dma_meta_list);
>>          return ret;
>>   }
>>
>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
>> index e3573880dbda..b794e279da31 100644
>> --- a/drivers/lightnvm/pblk-init.c
>> +++ b/drivers/lightnvm/pblk-init.c
>> @@ -1087,6 +1087,7 @@ static void pblk_free(struct pblk *pblk)
>>          pblk_l2p_free(pblk);
>>          pblk_rwb_free(pblk);
>>          pblk_core_free(pblk);
>> +       nvm_dev_dma_destroy(pblk->dev->parent, pblk->dma_pool);
>>
>>          kfree(pblk);
>>   }
>> @@ -1178,6 +1179,15 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
>>          atomic_long_set(&pblk->write_failed, 0);
>>          atomic_long_set(&pblk->erase_failed, 0);
>>
>> +       pblk->dma_pool = nvm_dev_dma_create(dev->parent, (pblk_dma_ppa_size +
>> +                                           pblk_dma_meta_size(pblk)),
>> +                                           tdisk->disk_name);
>> +       if (!pblk->dma_pool) {
>> +               pblk_err(pblk, "could not allocate dma pool\n");
>> +               kfree(pblk);
>> +               return ERR_PTR(-ENOMEM);
>> +       }
>> +
>>          ret = pblk_core_init(pblk);
>>          if (ret) {
>>                  pblk_err(pblk, "could not initialize core\n");
>> @@ -1249,6 +1259,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
>>   fail_free_core:
>>          pblk_core_free(pblk);
>>   fail:
>> +       nvm_dev_dma_destroy(dev->parent, pblk->dma_pool);
>>          kfree(pblk);
>>          return ERR_PTR(ret);
>>   }
>> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
>> index 6584a2588f61..5d213c4f83c1 100644
>> --- a/drivers/lightnvm/pblk-read.c
>> +++ b/drivers/lightnvm/pblk-read.c
>> @@ -499,7 +499,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
>>          return NVM_IO_OK;
>>
>>   fail_meta_free:
>> -       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
>> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
>> +                        rqd->meta_list, rqd->dma_meta_list);
>>   fail_rqd_free:
>>          pblk_free_rqd(pblk, rqd, PBLK_READ);
>>          return ret;
>> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
>> index fa63f9fa5ba8..4b703877907b 100644
>> --- a/drivers/lightnvm/pblk-recovery.c
>> +++ b/drivers/lightnvm/pblk-recovery.c
>> @@ -471,12 +471,13 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>>          dma_addr_t dma_ppa_list, dma_meta_list;
>>          int ret = 0;
>>
>> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
>> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
>> +                                        GFP_KERNEL, &dma_meta_list);
>>          if (!meta_list)
>>                  return -ENOMEM;
>>
>> -       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
>> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
>> +       ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
>> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
>>
>>          data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
>>          if (!data) {
>> @@ -507,7 +508,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
>>          mempool_free(rqd, &pblk->r_rq_pool);
>>          kfree(data);
>>   free_meta_list:
>> -       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
>> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool, meta_list, dma_meta_list);
>>
>>          return ret;
>>   }
>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
>> index 53156b6f99a3..6e4a63fd4c49 100644
>> --- a/drivers/lightnvm/pblk.h
>> +++ b/drivers/lightnvm/pblk.h
>> @@ -103,7 +103,6 @@ enum {
>>          PBLK_RL_LOW = 4
>>   };
>>
>> -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
>>   #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
>>
>>   /* write buffer completion context */
>> @@ -711,6 +710,7 @@ struct pblk {
>>          struct timer_list wtimer;
>>
>>          struct pblk_gc gc;
>> +       void *dma_pool;
>>   };
>>
>>   struct pblk_line_ws {
>> @@ -1401,4 +1401,13 @@ static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
>>   {
>>          return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
>>   }
>> +
>> +static inline int pblk_dma_meta_size(struct pblk *pblk)
>> +{
>> +       struct nvm_tgt_dev *dev = pblk->dev;
>> +       struct nvm_geo *geo = &dev->geo;
>> +
>> +       return max_t(int, sizeof(struct pblk_sec_meta), geo->sos)
>> +                               * NVM_MAX_VLBA;
>> +}
>>   #endif /* PBLK_H_ */
>> diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
>> index 986526ff1521..e370793f52d5 100644
>> --- a/drivers/nvme/host/lightnvm.c
>> +++ b/drivers/nvme/host/lightnvm.c
>> @@ -736,11 +736,13 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
>>          return ret;
>>   }
>>
>> -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
>> +static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
>> +                                       int size)
>>   {
>>          struct nvme_ns *ns = nvmdev->q->queuedata;
>>
>> -       return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
>> +       size = round_up(size, PAGE_SIZE);
>> +       return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
>>   }
>>
>>   static void nvme_nvm_destroy_dma_pool(void *pool)
>> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
>> index 36a84180c1e8..c6c998716ee7 100644
>> --- a/include/linux/lightnvm.h
>> +++ b/include/linux/lightnvm.h
>> @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
>>                                                          struct nvm_chk_meta *);
>>   typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
>>   typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
>> -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
>> +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
>>   typedef void (nvm_destroy_dma_pool_fn)(void *);
>>   typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
>>                                                                  dma_addr_t *);
>> @@ -668,8 +668,10 @@ struct nvm_tgt_type {
>>   extern int nvm_register_tgt_type(struct nvm_tgt_type *);
>>   extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
>>
>> -extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
>> -extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
>> +extern void *nvm_dev_dma_alloc(struct nvm_dev *, void *, gfp_t, dma_addr_t *);
>> +extern void nvm_dev_dma_free(struct nvm_dev *, void *, void *, dma_addr_t);
>> +extern void *nvm_dev_dma_create(struct nvm_dev *, int, char *);
>> +extern void nvm_dev_dma_destroy(struct nvm_dev *, void *);
>>
>>   extern struct nvm_dev *nvm_alloc_dev(int);
>>   extern int nvm_register(struct nvm_dev *);
>> --
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 10:01     ` Igor Konopko
@ 2018-10-09 12:10       ` Hans Holmberg
  2018-10-09 12:36         ` Javier Gonzalez
  2018-10-09 12:56         ` Matias Bjørling
  0 siblings, 2 replies; 22+ messages in thread
From: Hans Holmberg @ 2018-10-09 12:10 UTC (permalink / raw)
  To: igor.j.konopko, Matias Bjorling
  Cc: Javier Gonzalez, linux-block, marcin.dziegielewski

On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>
>
>
> On 09.10.2018 11:16, Hans Holmberg wrote:
> > On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
> >>
> >> Currently whole lightnvm and pblk uses single DMA pool,
> >> for which entry size is always equal to PAGE_SIZE.
> >> PPA list always needs 8b*64, so there is only 56b*64
> >> space for OOB meta. Since NVMe OOB meta can be bigger,
> >> such as 128b, this solution is not robustness.
> >>
> >> This patch add the possiblity to support OOB meta above
> >> 56b by creating separate DMA pool for PBLK with entry
> >> size which is big enough to store both PPA list and such
> >> a OOB metadata.
> >>
> >> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> >> ---
> >>   drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
> >>   drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
> >>   drivers/lightnvm/pblk-init.c     | 11 +++++++++++
> >>   drivers/lightnvm/pblk-read.c     |  3 ++-
> >>   drivers/lightnvm/pblk-recovery.c |  9 +++++----
> >>   drivers/lightnvm/pblk.h          | 11 ++++++++++-
> >>   drivers/nvme/host/lightnvm.c     |  6 ++++--
> >>   include/linux/lightnvm.h         |  8 +++++---
> >>   8 files changed, 71 insertions(+), 29 deletions(-)
> >>
> >> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
> >> index efb976a863d2..48db7a096257 100644
> >> --- a/drivers/lightnvm/core.c
> >> +++ b/drivers/lightnvm/core.c
> >> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
> >>   }
> >>   EXPORT_SYMBOL(nvm_unregister_tgt_type);
> >>
> >> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
> >> -                                                       dma_addr_t *dma_handler)
> >> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
> >> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
> >>   {
> >> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
> >> -                                                               dma_handler);
> >> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
> >> +                                               mem_flags, dma_handler);
> >>   }
> >>   EXPORT_SYMBOL(nvm_dev_dma_alloc);
> >>
> >> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
> >> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
> >> +                               void *addr, dma_addr_t dma_handler)
> >>   {
> >> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
> >> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
> >>   }
> >>   EXPORT_SYMBOL(nvm_dev_dma_free);
> >>
> >> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
> >> +{
> >> +       return dev->ops->create_dma_pool(dev, name, size);
> >> +}
> >> +EXPORT_SYMBOL(nvm_dev_dma_create);
> >> +
> >> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
> >> +{
> >> +       dev->ops->destroy_dma_pool(pool);
> >> +}
> >> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
> >> +
> >>   static struct nvm_dev *nvm_find_nvm_dev(const char *name)
> >>   {
> >>          struct nvm_dev *dev;
> >> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
> >>          }
> >>
> >>          rqd->nr_ppas = nr_ppas;
> >> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
> >> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
> >> +                                               &rqd->dma_ppa_list);
> >>          if (!rqd->ppa_list) {
> >>                  pr_err("nvm: failed to allocate dma memory\n");
> >>                  return -ENOMEM;
> >> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
> >>          if (!rqd->ppa_list)
> >>                  return;
> >>
> >> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
> >> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
> >> +                               rqd->dma_ppa_list);
> >>   }
> >>
> >>   static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
> >> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
> >>          if (!dev->q || !dev->ops)
> >>                  return -EINVAL;
> >>
> >> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
> >> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
> >>          if (!dev->dma_pool) {
> >>                  pr_err("nvm: could not create dma pool\n");
> >>                  return -ENOMEM;
> >
> > Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
> > Wouldn't it be more straightforward to use dma_pool_* instead?
> >
>
> In order to call dma_pool_create() I need NVMe device structure, which
> in my understanding is not public, so this is why I decided to reuse
> plumbing which was available in nvm_dev_* interfaces.

Hmm, yes, I see now.

>
> If there is some easy way to call dma_pool_create() from pblk module and
> I'm missing that - let me know. I can rewrite this part, if there is
> some better way to do so.

Create and destroy needs to go through dev->ops, but once you have
allocated the pool, there is no need for going through the dev->ops
for allocate/free as far as I can see.
Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
to dma_pool_alloc/free ?


>
> >> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> >> index 7cb39d84c833..131972b13e27 100644
> >> --- a/drivers/lightnvm/pblk-core.c
> >> +++ b/drivers/lightnvm/pblk-core.c
> >> @@ -242,16 +242,16 @@ int pblk_alloc_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
> >>   {
> >>          struct nvm_tgt_dev *dev = pblk->dev;
> >>
> >> -       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
> >> -                                                       &rqd->dma_meta_list);
> >> +       rqd->meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
> >> +                                          GFP_KERNEL, &rqd->dma_meta_list);
> >>          if (!rqd->meta_list)
> >>                  return -ENOMEM;
> >>
> >>          if (rqd->nr_ppas == 1)
> >>                  return 0;
> >>
> >> -       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
> >> -       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
> >> +       rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size(pblk);
> >> +       rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size(pblk);
> >>
> >>          return 0;
> >>   }
> >> @@ -261,7 +261,7 @@ void pblk_free_rqd_meta(struct pblk *pblk, struct nvm_rq *rqd)
> >>          struct nvm_tgt_dev *dev = pblk->dev;
> >>
> >>          if (rqd->meta_list)
> >> -               nvm_dev_dma_free(dev->parent, rqd->meta_list,
> >> +               nvm_dev_dma_free(dev->parent, pblk->dma_pool, rqd->meta_list,
> >>                                  rqd->dma_meta_list);
> >>   }
> >>
> >> @@ -840,13 +840,13 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
> >>          int i, j;
> >>          int ret;
> >>
> >> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
> >> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool, GFP_KERNEL,
> >>                                                          &dma_meta_list);
> >>          if (!meta_list)
> >>                  return -ENOMEM;
> >>
> >> -       ppa_list = meta_list + pblk_dma_meta_size;
> >> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
> >> +       ppa_list = meta_list + pblk_dma_meta_size(pblk);
> >> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
> >>
> >>   next_rq:
> >>          memset(&rqd, 0, sizeof(struct nvm_rq));
> >> @@ -919,7 +919,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
> >>                  goto next_rq;
> >>
> >>   free_rqd_dma:
> >> -       nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
> >> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
> >> +                        rqd.meta_list, rqd.dma_meta_list);
> >>          return ret;
> >>   }
> >>
> >> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
> >> index e3573880dbda..b794e279da31 100644
> >> --- a/drivers/lightnvm/pblk-init.c
> >> +++ b/drivers/lightnvm/pblk-init.c
> >> @@ -1087,6 +1087,7 @@ static void pblk_free(struct pblk *pblk)
> >>          pblk_l2p_free(pblk);
> >>          pblk_rwb_free(pblk);
> >>          pblk_core_free(pblk);
> >> +       nvm_dev_dma_destroy(pblk->dev->parent, pblk->dma_pool);
> >>
> >>          kfree(pblk);
> >>   }
> >> @@ -1178,6 +1179,15 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
> >>          atomic_long_set(&pblk->write_failed, 0);
> >>          atomic_long_set(&pblk->erase_failed, 0);
> >>
> >> +       pblk->dma_pool = nvm_dev_dma_create(dev->parent, (pblk_dma_ppa_size +
> >> +                                           pblk_dma_meta_size(pblk)),
> >> +                                           tdisk->disk_name);
> >> +       if (!pblk->dma_pool) {
> >> +               pblk_err(pblk, "could not allocate dma pool\n");
> >> +               kfree(pblk);
> >> +               return ERR_PTR(-ENOMEM);
> >> +       }
> >> +
> >>          ret = pblk_core_init(pblk);
> >>          if (ret) {
> >>                  pblk_err(pblk, "could not initialize core\n");
> >> @@ -1249,6 +1259,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
> >>   fail_free_core:
> >>          pblk_core_free(pblk);
> >>   fail:
> >> +       nvm_dev_dma_destroy(dev->parent, pblk->dma_pool);
> >>          kfree(pblk);
> >>          return ERR_PTR(ret);
> >>   }
> >> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> >> index 6584a2588f61..5d213c4f83c1 100644
> >> --- a/drivers/lightnvm/pblk-read.c
> >> +++ b/drivers/lightnvm/pblk-read.c
> >> @@ -499,7 +499,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
> >>          return NVM_IO_OK;
> >>
> >>   fail_meta_free:
> >> -       nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
> >> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool,
> >> +                        rqd->meta_list, rqd->dma_meta_list);
> >>   fail_rqd_free:
> >>          pblk_free_rqd(pblk, rqd, PBLK_READ);
> >>          return ret;
> >> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> >> index fa63f9fa5ba8..4b703877907b 100644
> >> --- a/drivers/lightnvm/pblk-recovery.c
> >> +++ b/drivers/lightnvm/pblk-recovery.c
> >> @@ -471,12 +471,13 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
> >>          dma_addr_t dma_ppa_list, dma_meta_list;
> >>          int ret = 0;
> >>
> >> -       meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
> >> +       meta_list = nvm_dev_dma_alloc(dev->parent, pblk->dma_pool,
> >> +                                        GFP_KERNEL, &dma_meta_list);
> >>          if (!meta_list)
> >>                  return -ENOMEM;
> >>
> >> -       ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
> >> -       dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
> >> +       ppa_list = (void *)(meta_list) + pblk_dma_meta_size(pblk);
> >> +       dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
> >>
> >>          data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL);
> >>          if (!data) {
> >> @@ -507,7 +508,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
> >>          mempool_free(rqd, &pblk->r_rq_pool);
> >>          kfree(data);
> >>   free_meta_list:
> >> -       nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
> >> +       nvm_dev_dma_free(dev->parent, pblk->dma_pool, meta_list, dma_meta_list);
> >>
> >>          return ret;
> >>   }
> >> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> >> index 53156b6f99a3..6e4a63fd4c49 100644
> >> --- a/drivers/lightnvm/pblk.h
> >> +++ b/drivers/lightnvm/pblk.h
> >> @@ -103,7 +103,6 @@ enum {
> >>          PBLK_RL_LOW = 4
> >>   };
> >>
> >> -#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * NVM_MAX_VLBA)
> >>   #define pblk_dma_ppa_size (sizeof(u64) * NVM_MAX_VLBA)
> >>
> >>   /* write buffer completion context */
> >> @@ -711,6 +710,7 @@ struct pblk {
> >>          struct timer_list wtimer;
> >>
> >>          struct pblk_gc gc;
> >> +       void *dma_pool;
> >>   };
> >>
> >>   struct pblk_line_ws {
> >> @@ -1401,4 +1401,13 @@ static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
> >>   {
> >>          return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
> >>   }
> >> +
> >> +static inline int pblk_dma_meta_size(struct pblk *pblk)
> >> +{
> >> +       struct nvm_tgt_dev *dev = pblk->dev;
> >> +       struct nvm_geo *geo = &dev->geo;
> >> +
> >> +       return max_t(int, sizeof(struct pblk_sec_meta), geo->sos)
> >> +                               * NVM_MAX_VLBA;
> >> +}
> >>   #endif /* PBLK_H_ */
> >> diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
> >> index 986526ff1521..e370793f52d5 100644
> >> --- a/drivers/nvme/host/lightnvm.c
> >> +++ b/drivers/nvme/host/lightnvm.c
> >> @@ -736,11 +736,13 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
> >>          return ret;
> >>   }
> >>
> >> -static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
> >> +static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
> >> +                                       int size)
> >>   {
> >>          struct nvme_ns *ns = nvmdev->q->queuedata;
> >>
> >> -       return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
> >> +       size = round_up(size, PAGE_SIZE);
> >> +       return dma_pool_create(name, ns->ctrl->dev, size, PAGE_SIZE, 0);
> >>   }
> >>
> >>   static void nvme_nvm_destroy_dma_pool(void *pool)
> >> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> >> index 36a84180c1e8..c6c998716ee7 100644
> >> --- a/include/linux/lightnvm.h
> >> +++ b/include/linux/lightnvm.h
> >> @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
> >>                                                          struct nvm_chk_meta *);
> >>   typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
> >>   typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
> >> -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
> >> +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
> >>   typedef void (nvm_destroy_dma_pool_fn)(void *);
> >>   typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
> >>                                                                  dma_addr_t *);
> >> @@ -668,8 +668,10 @@ struct nvm_tgt_type {
> >>   extern int nvm_register_tgt_type(struct nvm_tgt_type *);
> >>   extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
> >>
> >> -extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
> >> -extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
> >> +extern void *nvm_dev_dma_alloc(struct nvm_dev *, void *, gfp_t, dma_addr_t *);
> >> +extern void nvm_dev_dma_free(struct nvm_dev *, void *, void *, dma_addr_t);
> >> +extern void *nvm_dev_dma_create(struct nvm_dev *, int, char *);
> >> +extern void nvm_dev_dma_destroy(struct nvm_dev *, void *);
> >>
> >>   extern struct nvm_dev *nvm_alloc_dev(int);
> >>   extern int nvm_register(struct nvm_dev *);
> >> --
> >> 2.17.1
> >>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 12:10       ` Hans Holmberg
@ 2018-10-09 12:36         ` Javier Gonzalez
  2018-10-09 12:54           ` Igor Konopko
  2018-10-09 12:56         ` Matias Bjørling
  1 sibling, 1 reply; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-09 12:36 UTC (permalink / raw)
  To: Hans Holmberg
  Cc: Konopko, Igor J, Matias Bjørling, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 6038 bytes --]

> On 9 Oct 2018, at 21.10, Hans Holmberg <hans.ml.holmberg@owltronix.com> wrote:
> 
> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>> for which entry size is always equal to PAGE_SIZE.
>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>> such as 128b, this solution is not robustness.
>>>> 
>>>> This patch add the possiblity to support OOB meta above
>>>> 56b by creating separate DMA pool for PBLK with entry
>>>> size which is big enough to store both PPA list and such
>>>> a OOB metadata.
>>>> 
>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>> ---
>>>>  drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>  drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>  drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>  drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>  drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>  drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>  drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>  include/linux/lightnvm.h         |  8 +++++---
>>>>  8 files changed, 71 insertions(+), 29 deletions(-)
>>>> 
>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>> index efb976a863d2..48db7a096257 100644
>>>> --- a/drivers/lightnvm/core.c
>>>> +++ b/drivers/lightnvm/core.c
>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>  }
>>>>  EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>> 
>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>> -                                                       dma_addr_t *dma_handler)
>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>  {
>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>> -                                                               dma_handler);
>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>> +                                               mem_flags, dma_handler);
>>>>  }
>>>>  EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>> 
>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>  {
>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>  }
>>>>  EXPORT_SYMBOL(nvm_dev_dma_free);
>>>> 
>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>> +{
>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>> +}
>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>> +
>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>> +{
>>>> +       dev->ops->destroy_dma_pool(pool);
>>>> +}
>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>> +
>>>>  static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>  {
>>>>         struct nvm_dev *dev;
>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>         }
>>>> 
>>>>         rqd->nr_ppas = nr_ppas;
>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>> +                                               &rqd->dma_ppa_list);
>>>>         if (!rqd->ppa_list) {
>>>>                 pr_err("nvm: failed to allocate dma memory\n");
>>>>                 return -ENOMEM;
>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>         if (!rqd->ppa_list)
>>>>                 return;
>>>> 
>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>> +                               rqd->dma_ppa_list);
>>>>  }
>>>> 
>>>>  static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>         if (!dev->q || !dev->ops)
>>>>                 return -EINVAL;
>>>> 
>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>         if (!dev->dma_pool) {
>>>>                 pr_err("nvm: could not create dma pool\n");
>>>>                 return -ENOMEM;
>>> 
>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>> 
>> In order to call dma_pool_create() I need NVMe device structure, which
>> in my understanding is not public, so this is why I decided to reuse
>> plumbing which was available in nvm_dev_* interfaces.
> 
> Hmm, yes, I see now.
> 
>> If there is some easy way to call dma_pool_create() from pblk module and
>> I'm missing that - let me know. I can rewrite this part, if there is
>> some better way to do so.
> 
> Create and destroy needs to go through dev->ops, but once you have
> allocated the pool, there is no need for going through the dev->ops
> for allocate/free as far as I can see.
> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
> to dma_pool_alloc/free ?
> 

When we implemented this, we thought we might need some extra driver
specific code, but it has not been the case.

I'm ok with making the allocations directly once your have the pool
provided by the driver lightnvm bits.

Igor: can you integrate this on the patches for next revision (still
some extra feedback to come though).

Thanks,
Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-09  9:12     ` Matias Bjørling
@ 2018-10-09 12:39       ` Javier Gonzalez
  0 siblings, 0 replies; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-09 12:39 UTC (permalink / raw)
  To: Matias Bjørling
  Cc: hans.ml.holmberg, Konopko, Igor J, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 1130 bytes --]

> 
> On 9 Oct 2018, at 18.12, Matias Bjørling <mb@lightnvm.io> wrote:
> 
>>>  };
>>> 
>>>  struct pblk_sec_meta {
>>> -       u64 reserved;
>>>         __le64 lba;
>>>  };
>> It's nice to reduce the required metadata size, but this silently
>> breaks pblk the on-disk-storage format. We can't have that.
>> I suggest breaking out this change as a separate patch that also
>> increases SMETA_VERSION_MAJOR and instructs the user via the kernel
>> log to migrate the data manually (or factory reset the decice/pblk
>> instance).
> 
> In that case, we should include the initialization values of the target somewhere. E.g., line metadata, or some FTL log to be implemented at some point.
> 
> Another fix for now will be to keep it as 16b, and do the patch that
> you propose later. Igor, I believe the metadata will fit? 16b per LBA
> should be max 1K in the case of 64 LBAs.

Ideally, we fit this on the smta structure too, independently of the FTL
log. THis way, we can have different line types that are recovered
differently. Since we are changing the format, this is a good time to
add it.

Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 12:36         ` Javier Gonzalez
@ 2018-10-09 12:54           ` Igor Konopko
  0 siblings, 0 replies; 22+ messages in thread
From: Igor Konopko @ 2018-10-09 12:54 UTC (permalink / raw)
  To: Javier Gonzalez, Hans Holmberg
  Cc: Matias Bjørling, linux-block, marcin.dziegielewski



On 09.10.2018 14:36, Javier Gonzalez wrote:
>> On 9 Oct 2018, at 21.10, Hans Holmberg <hans.ml.holmberg@owltronix.com> wrote:
>>
>> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>>> for which entry size is always equal to PAGE_SIZE.
>>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>>> such as 128b, this solution is not robustness.
>>>>>
>>>>> This patch add the possiblity to support OOB meta above
>>>>> 56b by creating separate DMA pool for PBLK with entry
>>>>> size which is big enough to store both PPA list and such
>>>>> a OOB metadata.
>>>>>
>>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>>> ---
>>>>>   drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>>   drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>>   drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>>   drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>>   drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>>   drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>>   drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>>   include/linux/lightnvm.h         |  8 +++++---
>>>>>   8 files changed, 71 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>>> index efb976a863d2..48db7a096257 100644
>>>>> --- a/drivers/lightnvm/core.c
>>>>> +++ b/drivers/lightnvm/core.c
>>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>>>
>>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>>> -                                                       dma_addr_t *dma_handler)
>>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>>   {
>>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>>> -                                                               dma_handler);
>>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>>> +                                               mem_flags, dma_handler);
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>>>
>>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>>   {
>>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_dev_dma_free);
>>>>>
>>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>>> +{
>>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>>> +}
>>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>>> +
>>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>>> +{
>>>>> +       dev->ops->destroy_dma_pool(pool);
>>>>> +}
>>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>>> +
>>>>>   static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>>   {
>>>>>          struct nvm_dev *dev;
>>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>>          }
>>>>>
>>>>>          rqd->nr_ppas = nr_ppas;
>>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>>> +                                               &rqd->dma_ppa_list);
>>>>>          if (!rqd->ppa_list) {
>>>>>                  pr_err("nvm: failed to allocate dma memory\n");
>>>>>                  return -ENOMEM;
>>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>>          if (!rqd->ppa_list)
>>>>>                  return;
>>>>>
>>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>>> +                               rqd->dma_ppa_list);
>>>>>   }
>>>>>
>>>>>   static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>>          if (!dev->q || !dev->ops)
>>>>>                  return -EINVAL;
>>>>>
>>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>>          if (!dev->dma_pool) {
>>>>>                  pr_err("nvm: could not create dma pool\n");
>>>>>                  return -ENOMEM;
>>>>
>>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>>>
>>> In order to call dma_pool_create() I need NVMe device structure, which
>>> in my understanding is not public, so this is why I decided to reuse
>>> plumbing which was available in nvm_dev_* interfaces.
>>
>> Hmm, yes, I see now.
>>
>>> If there is some easy way to call dma_pool_create() from pblk module and
>>> I'm missing that - let me know. I can rewrite this part, if there is
>>> some better way to do so.
>>
>> Create and destroy needs to go through dev->ops, but once you have
>> allocated the pool, there is no need for going through the dev->ops
>> for allocate/free as far as I can see.
>> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
>> to dma_pool_alloc/free ?
>>
> 
> When we implemented this, we thought we might need some extra driver
> specific code, but it has not been the case.
> 
> I'm ok with making the allocations directly once your have the pool
> provided by the driver lightnvm bits.
> 
> Igor: can you integrate this on the patches for next revision (still
> some extra feedback to come though).
>

Sure, I will integrate this changes for next revision and will wait for 
more feedback before publishing V2.

> Thanks,
> Javier
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 12:10       ` Hans Holmberg
  2018-10-09 12:36         ` Javier Gonzalez
@ 2018-10-09 12:56         ` Matias Bjørling
  2018-10-09 13:10           ` Javier Gonzalez
  1 sibling, 1 reply; 22+ messages in thread
From: Matias Bjørling @ 2018-10-09 12:56 UTC (permalink / raw)
  To: hans.ml.holmberg, igor.j.konopko
  Cc: javier, linux-block, marcin.dziegielewski

On 10/09/2018 02:10 PM, Hans Holmberg wrote:
> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>
>>
>>
>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>
>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>> for which entry size is always equal to PAGE_SIZE.
>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>> such as 128b, this solution is not robustness.
>>>>
>>>> This patch add the possiblity to support OOB meta above
>>>> 56b by creating separate DMA pool for PBLK with entry
>>>> size which is big enough to store both PPA list and such
>>>> a OOB metadata.
>>>>
>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>> ---
>>>>    drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>    drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>    drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>    drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>    drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>    drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>    drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>    include/linux/lightnvm.h         |  8 +++++---
>>>>    8 files changed, 71 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>> index efb976a863d2..48db7a096257 100644
>>>> --- a/drivers/lightnvm/core.c
>>>> +++ b/drivers/lightnvm/core.c
>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>    }
>>>>    EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>>
>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>> -                                                       dma_addr_t *dma_handler)
>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>    {
>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>> -                                                               dma_handler);
>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>> +                                               mem_flags, dma_handler);
>>>>    }
>>>>    EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>>
>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>    {
>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>    }
>>>>    EXPORT_SYMBOL(nvm_dev_dma_free);
>>>>
>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>> +{
>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>> +}
>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>> +
>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>> +{
>>>> +       dev->ops->destroy_dma_pool(pool);
>>>> +}
>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>> +
>>>>    static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>    {
>>>>           struct nvm_dev *dev;
>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>           }
>>>>
>>>>           rqd->nr_ppas = nr_ppas;
>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>> +                                               &rqd->dma_ppa_list);
>>>>           if (!rqd->ppa_list) {
>>>>                   pr_err("nvm: failed to allocate dma memory\n");
>>>>                   return -ENOMEM;
>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>           if (!rqd->ppa_list)
>>>>                   return;
>>>>
>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>> +                               rqd->dma_ppa_list);
>>>>    }
>>>>
>>>>    static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>           if (!dev->q || !dev->ops)
>>>>                   return -EINVAL;
>>>>
>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>           if (!dev->dma_pool) {
>>>>                   pr_err("nvm: could not create dma pool\n");
>>>>                   return -ENOMEM;
>>>
>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>>>
>>
>> In order to call dma_pool_create() I need NVMe device structure, which
>> in my understanding is not public, so this is why I decided to reuse
>> plumbing which was available in nvm_dev_* interfaces.
> 
> Hmm, yes, I see now.
> 
>>
>> If there is some easy way to call dma_pool_create() from pblk module and
>> I'm missing that - let me know. I can rewrite this part, if there is
>> some better way to do so.
> 
> Create and destroy needs to go through dev->ops, but once you have
> allocated the pool, there is no need for going through the dev->ops
> for allocate/free as far as I can see.
> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
> to dma_pool_alloc/free ?
> 

Better to make the initial dma alloc better. I don't want targets to 
play around with their own DMA pools.

Something like the below (only compile tested :)):

diff --git i/drivers/lightnvm/core.c w/drivers/lightnvm/core.c
index efb976a863d2..f87696aa6b57 100644
--- i/drivers/lightnvm/core.c
+++ w/drivers/lightnvm/core.c
@@ -1141,11 +1141,17 @@ EXPORT_SYMBOL(nvm_alloc_dev);
  int nvm_register(struct nvm_dev *dev)
  {
         int ret;
+       int meta_sz;

         if (!dev->q || !dev->ops)
                 return -EINVAL;

-       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
+       /* Allow room for both metadata and ppa list in the dma pool */
+       meta_sz = (dev->geo.sos * NVM_MAX_VLBA) +
+                                       (sizeof(uint64_t) * NVM_MAX_VLBA);
+       meta_sz = PAGE_SIZE * (meta_sz >> PAGE_SHIFT);
+
+       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", meta_sz);
         if (!dev->dma_pool) {
                 pr_err("nvm: could not create dma pool\n");
                 return -ENOMEM;
diff --git i/drivers/nvme/host/lightnvm.c w/drivers/nvme/host/lightnvm.c
index a4f3b263cd6c..31715c0cd8f2 100644
--- i/drivers/nvme/host/lightnvm.c
+++ w/drivers/nvme/host/lightnvm.c
@@ -731,11 +731,12 @@ static int nvme_nvm_submit_io_sync(struct nvm_dev 
*dev, struct nvm_rq *rq
         return ret;
  }

-static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
+static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
+                                     int size)
  {
         struct nvme_ns *ns = nvmdev->q->queuedata;

-       return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, 
PAGE_SIZE, 0);
+       return dma_pool_create(name, ns->ctrl->dev, size, size, 0);
  }

  static void nvme_nvm_destroy_dma_pool(void *pool)
diff --git i/include/linux/lightnvm.h w/include/linux/lightnvm.h
index 2fdeac1a420d..7afedaddbd15 100644
--- i/include/linux/lightnvm.h
+++ w/include/linux/lightnvm.h
@@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, 
sector_t, int,
                                                         struct 
nvm_chk_meta *);
  typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
  typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
  typedef void (nvm_destroy_dma_pool_fn)(void *);
  typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
 
dma_addr_t *);

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 12:56         ` Matias Bjørling
@ 2018-10-09 13:10           ` Javier Gonzalez
  2018-10-09 13:23             ` Matias Bjørling
  0 siblings, 1 reply; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-09 13:10 UTC (permalink / raw)
  To: Matias Bjørling
  Cc: Hans Holmberg, Konopko, Igor J, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 7435 bytes --]

> On 9 Oct 2018, at 21.56, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 10/09/2018 02:10 PM, Hans Holmberg wrote:
>> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>>> for which entry size is always equal to PAGE_SIZE.
>>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>>> such as 128b, this solution is not robustness.
>>>>> 
>>>>> This patch add the possiblity to support OOB meta above
>>>>> 56b by creating separate DMA pool for PBLK with entry
>>>>> size which is big enough to store both PPA list and such
>>>>> a OOB metadata.
>>>>> 
>>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>>> ---
>>>>>   drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>>   drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>>   drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>>   drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>>   drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>>   drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>>   drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>>   include/linux/lightnvm.h         |  8 +++++---
>>>>>   8 files changed, 71 insertions(+), 29 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>>> index efb976a863d2..48db7a096257 100644
>>>>> --- a/drivers/lightnvm/core.c
>>>>> +++ b/drivers/lightnvm/core.c
>>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>>> 
>>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>>> -                                                       dma_addr_t *dma_handler)
>>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>>   {
>>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>>> -                                                               dma_handler);
>>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>>> +                                               mem_flags, dma_handler);
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>>> 
>>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>>   {
>>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>>   }
>>>>>   EXPORT_SYMBOL(nvm_dev_dma_free);
>>>>> 
>>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>>> +{
>>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>>> +}
>>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>>> +
>>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>>> +{
>>>>> +       dev->ops->destroy_dma_pool(pool);
>>>>> +}
>>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>>> +
>>>>>   static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>>   {
>>>>>          struct nvm_dev *dev;
>>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>>          }
>>>>> 
>>>>>          rqd->nr_ppas = nr_ppas;
>>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>>> +                                               &rqd->dma_ppa_list);
>>>>>          if (!rqd->ppa_list) {
>>>>>                  pr_err("nvm: failed to allocate dma memory\n");
>>>>>                  return -ENOMEM;
>>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>>          if (!rqd->ppa_list)
>>>>>                  return;
>>>>> 
>>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>>> +                               rqd->dma_ppa_list);
>>>>>   }
>>>>> 
>>>>>   static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>>          if (!dev->q || !dev->ops)
>>>>>                  return -EINVAL;
>>>>> 
>>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>>          if (!dev->dma_pool) {
>>>>>                  pr_err("nvm: could not create dma pool\n");
>>>>>                  return -ENOMEM;
>>>> 
>>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>>> 
>>> In order to call dma_pool_create() I need NVMe device structure, which
>>> in my understanding is not public, so this is why I decided to reuse
>>> plumbing which was available in nvm_dev_* interfaces.
>> Hmm, yes, I see now.
>>> If there is some easy way to call dma_pool_create() from pblk module and
>>> I'm missing that - let me know. I can rewrite this part, if there is
>>> some better way to do so.
>> Create and destroy needs to go through dev->ops, but once you have
>> allocated the pool, there is no need for going through the dev->ops
>> for allocate/free as far as I can see.
>> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
>> to dma_pool_alloc/free ?
> 
> Better to make the initial dma alloc better. I don't want targets to play around with their own DMA pools.
> 
> Something like the below (only compile tested :)):

Good idea. One comment below, though.

> 
> diff --git i/drivers/lightnvm/core.c w/drivers/lightnvm/core.c
> index efb976a863d2..f87696aa6b57 100644
> --- i/drivers/lightnvm/core.c
> +++ w/drivers/lightnvm/core.c
> @@ -1141,11 +1141,17 @@ EXPORT_SYMBOL(nvm_alloc_dev);
> int nvm_register(struct nvm_dev *dev)
> {
>        int ret;
> +       int meta_sz;
> 
>        if (!dev->q || !dev->ops)
>                return -EINVAL;
> 
> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
> +       /* Allow room for both metadata and ppa list in the dma pool */
> +       meta_sz = (dev->geo.sos * NVM_MAX_VLBA) +
> +                                       (sizeof(uint64_t) * NVM_MAX_VLBA);
> +       meta_sz = PAGE_SIZE * (meta_sz >> PAGE_SHIFT);

Can we just send the required min meta_sz and then let the dma
allocation do the alignment? The diver knows if it can fit the
allocation without wasting unnecessary space.

Also, by sharing the ppa_list allocation and the metadata on the same
pool, we risk having unnecessary larger DMA buffers for the ppa_list.
Better have 2 pools, each with each own size. In fact, the only reason
we made the original allocation PAGE_SIZE was to fit the ppa_list and
metadata on the same allocation; now that we are relaxing this, we can
have tighter allocations for the ppa_list as there is a hard limit on 64
lbas (512B).

Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 13:10           ` Javier Gonzalez
@ 2018-10-09 13:23             ` Matias Bjørling
  2018-10-09 13:31               ` Javier Gonzalez
  0 siblings, 1 reply; 22+ messages in thread
From: Matias Bjørling @ 2018-10-09 13:23 UTC (permalink / raw)
  To: javier
  Cc: hans.ml.holmberg, igor.j.konopko, linux-block, marcin.dziegielewski

On 10/09/2018 03:10 PM, Javier Gonzalez wrote:
>> On 9 Oct 2018, at 21.56, Matias Bjørling <mb@lightnvm.io> wrote:
>>
>> On 10/09/2018 02:10 PM, Hans Holmberg wrote:
>>> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>>>> for which entry size is always equal to PAGE_SIZE.
>>>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>>>> such as 128b, this solution is not robustness.
>>>>>>
>>>>>> This patch add the possiblity to support OOB meta above
>>>>>> 56b by creating separate DMA pool for PBLK with entry
>>>>>> size which is big enough to store both PPA list and such
>>>>>> a OOB metadata.
>>>>>>
>>>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>>>> ---
>>>>>>    drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>>>    drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>>>    drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>>>    drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>>>    drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>>>    drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>>>    drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>>>    include/linux/lightnvm.h         |  8 +++++---
>>>>>>    8 files changed, 71 insertions(+), 29 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>>>> index efb976a863d2..48db7a096257 100644
>>>>>> --- a/drivers/lightnvm/core.c
>>>>>> +++ b/drivers/lightnvm/core.c
>>>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>>>    }
>>>>>>    EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>>>>
>>>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>>>> -                                                       dma_addr_t *dma_handler)
>>>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>>>    {
>>>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>>>> -                                                               dma_handler);
>>>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>>>> +                                               mem_flags, dma_handler);
>>>>>>    }
>>>>>>    EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>>>>
>>>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>>>    {
>>>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>>>    }
>>>>>>    EXPORT_SYMBOL(nvm_dev_dma_free);
>>>>>>
>>>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>>>> +{
>>>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>>>> +}
>>>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>>>> +
>>>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>>>> +{
>>>>>> +       dev->ops->destroy_dma_pool(pool);
>>>>>> +}
>>>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>>>> +
>>>>>>    static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>>>    {
>>>>>>           struct nvm_dev *dev;
>>>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>>>           }
>>>>>>
>>>>>>           rqd->nr_ppas = nr_ppas;
>>>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>>>> +                                               &rqd->dma_ppa_list);
>>>>>>           if (!rqd->ppa_list) {
>>>>>>                   pr_err("nvm: failed to allocate dma memory\n");
>>>>>>                   return -ENOMEM;
>>>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>>>           if (!rqd->ppa_list)
>>>>>>                   return;
>>>>>>
>>>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>>>> +                               rqd->dma_ppa_list);
>>>>>>    }
>>>>>>
>>>>>>    static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>>>           if (!dev->q || !dev->ops)
>>>>>>                   return -EINVAL;
>>>>>>
>>>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>>>           if (!dev->dma_pool) {
>>>>>>                   pr_err("nvm: could not create dma pool\n");
>>>>>>                   return -ENOMEM;
>>>>>
>>>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>>>>
>>>> In order to call dma_pool_create() I need NVMe device structure, which
>>>> in my understanding is not public, so this is why I decided to reuse
>>>> plumbing which was available in nvm_dev_* interfaces.
>>> Hmm, yes, I see now.
>>>> If there is some easy way to call dma_pool_create() from pblk module and
>>>> I'm missing that - let me know. I can rewrite this part, if there is
>>>> some better way to do so.
>>> Create and destroy needs to go through dev->ops, but once you have
>>> allocated the pool, there is no need for going through the dev->ops
>>> for allocate/free as far as I can see.
>>> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
>>> to dma_pool_alloc/free ?
>>
>> Better to make the initial dma alloc better. I don't want targets to play around with their own DMA pools.
>>
>> Something like the below (only compile tested :)):
> 
> Good idea. One comment below, though.
> 
>>
>> diff --git i/drivers/lightnvm/core.c w/drivers/lightnvm/core.c
>> index efb976a863d2..f87696aa6b57 100644
>> --- i/drivers/lightnvm/core.c
>> +++ w/drivers/lightnvm/core.c
>> @@ -1141,11 +1141,17 @@ EXPORT_SYMBOL(nvm_alloc_dev);
>> int nvm_register(struct nvm_dev *dev)
>> {
>>         int ret;
>> +       int meta_sz;
>>
>>         if (!dev->q || !dev->ops)
>>                 return -EINVAL;
>>
>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>> +       /* Allow room for both metadata and ppa list in the dma pool */
>> +       meta_sz = (dev->geo.sos * NVM_MAX_VLBA) +
>> +                                       (sizeof(uint64_t) * NVM_MAX_VLBA);
>> +       meta_sz = PAGE_SIZE * (meta_sz >> PAGE_SHIFT);
> 
> Can we just send the required min meta_sz and then let the dma
> allocation do the alignment? The diver knows if it can fit the
> allocation without wasting unnecessary space.

The alignment must be a power of two, and from what I can see, the size 
will be rounded up internally in the dma_pool_create function. I don't 
have a strong opinion either way.

> 
> Also, by sharing the ppa_list allocation and the metadata on the same
> pool, we risk having unnecessary larger DMA buffers for the ppa_list.
> Better have 2 pools, each with each own size. In fact, the only reason
> we made the original allocation PAGE_SIZE was to fit the ppa_list and
> metadata on the same allocation; now that we are relaxing this, we can
> have tighter allocations for the ppa_list as there is a hard limit on 64
> lbas (512B).
> 

Let's keep it together for now. I want to avoid having if/else's in the 
target code that has to decide on which pool to use. Also, most devices 
will fit within the PAGE_SIZE allocation.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/5] lightnvm: Flexible DMA pool entry size
  2018-10-09 13:23             ` Matias Bjørling
@ 2018-10-09 13:31               ` Javier Gonzalez
  0 siblings, 0 replies; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-09 13:31 UTC (permalink / raw)
  To: Matias Bjørling
  Cc: hans.ml.holmberg, Konopko, Igor J, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 8663 bytes --]

> On 9 Oct 2018, at 22.23, Matias Bjørling <mb@lightnvm.io> wrote:
> 
> On 10/09/2018 03:10 PM, Javier Gonzalez wrote:
>>> On 9 Oct 2018, at 21.56, Matias Bjørling <mb@lightnvm.io> wrote:
>>> 
>>> On 10/09/2018 02:10 PM, Hans Holmberg wrote:
>>>> On Tue, Oct 9, 2018 at 12:03 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>> On 09.10.2018 11:16, Hans Holmberg wrote:
>>>>>> On Fri, Oct 5, 2018 at 3:38 PM Igor Konopko <igor.j.konopko@intel.com> wrote:
>>>>>>> Currently whole lightnvm and pblk uses single DMA pool,
>>>>>>> for which entry size is always equal to PAGE_SIZE.
>>>>>>> PPA list always needs 8b*64, so there is only 56b*64
>>>>>>> space for OOB meta. Since NVMe OOB meta can be bigger,
>>>>>>> such as 128b, this solution is not robustness.
>>>>>>> 
>>>>>>> This patch add the possiblity to support OOB meta above
>>>>>>> 56b by creating separate DMA pool for PBLK with entry
>>>>>>> size which is big enough to store both PPA list and such
>>>>>>> a OOB metadata.
>>>>>>> 
>>>>>>> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
>>>>>>> ---
>>>>>>>   drivers/lightnvm/core.c          | 33 +++++++++++++++++++++++---------
>>>>>>>   drivers/lightnvm/pblk-core.c     | 19 +++++++++---------
>>>>>>>   drivers/lightnvm/pblk-init.c     | 11 +++++++++++
>>>>>>>   drivers/lightnvm/pblk-read.c     |  3 ++-
>>>>>>>   drivers/lightnvm/pblk-recovery.c |  9 +++++----
>>>>>>>   drivers/lightnvm/pblk.h          | 11 ++++++++++-
>>>>>>>   drivers/nvme/host/lightnvm.c     |  6 ++++--
>>>>>>>   include/linux/lightnvm.h         |  8 +++++---
>>>>>>>   8 files changed, 71 insertions(+), 29 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
>>>>>>> index efb976a863d2..48db7a096257 100644
>>>>>>> --- a/drivers/lightnvm/core.c
>>>>>>> +++ b/drivers/lightnvm/core.c
>>>>>>> @@ -641,20 +641,33 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt)
>>>>>>>   }
>>>>>>>   EXPORT_SYMBOL(nvm_unregister_tgt_type);
>>>>>>> 
>>>>>>> -void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
>>>>>>> -                                                       dma_addr_t *dma_handler)
>>>>>>> +void *nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
>>>>>>> +                               gfp_t mem_flags, dma_addr_t *dma_handler)
>>>>>>>   {
>>>>>>> -       return dev->ops->dev_dma_alloc(dev, dev->dma_pool, mem_flags,
>>>>>>> -                                                               dma_handler);
>>>>>>> +       return dev->ops->dev_dma_alloc(dev, pool ?: dev->dma_pool,
>>>>>>> +                                               mem_flags, dma_handler);
>>>>>>>   }
>>>>>>>   EXPORT_SYMBOL(nvm_dev_dma_alloc);
>>>>>>> 
>>>>>>> -void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
>>>>>>> +void nvm_dev_dma_free(struct nvm_dev *dev, void *pool,
>>>>>>> +                               void *addr, dma_addr_t dma_handler)
>>>>>>>   {
>>>>>>> -       dev->ops->dev_dma_free(dev->dma_pool, addr, dma_handler);
>>>>>>> +       dev->ops->dev_dma_free(pool ?: dev->dma_pool, addr, dma_handler);
>>>>>>>   }
>>>>>>>   EXPORT_SYMBOL(nvm_dev_dma_free);
>>>>>>> 
>>>>>>> +void *nvm_dev_dma_create(struct nvm_dev *dev, int size, char *name)
>>>>>>> +{
>>>>>>> +       return dev->ops->create_dma_pool(dev, name, size);
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL(nvm_dev_dma_create);
>>>>>>> +
>>>>>>> +void nvm_dev_dma_destroy(struct nvm_dev *dev, void *pool)
>>>>>>> +{
>>>>>>> +       dev->ops->destroy_dma_pool(pool);
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL(nvm_dev_dma_destroy);
>>>>>>> +
>>>>>>>   static struct nvm_dev *nvm_find_nvm_dev(const char *name)
>>>>>>>   {
>>>>>>>          struct nvm_dev *dev;
>>>>>>> @@ -682,7 +695,8 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
>>>>>>>          }
>>>>>>> 
>>>>>>>          rqd->nr_ppas = nr_ppas;
>>>>>>> -       rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
>>>>>>> +       rqd->ppa_list = nvm_dev_dma_alloc(dev, NULL, GFP_KERNEL,
>>>>>>> +                                               &rqd->dma_ppa_list);
>>>>>>>          if (!rqd->ppa_list) {
>>>>>>>                  pr_err("nvm: failed to allocate dma memory\n");
>>>>>>>                  return -ENOMEM;
>>>>>>> @@ -708,7 +722,8 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
>>>>>>>          if (!rqd->ppa_list)
>>>>>>>                  return;
>>>>>>> 
>>>>>>> -       nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
>>>>>>> +       nvm_dev_dma_free(tgt_dev->parent, NULL, rqd->ppa_list,
>>>>>>> +                               rqd->dma_ppa_list);
>>>>>>>   }
>>>>>>> 
>>>>>>>   static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
>>>>>>> @@ -1145,7 +1160,7 @@ int nvm_register(struct nvm_dev *dev)
>>>>>>>          if (!dev->q || !dev->ops)
>>>>>>>                  return -EINVAL;
>>>>>>> 
>>>>>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>>>>>> +       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist", PAGE_SIZE);
>>>>>>>          if (!dev->dma_pool) {
>>>>>>>                  pr_err("nvm: could not create dma pool\n");
>>>>>>>                  return -ENOMEM;
>>>>>> 
>>>>>> Why hack the nvm_dev_ interfaces when you are not using the dev pool anyway?
>>>>>> Wouldn't it be more straightforward to use dma_pool_* instead?
>>>>> 
>>>>> In order to call dma_pool_create() I need NVMe device structure, which
>>>>> in my understanding is not public, so this is why I decided to reuse
>>>>> plumbing which was available in nvm_dev_* interfaces.
>>>> Hmm, yes, I see now.
>>>>> If there is some easy way to call dma_pool_create() from pblk module and
>>>>> I'm missing that - let me know. I can rewrite this part, if there is
>>>>> some better way to do so.
>>>> Create and destroy needs to go through dev->ops, but once you have
>>>> allocated the pool, there is no need for going through the dev->ops
>>>> for allocate/free as far as I can see.
>>>> Matias: can't we just replace .dev_dma_alloc / .dev_dma_free  by calls
>>>> to dma_pool_alloc/free ?
>>> 
>>> Better to make the initial dma alloc better. I don't want targets to play around with their own DMA pools.
>>> 
>>> Something like the below (only compile tested :)):
>> Good idea. One comment below, though.
>>> diff --git i/drivers/lightnvm/core.c w/drivers/lightnvm/core.c
>>> index efb976a863d2..f87696aa6b57 100644
>>> --- i/drivers/lightnvm/core.c
>>> +++ w/drivers/lightnvm/core.c
>>> @@ -1141,11 +1141,17 @@ EXPORT_SYMBOL(nvm_alloc_dev);
>>> int nvm_register(struct nvm_dev *dev)
>>> {
>>>        int ret;
>>> +       int meta_sz;
>>> 
>>>        if (!dev->q || !dev->ops)
>>>                return -EINVAL;
>>> 
>>> -       dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
>>> +       /* Allow room for both metadata and ppa list in the dma pool */
>>> +       meta_sz = (dev->geo.sos * NVM_MAX_VLBA) +
>>> +                                       (sizeof(uint64_t) * NVM_MAX_VLBA);
>>> +       meta_sz = PAGE_SIZE * (meta_sz >> PAGE_SHIFT);
>> Can we just send the required min meta_sz and then let the dma
>> allocation do the alignment? The diver knows if it can fit the
>> allocation without wasting unnecessary space.
> 
> The alignment must be a power of two, and from what I can see, the
> size will be rounded up internally in the dma_pool_create function. I
> don't have a strong opinion either way.
> 
>> Also, by sharing the ppa_list allocation and the metadata on the same
>> pool, we risk having unnecessary larger DMA buffers for the ppa_list.
>> Better have 2 pools, each with each own size. In fact, the only reason
>> we made the original allocation PAGE_SIZE was to fit the ppa_list and
>> metadata on the same allocation; now that we are relaxing this, we can
>> have tighter allocations for the ppa_list as there is a hard limit on 64
>> lbas (512B).
> 
> Let's keep it together for now. I want to avoid having if/else's in
> the target code that has to decide on which pool to use.

Why if/else? The lba list is allocated from the ppa_list pool and the
metadatada from the meta_pool. From the target perspective it is
different pools for a different purposes, as opposed to the small pool
for 256B PRPs in the nvme driver, which requires a size check.

Am I missing something?

> Also, most devices will fit within the PAGE_SIZE allocation.

We hear requirements for larger OOB areas form different places, so I'm
not sure this is true anymore.

Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read
  2018-10-05 13:34 ` [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read Igor Konopko
@ 2018-10-12 11:32   ` Javier Gonzalez
  0 siblings, 0 replies; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-12 11:32 UTC (permalink / raw)
  To: Igor Konopko; +Cc: mb, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 4110 bytes --]

> On 5 Oct 2018, at 15.34, Igor Konopko <igor.j.konopko@intel.com> wrote:
> 
> Currently DMA allocated memory is reused on partial read
> path for some internal pblk structs. In preparation for
> dynamic DMA pool sizes we need to change it to kmalloc
> allocated memory.
> 
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
> drivers/lightnvm/pblk-read.c | 20 +++++---------------
> drivers/lightnvm/pblk.h      |  2 ++
> 2 files changed, 7 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> index d340dece1d00..08f6ebd4bc48 100644
> --- a/drivers/lightnvm/pblk-read.c
> +++ b/drivers/lightnvm/pblk-read.c
> @@ -224,7 +224,6 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 	unsigned long *read_bitmap = pr_ctx->bitmap;
> 	int nr_secs = pr_ctx->orig_nr_secs;
> 	int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
> -	__le64 *lba_list_mem, *lba_list_media;
> 	void *src_p, *dst_p;
> 	int hole, i;
> 
> @@ -237,13 +236,9 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 		rqd->ppa_list[0] = ppa;
> 	}
> 
> -	/* Re-use allocated memory for intermediate lbas */
> -	lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
> -	lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
> -
> 	for (i = 0; i < nr_secs; i++) {
> -		lba_list_media[i] = meta_list[i].lba;
> -		meta_list[i].lba = lba_list_mem[i];
> +		pr_ctx->lba_list_media[i] = meta_list[i].lba;
> +		meta_list[i].lba = pr_ctx->lba_list_mem[i];
> 	}
> 
> 	/* Fill the holes in the original bio */
> @@ -255,7 +250,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 		line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
> 		kref_put(&line->ref, pblk_line_put);
> 
> -		meta_list[hole].lba = lba_list_media[i];
> +		meta_list[hole].lba = pr_ctx->lba_list_media[i];
> 
> 		src_bv = new_bio->bi_io_vec[i++];
> 		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
> @@ -295,13 +290,9 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
> 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
> 	struct pblk_pr_ctx *pr_ctx;
> 	struct bio *new_bio, *bio = r_ctx->private;
> -	__le64 *lba_list_mem;
> 	int nr_secs = rqd->nr_ppas;
> 	int i;
> 
> -	/* Re-use allocated memory for intermediate lbas */
> -	lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
> -
> 	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
> 
> 	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
> @@ -312,12 +303,12 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
> 		goto fail_free_pages;
> 	}
> 
> -	pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
> +	pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
> 	if (!pr_ctx)
> 		goto fail_free_pages;
> 
> 	for (i = 0; i < nr_secs; i++)
> -		lba_list_mem[i] = meta_list[i].lba;
> +		pr_ctx->lba_list_mem[i] = meta_list[i].lba;
> 
> 	new_bio->bi_iter.bi_sector = 0; /* internal bio */
> 	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
> @@ -325,7 +316,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
> 	rqd->bio = new_bio;
> 	rqd->nr_ppas = nr_holes;
> 
> -	pr_ctx->ppa_ptr = NULL;
> 	pr_ctx->orig_bio = bio;
> 	bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
> 	pr_ctx->bio_init_idx = bio_init_idx;
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 0f98ea24ee59..aea09879636f 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -132,6 +132,8 @@ struct pblk_pr_ctx {
> 	unsigned int bio_init_idx;
> 	void *ppa_ptr;
> 	dma_addr_t dma_ppa_list;
> +	__le64 lba_list_mem[NVM_MAX_VLBA];
> +	__le64 lba_list_media[NVM_MAX_VLBA];
> };
> 
> /* Pad context */
> --
> 2.17.1

The patch looks good but the commit message is a bit confusing, as it
implies that we do specific allocations for the lba and meta lists, when
in reality the patch extends the partial context with a fixed
NVM_MAX_VLBA length.

It would help if you can make this clear on V2

Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata
  2018-10-05 13:34 ` [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata Igor Konopko
  2018-10-09  9:02   ` Hans Holmberg
@ 2018-10-12 11:52   ` Javier Gonzalez
  1 sibling, 0 replies; 22+ messages in thread
From: Javier Gonzalez @ 2018-10-12 11:52 UTC (permalink / raw)
  To: Igor Konopko; +Cc: mb, linux-block, marcin.dziegielewski

[-- Attachment #1: Type: text/plain, Size: 15557 bytes --]

> On 5 Oct 2018, at 15.34, Igor Konopko <igor.j.konopko@intel.com> wrote:
> 
> Currently pblk assumes that size of OOB metadata on drive is always
> equal to size of pblk_sec_meta struct. This commit add helpers which will
> allow to handle different sizes of OOB metadata on drive.
> 
> Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
> ---
> drivers/lightnvm/pblk-core.c     |  6 ++---
> drivers/lightnvm/pblk-map.c      | 21 ++++++++++------
> drivers/lightnvm/pblk-read.c     | 41 +++++++++++++++++++-------------
> drivers/lightnvm/pblk-recovery.c | 14 ++++++-----
> drivers/lightnvm/pblk.h          | 37 +++++++++++++++++++++++++++-
> 5 files changed, 86 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 6944aac43b01..7cb39d84c833 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -743,7 +743,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
> 	rqd.opcode = NVM_OP_PREAD;
> 	rqd.nr_ppas = lm->smeta_sec;
> 	rqd.is_seq = 1;
> -

no need for removing this space

> 	for (i = 0; i < lm->smeta_sec; i++, paddr++)
> 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
> 
> @@ -796,10 +795,11 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
> 	rqd.is_seq = 1;
> 
> 	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
> -		struct pblk_sec_meta *meta_list = rqd.meta_list;
> +		void *meta_list = rqd.meta_list;
> 
> 		rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
> -		meta_list[i].lba = lba_list[paddr] = addr_empty;
> +		pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
> +		lba_list[paddr] = addr_empty;
> 	}
> 
> 	ret = pblk_submit_io_sync_sem(pblk, &rqd);
> diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
> index 6dcbd44e3acb..4c7a9909308e 100644
> --- a/drivers/lightnvm/pblk-map.c
> +++ b/drivers/lightnvm/pblk-map.c
> @@ -22,7 +22,7 @@
> static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
> 			      struct ppa_addr *ppa_list,
> 			      unsigned long *lun_bitmap,
> -			      struct pblk_sec_meta *meta_list,
> +			      void *meta_list,
> 			      unsigned int valid_secs)
> {
> 	struct pblk_line *line = pblk_line_get_data(pblk);
> @@ -68,14 +68,15 @@ static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
> 			kref_get(&line->ref);
> 			w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
> 			w_ctx->ppa = ppa_list[i];
> -			meta_list[i].lba = cpu_to_le64(w_ctx->lba);
> +			pblk_set_meta_lba(pblk, meta_list, i, w_ctx->lba);
> 			lba_list[paddr] = cpu_to_le64(w_ctx->lba);
> 			if (lba_list[paddr] != addr_empty)
> 				line->nr_valid_lbas++;
> 			else
> 				atomic64_inc(&pblk->pad_wa);
> 		} else {
> -			lba_list[paddr] = meta_list[i].lba = addr_empty;
> +			lba_list[paddr] = addr_empty;
> +			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
> 			__pblk_map_invalidate(pblk, line, paddr);
> 		}
> 	}
> @@ -88,7 +89,7 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
> 		 unsigned long *lun_bitmap, unsigned int valid_secs,
> 		 unsigned int off)
> {
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
> 	unsigned int map_secs;
> 	int min = pblk->min_write_pgs;
> @@ -97,7 +98,10 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
> 	for (i = off; i < rqd->nr_ppas; i += min) {
> 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
> 		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
> -					lun_bitmap, &meta_list[i], map_secs)) {
> +					lun_bitmap,
> +					pblk_get_meta_buffer(pblk,
> +							     meta_list, i),
> +					map_secs)) {

This part of the code is already difficult to read with so many
parameters. Can you assign the metadata buffer before instead to make it
clearer?

> 			bio_put(rqd->bio);
> 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
> 			pblk_pipeline_stop(pblk);
> @@ -113,7 +117,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
> 	struct nvm_tgt_dev *dev = pblk->dev;
> 	struct nvm_geo *geo = &dev->geo;
> 	struct pblk_line_meta *lm = &pblk->lm;
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
> 	struct pblk_line *e_line, *d_line;
> 	unsigned int map_secs;
> @@ -123,7 +127,10 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
> 	for (i = 0; i < rqd->nr_ppas; i += min) {
> 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
> 		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
> -					lun_bitmap, &meta_list[i], map_secs)) {
> +					lun_bitmap,
> +					pblk_get_meta_buffer(pblk,
> +							     meta_list, i),
> +					map_secs)) {

Same as above

> 			bio_put(rqd->bio);
> 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
> 			pblk_pipeline_stop(pblk);
> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> index 08f6ebd4bc48..6584a2588f61 100644
> --- a/drivers/lightnvm/pblk-read.c
> +++ b/drivers/lightnvm/pblk-read.c
> @@ -43,7 +43,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
> 				 struct bio *bio, sector_t blba,
> 				 unsigned long *read_bitmap)
> {
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	struct ppa_addr ppas[NVM_MAX_VLBA];
> 	int nr_secs = rqd->nr_ppas;
> 	bool advanced_bio = false;
> @@ -58,7 +58,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
> retry:
> 		if (pblk_ppa_empty(p)) {
> 			WARN_ON(test_and_set_bit(i, read_bitmap));
> -			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
> +			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
> 
> 			if (unlikely(!advanced_bio)) {
> 				bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
> @@ -78,7 +78,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
> 				goto retry;
> 			}
> 			WARN_ON(test_and_set_bit(i, read_bitmap));
> -			meta_list[i].lba = cpu_to_le64(lba);
> +			pblk_set_meta_lba(pblk, meta_list, i, lba);
> 			advanced_bio = true;
> #ifdef CONFIG_NVM_PBLK_DEBUG
> 			atomic_long_inc(&pblk->cache_reads);
> @@ -105,12 +105,15 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
> static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
> 				sector_t blba)
> {
> -	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	int nr_lbas = rqd->nr_ppas;
> 	int i;
> 
> +	if (!pblk_is_oob_meta_supported(pblk))
> +		return;
> +

Since this patch is only creating the helpers, I would add this to the
patch when you add support for packed metadata.

> 	for (i = 0; i < nr_lbas; i++) {
> -		u64 lba = le64_to_cpu(meta_lba_list[i].lba);
> +		u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
> 
> 		if (lba == ADDR_EMPTY)
> 			continue;
> @@ -134,9 +137,12 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
> static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
> 				 u64 *lba_list, int nr_lbas)
> {
> -	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
> +	void *meta_lba_list = rqd->meta_list;
> 	int i, j;
> 
> +	if (!pblk_is_oob_meta_supported(pblk))
> +		return;

Same as above

> +
> 	for (i = 0, j = 0; i < nr_lbas; i++) {
> 		u64 lba = lba_list[i];
> 		u64 meta_lba;
> @@ -144,7 +150,7 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
> 		if (lba == ADDR_EMPTY)
> 			continue;
> 
> -		meta_lba = le64_to_cpu(meta_lba_list[j].lba);
> +		meta_lba = pblk_get_meta_lba(pblk, meta_lba_list, j);
> 
> 		if (lba != meta_lba) {
> #ifdef CONFIG_NVM_PBLK_DEBUG
> @@ -219,7 +225,7 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 	struct bio *new_bio = rqd->bio;
> 	struct bio *bio = pr_ctx->orig_bio;
> 	struct bio_vec src_bv, dst_bv;
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	int bio_init_idx = pr_ctx->bio_init_idx;
> 	unsigned long *read_bitmap = pr_ctx->bitmap;
> 	int nr_secs = pr_ctx->orig_nr_secs;
> @@ -237,8 +243,10 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 	}
> 
> 	for (i = 0; i < nr_secs; i++) {
> -		pr_ctx->lba_list_media[i] = meta_list[i].lba;
> -		meta_list[i].lba = pr_ctx->lba_list_mem[i];
> +		pr_ctx->lba_list_media[i] = pblk_get_meta_lba(pblk,
> +							meta_list, i);
> +		pblk_set_meta_lba(pblk, meta_list, i,
> +				  pr_ctx->lba_list_mem[i]);

This fits in a single line

> 	}
> 
> 	/* Fill the holes in the original bio */
> @@ -250,7 +258,8 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
> 		line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
> 		kref_put(&line->ref, pblk_line_put);
> 
> -		meta_list[hole].lba = pr_ctx->lba_list_media[i];
> +		pblk_set_meta_lba(pblk, meta_list, hole,
> +				  pr_ctx->lba_list_media[i]);
> 
> 		src_bv = new_bio->bi_io_vec[i++];
> 		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
> @@ -286,7 +295,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
> 			    unsigned long *read_bitmap,
> 			    int nr_holes)
> {
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
> 	struct pblk_pr_ctx *pr_ctx;
> 	struct bio *new_bio, *bio = r_ctx->private;
> @@ -308,7 +317,7 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
> 		goto fail_free_pages;
> 
> 	for (i = 0; i < nr_secs; i++)
> -		pr_ctx->lba_list_mem[i] = meta_list[i].lba;
> +		pr_ctx->lba_list_mem[i] = pblk_get_meta_lba(pblk, meta_list, i);
> 
> 	new_bio->bi_iter.bi_sector = 0; /* internal bio */
> 	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
> @@ -373,7 +382,7 @@ static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
> static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
> 			 sector_t lba, unsigned long *read_bitmap)
> {
> -	struct pblk_sec_meta *meta_list = rqd->meta_list;
> +	void *meta_list = rqd->meta_list;
> 	struct ppa_addr ppa;
> 
> 	pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
> @@ -385,7 +394,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
> retry:
> 	if (pblk_ppa_empty(ppa)) {
> 		WARN_ON(test_and_set_bit(0, read_bitmap));
> -		meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
> +		pblk_set_meta_lba(pblk, meta_list, 0, ADDR_EMPTY);
> 		return;
> 	}
> 
> @@ -399,7 +408,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
> 		}
> 
> 		WARN_ON(test_and_set_bit(0, read_bitmap));
> -		meta_list[0].lba = cpu_to_le64(lba);
> +		pblk_set_meta_lba(pblk, meta_list, 0, lba);
> 
> #ifdef CONFIG_NVM_PBLK_DEBUG
> 		atomic_long_inc(&pblk->cache_reads);
> diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
> index 5740b7509bd8..fa63f9fa5ba8 100644
> --- a/drivers/lightnvm/pblk-recovery.c
> +++ b/drivers/lightnvm/pblk-recovery.c
> @@ -124,7 +124,7 @@ static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
> 
> struct pblk_recov_alloc {
> 	struct ppa_addr *ppa_list;
> -	struct pblk_sec_meta *meta_list;
> +	void *meta_list;
> 	struct nvm_rq *rqd;
> 	void *data;
> 	dma_addr_t dma_ppa_list;
> @@ -158,7 +158,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> {
> 	struct nvm_tgt_dev *dev = pblk->dev;
> 	struct nvm_geo *geo = &dev->geo;
> -	struct pblk_sec_meta *meta_list;
> +	void *meta_list;
> 	struct pblk_pad_rq *pad_rq;
> 	struct nvm_rq *rqd;
> 	struct bio *bio;
> @@ -242,7 +242,8 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
> 			dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
> 
> 			pblk_map_invalidate(pblk, dev_ppa);
> -			lba_list[w_ptr] = meta_list[i].lba = addr_empty;
> +			lba_list[w_ptr] = addr_empty;
> +			pblk_set_meta_lba(pblk, meta_list, i, ADDR_EMPTY);
> 			rqd->ppa_list[i] = dev_ppa;
> 		}
> 	}
> @@ -325,6 +326,7 @@ static int pblk_line_wp_is_unbalanced(struct pblk *pblk,
> 			return 1;
> 		else if (chunk->wp < line_wp)
> 			line_wp = chunk->wp;
> +
> 	}
> 
> 	return 0;
> @@ -336,7 +338,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	struct nvm_tgt_dev *dev = pblk->dev;
> 	struct nvm_geo *geo = &dev->geo;
> 	struct ppa_addr *ppa_list;
> -	struct pblk_sec_meta *meta_list;
> +	void *meta_list;
> 	struct nvm_rq *rqd;
> 	struct bio *bio;
> 	void *data;
> @@ -434,7 +436,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
> 	}
> 
> 	for (i = 0; i < rqd->nr_ppas; i++) {
> -		u64 lba = le64_to_cpu(meta_list[i].lba);
> +		u64 lba = pblk_get_meta_lba(pblk, meta_list, i);
> 
> 		lba_list[paddr++] = cpu_to_le64(lba);
> 
> @@ -463,7 +465,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
> 	struct nvm_geo *geo = &dev->geo;
> 	struct nvm_rq *rqd;
> 	struct ppa_addr *ppa_list;
> -	struct pblk_sec_meta *meta_list;
> +	void *meta_list;
> 	struct pblk_recov_alloc p;
> 	void *data;
> 	dma_addr_t dma_ppa_list, dma_meta_list;
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index aea09879636f..53156b6f99a3 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -87,7 +87,6 @@ enum {
> };
> 
> struct pblk_sec_meta {
> -	u64 reserved;
> 	__le64 lba;
> };

i would move this to a separate patch. I would suggest that you do it
together with the patch that changes the device format adding
information on the OOB area supported and making this part of smeta and
emeta. When we push the ftl log, we will take this into account too.

> 
> @@ -1366,4 +1365,40 @@ static inline char *pblk_disk_name(struct pblk *pblk)
> 
> 	return disk->disk_name;
> }
> +
> +static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
> +{
> +	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> +
> +	/* Pblk uses OOB meta to store LBA of given physical sector.
> +	 * The LBA is eventually used in recovery mode and/or for handling
> +	 * telemetry events (e.g., relocate sector).
> +	 */
> +

If you want to keep this comment, I think it is more informative on top
of struct pblk_sec_meta (). Here you can explain when metadata is packed
and when in OOB.

> +	return (geo->sos >= sizeof(struct pblk_sec_meta));
> +}
> +
> +static inline struct pblk_sec_meta *pblk_get_meta_buffer(struct pblk *pblk,
> +							 void *meta_ptr,
> +							 int index)
> +{
> +	struct nvm_tgt_dev *dev = pblk->dev;
> +	struct nvm_geo *geo = &dev->geo;
> +
> +	return meta_ptr + max_t(int, geo->sos, sizeof(struct pblk_sec_meta))
> +		* index;
> +}
> +
> +static inline void pblk_set_meta_lba(struct pblk *pblk, void *meta_ptr,
> +				     int index, u64 lba)
> +{
> +	pblk_get_meta_buffer(pblk, meta_ptr, index)->lba = cpu_to_le64(lba);
> +}
> +
> +static inline u64 pblk_get_meta_lba(struct pblk *pblk, void *meta_ptr,
> +				    int index)
> +{
> +	return le64_to_cpu(pblk_get_meta_buffer(pblk, meta_ptr, index)->lba);
> +}
> #endif /* PBLK_H_ */
> --
> 2.17.1

Apart form the comments above, the patch looks good to me

Javier

[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-10-12 19:24 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-05 13:34 [PATCH 0/5] lightnvm: pblk: Flexible metadata Igor Konopko
2018-10-05 13:34 ` [PATCH 1/5] lightnvm: pblk: Do not reuse DMA memory on partial read Igor Konopko
2018-10-12 11:32   ` Javier Gonzalez
2018-10-05 13:34 ` [PATCH 2/5] lightnvm: pblk: Helpers for OOB metadata Igor Konopko
2018-10-09  9:02   ` Hans Holmberg
2018-10-09  9:12     ` Matias Bjørling
2018-10-09 12:39       ` Javier Gonzalez
2018-10-09  9:31     ` Igor Konopko
2018-10-12 11:52   ` Javier Gonzalez
2018-10-05 13:34 ` [PATCH 3/5] lightnvm: Flexible DMA pool entry size Igor Konopko
2018-10-09  9:16   ` Hans Holmberg
2018-10-09 10:01     ` Igor Konopko
2018-10-09 12:10       ` Hans Holmberg
2018-10-09 12:36         ` Javier Gonzalez
2018-10-09 12:54           ` Igor Konopko
2018-10-09 12:56         ` Matias Bjørling
2018-10-09 13:10           ` Javier Gonzalez
2018-10-09 13:23             ` Matias Bjørling
2018-10-09 13:31               ` Javier Gonzalez
2018-10-05 13:34 ` [PATCH 4/5] lightnvm: Disable interleaved metadata Igor Konopko
2018-10-09  9:30   ` Hans Holmberg
2018-10-05 13:34 ` [PATCH 5/5] lightnvm: pblk: Support for packed metadata Igor Konopko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.