All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] lightnvm: pblk: check lba sanity on read path
@ 2017-09-15 12:35 Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: guarantee line integrity on reads Javier González
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

As part of pblk's recovery scheme, we store the lba mapped to each
physical sector on the device's out-of-bound (OOB) area.

On the read path, we can use this information to validate that the data
being delivered to the upper layers corresponds to the lba being
requested. The cost of this check is an extra copy on the DMA region on
the device and an extra comparison in the host, given that (i) the OOB
area is being read together with the data in the media, and (ii) the DMA
region allocated for the ppa list can be reused for the metadata stored
on the OOB area.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-read.c | 51 +++++++++++++++++++++++++++++++++++++++++---
 drivers/lightnvm/pblk.h      |  4 +++-
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 0299fc08291d..a465d9980df4 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -41,6 +41,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 				 sector_t blba, unsigned long *read_bitmap)
 {
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
 	struct bio *bio = rqd->bio;
 	struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
 	int nr_secs = rqd->nr_ppas;
@@ -56,6 +57,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 retry:
 		if (pblk_ppa_empty(p)) {
 			WARN_ON(test_and_set_bit(i, read_bitmap));
+			meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
 
 			if (unlikely(!advanced_bio)) {
 				bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE);
@@ -75,6 +77,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
 				goto retry;
 			}
 			WARN_ON(test_and_set_bit(i, read_bitmap));
+			meta_list[i].lba = cpu_to_le64(lba);
 			advanced_bio = true;
 #ifdef CONFIG_NVM_DEBUG
 			atomic_long_inc(&pblk->cache_reads);
@@ -110,10 +113,26 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
 	return NVM_IO_OK;
 }
 
+static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
+			   sector_t blba)
+{
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	int nr_lbas = rqd->nr_ppas;
+	int i;
+
+	for (i = 0; i < nr_lbas; i++) {
+		u64 lba = le64_to_cpu(meta_list[i].lba);
+
+		if (lba == ADDR_EMPTY)
+			continue;
+
+		WARN(lba != blba + i, "pblk: corrupted read LBA\n");
+	}
+}
+
 static void pblk_end_io_read(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
-	struct nvm_tgt_dev *dev = pblk->dev;
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct bio *bio = rqd->bio;
 
@@ -124,6 +143,8 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
 #endif
 
+	pblk_read_check(pblk, rqd, r_ctx->lba);
+
 	bio_put(bio);
 	if (r_ctx->private) {
 		struct bio *orig_bio = r_ctx->private;
@@ -149,15 +170,21 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 				      unsigned long *read_bitmap)
 {
 	struct bio *new_bio, *bio = rqd->bio;
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
 	struct bio_vec src_bv, dst_bv;
 	void *ppa_ptr = NULL;
 	void *src_p, *dst_p;
 	dma_addr_t dma_ppa_list = 0;
+	__le64 *lba_list_mem, *lba_list_media;
 	int nr_secs = rqd->nr_ppas;
 	int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
 	int i, ret, hole;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
+	/* Re-use allocated memory for intermediate lbas */
+	lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
+	lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size);
+
 	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
 
 	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
@@ -168,6 +195,9 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 		goto err;
 	}
 
+	for (i = 0; i < nr_secs; i++)
+		lba_list_mem[i] = meta_list[i].lba;
+
 	new_bio->bi_iter.bi_sector = 0; /* internal bio */
 	bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
 
@@ -207,10 +237,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 		rqd->dma_ppa_list = dma_ppa_list;
 	}
 
+	for (i = 0; i < nr_secs; i++) {
+		lba_list_media[i] = meta_list[i].lba;
+		meta_list[i].lba = lba_list_mem[i];
+	}
+
 	/* Fill the holes in the original bio */
 	i = 0;
 	hole = find_first_zero_bit(read_bitmap, nr_secs);
 	do {
+		meta_list[hole].lba = lba_list_media[i];
+
 		src_bv = new_bio->bi_io_vec[i++];
 		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
 
@@ -251,6 +288,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			 sector_t lba, unsigned long *read_bitmap)
 {
+	struct pblk_sec_meta *meta_list = rqd->meta_list;
 	struct bio *bio = rqd->bio;
 	struct ppa_addr ppa;
 
@@ -263,6 +301,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 retry:
 	if (pblk_ppa_empty(ppa)) {
 		WARN_ON(test_and_set_bit(0, read_bitmap));
+		meta_list[0].lba = cpu_to_le64(ADDR_EMPTY);
 		return;
 	}
 
@@ -274,6 +313,9 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
 			goto retry;
 		}
+
+		meta_list[0].lba = cpu_to_le64(lba);
+
 		WARN_ON(test_and_set_bit(0, read_bitmap));
 #ifdef CONFIG_NVM_DEBUG
 			atomic_long_inc(&pblk->cache_reads);
@@ -290,9 +332,10 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	struct nvm_tgt_dev *dev = pblk->dev;
 	sector_t blba = pblk_get_lba(bio);
 	unsigned int nr_secs = pblk_get_secs(bio);
+	struct pblk_g_ctx *r_ctx;
 	struct nvm_rq *rqd;
-	unsigned long read_bitmap; /* Max 64 ppas per request */
 	unsigned int bio_init_idx;
+	unsigned long read_bitmap; /* Max 64 ppas per request */
 	int ret = NVM_IO_ERR;
 
 	/* logic error: lba out-of-bounds. Ignore read request */
@@ -312,6 +355,9 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	rqd->private = pblk;
 	rqd->end_io = pblk_end_io_read;
 
+	r_ctx = nvm_rq_to_pdu(rqd);
+	r_ctx->lba = blba;
+
 	/* Save the index for this bio's start. This is needed in case
 	 * we need to fill a partial read.
 	 */
@@ -344,7 +390,6 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	/* All sectors are to be read from the device */
 	if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
 		struct bio *int_bio = NULL;
-		struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 
 		/* Clone read bio to deal with read errors internally */
 		int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index adae8620e298..ac65e778b40d 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -99,6 +99,7 @@ enum {
 };
 
 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
+#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS)
 
 /* write buffer completion context */
 struct pblk_c_ctx {
@@ -110,9 +111,10 @@ struct pblk_c_ctx {
 	unsigned int nr_padded;
 };
 
-/* generic context */
+/* read context */
 struct pblk_g_ctx {
 	void *private;
+	u64 lba;
 };
 
 /* Pad context */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lightnvm: pblk: guarantee line integrity on reads
  2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
@ 2017-09-15 12:35 ` Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove redundant check on read path Javier González
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

When a line is recycled during garbage collection, reads can still be
issued to the line. If the line is freed in the middle of this process,
data corruption might occur.

This patch guarantees that lines are not freed in the middle of reads
that target them (lines). Specifically, we use the existing line
reference to decide when a line is eligible for being freed after the
recycle process.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-core.c | 56 ++++++++++++++++++++++++++++++----
 drivers/lightnvm/pblk-init.c | 14 +++++++--
 drivers/lightnvm/pblk-read.c | 71 +++++++++++++++++++++++++++++++++-----------
 drivers/lightnvm/pblk.h      |  2 ++
 4 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e6a881e3434..d2587e37034f 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1458,10 +1458,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
 	line->emeta = NULL;
 }
 
-void pblk_line_put(struct kref *ref)
+static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
 {
-	struct pblk_line *line = container_of(ref, struct pblk_line, ref);
-	struct pblk *pblk = line->pblk;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 
 	spin_lock(&line->lock);
@@ -1479,6 +1477,43 @@ void pblk_line_put(struct kref *ref)
 	pblk_rl_free_lines_inc(&pblk->rl, line);
 }
 
+static void pblk_line_put_ws(struct work_struct *work)
+{
+	struct pblk_line_ws *line_put_ws = container_of(work,
+						struct pblk_line_ws, ws);
+	struct pblk *pblk = line_put_ws->pblk;
+	struct pblk_line *line = line_put_ws->line;
+
+	__pblk_line_put(pblk, line);
+	mempool_free(line_put_ws, pblk->gen_ws_pool);
+}
+
+void pblk_line_put(struct kref *ref)
+{
+	struct pblk_line *line = container_of(ref, struct pblk_line, ref);
+	struct pblk *pblk = line->pblk;
+
+	__pblk_line_put(pblk, line);
+}
+
+void pblk_line_put_wq(struct kref *ref)
+{
+	struct pblk_line *line = container_of(ref, struct pblk_line, ref);
+	struct pblk *pblk = line->pblk;
+	struct pblk_line_ws *line_put_ws;
+
+	line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
+	if (!line_put_ws)
+		return;
+
+	line_put_ws->pblk = pblk;
+	line_put_ws->line = line;
+	line_put_ws->priv = NULL;
+
+	INIT_WORK(&line_put_ws->ws, pblk_line_put_ws);
+	queue_work(pblk->r_end_wq, &line_put_ws->ws);
+}
+
 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
 {
 	struct nvm_rq *rqd;
@@ -1886,8 +1921,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
 	int i;
 
 	spin_lock(&pblk->trans_lock);
-	for (i = 0; i < nr_secs; i++)
-		ppas[i] = pblk_trans_map_get(pblk, blba + i);
+	for (i = 0; i < nr_secs; i++) {
+		struct ppa_addr ppa;
+
+		ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i);
+
+		/* If the L2P entry maps to a line, the reference is valid */
+		if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
+			int line_id = pblk_dev_ppa_to_line(ppa);
+			struct pblk_line *line = &pblk->lines[line_id];
+
+			kref_get(&line->ref);
+		}
+	}
 	spin_unlock(&pblk->trans_lock);
 }
 
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index df86cf98ef4d..670e5858611e 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -271,15 +271,22 @@ static int pblk_core_init(struct pblk *pblk)
 	if (!pblk->bb_wq)
 		goto free_close_wq;
 
+	pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
+			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!pblk->r_end_wq)
+		goto free_bb_wq;
+
 	if (pblk_set_ppaf(pblk))
-		goto free_bb_wq;
+		goto free_r_end_wq;
 
 	if (pblk_rwb_init(pblk))
-		goto free_bb_wq;
+		goto free_r_end_wq;
 
 	INIT_LIST_HEAD(&pblk->compl_list);
 	return 0;
 
+free_r_end_wq:
+	destroy_workqueue(pblk->r_end_wq);
 free_bb_wq:
 	destroy_workqueue(pblk->bb_wq);
 free_close_wq:
@@ -304,6 +311,9 @@ static void pblk_core_free(struct pblk *pblk)
 	if (pblk->close_wq)
 		destroy_workqueue(pblk->close_wq);
 
+	if (pblk->r_end_wq)
+		destroy_workqueue(pblk->r_end_wq);
+
 	if (pblk->bb_wq)
 		destroy_workqueue(pblk->bb_wq);
 
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index a465d9980df4..402f8eff6a2e 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -130,9 +130,34 @@ static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
 	}
 }
 
-static void pblk_end_io_read(struct nvm_rq *rqd)
+static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
+{
+	struct ppa_addr *ppa_list;
+	int i;
+
+	ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+
+	for (i = 0; i < rqd->nr_ppas; i++) {
+		struct ppa_addr ppa = ppa_list[i];
+		struct pblk_line *line;
+
+		line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
+		kref_put(&line->ref, pblk_line_put_wq);
+	}
+}
+
+static void pblk_end_user_read(struct bio *bio)
+{
+#ifdef CONFIG_NVM_DEBUG
+	WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
+#endif
+	bio_endio(bio);
+	bio_put(bio);
+}
+
+static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
+			       bool put_line)
 {
-	struct pblk *pblk = rqd->private;
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
 	struct bio *bio = rqd->bio;
 
@@ -146,15 +171,11 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 	pblk_read_check(pblk, rqd, r_ctx->lba);
 
 	bio_put(bio);
-	if (r_ctx->private) {
-		struct bio *orig_bio = r_ctx->private;
+	if (r_ctx->private)
+		pblk_end_user_read((struct bio *)r_ctx->private);
 
-#ifdef CONFIG_NVM_DEBUG
-		WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
-#endif
-		bio_endio(orig_bio);
-		bio_put(orig_bio);
-	}
+	if (put_line)
+		pblk_read_put_rqd_kref(pblk, rqd);
 
 #ifdef CONFIG_NVM_DEBUG
 	atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
@@ -165,6 +186,13 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
 	atomic_dec(&pblk->inflight_io);
 }
 
+static void pblk_end_io_read(struct nvm_rq *rqd)
+{
+	struct pblk *pblk = rqd->private;
+
+	__pblk_end_io_read(pblk, rqd, true);
+}
+
 static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 				      unsigned int bio_init_idx,
 				      unsigned long *read_bitmap)
@@ -233,8 +261,12 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 	}
 
 	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+		struct ppa_addr ppa;
+
+		ppa = rqd->ppa_addr;
 		rqd->ppa_list = ppa_ptr;
 		rqd->dma_ppa_list = dma_ppa_list;
+		rqd->ppa_list[0] = ppa;
 	}
 
 	for (i = 0; i < nr_secs; i++) {
@@ -246,6 +278,11 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 	i = 0;
 	hole = find_first_zero_bit(read_bitmap, nr_secs);
 	do {
+		int line_id = pblk_dev_ppa_to_line(rqd->ppa_list[i]);
+		struct pblk_line *line = &pblk->lines[line_id];
+
+		kref_put(&line->ref, pblk_line_put);
+
 		meta_list[hole].lba = lba_list_media[i];
 
 		src_bv = new_bio->bi_io_vec[i++];
@@ -269,19 +306,17 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 	bio_put(new_bio);
 
 	/* Complete the original bio and associated request */
+	bio_endio(bio);
 	rqd->bio = bio;
 	rqd->nr_ppas = nr_secs;
-	rqd->private = pblk;
 
-	bio_endio(bio);
-	pblk_end_io_read(rqd);
+	__pblk_end_io_read(pblk, rqd, false);
 	return NVM_IO_OK;
 
 err:
 	/* Free allocated pages in new bio */
 	pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
-	rqd->private = pblk;
-	pblk_end_io_read(rqd);
+	__pblk_end_io_read(pblk, rqd, false);
 	return NVM_IO_ERR;
 }
 
@@ -314,11 +349,11 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			goto retry;
 		}
 
+		WARN_ON(test_and_set_bit(0, read_bitmap));
 		meta_list[0].lba = cpu_to_le64(lba);
 
-		WARN_ON(test_and_set_bit(0, read_bitmap));
 #ifdef CONFIG_NVM_DEBUG
-			atomic_long_inc(&pblk->cache_reads);
+		atomic_long_inc(&pblk->cache_reads);
 #endif
 	} else {
 		rqd->ppa_addr = ppa;
@@ -383,7 +418,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 	if (bitmap_full(&read_bitmap, nr_secs)) {
 		bio_endio(bio);
 		atomic_inc(&pblk->inflight_io);
-		pblk_end_io_read(rqd);
+		__pblk_end_io_read(pblk, rqd, false);
 		return NVM_IO_OK;
 	}
 
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index ac65e778b40d..8e9ae213b707 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -635,6 +635,7 @@ struct pblk {
 
 	struct workqueue_struct *close_wq;
 	struct workqueue_struct *bb_wq;
+	struct workqueue_struct *r_end_wq;
 
 	struct timer_list wtimer;
 
@@ -740,6 +741,7 @@ int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
 			 void *emeta_buf);
 int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
 void pblk_line_put(struct kref *ref);
+void pblk_line_put_wq(struct kref *ref);
 struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
 u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
 void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lightnvm: pblk: remove redundant check on read path
  2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: guarantee line integrity on reads Javier González
@ 2017-09-15 12:35 ` Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove I/O dependency on write path Javier González
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

A partial read I/O in pblk is an I/O where some sectors reside in the
write buffer in main memory and some are persisted on the device. Such
an I/O must at least contain 2 lbas, therefore checking for the case
where a single lba is mapped is not necessary.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-read.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 402f8eff6a2e..71c58503f1a4 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -235,7 +235,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 	rqd->end_io = pblk_end_io_sync;
 	rqd->private = &wait;
 
-	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+	if (unlikely(nr_holes == 1)) {
 		ppa_ptr = rqd->ppa_list;
 		dma_ppa_list = rqd->dma_ppa_list;
 		rqd->ppa_addr = rqd->ppa_list[0];
@@ -260,7 +260,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
 #endif
 	}
 
-	if (unlikely(nr_secs > 1 && nr_holes == 1)) {
+	if (unlikely(nr_holes == 1)) {
 		struct ppa_addr ppa;
 
 		ppa = rqd->ppa_addr;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lightnvm: pblk: remove I/O dependency on write path
  2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: guarantee line integrity on reads Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove redundant check on read path Javier González
@ 2017-09-15 12:35 ` Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: enable 1 LUN configuration Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: ensure right bad block calculation Javier González
  4 siblings, 0 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

pblk schedules user I/O, metadata I/O and erases on the write path in
order to minimize collisions at the media level. Until now, there has
been a dependency between user and metadata I/Os that could lead to a
deadlock as both take the per-LUN semaphore to schedule submission.

This path removes this dependency and guarantees forward progress at a
per I/O granurality.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-write.c | 145 +++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 80 deletions(-)

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index cec526759547..89a1b46ec728 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -223,15 +223,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 }
 
 static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			   struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
+			   struct ppa_addr *erase_ppa)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line *e_line = pblk_line_get_erase(pblk);
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 	unsigned int valid = c_ctx->nr_valid;
 	unsigned int padded = c_ctx->nr_padded;
 	unsigned int nr_secs = valid + padded;
 	unsigned long *lun_bitmap;
-	int ret = 0;
+	int ret;
 
 	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
 	if (!lun_bitmap)
@@ -297,55 +298,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 	return secs_to_sync;
 }
 
-static inline int pblk_valid_meta_ppa(struct pblk *pblk,
-				      struct pblk_line *meta_line,
-				      struct ppa_addr *ppa_list, int nr_ppas)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line *data_line;
-	struct ppa_addr ppa, ppa_opt;
-	u64 paddr;
-	int i;
-
-	data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
-	paddr = pblk_lookup_page(pblk, meta_line);
-	ppa = addr_to_gen_ppa(pblk, paddr, 0);
-
-	if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
-		return 1;
-
-	/* Schedule a metadata I/O that is half the distance from the data I/O
-	 * with regards to the number of LUNs forming the pblk instance. This
-	 * balances LUN conflicts across every I/O.
-	 *
-	 * When the LUN configuration changes (e.g., due to GC), this distance
-	 * can align, which would result on a LUN deadlock. In this case, modify
-	 * the distance to not be optimal, but allow metadata I/Os to succeed.
-	 */
-	ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
-	if (unlikely(ppa_opt.ppa == ppa.ppa)) {
-		data_line->meta_distance--;
-		return 0;
-	}
-
-	for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
-		if (ppa_list[i].g.ch == ppa_opt.g.ch &&
-					ppa_list[i].g.lun == ppa_opt.g.lun)
-			return 1;
-
-	if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
-		for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
-			if (ppa_list[i].g.ch == ppa.g.ch &&
-						ppa_list[i].g.lun == ppa.g.lun)
-				return 0;
-
-		return 1;
-	}
-
-	return 0;
-}
-
 int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -424,8 +376,44 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	return ret;
 }
 
-static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
-			       int prev_n)
+static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
+				       struct pblk_line *meta_line,
+				       struct nvm_rq *data_rqd)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
+	struct pblk_line *data_line = pblk_line_get_data(pblk);
+	struct ppa_addr ppa, ppa_opt;
+	u64 paddr;
+	int pos_opt;
+
+	/* Schedule a metadata I/O that is half the distance from the data I/O
+	 * with regards to the number of LUNs forming the pblk instance. This
+	 * balances LUN conflicts across every I/O.
+	 *
+	 * When the LUN configuration changes (e.g., due to GC), this distance
+	 * can align, which would result on metadata and data I/Os colliding. In
+	 * this case, modify the distance to not be optimal, but move the
+	 * optimal in the right direction.
+	 */
+	paddr = pblk_lookup_page(pblk, meta_line);
+	ppa = addr_to_gen_ppa(pblk, paddr, 0);
+	ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
+	pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
+
+	if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
+				test_bit(pos_opt, data_line->blk_bitmap))
+		return true;
+
+	if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
+		data_line->meta_distance--;
+
+	return false;
+}
+
+static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
+						    struct nvm_rq *data_rqd)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -435,57 +423,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
 retry:
 	if (list_empty(&l_mg->emeta_list)) {
 		spin_unlock(&l_mg->close_lock);
-		return 0;
+		return NULL;
 	}
 	meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
 	if (meta_line->emeta->mem >= lm->emeta_len[0])
 		goto retry;
 	spin_unlock(&l_mg->close_lock);
 
-	if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
-		return 0;
+	if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
+		return NULL;
 
-	return pblk_submit_meta_io(pblk, meta_line);
+	return meta_line;
 }
 
 static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
 {
-	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 	struct ppa_addr erase_ppa;
+	struct pblk_line *meta_line;
 	int err;
 
 	ppa_set_empty(&erase_ppa);
 
 	/* Assign lbas to ppas and populate request structure */
-	err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
+	err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
 	if (err) {
 		pr_err("pblk: could not setup write request: %d\n", err);
 		return NVM_IO_ERR;
 	}
 
-	if (likely(ppa_empty(erase_ppa))) {
-		/* Submit metadata write for previous data line */
-		err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
-		if (err) {
-			pr_err("pblk: metadata I/O submission failed: %d", err);
-			return NVM_IO_ERR;
-		}
+	meta_line = pblk_should_submit_meta_io(pblk, rqd);
 
-		/* Submit data write for current data line */
-		err = pblk_submit_io(pblk, rqd);
-		if (err) {
-			pr_err("pblk: data I/O submission failed: %d\n", err);
-			return NVM_IO_ERR;
-		}
-	} else {
-		/* Submit data write for current data line */
-		err = pblk_submit_io(pblk, rqd);
-		if (err) {
-			pr_err("pblk: data I/O submission failed: %d\n", err);
-			return NVM_IO_ERR;
-		}
+	/* Submit data write for current data line */
+	err = pblk_submit_io(pblk, rqd);
+	if (err) {
+		pr_err("pblk: data I/O submission failed: %d\n", err);
+		return NVM_IO_ERR;
+	}
 
-		/* Submit available erase for next data line */
+	if (!ppa_empty(erase_ppa)) {
+		/* Submit erase for next data line */
 		if (pblk_blk_erase_async(pblk, erase_ppa)) {
 			struct pblk_line *e_line = pblk_line_get_erase(pblk);
 			struct nvm_tgt_dev *dev = pblk->dev;
@@ -498,6 +474,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
 		}
 	}
 
+	if (meta_line) {
+		/* Submit metadata write for previous data line */
+		err = pblk_submit_meta_io(pblk, meta_line);
+		if (err) {
+			pr_err("pblk: metadata I/O submission failed: %d", err);
+			return NVM_IO_ERR;
+		}
+	}
+
 	return NVM_IO_OK;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lightnvm: pblk: enable 1 LUN configuration
  2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
                   ` (2 preceding siblings ...)
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove I/O dependency on write path Javier González
@ 2017-09-15 12:35 ` Javier González
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: ensure right bad block calculation Javier González
  4 siblings, 0 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

Metadata I/Os are scheduled to minimize their impact on user data I/Os.
When there are enough LUNs instantiated (i.e., enought bandwidth), it is
easy to interleave metadata and data one after the other so that
metadata I/Os are the ones being blocked and not viceversa.

We do this by calculating the distance between the I/Os in terms of the
LUNs that are not in used, and selecting a free LUN that satisfies a
the simple heuristic that medatata is scheduled behind. The per-LUN
semaphores guarantee consistency. This works fine on >1 LUN
configuration. However, when a signle LUN is instantiated, this design
leads to a deadlock, where metadata waits to be scheduled on a free LUN.

This patch implements the 1 LUN case by simply scheduling the medatada
I/O after the data I/O. In the process, we refactor the way a line is
replaced to ensure that metadata writes are submitted after data writes
in order to guarantee block sequentiality. Note that, since there is
only one LUN, both I/Os will block each other by design. However, such
configuration only pursues tight read latencies, not write bandwidth.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-core.c | 17 ++++++++++-------
 drivers/lightnvm/pblk-init.c |  8 ++++++--
 drivers/lightnvm/pblk-map.c  | 21 ++++++++++++---------
 drivers/lightnvm/pblk.h      |  2 +-
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index d2587e37034f..64a6a255514e 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -1370,17 +1370,17 @@ void pblk_pipeline_stop(struct pblk *pblk)
 	spin_unlock(&l_mg->free_lock);
 }
 
-void pblk_line_replace_data(struct pblk *pblk)
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
-	struct pblk_line *cur, *new;
+	struct pblk_line *cur, *new = NULL;
 	unsigned int left_seblks;
 	int is_next = 0;
 
 	cur = l_mg->data_line;
 	new = l_mg->data_next;
 	if (!new)
-		return;
+		goto out;
 	l_mg->data_line = new;
 
 	spin_lock(&l_mg->free_lock);
@@ -1388,7 +1388,7 @@ void pblk_line_replace_data(struct pblk *pblk)
 		l_mg->data_line = NULL;
 		l_mg->data_next = NULL;
 		spin_unlock(&l_mg->free_lock);
-		return;
+		goto out;
 	}
 
 	pblk_line_setup_metadata(new, l_mg, &pblk->lm);
@@ -1400,7 +1400,7 @@ void pblk_line_replace_data(struct pblk *pblk)
 		/* If line is not fully erased, erase it */
 		if (atomic_read(&new->left_eblks)) {
 			if (pblk_line_erase(pblk, new))
-				return;
+				goto out;
 		} else {
 			io_schedule();
 		}
@@ -1411,7 +1411,7 @@ void pblk_line_replace_data(struct pblk *pblk)
 	if (!pblk_line_init_metadata(pblk, new, cur)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return;
+			goto out;
 
 		goto retry_setup;
 	}
@@ -1419,7 +1419,7 @@ void pblk_line_replace_data(struct pblk *pblk)
 	if (!pblk_line_init_bb(pblk, new, 1)) {
 		new = pblk_line_retry(pblk, new);
 		if (!new)
-			return;
+			goto out;
 
 		goto retry_setup;
 	}
@@ -1443,6 +1443,9 @@ void pblk_line_replace_data(struct pblk *pblk)
 
 	if (is_next)
 		pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
+
+out:
+	return new;
 }
 
 void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 670e5858611e..9d9adcf3e141 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -708,8 +708,12 @@ static int pblk_lines_init(struct pblk *pblk)
 	}
 
 	lm->emeta_bb = geo->nr_luns - i;
-	lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
-							geo->sec_per_blk);
+
+	lm->min_blk_line = 1;
+	if (geo->nr_luns > 1)
+		lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
+					lm->emeta_sec[0], geo->sec_per_blk);
+
 	if (lm->min_blk_line > lm->blk_per_line) {
 		pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
 							lm->blk_per_line);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index fddb924f6dde..3bc4c94f9cf2 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,13 +25,23 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 			       unsigned int valid_secs)
 {
 	struct pblk_line *line = pblk_line_get_data(pblk);
-	struct pblk_emeta *emeta = line->emeta;
+	struct pblk_emeta *emeta;
 	struct pblk_w_ctx *w_ctx;
-	__le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
+	__le64 *lba_list;
 	u64 paddr;
 	int nr_secs = pblk->min_write_pgs;
 	int i;
 
+	if (pblk_line_is_full(line)) {
+		struct pblk_line *prev_line = line;
+
+		line = pblk_line_replace_data(pblk);
+		pblk_line_close_meta(pblk, prev_line);
+	}
+
+	emeta = line->emeta;
+	lba_list = emeta_to_lbas(pblk, emeta->buf);
+
 	paddr = pblk_alloc_page(pblk, line, nr_secs);
 
 	for (i = 0; i < nr_secs; i++, paddr++) {
@@ -60,13 +70,6 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
 		}
 	}
 
-	if (pblk_line_is_full(line)) {
-		struct pblk_line *prev_line = line;
-
-		pblk_line_replace_data(pblk);
-		pblk_line_close_meta(pblk, prev_line);
-	}
-
 	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
 }
 
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 8e9ae213b707..eaf539715d71 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -718,7 +718,7 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
 			      int alloc_type, gfp_t gfp_mask);
 struct pblk_line *pblk_line_get(struct pblk *pblk);
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
-void pblk_line_replace_data(struct pblk *pblk);
+struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
 int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
 struct pblk_line *pblk_line_get_data(struct pblk *pblk);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] lightnvm: pblk: ensure right bad block calculation
  2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
                   ` (3 preceding siblings ...)
  2017-09-15 12:35 ` [PATCH] lightnvm: pblk: enable 1 LUN configuration Javier González
@ 2017-09-15 12:35 ` Javier González
  4 siblings, 0 replies; 6+ messages in thread
From: Javier González @ 2017-09-15 12:35 UTC (permalink / raw)
  To: linux-block; +Cc: linux-kernel, Javier González

Make sure that the variable controlling block threshold for allocating
extra metadata sectors in case of a line with bad blocks does not get a
negative value. Otherwise, the line will be marked as corrupted and
wasted.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/pblk-init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 9d9adcf3e141..7cf4b536d899 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -707,7 +707,7 @@ static int pblk_lines_init(struct pblk *pblk)
 		goto add_emeta_page;
 	}
 
-	lm->emeta_bb = geo->nr_luns - i;
+	lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0;
 
 	lm->min_blk_line = 1;
 	if (geo->nr_luns > 1)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-09-15 12:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-15 12:35 [PATCH] lightnvm: pblk: check lba sanity on read path Javier González
2017-09-15 12:35 ` [PATCH] lightnvm: pblk: guarantee line integrity on reads Javier González
2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove redundant check on read path Javier González
2017-09-15 12:35 ` [PATCH] lightnvm: pblk: remove I/O dependency on write path Javier González
2017-09-15 12:35 ` [PATCH] lightnvm: pblk: enable 1 LUN configuration Javier González
2017-09-15 12:35 ` [PATCH] lightnvm: pblk: ensure right bad block calculation Javier González

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.