All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Javier González" <jg@lightnvm.io>
To: mb@lightnvm.io
Cc: linux-kernel@vger.kernel.org, linux-block@vger.kernel.org,
	"Javier González" <javier@cnexlabs.com>
Subject: [RFC 3/4] lightnvm: read from rrpc write buffer if possible
Date: Thu,  4 Feb 2016 14:08:18 +0100	[thread overview]
Message-ID: <1454591299-30305-4-git-send-email-javier@javigon.com> (raw)
In-Reply-To: <1454591299-30305-1-git-send-email-javier@javigon.com>

Since writes are buffered in memory, incoming reads must retrieve
buffered pages instead of submitting the I/O to the media.

This patch implements this logic. When a read bio arrives to rrpc, valid
pages from the flash blocks residing in memory are copied. If there are
any "holes" in the bio, a new bio is submitted to the media to retrieve
the necessary pages. The original bio is updated accordingly.

Signed-off-by: Javier González <javier@cnexlabs.com>
---
 drivers/lightnvm/rrpc.c  | 451 ++++++++++++++++++++++++++++++++++++-----------
 include/linux/lightnvm.h |   1 +
 2 files changed, 346 insertions(+), 106 deletions(-)

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index e9fb19d..6348d52 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -827,10 +827,13 @@ static void rrpc_end_io(struct nvm_rq *rqd)
 	struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
 	uint8_t nr_pages = rqd->nr_pages;
 
-	if (bio_data_dir(rqd->bio) == WRITE)
+	if (bio_data_dir(rqd->bio) == WRITE) {
 		rrpc_end_io_write(rrpc, rqd, nr_pages);
-	else
+	} else {
+		if (rqd->flags & NVM_IOTYPE_SYNC)
+			return;
 		rrpc_end_io_read(rrpc, rqd, nr_pages);
+	}
 
 	bio_put(rqd->bio);
 
@@ -842,83 +845,6 @@ static void rrpc_end_io(struct nvm_rq *rqd)
 	mempool_free(rqd, rrpc->rq_pool);
 }
 
-static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
-			struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
-			unsigned long flags, int nr_pages)
-{
-	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
-	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
-	struct rrpc_addr *gp;
-	sector_t laddr = rrpc_get_laddr(bio);
-	int is_gc = flags & NVM_IOTYPE_GC;
-	int i;
-
-	if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
-		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
-		mempool_free(rrqd, rrpc->rrq_pool);
-		mempool_free(rqd, rrpc->rq_pool);
-		return NVM_IO_REQUEUE;
-	}
-
-	for (i = 0; i < nr_pages; i++) {
-		/* We assume that mapping occurs at 4KB granularity */
-		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
-		gp = &rrpc->trans_map[laddr + i];
-
-		if (gp->rblk) {
-			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
-								gp->addr);
-		} else {
-			BUG_ON(is_gc);
-			rrpc_unlock_laddr(rrpc, r);
-			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
-							rqd->dma_ppa_list);
-			mempool_free(rrqd, rrpc->rrq_pool);
-			mempool_free(rqd, rrpc->rq_pool);
-			return NVM_IO_DONE;
-		}
-
-		brrqd[i].addr = gp;
-	}
-
-	rqd->opcode = NVM_OP_HBREAD;
-
-	return NVM_IO_OK;
-}
-
-static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
-							unsigned long flags)
-{
-	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
-	int is_gc = flags & NVM_IOTYPE_GC;
-	sector_t laddr = rrpc_get_laddr(bio);
-	struct rrpc_addr *gp;
-
-	if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
-		mempool_free(rrqd, rrpc->rrq_pool);
-		mempool_free(rqd, rrpc->rq_pool);
-		return NVM_IO_REQUEUE;
-	}
-
-	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
-	gp = &rrpc->trans_map[laddr];
-
-	if (gp->rblk) {
-		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
-	} else {
-		BUG_ON(is_gc);
-		rrpc_unlock_rq(rrpc, rrqd);
-		mempool_free(rrqd, rrpc->rrq_pool);
-		mempool_free(rqd, rrpc->rq_pool);
-		return NVM_IO_DONE;
-	}
-
-	rqd->opcode = NVM_OP_HBREAD;
-	rrqd->addr = gp;
-
-	return NVM_IO_OK;
-}
-
 /*
  * Copy data from current bio to block write buffer. This if necessary
  * to guarantee durability if a flash block becomes bad before all pages
@@ -1051,14 +977,335 @@ static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
 	return NVM_IO_DONE;
 }
 
+static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
+				struct rrpc_rq *rrqd, unsigned long flags)
+{
+	uint8_t nr_pages = rrpc_get_pages(bio);
+
+	rrqd->nr_pages = nr_pages;
+
+	if (nr_pages > 1)
+		return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
+	else
+		return rrpc_write_rq(rrpc, bio, rrqd, flags);
+}
+
+static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
+			struct nvm_rq *rqd, struct rrpc_buf_rq *brrqd,
+			unsigned long flags, int nr_pages)
+{
+	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rrqd);
+	struct rrpc_addr *gp;
+	sector_t laddr = rrpc_get_laddr(bio);
+	int is_gc = flags & NVM_IOTYPE_GC;
+	int i;
+
+	if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd)) {
+		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
+		return NVM_IO_REQUEUE;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		/* We assume that mapping occurs at 4KB granularity */
+		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_sects));
+		gp = &rrpc->trans_map[laddr + i];
+
+		if (gp->rblk) {
+			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+								gp->addr);
+		} else {
+			BUG_ON(is_gc);
+			rrpc_unlock_laddr(rrpc, r);
+			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
+							rqd->dma_ppa_list);
+			return NVM_IO_DONE;
+		}
+
+		brrqd[i].addr = gp;
+	}
+
+	rqd->opcode = NVM_OP_HBREAD;
+
+	return NVM_IO_OK;
+}
+
+static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
+							unsigned long flags)
+{
+	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+	int is_gc = flags & NVM_IOTYPE_GC;
+	sector_t laddr = rrpc_get_laddr(bio);
+	struct rrpc_addr *gp;
+
+	if (!is_gc && rrpc_lock_rq(rrpc, bio, rrqd))
+		return NVM_IO_REQUEUE;
+
+	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_sects));
+	gp = &rrpc->trans_map[laddr];
+
+	if (gp->rblk) {
+		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
+	} else {
+		BUG_ON(is_gc);
+		rrpc_unlock_rq(rrpc, rrqd);
+		return NVM_IO_DONE;
+	}
+
+	rqd->opcode = NVM_OP_HBREAD;
+	rrqd->addr = gp;
+
+	return NVM_IO_OK;
+}
+
+static int rrpc_read_w_buf_entry(struct bio *bio, struct rrpc_block *rblk,
+					struct bvec_iter iter, int entry)
+{
+	struct buf_entry *read_entry;
+	struct bio_vec bv;
+	struct page *page;
+	void *kaddr;
+	void *data;
+	int read = 0;
+
+	lockdep_assert_held(&rblk->w_buf.s_lock);
+
+	spin_lock(&rblk->w_buf.w_lock);
+	if (entry >= rblk->w_buf.cur_mem) {
+		spin_unlock(&rblk->w_buf.w_lock);
+		goto out;
+	}
+	spin_unlock(&rblk->w_buf.w_lock);
+
+	read_entry = &rblk->w_buf.entries[entry];
+	data = read_entry->data;
+
+	bv = bio_iter_iovec(bio, iter);
+	page = bv.bv_page;
+	kaddr = kmap_atomic(page);
+	memcpy(kaddr + bv.bv_offset, data, RRPC_EXPOSED_PAGE_SIZE);
+	kunmap_atomic(kaddr);
+	read++;
+
+out:
+	return read;
+}
+
+static int rrpc_read_from_w_buf(struct rrpc *rrpc, struct nvm_rq *rqd,
+			struct rrpc_buf_rq *brrqd, unsigned long *read_bitmap)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+	struct rrpc_addr *addr;
+	struct bio *bio = rqd->bio;
+	struct bvec_iter iter = bio->bi_iter;
+	struct rrpc_block *rblk;
+	unsigned long blk_id;
+	int nr_pages = rqd->nr_pages;
+	int left = nr_pages;
+	int read = 0;
+	int entry;
+	int i;
+
+	if (nr_pages != bio->bi_vcnt)
+		goto out;
+
+	if (nr_pages == 1) {
+		rblk = rrqd->addr->rblk;
+
+		/* If the write buffer exists, the block is open in memory */
+		spin_lock(&rblk->w_buf.s_lock);
+		atomic_inc(&rblk->w_buf.refs);
+		if (rblk->w_buf.entries) {
+			blk_id = rblk->parent->id;
+			entry = rrqd->addr->addr -
+				(blk_id * dev->sec_per_pg * dev->pgs_per_blk);
+
+			read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+			left -= read;
+			WARN_ON(test_and_set_bit(0, read_bitmap));
+		}
+		bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+		atomic_dec(&rblk->w_buf.refs);
+		spin_unlock(&rblk->w_buf.s_lock);
+
+		goto out;
+	}
+
+	/* Iterate through all pages and copy those that are found in the write
+	 * buffer. We will complete the holes (if any) with a intermediate bio
+	 * later on
+	 */
+	for (i = 0; i < nr_pages; i++) {
+		addr = brrqd[i].addr;
+		rblk = addr->rblk;
+
+		/* If the write buffer exists, the block is open in memory */
+		spin_lock(&rblk->w_buf.s_lock);
+		atomic_inc(&rblk->w_buf.refs);
+		if (rblk->w_buf.entries) {
+			blk_id = rblk->parent->id;
+			entry = addr->addr - (blk_id * dev->sec_per_pg *
+							dev->pgs_per_blk);
+
+			read = rrpc_read_w_buf_entry(bio, rblk, iter, entry);
+
+			left -= read;
+			WARN_ON(test_and_set_bit(i, read_bitmap));
+		}
+		bio_advance_iter(bio, &iter, RRPC_EXPOSED_PAGE_SIZE);
+
+		atomic_dec(&rblk->w_buf.refs);
+		spin_unlock(&rblk->w_buf.s_lock);
+	}
+
+out:
+	return left;
+}
+
+static int rrpc_submit_read_io(struct rrpc *rrpc, struct bio *bio,
+				struct nvm_rq *rqd, unsigned long flags)
+{
+	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
+	int err;
+
+	err = nvm_submit_io(rrpc->dev, rqd);
+	if (err) {
+		pr_err("rrpc: I/O submission failed: %d\n", err);
+		bio_put(bio);
+		if (!(flags & NVM_IOTYPE_GC)) {
+			rrpc_unlock_rq(rrpc, rrqd);
+			if (rqd->nr_pages > 1)
+				nvm_dev_dma_free(rrpc->dev,
+			rqd->ppa_list, rqd->dma_ppa_list);
+		}
+		return NVM_IO_ERR;
+	}
+
+	return NVM_IO_OK;
+}
+
+static int rrpc_fill_partial_read_bio(struct rrpc *rrpc, struct bio *bio,
+				unsigned long *read_bitmap, struct nvm_rq *rqd,
+				struct rrpc_buf_rq *brrqd, uint8_t nr_pages)
+{
+	struct bio *new_bio;
+	struct page *page;
+	struct bio_vec src_bv, dst_bv;
+	void *src_p, *dst_p;
+	int nr_holes = nr_pages - bitmap_weight(read_bitmap, nr_pages);
+	int hole;
+	int i = 0;
+	int ret;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
+	if (!new_bio) {
+		pr_err("nvm: rrpc: could not alloc read bio\n");
+		return NVM_IO_ERR;
+	}
+
+	hole = find_first_zero_bit(read_bitmap, nr_pages);
+	do {
+		page = mempool_alloc(rrpc->page_pool, GFP_KERNEL);
+		if (!page) {
+			bio_put(new_bio);
+			pr_err("nvm: rrpc: could not alloc read page\n");
+			goto err;
+		}
+
+		ret = bio_add_page(new_bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
+		if (ret != RRPC_EXPOSED_PAGE_SIZE) {
+			pr_err("nvm: rrpc: could not add page to bio\n");
+			mempool_free(page, rrpc->page_pool);
+			goto err;
+		}
+
+		rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
+							brrqd[hole].addr->addr);
+
+		i++;
+		hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+	} while (hole != nr_pages);
+
+	if (nr_holes != new_bio->bi_vcnt) {
+		pr_err("rrpc: malformed bio\n");
+		goto err;
+	}
+
+	new_bio->bi_iter.bi_sector = bio->bi_iter.bi_sector;
+	new_bio->bi_rw = READ;
+	new_bio->bi_private = &wait;
+	new_bio->bi_end_io = rrpc_end_sync_bio;
+
+	rqd->flags |= NVM_IOTYPE_SYNC;
+	rqd->bio = new_bio;
+	rqd->nr_pages = nr_holes;
+
+	rrpc_submit_read_io(rrpc, new_bio, rqd, rqd->flags);
+	wait_for_completion_io(&wait);
+
+	if (new_bio->bi_error)
+		goto err;
+
+	/* Fill the holes in the original bio */
+	i = 0;
+	hole = find_first_zero_bit(read_bitmap, nr_pages);
+	do {
+		src_bv = new_bio->bi_io_vec[i];
+		dst_bv = bio->bi_io_vec[hole];
+
+		src_p = kmap_atomic(src_bv.bv_page);
+		dst_p = kmap_atomic(dst_bv.bv_page);
+
+		memcpy(dst_p + dst_bv.bv_offset,
+			src_p + src_bv.bv_offset,
+			RRPC_EXPOSED_PAGE_SIZE);
+
+		kunmap_atomic(src_p);
+		kunmap_atomic(dst_p);
+
+		mempool_free(&src_bv.bv_page, rrpc->page_pool);
+
+		i++;
+		hole = find_next_zero_bit(read_bitmap, nr_pages, hole + 1);
+	} while (hole != nr_pages);
+
+	bio_put(new_bio);
+
+	/* Complete the original bio and associated request */
+	rqd->flags &= ~NVM_IOTYPE_SYNC;
+	rqd->bio = bio;
+	rqd->nr_pages = nr_pages;
+
+	bio_endio(bio);
+	rrpc_end_io(rqd);
+	return NVM_IO_OK;
+
+err:
+	/* Free allocated pages in new bio */
+	for (i = 0; i < new_bio->bi_vcnt; i++) {
+		src_bv = new_bio->bi_io_vec[i];
+		mempool_free(&src_bv.bv_page, rrpc->page_pool);
+	}
+	bio_endio(new_bio);
+	return NVM_IO_ERR;
+}
+
 static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
 				struct rrpc_rq *rrqd, unsigned long flags)
 {
 	struct nvm_rq *rqd;
 	struct rrpc_buf_rq brrqd[rrpc->max_write_pgs];
+	unsigned long read_bitmap; /* Max 64 ppas per request */
+	uint8_t left;
 	uint8_t nr_pages = rrpc_get_pages(bio);
 	int err;
 
+	bitmap_zero(&read_bitmap, nr_pages);
+
 	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
 	if (!rqd) {
 		pr_err_ratelimited("rrpc: not able to queue bio.");
@@ -1073,22 +1320,25 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
 						&rqd->dma_ppa_list);
 		if (!rqd->ppa_list) {
 			pr_err("rrpc: not able to allocate ppa list\n");
-			mempool_free(rrqd, rrpc->rrq_pool);
 			mempool_free(rqd, rrpc->rq_pool);
+			mempool_free(rrqd, rrpc->rrq_pool);
 			return NVM_IO_ERR;
 		}
 
 		err = rrpc_read_ppalist_rq(rrpc, bio, rqd, brrqd, flags,
 								nr_pages);
 		if (err) {
-			mempool_free(rrqd, rrpc->rrq_pool);
 			mempool_free(rqd, rrpc->rq_pool);
+			mempool_free(rrqd, rrpc->rrq_pool);
 			return err;
 		}
 	} else {
 		err = rrpc_read_rq(rrpc, bio, rqd, flags);
-		if (err)
+		if (err) {
+			mempool_free(rrqd, rrpc->rrq_pool);
+			mempool_free(rqd, rrpc->rq_pool);
 			return err;
+		}
 	}
 
 	bio_get(bio);
@@ -1097,33 +1347,22 @@ static int rrpc_submit_read(struct rrpc *rrpc, struct bio *bio,
 	rqd->nr_pages = rrqd->nr_pages = nr_pages;
 	rqd->flags = flags;
 
-	err = nvm_submit_io(rrpc->dev, rqd);
-	if (err) {
-		pr_err("rrpc: I/O submission failed: %d\n", err);
-		bio_put(bio);
-		if (!(flags & NVM_IOTYPE_GC)) {
-			rrpc_unlock_rq(rrpc, rrqd);
-			if (rqd->nr_pages > 1)
-				nvm_dev_dma_free(rrpc->dev,
-			rqd->ppa_list, rqd->dma_ppa_list);
-		}
+	left = rrpc_read_from_w_buf(rrpc, rqd, brrqd, &read_bitmap);
+	if (left == 0) {
+		bio_endio(bio);
+		rrpc_end_io(rqd);
+		return NVM_IO_OK;
+	} else if (left < 0)
 		return NVM_IO_ERR;
-	}
 
-	return NVM_IO_OK;
-}
+	if (bitmap_empty(&read_bitmap, nr_pages))
+		return rrpc_submit_read_io(rrpc, bio, rqd, flags);
 
-static int rrpc_buffer_write(struct rrpc *rrpc, struct bio *bio,
-				struct rrpc_rq *rrqd, unsigned long flags)
-{
-	uint8_t nr_pages = rrpc_get_pages(bio);
-
-	rrqd->nr_pages = nr_pages;
-
-	if (nr_pages > 1)
-		return rrpc_write_ppalist_rq(rrpc, bio, rrqd, flags, nr_pages);
-	else
-		return rrpc_write_rq(rrpc, bio, rrqd, flags);
+	/* The read bio could not be completely read from the write buffer. This
+	 * case only occurs when several pages are sent in a single bio
+	 */
+	return rrpc_fill_partial_read_bio(rrpc, bio, &read_bitmap, rqd, brrqd,
+								nr_pages);
 }
 
 static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index eda9743..ae26ced 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -11,6 +11,7 @@ enum {
 
 	NVM_IOTYPE_NONE = 0,
 	NVM_IOTYPE_GC = 1,
+	NVM_IOTYPE_SYNC = 2,
 };
 
 #define NVM_BLK_BITS (16)
-- 
2.1.4

  parent reply	other threads:[~2016-02-04 13:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-04 13:08 [RFC 0/4] lightnvm: add write buffering to rrpc Javier González
2016-02-04 13:08 ` [RFC 1/4] lightnvm: precalculate controller write boundaries Javier González
2016-02-05 14:53   ` Matias Bjørling
2016-02-04 13:08 ` [RFC 2/4] lightnvm: add write buffering for rrpc Javier González
2016-02-05 14:52   ` Matias Bjørling
2016-02-08  7:31     ` Javier González
2016-02-04 13:08 ` Javier González [this message]
2016-02-05 14:54   ` [RFC 3/4] lightnvm: read from rrpc write buffer if possible Matias Bjørling
2016-02-04 13:08 ` [RFC 4/4] lightnvm: add debug info for rrpc target Javier González

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454591299-30305-4-git-send-email-javier@javigon.com \
    --to=jg@lightnvm.io \
    --cc=javier@cnexlabs.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mb@lightnvm.io \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.