All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC v2 0/2] virtio-pmem: Asynchronous flush
@ 2021-07-26  6:08 Pankaj Gupta
  2021-07-26  6:08 ` [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush Pankaj Gupta
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-07-26  6:08 UTC (permalink / raw)
  To: nvdimm, linux-kernel
  Cc: dan.j.williams, jmoyer, david, mst, cohuck, vishal.l.verma,
	dave.jiang, ira.weiny, pankaj.gupta.linux, Pankaj Gupta

From: Pankaj Gupta <pankaj.gupta@ionos.com>

 Jeff reported preflush order issue with the existing implementation
 of virtio pmem preflush. Dan suggested[1] to implement asynchronous flush
 for virtio pmem using work queue as done in md/RAID. This patch series
 intends to solve the preflush ordering issue and also makes the flush
 asynchronous for the submitting thread.

 Submitting this patch series for review. Sorry, It took me long time to
 come back to this due to some personal reasons.

 RFC v1 -> RFC v2
 - More testing and bug fix.

 [1] https://marc.info/?l=linux-kernel&m=157446316409937&w=2

Pankaj Gupta (2):
  virtio-pmem: Async virtio-pmem flush
  pmem: enable pmem_submit_bio for asynchronous flush

 drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
 drivers/nvdimm/pmem.c        | 17 ++++++---
 drivers/nvdimm/virtio_pmem.c | 10 ++++-
 drivers/nvdimm/virtio_pmem.h | 14 +++++++
 4 files changed, 91 insertions(+), 22 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-07-26  6:08 [RFC v2 0/2] virtio-pmem: Asynchronous flush Pankaj Gupta
@ 2021-07-26  6:08 ` Pankaj Gupta
  2021-08-25 17:25     ` Dan Williams
  2021-07-26  6:08 ` [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush Pankaj Gupta
  2021-08-19 11:08   ` Pankaj Gupta
  2 siblings, 1 reply; 21+ messages in thread
From: Pankaj Gupta @ 2021-07-26  6:08 UTC (permalink / raw)
  To: nvdimm, linux-kernel
  Cc: dan.j.williams, jmoyer, david, mst, cohuck, vishal.l.verma,
	dave.jiang, ira.weiny, pankaj.gupta.linux, Pankaj Gupta

From: Pankaj Gupta <pankaj.gupta@ionos.com>

Implement asynchronous flush for virtio pmem using work queue
to solve the preflush ordering issue. Also, coalesce the flush
requests when a flush is already in process.

Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
---
 drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
 drivers/nvdimm/virtio_pmem.c | 10 ++++-
 drivers/nvdimm/virtio_pmem.h | 14 +++++++
 3 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fa..61b655b583be 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
 	return err;
 };
 
+static void submit_async_flush(struct work_struct *ws);
+
 /* The asynchronous flush callback function */
 int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
 {
-	/*
-	 * Create child bio for asynchronous flush and chain with
-	 * parent bio. Otherwise directly call nd_region flush.
+	/* queue asynchronous flush and coalesce the flush requests */
+	struct virtio_device *vdev = nd_region->provider_data;
+	struct virtio_pmem *vpmem  = vdev->priv;
+	ktime_t req_start = ktime_get_boottime();
+
+	spin_lock_irq(&vpmem->lock);
+	/* flush requests wait until ongoing flush completes,
+	 * hence coalescing all the pending requests.
 	 */
-	if (bio && bio->bi_iter.bi_sector != -1) {
-		struct bio *child = bio_alloc(GFP_ATOMIC, 0);
-
-		if (!child)
-			return -ENOMEM;
-		bio_copy_dev(child, bio);
-		child->bi_opf = REQ_PREFLUSH;
-		child->bi_iter.bi_sector = -1;
-		bio_chain(child, bio);
-		submit_bio(child);
-		return 0;
+	wait_event_lock_irq(vpmem->sb_wait,
+			    !vpmem->flush_bio ||
+			    ktime_before(req_start, vpmem->prev_flush_start),
+			    vpmem->lock);
+	/* new request after previous flush is completed */
+	if (ktime_after(req_start, vpmem->prev_flush_start)) {
+		WARN_ON(vpmem->flush_bio);
+		vpmem->flush_bio = bio;
+		bio = NULL;
+	}
+	spin_unlock_irq(&vpmem->lock);
+
+	if (!bio) {
+		INIT_WORK(&vpmem->flush_work, submit_async_flush);
+		queue_work(vpmem->pmem_wq, &vpmem->flush_work);
+		return 1;
+	}
+
+	/* flush completed in other context while we waited */
+	if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
+		bio->bi_opf &= ~REQ_PREFLUSH;
+		submit_bio(bio);
+	} else if (bio && (bio->bi_opf & REQ_FUA)) {
+		bio->bi_opf &= ~REQ_FUA;
+		bio_endio(bio);
 	}
-	if (virtio_pmem_flush(nd_region))
-		return -EIO;
 
 	return 0;
 };
 EXPORT_SYMBOL_GPL(async_pmem_flush);
+
+static void submit_async_flush(struct work_struct *ws)
+{
+	struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, flush_work);
+	struct bio *bio = vpmem->flush_bio;
+
+	vpmem->start_flush = ktime_get_boottime();
+	bio->bi_status = errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
+	vpmem->prev_flush_start = vpmem->start_flush;
+	vpmem->flush_bio = NULL;
+	wake_up(&vpmem->sb_wait);
+
+	/* Submit parent bio only for PREFLUSH */
+	if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
+		bio->bi_opf &= ~REQ_PREFLUSH;
+		submit_bio(bio);
+	} else if (bio && (bio->bi_opf & REQ_FUA)) {
+		bio->bi_opf &= ~REQ_FUA;
+		bio_endio(bio);
+	}
+}
 MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 726c7354d465..56780a6140c7 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
 		return PTR_ERR(vpmem->req_vq);
 
 	spin_lock_init(&vpmem->pmem_lock);
+	spin_lock_init(&vpmem->lock);
 	INIT_LIST_HEAD(&vpmem->req_list);
 
 	return 0;
@@ -57,7 +58,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
 		dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
 		goto out_err;
 	}
-
+	vpmem->pmem_wq = alloc_workqueue("vpmem_wq", WQ_MEM_RECLAIM, 0);
+	if (!vpmem->pmem_wq) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+	init_waitqueue_head(&vpmem->sb_wait);
 	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
 			start, &vpmem->start);
 	virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
@@ -90,10 +96,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
 		goto out_nd;
 	}
 	nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
+	vpmem->nd_region = nd_region;
 	return 0;
 out_nd:
 	nvdimm_bus_unregister(vpmem->nvdimm_bus);
 out_vq:
+	destroy_workqueue(vpmem->pmem_wq);
 	vdev->config->del_vqs(vdev);
 out_err:
 	return err;
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index 0dddefe594c4..d9abc8d052b6 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -35,9 +35,23 @@ struct virtio_pmem {
 	/* Virtio pmem request queue */
 	struct virtqueue *req_vq;
 
+	struct bio *flush_bio;
+	/* last_flush is when the last completed flush was started */
+	ktime_t prev_flush_start, start_flush;
+
+	/* work queue for deferred flush */
+	struct work_struct flush_work;
+	struct workqueue_struct *pmem_wq;
+
+	/* Synchronize flush wait queue data */
+	spinlock_t lock;
+	/* for waiting for previous flush to complete */
+	wait_queue_head_t sb_wait;
+
 	/* nvdimm bus registers virtio pmem device */
 	struct nvdimm_bus *nvdimm_bus;
 	struct nvdimm_bus_descriptor nd_desc;
+	struct nd_region *nd_region;
 
 	/* List to store deferred work if virtqueue is full */
 	struct list_head req_list;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush
  2021-07-26  6:08 [RFC v2 0/2] virtio-pmem: Asynchronous flush Pankaj Gupta
  2021-07-26  6:08 ` [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush Pankaj Gupta
@ 2021-07-26  6:08 ` Pankaj Gupta
  2021-07-26 13:25   ` kernel test robot
  2021-07-26 13:25   ` kernel test robot
  2021-08-19 11:08   ` Pankaj Gupta
  2 siblings, 2 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-07-26  6:08 UTC (permalink / raw)
  To: nvdimm, linux-kernel
  Cc: dan.j.williams, jmoyer, david, mst, cohuck, vishal.l.verma,
	dave.jiang, ira.weiny, pankaj.gupta.linux, Pankaj Gupta

From: Pankaj Gupta <pankaj.gupta@ionos.com>

Return from "pmem_submit_bio" when asynchronous flush is in
process in other context.

Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
---
 drivers/nvdimm/pmem.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 1e0615b8565e..3ca1fa88a5e7 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -201,8 +201,13 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
 	struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
 	struct nd_region *nd_region = to_region(pmem);
 
-	if (bio->bi_opf & REQ_PREFLUSH)
-		ret = nvdimm_flush(nd_region, bio);
+	if ((bio->bi_opf & REQ_PREFLUSH) &&
+		nvdimm_flush(nd_region, bio)) {
+
+		/* asynchronous flush completes in other context */
+		if (nd_region->flush)
+			return BLK_QC_T_NONE;
+	}
 
 	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
 	if (do_acct)
@@ -222,11 +227,13 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
 	if (do_acct)
 		bio_end_io_acct(bio, start);
 
-	if (bio->bi_opf & REQ_FUA)
+	if (bio->bi_opf & REQ_FUA)  {
 		ret = nvdimm_flush(nd_region, bio);
 
-	if (ret)
-		bio->bi_status = errno_to_blk_status(ret);
+		/* asynchronous flush completes in other context */
+		if (nd_region->flush)
+			return BLK_QC_T_NONE;
+	}
 
 	bio_endio(bio);
 	return BLK_QC_T_NONE;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush
  2021-07-26  6:08 ` [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush Pankaj Gupta
@ 2021-07-26 13:25   ` kernel test robot
  2021-07-26 13:58     ` Pankaj Gupta
  2021-07-26 13:25   ` kernel test robot
  1 sibling, 1 reply; 21+ messages in thread
From: kernel test robot @ 2021-07-26 13:25 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 6701 bytes --]

Hi Pankaj,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on linux/master]
[also build test WARNING on linus/master linux-nvdimm/libnvdimm-for-next v5.14-rc3 next-20210723]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830
config: arc-allyesconfig (attached as .config)
compiler: arceb-elf-gcc (GCC) 10.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/e6040aad1930816dc183a36f6ff5eae1412a9713
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
        git checkout e6040aad1930816dc183a36f6ff5eae1412a9713
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/nvdimm/pmem.c: In function 'pmem_submit_bio':
>> drivers/nvdimm/pmem.c:195:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
     195 |  int ret = 0;
         |      ^~~


vim +/ret +195 drivers/nvdimm/pmem.c

9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  192  
c62b37d96b6eb3 drivers/nvdimm/pmem.c Christoph Hellwig 2020-07-01  193  static blk_qc_t pmem_submit_bio(struct bio *bio)
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  194  {
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05 @195  	int ret = 0;
4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  196  	blk_status_t rc = 0;
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  197  	bool do_acct;
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  198  	unsigned long start;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  199  	struct bio_vec bvec;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  200  	struct bvec_iter iter;
309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  201  	struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  202  	struct nd_region *nd_region = to_region(pmem);
7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  203  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  204  	if ((bio->bi_opf & REQ_PREFLUSH) &&
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  205  		nvdimm_flush(nd_region, bio)) {
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  206  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  207  		/* asynchronous flush completes in other context */
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  208  		if (nd_region->flush)
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  209  			return BLK_QC_T_NONE;
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  210  	}
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  211  
309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  212  	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  213  	if (do_acct)
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  214  		start = bio_start_io_acct(bio);
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  215  	bio_for_each_segment(bvec, bio, iter) {
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  216  		if (op_is_write(bio_op(bio)))
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  217  			rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  218  				iter.bi_sector, bvec.bv_len);
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  219  		else
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  220  			rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  221  				iter.bi_sector, bvec.bv_len);
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  222  		if (rc) {
4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  223  			bio->bi_status = rc;
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  224  			break;
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  225  		}
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  226  	}
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  227  	if (do_acct)
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  228  		bio_end_io_acct(bio, start);
61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  229  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  230  	if (bio->bi_opf & REQ_FUA)  {
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  231  		ret = nvdimm_flush(nd_region, bio);
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  232  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  233  		/* asynchronous flush completes in other context */
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  234  		if (nd_region->flush)
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  235  			return BLK_QC_T_NONE;
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  236  	}
61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  237  
4246a0b63bd8f5 drivers/nvdimm/pmem.c Christoph Hellwig 2015-07-20  238  	bio_endio(bio);
dece16353ef47d drivers/nvdimm/pmem.c Jens Axboe        2015-11-05  239  	return BLK_QC_T_NONE;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  240  }
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  241  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 68126 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush
  2021-07-26  6:08 ` [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush Pankaj Gupta
  2021-07-26 13:25   ` kernel test robot
@ 2021-07-26 13:25   ` kernel test robot
  1 sibling, 0 replies; 21+ messages in thread
From: kernel test robot @ 2021-07-26 13:25 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 6867 bytes --]

Hi Pankaj,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on linux/master]
[also build test WARNING on linus/master linux-nvdimm/libnvdimm-for-next v5.14-rc3 next-20210723]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830
config: x86_64-randconfig-a013-20210726 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project c63dbd850182797bc4b76124d08e1c320ab2365d)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/e6040aad1930816dc183a36f6ff5eae1412a9713
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
        git checkout e6040aad1930816dc183a36f6ff5eae1412a9713
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/nvdimm/pmem.c:195:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
           int ret = 0;
               ^
   1 warning generated.


vim +/ret +195 drivers/nvdimm/pmem.c

9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  192  
c62b37d96b6eb3 drivers/nvdimm/pmem.c Christoph Hellwig 2020-07-01  193  static blk_qc_t pmem_submit_bio(struct bio *bio)
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  194  {
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05 @195  	int ret = 0;
4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  196  	blk_status_t rc = 0;
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  197  	bool do_acct;
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  198  	unsigned long start;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  199  	struct bio_vec bvec;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  200  	struct bvec_iter iter;
309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  201  	struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  202  	struct nd_region *nd_region = to_region(pmem);
7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  203  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  204  	if ((bio->bi_opf & REQ_PREFLUSH) &&
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  205  		nvdimm_flush(nd_region, bio)) {
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  206  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  207  		/* asynchronous flush completes in other context */
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  208  		if (nd_region->flush)
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  209  			return BLK_QC_T_NONE;
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  210  	}
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  211  
309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  212  	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  213  	if (do_acct)
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  214  		start = bio_start_io_acct(bio);
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  215  	bio_for_each_segment(bvec, bio, iter) {
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  216  		if (op_is_write(bio_op(bio)))
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  217  			rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  218  				iter.bi_sector, bvec.bv_len);
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  219  		else
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  220  			rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  221  				iter.bi_sector, bvec.bv_len);
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  222  		if (rc) {
4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  223  			bio->bi_status = rc;
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  224  			break;
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  225  		}
e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  226  	}
f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  227  	if (do_acct)
0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  228  		bio_end_io_acct(bio, start);
61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  229  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  230  	if (bio->bi_opf & REQ_FUA)  {
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  231  		ret = nvdimm_flush(nd_region, bio);
c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  232  
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  233  		/* asynchronous flush completes in other context */
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  234  		if (nd_region->flush)
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  235  			return BLK_QC_T_NONE;
e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  236  	}
61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  237  
4246a0b63bd8f5 drivers/nvdimm/pmem.c Christoph Hellwig 2015-07-20  238  	bio_endio(bio);
dece16353ef47d drivers/nvdimm/pmem.c Jens Axboe        2015-11-05  239  	return BLK_QC_T_NONE;
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  240  }
9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  241  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 35391 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush
  2021-07-26 13:25   ` kernel test robot
@ 2021-07-26 13:58     ` Pankaj Gupta
  0 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-07-26 13:58 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 7367 bytes --]

> Hi Pankaj,
>
> [FYI, it's a private test report for your RFC patch.]
> [auto build test WARNING on linux/master]
> [also build test WARNING on linus/master linux-nvdimm/libnvdimm-for-next v5.14-rc3 next-20210723]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830
> config: arc-allyesconfig (attached as .config)
> compiler: arceb-elf-gcc (GCC) 10.3.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # https://github.com/0day-ci/linux/commit/e6040aad1930816dc183a36f6ff5eae1412a9713
>         git remote add linux-review https://github.com/0day-ci/linux
>         git fetch --no-tags linux-review Pankaj-Gupta/virtio-pmem-Async-virtio-pmem-flush/20210726-150913
>         git checkout e6040aad1930816dc183a36f6ff5eae1412a9713
>         # save the attached .config to linux build tree
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-10.3.0 make.cross ARCH=arc
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
>    drivers/nvdimm/pmem.c: In function 'pmem_submit_bio':
> >> drivers/nvdimm/pmem.c:195:6: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
>      195 |  int ret = 0;
>          |      ^~~

Thank you, will fix this.

>
>
> vim +/ret +195 drivers/nvdimm/pmem.c
>
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  192
> c62b37d96b6eb3 drivers/nvdimm/pmem.c Christoph Hellwig 2020-07-01  193  static blk_qc_t pmem_submit_bio(struct bio *bio)
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  194  {
> c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05 @195          int ret = 0;
> 4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  196          blk_status_t rc = 0;
> f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  197          bool do_acct;
> f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  198          unsigned long start;
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  199          struct bio_vec bvec;
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  200          struct bvec_iter iter;
> 309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  201          struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
> 7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  202          struct nd_region *nd_region = to_region(pmem);
> 7e267a8c790edf drivers/nvdimm/pmem.c Dan Williams      2016-06-01  203
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  204          if ((bio->bi_opf & REQ_PREFLUSH) &&
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  205                  nvdimm_flush(nd_region, bio)) {
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  206
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  207                  /* asynchronous flush completes in other context */
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  208                  if (nd_region->flush)
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  209                          return BLK_QC_T_NONE;
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  210          }
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  211
> 309dca309fc39a drivers/nvdimm/pmem.c Christoph Hellwig 2021-01-24  212          do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
> 0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  213          if (do_acct)
> 0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  214                  start = bio_start_io_acct(bio);
> e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  215          bio_for_each_segment(bvec, bio, iter) {
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  216                  if (op_is_write(bio_op(bio)))
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  217                          rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  218                                  iter.bi_sector, bvec.bv_len);
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  219                  else
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  220                          rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
> 5d64efe79703c4 drivers/nvdimm/pmem.c Vivek Goyal       2020-02-28  221                                  iter.bi_sector, bvec.bv_len);
> e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  222                  if (rc) {
> 4e4cbee93d5613 drivers/nvdimm/pmem.c Christoph Hellwig 2017-06-03  223                          bio->bi_status = rc;
> e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  224                          break;
> e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  225                  }
> e10624f8c09710 drivers/nvdimm/pmem.c Dan Williams      2016-01-06  226          }
> f0dc089ce217e7 drivers/nvdimm/pmem.c Dan Williams      2015-05-16  227          if (do_acct)
> 0fd92f89a44d3b drivers/nvdimm/pmem.c Christoph Hellwig 2020-05-27  228                  bio_end_io_acct(bio, start);
> 61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  229
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  230          if (bio->bi_opf & REQ_FUA)  {
> c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  231                  ret = nvdimm_flush(nd_region, bio);
> c5d4355d10d414 drivers/nvdimm/pmem.c Pankaj Gupta      2019-07-05  232
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  233                  /* asynchronous flush completes in other context */
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  234                  if (nd_region->flush)
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  235                          return BLK_QC_T_NONE;
> e6040aad193081 drivers/nvdimm/pmem.c Pankaj Gupta      2021-07-26  236          }
> 61031952f4c89d drivers/nvdimm/pmem.c Ross Zwisler      2015-06-25  237
> 4246a0b63bd8f5 drivers/nvdimm/pmem.c Christoph Hellwig 2015-07-20  238          bio_endio(bio);
> dece16353ef47d drivers/nvdimm/pmem.c Jens Axboe        2015-11-05  239          return BLK_QC_T_NONE;
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  240  }
> 9e853f2313e5eb drivers/block/pmem.c  Ross Zwisler      2015-04-01  241
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 0/2] virtio-pmem: Asynchronous flush
  2021-07-26  6:08 [RFC v2 0/2] virtio-pmem: Asynchronous flush Pankaj Gupta
@ 2021-08-19 11:08   ` Pankaj Gupta
  2021-07-26  6:08 ` [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush Pankaj Gupta
  2021-08-19 11:08   ` Pankaj Gupta
  2 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-19 11:08 UTC (permalink / raw)
  To: nvdimm, LKML
  Cc: Dan Williams, jmoyer, David Hildenbrand, Michael S . Tsirkin,
	Cornelia Huck, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta

Gentle ping.

>
>  Jeff reported preflush order issue with the existing implementation
>  of virtio pmem preflush. Dan suggested[1] to implement asynchronous flush
>  for virtio pmem using work queue as done in md/RAID. This patch series
>  intends to solve the preflush ordering issue and also makes the flush
>  asynchronous for the submitting thread.
>
>  Submitting this patch series for review. Sorry, It took me long time to
>  come back to this due to some personal reasons.
>
>  RFC v1 -> RFC v2
>  - More testing and bug fix.
>
>  [1] https://marc.info/?l=linux-kernel&m=157446316409937&w=2
>
> Pankaj Gupta (2):
>   virtio-pmem: Async virtio-pmem flush
>   pmem: enable pmem_submit_bio for asynchronous flush
>
>  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
>  drivers/nvdimm/pmem.c        | 17 ++++++---
>  drivers/nvdimm/virtio_pmem.c | 10 ++++-
>  drivers/nvdimm/virtio_pmem.h | 14 +++++++
>  4 files changed, 91 insertions(+), 22 deletions(-)
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 0/2] virtio-pmem: Asynchronous flush
@ 2021-08-19 11:08   ` Pankaj Gupta
  0 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-19 11:08 UTC (permalink / raw)
  To: nvdimm, LKML
  Cc: Dan Williams, jmoyer, David Hildenbrand, Michael S . Tsirkin,
	Cornelia Huck, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta

Gentle ping.

>
>  Jeff reported preflush order issue with the existing implementation
>  of virtio pmem preflush. Dan suggested[1] to implement asynchronous flush
>  for virtio pmem using work queue as done in md/RAID. This patch series
>  intends to solve the preflush ordering issue and also makes the flush
>  asynchronous for the submitting thread.
>
>  Submitting this patch series for review. Sorry, It took me long time to
>  come back to this due to some personal reasons.
>
>  RFC v1 -> RFC v2
>  - More testing and bug fix.
>
>  [1] https://marc.info/?l=linux-kernel&m=157446316409937&w=2
>
> Pankaj Gupta (2):
>   virtio-pmem: Async virtio-pmem flush
>   pmem: enable pmem_submit_bio for asynchronous flush
>
>  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
>  drivers/nvdimm/pmem.c        | 17 ++++++---
>  drivers/nvdimm/virtio_pmem.c | 10 ++++-
>  drivers/nvdimm/virtio_pmem.h | 14 +++++++
>  4 files changed, 91 insertions(+), 22 deletions(-)
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-07-26  6:08 ` [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush Pankaj Gupta
@ 2021-08-25 17:25     ` Dan Williams
  0 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 17:25 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

On Sun, Jul 25, 2021 at 11:09 PM Pankaj Gupta
<pankaj.gupta.linux@gmail.com> wrote:
>
> From: Pankaj Gupta <pankaj.gupta@ionos.com>
>
> Implement asynchronous flush for virtio pmem using work queue
> to solve the preflush ordering issue. Also, coalesce the flush
> requests when a flush is already in process.
>
> Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> ---
>  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
>  drivers/nvdimm/virtio_pmem.c | 10 ++++-
>  drivers/nvdimm/virtio_pmem.h | 14 +++++++
>  3 files changed, 79 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index 10351d5b49fa..61b655b583be 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
>         return err;
>  };
>
> +static void submit_async_flush(struct work_struct *ws);
> +
>  /* The asynchronous flush callback function */
>  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
>  {
> -       /*
> -        * Create child bio for asynchronous flush and chain with
> -        * parent bio. Otherwise directly call nd_region flush.
> +       /* queue asynchronous flush and coalesce the flush requests */
> +       struct virtio_device *vdev = nd_region->provider_data;
> +       struct virtio_pmem *vpmem  = vdev->priv;
> +       ktime_t req_start = ktime_get_boottime();
> +
> +       spin_lock_irq(&vpmem->lock);
> +       /* flush requests wait until ongoing flush completes,
> +        * hence coalescing all the pending requests.
>          */
> -       if (bio && bio->bi_iter.bi_sector != -1) {
> -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> -
> -               if (!child)
> -                       return -ENOMEM;
> -               bio_copy_dev(child, bio);
> -               child->bi_opf = REQ_PREFLUSH;
> -               child->bi_iter.bi_sector = -1;
> -               bio_chain(child, bio);
> -               submit_bio(child);
> -               return 0;
> +       wait_event_lock_irq(vpmem->sb_wait,
> +                           !vpmem->flush_bio ||
> +                           ktime_before(req_start, vpmem->prev_flush_start),
> +                           vpmem->lock);
> +       /* new request after previous flush is completed */
> +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> +               WARN_ON(vpmem->flush_bio);
> +               vpmem->flush_bio = bio;
> +               bio = NULL;
> +       }

Why the dance with ->prev_flush_start vs just calling queue_work()
again. queue_work() is naturally coalescing in that if the last work
request has not started execution another queue attempt will be
dropped.

> +       spin_unlock_irq(&vpmem->lock);
> +
> +       if (!bio) {
> +               INIT_WORK(&vpmem->flush_work, submit_async_flush);

I expect this only needs to be initialized once at driver init time.

> +               queue_work(vpmem->pmem_wq, &vpmem->flush_work);
> +               return 1;
> +       }
> +
> +       /* flush completed in other context while we waited */
> +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> +               bio->bi_opf &= ~REQ_PREFLUSH;
> +               submit_bio(bio);
> +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> +               bio->bi_opf &= ~REQ_FUA;
> +               bio_endio(bio);

It's not clear to me how this happens, shouldn't all flush completions
be driven from the work completion?

>         }
> -       if (virtio_pmem_flush(nd_region))
> -               return -EIO;
>
>         return 0;
>  };
>  EXPORT_SYMBOL_GPL(async_pmem_flush);
> +
> +static void submit_async_flush(struct work_struct *ws)
> +{
> +       struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, flush_work);
> +       struct bio *bio = vpmem->flush_bio;
> +
> +       vpmem->start_flush = ktime_get_boottime();
> +       bio->bi_status = errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
> +       vpmem->prev_flush_start = vpmem->start_flush;
> +       vpmem->flush_bio = NULL;
> +       wake_up(&vpmem->sb_wait);
> +
> +       /* Submit parent bio only for PREFLUSH */
> +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> +               bio->bi_opf &= ~REQ_PREFLUSH;
> +               submit_bio(bio);
> +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> +               bio->bi_opf &= ~REQ_FUA;
> +               bio_endio(bio);
> +       }

Shouldn't the wait_event_lock_irq() be here rather than in
async_pmem_flush()? That will cause the workqueue to back up and flush
requests to coalesce.

> +}
>  MODULE_LICENSE("GPL");
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 726c7354d465..56780a6140c7 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
>                 return PTR_ERR(vpmem->req_vq);
>
>         spin_lock_init(&vpmem->pmem_lock);
> +       spin_lock_init(&vpmem->lock);

Why 2 locks?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-25 17:25     ` Dan Williams
  0 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 17:25 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

On Sun, Jul 25, 2021 at 11:09 PM Pankaj Gupta
<pankaj.gupta.linux@gmail.com> wrote:
>
> From: Pankaj Gupta <pankaj.gupta@ionos.com>
>
> Implement asynchronous flush for virtio pmem using work queue
> to solve the preflush ordering issue. Also, coalesce the flush
> requests when a flush is already in process.
>
> Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> ---
>  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
>  drivers/nvdimm/virtio_pmem.c | 10 ++++-
>  drivers/nvdimm/virtio_pmem.h | 14 +++++++
>  3 files changed, 79 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index 10351d5b49fa..61b655b583be 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
>         return err;
>  };
>
> +static void submit_async_flush(struct work_struct *ws);
> +
>  /* The asynchronous flush callback function */
>  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
>  {
> -       /*
> -        * Create child bio for asynchronous flush and chain with
> -        * parent bio. Otherwise directly call nd_region flush.
> +       /* queue asynchronous flush and coalesce the flush requests */
> +       struct virtio_device *vdev = nd_region->provider_data;
> +       struct virtio_pmem *vpmem  = vdev->priv;
> +       ktime_t req_start = ktime_get_boottime();
> +
> +       spin_lock_irq(&vpmem->lock);
> +       /* flush requests wait until ongoing flush completes,
> +        * hence coalescing all the pending requests.
>          */
> -       if (bio && bio->bi_iter.bi_sector != -1) {
> -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> -
> -               if (!child)
> -                       return -ENOMEM;
> -               bio_copy_dev(child, bio);
> -               child->bi_opf = REQ_PREFLUSH;
> -               child->bi_iter.bi_sector = -1;
> -               bio_chain(child, bio);
> -               submit_bio(child);
> -               return 0;
> +       wait_event_lock_irq(vpmem->sb_wait,
> +                           !vpmem->flush_bio ||
> +                           ktime_before(req_start, vpmem->prev_flush_start),
> +                           vpmem->lock);
> +       /* new request after previous flush is completed */
> +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> +               WARN_ON(vpmem->flush_bio);
> +               vpmem->flush_bio = bio;
> +               bio = NULL;
> +       }

Why the dance with ->prev_flush_start vs just calling queue_work()
again. queue_work() is naturally coalescing in that if the last work
request has not started execution another queue attempt will be
dropped.

> +       spin_unlock_irq(&vpmem->lock);
> +
> +       if (!bio) {
> +               INIT_WORK(&vpmem->flush_work, submit_async_flush);

I expect this only needs to be initialized once at driver init time.

> +               queue_work(vpmem->pmem_wq, &vpmem->flush_work);
> +               return 1;
> +       }
> +
> +       /* flush completed in other context while we waited */
> +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> +               bio->bi_opf &= ~REQ_PREFLUSH;
> +               submit_bio(bio);
> +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> +               bio->bi_opf &= ~REQ_FUA;
> +               bio_endio(bio);

It's not clear to me how this happens, shouldn't all flush completions
be driven from the work completion?

>         }
> -       if (virtio_pmem_flush(nd_region))
> -               return -EIO;
>
>         return 0;
>  };
>  EXPORT_SYMBOL_GPL(async_pmem_flush);
> +
> +static void submit_async_flush(struct work_struct *ws)
> +{
> +       struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, flush_work);
> +       struct bio *bio = vpmem->flush_bio;
> +
> +       vpmem->start_flush = ktime_get_boottime();
> +       bio->bi_status = errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
> +       vpmem->prev_flush_start = vpmem->start_flush;
> +       vpmem->flush_bio = NULL;
> +       wake_up(&vpmem->sb_wait);
> +
> +       /* Submit parent bio only for PREFLUSH */
> +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> +               bio->bi_opf &= ~REQ_PREFLUSH;
> +               submit_bio(bio);
> +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> +               bio->bi_opf &= ~REQ_FUA;
> +               bio_endio(bio);
> +       }

Shouldn't the wait_event_lock_irq() be here rather than in
async_pmem_flush()? That will cause the workqueue to back up and flush
requests to coalesce.

> +}
>  MODULE_LICENSE("GPL");
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 726c7354d465..56780a6140c7 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
>                 return PTR_ERR(vpmem->req_vq);
>
>         spin_lock_init(&vpmem->pmem_lock);
> +       spin_lock_init(&vpmem->lock);

Why 2 locks?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-08-25 17:25     ` Dan Williams
@ 2021-08-25 20:01       ` Pankaj Gupta
  -1 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-25 20:01 UTC (permalink / raw)
  To: Dan Williams
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

Hi Dan,

Thank you for the review. Please see my reply inline.

> > Implement asynchronous flush for virtio pmem using work queue
> > to solve the preflush ordering issue. Also, coalesce the flush
> > requests when a flush is already in process.
> >
> > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > ---
> >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> >  3 files changed, 79 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > index 10351d5b49fa..61b655b583be 100644
> > --- a/drivers/nvdimm/nd_virtio.c
> > +++ b/drivers/nvdimm/nd_virtio.c
> > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> >         return err;
> >  };
> >
> > +static void submit_async_flush(struct work_struct *ws);
> > +
> >  /* The asynchronous flush callback function */
> >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> >  {
> > -       /*
> > -        * Create child bio for asynchronous flush and chain with
> > -        * parent bio. Otherwise directly call nd_region flush.
> > +       /* queue asynchronous flush and coalesce the flush requests */
> > +       struct virtio_device *vdev = nd_region->provider_data;
> > +       struct virtio_pmem *vpmem  = vdev->priv;
> > +       ktime_t req_start = ktime_get_boottime();
> > +
> > +       spin_lock_irq(&vpmem->lock);
> > +       /* flush requests wait until ongoing flush completes,
> > +        * hence coalescing all the pending requests.
> >          */
> > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > -
> > -               if (!child)
> > -                       return -ENOMEM;
> > -               bio_copy_dev(child, bio);
> > -               child->bi_opf = REQ_PREFLUSH;
> > -               child->bi_iter.bi_sector = -1;
> > -               bio_chain(child, bio);
> > -               submit_bio(child);
> > -               return 0;
> > +       wait_event_lock_irq(vpmem->sb_wait,
> > +                           !vpmem->flush_bio ||
> > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > +                           vpmem->lock);
> > +       /* new request after previous flush is completed */
> > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > +               WARN_ON(vpmem->flush_bio);
> > +               vpmem->flush_bio = bio;
> > +               bio = NULL;
> > +       }
>
> Why the dance with ->prev_flush_start vs just calling queue_work()
> again. queue_work() is naturally coalescing in that if the last work
> request has not started execution another queue attempt will be
> dropped.

How parent flush request will know when corresponding flush is completed?

>
> > +       spin_unlock_irq(&vpmem->lock);
> > +
> > +       if (!bio) {
> > +               INIT_WORK(&vpmem->flush_work, submit_async_flush);
>
> I expect this only needs to be initialized once at driver init time.

yes, will fix this.
>
> > +               queue_work(vpmem->pmem_wq, &vpmem->flush_work);
> > +               return 1;
> > +       }
> > +
> > +       /* flush completed in other context while we waited */
> > +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> > +               bio->bi_opf &= ~REQ_PREFLUSH;
> > +               submit_bio(bio);
> > +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> > +               bio->bi_opf &= ~REQ_FUA;
> > +               bio_endio(bio);
>
> It's not clear to me how this happens, shouldn't all flush completions
> be driven from the work completion?

Requests should progress after notified by ongoing flush completion
event.

>
> >         }
> > -       if (virtio_pmem_flush(nd_region))
> > -               return -EIO;
> >
> >         return 0;
> >  };
> >  EXPORT_SYMBOL_GPL(async_pmem_flush);
> > +
> > +static void submit_async_flush(struct work_struct *ws)
> > +{
> > +       struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, flush_work);
> > +       struct bio *bio = vpmem->flush_bio;
> > +
> > +       vpmem->start_flush = ktime_get_boottime();
> > +       bio->bi_status = errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
> > +       vpmem->prev_flush_start = vpmem->start_flush;
> > +       vpmem->flush_bio = NULL;
> > +       wake_up(&vpmem->sb_wait);
> > +
> > +       /* Submit parent bio only for PREFLUSH */
> > +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> > +               bio->bi_opf &= ~REQ_PREFLUSH;
> > +               submit_bio(bio);
> > +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> > +               bio->bi_opf &= ~REQ_FUA;
> > +               bio_endio(bio);
> > +       }
>
> Shouldn't the wait_event_lock_irq() be here rather than in
> async_pmem_flush()? That will cause the workqueue to back up and flush
> requests to coalesce.

but this is coalesced flush request?

> > +}
> >  MODULE_LICENSE("GPL");
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > index 726c7354d465..56780a6140c7 100644
> > --- a/drivers/nvdimm/virtio_pmem.c
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
> >                 return PTR_ERR(vpmem->req_vq);
> >
> >         spin_lock_init(&vpmem->pmem_lock);
> > +       spin_lock_init(&vpmem->lock);
>
> Why 2 locks?

One lock is for work queue and other for virtio flush completion.

Thanks,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-25 20:01       ` Pankaj Gupta
  0 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-25 20:01 UTC (permalink / raw)
  To: Dan Williams
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

Hi Dan,

Thank you for the review. Please see my reply inline.

> > Implement asynchronous flush for virtio pmem using work queue
> > to solve the preflush ordering issue. Also, coalesce the flush
> > requests when a flush is already in process.
> >
> > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > ---
> >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> >  3 files changed, 79 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > index 10351d5b49fa..61b655b583be 100644
> > --- a/drivers/nvdimm/nd_virtio.c
> > +++ b/drivers/nvdimm/nd_virtio.c
> > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> >         return err;
> >  };
> >
> > +static void submit_async_flush(struct work_struct *ws);
> > +
> >  /* The asynchronous flush callback function */
> >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> >  {
> > -       /*
> > -        * Create child bio for asynchronous flush and chain with
> > -        * parent bio. Otherwise directly call nd_region flush.
> > +       /* queue asynchronous flush and coalesce the flush requests */
> > +       struct virtio_device *vdev = nd_region->provider_data;
> > +       struct virtio_pmem *vpmem  = vdev->priv;
> > +       ktime_t req_start = ktime_get_boottime();
> > +
> > +       spin_lock_irq(&vpmem->lock);
> > +       /* flush requests wait until ongoing flush completes,
> > +        * hence coalescing all the pending requests.
> >          */
> > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > -
> > -               if (!child)
> > -                       return -ENOMEM;
> > -               bio_copy_dev(child, bio);
> > -               child->bi_opf = REQ_PREFLUSH;
> > -               child->bi_iter.bi_sector = -1;
> > -               bio_chain(child, bio);
> > -               submit_bio(child);
> > -               return 0;
> > +       wait_event_lock_irq(vpmem->sb_wait,
> > +                           !vpmem->flush_bio ||
> > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > +                           vpmem->lock);
> > +       /* new request after previous flush is completed */
> > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > +               WARN_ON(vpmem->flush_bio);
> > +               vpmem->flush_bio = bio;
> > +               bio = NULL;
> > +       }
>
> Why the dance with ->prev_flush_start vs just calling queue_work()
> again. queue_work() is naturally coalescing in that if the last work
> request has not started execution another queue attempt will be
> dropped.

How parent flush request will know when corresponding flush is completed?

>
> > +       spin_unlock_irq(&vpmem->lock);
> > +
> > +       if (!bio) {
> > +               INIT_WORK(&vpmem->flush_work, submit_async_flush);
>
> I expect this only needs to be initialized once at driver init time.

yes, will fix this.
>
> > +               queue_work(vpmem->pmem_wq, &vpmem->flush_work);
> > +               return 1;
> > +       }
> > +
> > +       /* flush completed in other context while we waited */
> > +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> > +               bio->bi_opf &= ~REQ_PREFLUSH;
> > +               submit_bio(bio);
> > +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> > +               bio->bi_opf &= ~REQ_FUA;
> > +               bio_endio(bio);
>
> It's not clear to me how this happens, shouldn't all flush completions
> be driven from the work completion?

Requests should progress after notified by ongoing flush completion
event.

>
> >         }
> > -       if (virtio_pmem_flush(nd_region))
> > -               return -EIO;
> >
> >         return 0;
> >  };
> >  EXPORT_SYMBOL_GPL(async_pmem_flush);
> > +
> > +static void submit_async_flush(struct work_struct *ws)
> > +{
> > +       struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, flush_work);
> > +       struct bio *bio = vpmem->flush_bio;
> > +
> > +       vpmem->start_flush = ktime_get_boottime();
> > +       bio->bi_status = errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
> > +       vpmem->prev_flush_start = vpmem->start_flush;
> > +       vpmem->flush_bio = NULL;
> > +       wake_up(&vpmem->sb_wait);
> > +
> > +       /* Submit parent bio only for PREFLUSH */
> > +       if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
> > +               bio->bi_opf &= ~REQ_PREFLUSH;
> > +               submit_bio(bio);
> > +       } else if (bio && (bio->bi_opf & REQ_FUA)) {
> > +               bio->bi_opf &= ~REQ_FUA;
> > +               bio_endio(bio);
> > +       }
>
> Shouldn't the wait_event_lock_irq() be here rather than in
> async_pmem_flush()? That will cause the workqueue to back up and flush
> requests to coalesce.

but this is coalesced flush request?

> > +}
> >  MODULE_LICENSE("GPL");
> > diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> > index 726c7354d465..56780a6140c7 100644
> > --- a/drivers/nvdimm/virtio_pmem.c
> > +++ b/drivers/nvdimm/virtio_pmem.c
> > @@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
> >                 return PTR_ERR(vpmem->req_vq);
> >
> >         spin_lock_init(&vpmem->pmem_lock);
> > +       spin_lock_init(&vpmem->lock);
>
> Why 2 locks?

One lock is for work queue and other for virtio flush completion.

Thanks,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-08-25 20:01       ` Pankaj Gupta
@ 2021-08-25 21:50         ` Dan Williams
  -1 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 21:50 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

On Wed, Aug 25, 2021 at 1:02 PM Pankaj Gupta
<pankaj.gupta.linux@gmail.com> wrote:
>
> Hi Dan,
>
> Thank you for the review. Please see my reply inline.
>
> > > Implement asynchronous flush for virtio pmem using work queue
> > > to solve the preflush ordering issue. Also, coalesce the flush
> > > requests when a flush is already in process.
> > >
> > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > ---
> > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > index 10351d5b49fa..61b655b583be 100644
> > > --- a/drivers/nvdimm/nd_virtio.c
> > > +++ b/drivers/nvdimm/nd_virtio.c
> > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > >         return err;
> > >  };
> > >
> > > +static void submit_async_flush(struct work_struct *ws);
> > > +
> > >  /* The asynchronous flush callback function */
> > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > >  {
> > > -       /*
> > > -        * Create child bio for asynchronous flush and chain with
> > > -        * parent bio. Otherwise directly call nd_region flush.
> > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > +       ktime_t req_start = ktime_get_boottime();
> > > +
> > > +       spin_lock_irq(&vpmem->lock);
> > > +       /* flush requests wait until ongoing flush completes,
> > > +        * hence coalescing all the pending requests.
> > >          */
> > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > -
> > > -               if (!child)
> > > -                       return -ENOMEM;
> > > -               bio_copy_dev(child, bio);
> > > -               child->bi_opf = REQ_PREFLUSH;
> > > -               child->bi_iter.bi_sector = -1;
> > > -               bio_chain(child, bio);
> > > -               submit_bio(child);
> > > -               return 0;
> > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > +                           !vpmem->flush_bio ||
> > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > +                           vpmem->lock);
> > > +       /* new request after previous flush is completed */
> > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > +               WARN_ON(vpmem->flush_bio);
> > > +               vpmem->flush_bio = bio;
> > > +               bio = NULL;
> > > +       }
> >
> > Why the dance with ->prev_flush_start vs just calling queue_work()
> > again. queue_work() is naturally coalescing in that if the last work
> > request has not started execution another queue attempt will be
> > dropped.
>
> How parent flush request will know when corresponding flush is completed?

The eventual bio_endio() is what signals upper layers that the flush
completed...


Hold on... it's been so long that I forgot that you are copying
md_flush_request() here. It would help immensely if that was mentioned
in the changelog and at a minimum have a comment in the code that this
was copied from md. In fact it would be extra helpful if you
refactored a common helper that bio based block drivers could share
for implementing flush handling, but that can come later.

Let me go re-review this with respect to whether the md case is fully
applicable here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-25 21:50         ` Dan Williams
  0 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 21:50 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira, Pankaj Gupta

On Wed, Aug 25, 2021 at 1:02 PM Pankaj Gupta
<pankaj.gupta.linux@gmail.com> wrote:
>
> Hi Dan,
>
> Thank you for the review. Please see my reply inline.
>
> > > Implement asynchronous flush for virtio pmem using work queue
> > > to solve the preflush ordering issue. Also, coalesce the flush
> > > requests when a flush is already in process.
> > >
> > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > ---
> > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > index 10351d5b49fa..61b655b583be 100644
> > > --- a/drivers/nvdimm/nd_virtio.c
> > > +++ b/drivers/nvdimm/nd_virtio.c
> > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > >         return err;
> > >  };
> > >
> > > +static void submit_async_flush(struct work_struct *ws);
> > > +
> > >  /* The asynchronous flush callback function */
> > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > >  {
> > > -       /*
> > > -        * Create child bio for asynchronous flush and chain with
> > > -        * parent bio. Otherwise directly call nd_region flush.
> > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > +       ktime_t req_start = ktime_get_boottime();
> > > +
> > > +       spin_lock_irq(&vpmem->lock);
> > > +       /* flush requests wait until ongoing flush completes,
> > > +        * hence coalescing all the pending requests.
> > >          */
> > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > -
> > > -               if (!child)
> > > -                       return -ENOMEM;
> > > -               bio_copy_dev(child, bio);
> > > -               child->bi_opf = REQ_PREFLUSH;
> > > -               child->bi_iter.bi_sector = -1;
> > > -               bio_chain(child, bio);
> > > -               submit_bio(child);
> > > -               return 0;
> > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > +                           !vpmem->flush_bio ||
> > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > +                           vpmem->lock);
> > > +       /* new request after previous flush is completed */
> > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > +               WARN_ON(vpmem->flush_bio);
> > > +               vpmem->flush_bio = bio;
> > > +               bio = NULL;
> > > +       }
> >
> > Why the dance with ->prev_flush_start vs just calling queue_work()
> > again. queue_work() is naturally coalescing in that if the last work
> > request has not started execution another queue attempt will be
> > dropped.
>
> How parent flush request will know when corresponding flush is completed?

The eventual bio_endio() is what signals upper layers that the flush
completed...


Hold on... it's been so long that I forgot that you are copying
md_flush_request() here. It would help immensely if that was mentioned
in the changelog and at a minimum have a comment in the code that this
was copied from md. In fact it would be extra helpful if you
refactored a common helper that bio based block drivers could share
for implementing flush handling, but that can come later.

Let me go re-review this with respect to whether the md case is fully
applicable here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-08-25 21:50         ` Dan Williams
@ 2021-08-25 22:00           ` Pankaj Gupta
  -1 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-25 22:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

> > Hi Dan,
> >
> > Thank you for the review. Please see my reply inline.
> >
> > > > Implement asynchronous flush for virtio pmem using work queue
> > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > requests when a flush is already in process.
> > > >
> > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > ---
> > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > index 10351d5b49fa..61b655b583be 100644
> > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > >         return err;
> > > >  };
> > > >
> > > > +static void submit_async_flush(struct work_struct *ws);
> > > > +
> > > >  /* The asynchronous flush callback function */
> > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > >  {
> > > > -       /*
> > > > -        * Create child bio for asynchronous flush and chain with
> > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > +       ktime_t req_start = ktime_get_boottime();
> > > > +
> > > > +       spin_lock_irq(&vpmem->lock);
> > > > +       /* flush requests wait until ongoing flush completes,
> > > > +        * hence coalescing all the pending requests.
> > > >          */
> > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > -
> > > > -               if (!child)
> > > > -                       return -ENOMEM;
> > > > -               bio_copy_dev(child, bio);
> > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > -               child->bi_iter.bi_sector = -1;
> > > > -               bio_chain(child, bio);
> > > > -               submit_bio(child);
> > > > -               return 0;
> > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > +                           !vpmem->flush_bio ||
> > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > +                           vpmem->lock);
> > > > +       /* new request after previous flush is completed */
> > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > +               WARN_ON(vpmem->flush_bio);
> > > > +               vpmem->flush_bio = bio;
> > > > +               bio = NULL;
> > > > +       }
> > >
> > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > again. queue_work() is naturally coalescing in that if the last work
> > > request has not started execution another queue attempt will be
> > > dropped.
> >
> > How parent flush request will know when corresponding flush is completed?
>
> The eventual bio_endio() is what signals upper layers that the flush
> completed...
>
>
> Hold on... it's been so long that I forgot that you are copying
> md_flush_request() here. It would help immensely if that was mentioned
> in the changelog and at a minimum have a comment in the code that this
> was copied from md. In fact it would be extra helpful if you

My bad. I only mentioned this in the cover letter.

> refactored a common helper that bio based block drivers could share
> for implementing flush handling, but that can come later.

Sure.
>
> Let me go re-review this with respect to whether the md case is fully
> applicable here.

o.k.

Best regards,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-25 22:00           ` Pankaj Gupta
  0 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-25 22:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

> > Hi Dan,
> >
> > Thank you for the review. Please see my reply inline.
> >
> > > > Implement asynchronous flush for virtio pmem using work queue
> > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > requests when a flush is already in process.
> > > >
> > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > ---
> > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > index 10351d5b49fa..61b655b583be 100644
> > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > >         return err;
> > > >  };
> > > >
> > > > +static void submit_async_flush(struct work_struct *ws);
> > > > +
> > > >  /* The asynchronous flush callback function */
> > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > >  {
> > > > -       /*
> > > > -        * Create child bio for asynchronous flush and chain with
> > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > +       ktime_t req_start = ktime_get_boottime();
> > > > +
> > > > +       spin_lock_irq(&vpmem->lock);
> > > > +       /* flush requests wait until ongoing flush completes,
> > > > +        * hence coalescing all the pending requests.
> > > >          */
> > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > -
> > > > -               if (!child)
> > > > -                       return -ENOMEM;
> > > > -               bio_copy_dev(child, bio);
> > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > -               child->bi_iter.bi_sector = -1;
> > > > -               bio_chain(child, bio);
> > > > -               submit_bio(child);
> > > > -               return 0;
> > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > +                           !vpmem->flush_bio ||
> > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > +                           vpmem->lock);
> > > > +       /* new request after previous flush is completed */
> > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > +               WARN_ON(vpmem->flush_bio);
> > > > +               vpmem->flush_bio = bio;
> > > > +               bio = NULL;
> > > > +       }
> > >
> > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > again. queue_work() is naturally coalescing in that if the last work
> > > request has not started execution another queue attempt will be
> > > dropped.
> >
> > How parent flush request will know when corresponding flush is completed?
>
> The eventual bio_endio() is what signals upper layers that the flush
> completed...
>
>
> Hold on... it's been so long that I forgot that you are copying
> md_flush_request() here. It would help immensely if that was mentioned
> in the changelog and at a minimum have a comment in the code that this
> was copied from md. In fact it would be extra helpful if you

My bad. I only mentioned this in the cover letter.

> refactored a common helper that bio based block drivers could share
> for implementing flush handling, but that can come later.

Sure.
>
> Let me go re-review this with respect to whether the md case is fully
> applicable here.

o.k.

Best regards,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-08-25 22:00           ` Pankaj Gupta
@ 2021-08-25 22:08             ` Dan Williams
  -1 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 22:08 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

On Wed, Aug 25, 2021 at 3:01 PM Pankaj Gupta <pankaj.gupta@ionos.com> wrote:
>
> > > Hi Dan,
> > >
> > > Thank you for the review. Please see my reply inline.
> > >
> > > > > Implement asynchronous flush for virtio pmem using work queue
> > > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > > requests when a flush is already in process.
> > > > >
> > > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > > ---
> > > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > > index 10351d5b49fa..61b655b583be 100644
> > > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > > >         return err;
> > > > >  };
> > > > >
> > > > > +static void submit_async_flush(struct work_struct *ws);
> > > > > +
> > > > >  /* The asynchronous flush callback function */
> > > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > > >  {
> > > > > -       /*
> > > > > -        * Create child bio for asynchronous flush and chain with
> > > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > > +       ktime_t req_start = ktime_get_boottime();
> > > > > +
> > > > > +       spin_lock_irq(&vpmem->lock);
> > > > > +       /* flush requests wait until ongoing flush completes,
> > > > > +        * hence coalescing all the pending requests.
> > > > >          */
> > > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > > -
> > > > > -               if (!child)
> > > > > -                       return -ENOMEM;
> > > > > -               bio_copy_dev(child, bio);
> > > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > > -               child->bi_iter.bi_sector = -1;
> > > > > -               bio_chain(child, bio);
> > > > > -               submit_bio(child);
> > > > > -               return 0;
> > > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > > +                           !vpmem->flush_bio ||
> > > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > > +                           vpmem->lock);
> > > > > +       /* new request after previous flush is completed */
> > > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > > +               WARN_ON(vpmem->flush_bio);
> > > > > +               vpmem->flush_bio = bio;
> > > > > +               bio = NULL;
> > > > > +       }
> > > >
> > > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > > again. queue_work() is naturally coalescing in that if the last work
> > > > request has not started execution another queue attempt will be
> > > > dropped.
> > >
> > > How parent flush request will know when corresponding flush is completed?
> >
> > The eventual bio_endio() is what signals upper layers that the flush
> > completed...
> >
> >
> > Hold on... it's been so long that I forgot that you are copying
> > md_flush_request() here. It would help immensely if that was mentioned
> > in the changelog and at a minimum have a comment in the code that this
> > was copied from md. In fact it would be extra helpful if you
>
> My bad. I only mentioned this in the cover letter.

Yeah, sorry about that. Having come back to this after so long I just
decided to jump straight into the patches, but even if I had read that
cover I still would have given the feedback that md_flush_request()
heritage should also be noted with a comment in the code.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-25 22:08             ` Dan Williams
  0 siblings, 0 replies; 21+ messages in thread
From: Dan Williams @ 2021-08-25 22:08 UTC (permalink / raw)
  To: Pankaj Gupta
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

On Wed, Aug 25, 2021 at 3:01 PM Pankaj Gupta <pankaj.gupta@ionos.com> wrote:
>
> > > Hi Dan,
> > >
> > > Thank you for the review. Please see my reply inline.
> > >
> > > > > Implement asynchronous flush for virtio pmem using work queue
> > > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > > requests when a flush is already in process.
> > > > >
> > > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > > ---
> > > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > > index 10351d5b49fa..61b655b583be 100644
> > > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > > >         return err;
> > > > >  };
> > > > >
> > > > > +static void submit_async_flush(struct work_struct *ws);
> > > > > +
> > > > >  /* The asynchronous flush callback function */
> > > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > > >  {
> > > > > -       /*
> > > > > -        * Create child bio for asynchronous flush and chain with
> > > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > > +       ktime_t req_start = ktime_get_boottime();
> > > > > +
> > > > > +       spin_lock_irq(&vpmem->lock);
> > > > > +       /* flush requests wait until ongoing flush completes,
> > > > > +        * hence coalescing all the pending requests.
> > > > >          */
> > > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > > -
> > > > > -               if (!child)
> > > > > -                       return -ENOMEM;
> > > > > -               bio_copy_dev(child, bio);
> > > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > > -               child->bi_iter.bi_sector = -1;
> > > > > -               bio_chain(child, bio);
> > > > > -               submit_bio(child);
> > > > > -               return 0;
> > > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > > +                           !vpmem->flush_bio ||
> > > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > > +                           vpmem->lock);
> > > > > +       /* new request after previous flush is completed */
> > > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > > +               WARN_ON(vpmem->flush_bio);
> > > > > +               vpmem->flush_bio = bio;
> > > > > +               bio = NULL;
> > > > > +       }
> > > >
> > > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > > again. queue_work() is naturally coalescing in that if the last work
> > > > request has not started execution another queue attempt will be
> > > > dropped.
> > >
> > > How parent flush request will know when corresponding flush is completed?
> >
> > The eventual bio_endio() is what signals upper layers that the flush
> > completed...
> >
> >
> > Hold on... it's been so long that I forgot that you are copying
> > md_flush_request() here. It would help immensely if that was mentioned
> > in the changelog and at a minimum have a comment in the code that this
> > was copied from md. In fact it would be extra helpful if you
>
> My bad. I only mentioned this in the cover letter.

Yeah, sorry about that. Having come back to this after so long I just
decided to jump straight into the patches, but even if I had read that
cover I still would have given the feedback that md_flush_request()
heritage should also be noted with a comment in the code.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
  2021-08-25 22:08             ` Dan Williams
@ 2021-08-27 12:39               ` Pankaj Gupta
  -1 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-27 12:39 UTC (permalink / raw)
  To: Dan Williams
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

> > > > > > Implement asynchronous flush for virtio pmem using work queue
> > > > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > > > requests when a flush is already in process.
> > > > > >
> > > > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > > > ---
> > > > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > > > index 10351d5b49fa..61b655b583be 100644
> > > > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > > > >         return err;
> > > > > >  };
> > > > > >
> > > > > > +static void submit_async_flush(struct work_struct *ws);
> > > > > > +
> > > > > >  /* The asynchronous flush callback function */
> > > > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > > > >  {
> > > > > > -       /*
> > > > > > -        * Create child bio for asynchronous flush and chain with
> > > > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > > > +       ktime_t req_start = ktime_get_boottime();
> > > > > > +
> > > > > > +       spin_lock_irq(&vpmem->lock);
> > > > > > +       /* flush requests wait until ongoing flush completes,
> > > > > > +        * hence coalescing all the pending requests.
> > > > > >          */
> > > > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > > > -
> > > > > > -               if (!child)
> > > > > > -                       return -ENOMEM;
> > > > > > -               bio_copy_dev(child, bio);
> > > > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > > > -               child->bi_iter.bi_sector = -1;
> > > > > > -               bio_chain(child, bio);
> > > > > > -               submit_bio(child);
> > > > > > -               return 0;
> > > > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > > > +                           !vpmem->flush_bio ||
> > > > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > > > +                           vpmem->lock);
> > > > > > +       /* new request after previous flush is completed */
> > > > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > > > +               WARN_ON(vpmem->flush_bio);
> > > > > > +               vpmem->flush_bio = bio;
> > > > > > +               bio = NULL;
> > > > > > +       }
> > > > >
> > > > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > > > again. queue_work() is naturally coalescing in that if the last work
> > > > > request has not started execution another queue attempt will be
> > > > > dropped.
> > > >
> > > > How parent flush request will know when corresponding flush is completed?
> > >
> > > The eventual bio_endio() is what signals upper layers that the flush
> > > completed...
> > >
> > >
> > > Hold on... it's been so long that I forgot that you are copying
> > > md_flush_request() here. It would help immensely if that was mentioned
> > > in the changelog and at a minimum have a comment in the code that this
> > > was copied from md. In fact it would be extra helpful if you
> >
> > My bad. I only mentioned this in the cover letter.
>
> Yeah, sorry about that. Having come back to this after so long I just
> decided to jump straight into the patches, but even if I had read that
> cover I still would have given the feedback that md_flush_request()
> heritage should also be noted with a comment in the code.

Sure.

Thanks,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush
@ 2021-08-27 12:39               ` Pankaj Gupta
  0 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-08-27 12:39 UTC (permalink / raw)
  To: Dan Williams
  Cc: Pankaj Gupta, Linux NVDIMM, Linux Kernel Mailing List, jmoyer,
	David Hildenbrand, Michael S. Tsirkin, Cornelia Huck,
	Vishal L Verma, Dave Jiang, Weiny, Ira

> > > > > > Implement asynchronous flush for virtio pmem using work queue
> > > > > > to solve the preflush ordering issue. Also, coalesce the flush
> > > > > > requests when a flush is already in process.
> > > > > >
> > > > > > Signed-off-by: Pankaj Gupta <pankaj.gupta@ionos.com>
> > > > > > ---
> > > > > >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> > > > > >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> > > > > >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> > > > > >  3 files changed, 79 insertions(+), 17 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > > > > > index 10351d5b49fa..61b655b583be 100644
> > > > > > --- a/drivers/nvdimm/nd_virtio.c
> > > > > > +++ b/drivers/nvdimm/nd_virtio.c
> > > > > > @@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> > > > > >         return err;
> > > > > >  };
> > > > > >
> > > > > > +static void submit_async_flush(struct work_struct *ws);
> > > > > > +
> > > > > >  /* The asynchronous flush callback function */
> > > > > >  int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
> > > > > >  {
> > > > > > -       /*
> > > > > > -        * Create child bio for asynchronous flush and chain with
> > > > > > -        * parent bio. Otherwise directly call nd_region flush.
> > > > > > +       /* queue asynchronous flush and coalesce the flush requests */
> > > > > > +       struct virtio_device *vdev = nd_region->provider_data;
> > > > > > +       struct virtio_pmem *vpmem  = vdev->priv;
> > > > > > +       ktime_t req_start = ktime_get_boottime();
> > > > > > +
> > > > > > +       spin_lock_irq(&vpmem->lock);
> > > > > > +       /* flush requests wait until ongoing flush completes,
> > > > > > +        * hence coalescing all the pending requests.
> > > > > >          */
> > > > > > -       if (bio && bio->bi_iter.bi_sector != -1) {
> > > > > > -               struct bio *child = bio_alloc(GFP_ATOMIC, 0);
> > > > > > -
> > > > > > -               if (!child)
> > > > > > -                       return -ENOMEM;
> > > > > > -               bio_copy_dev(child, bio);
> > > > > > -               child->bi_opf = REQ_PREFLUSH;
> > > > > > -               child->bi_iter.bi_sector = -1;
> > > > > > -               bio_chain(child, bio);
> > > > > > -               submit_bio(child);
> > > > > > -               return 0;
> > > > > > +       wait_event_lock_irq(vpmem->sb_wait,
> > > > > > +                           !vpmem->flush_bio ||
> > > > > > +                           ktime_before(req_start, vpmem->prev_flush_start),
> > > > > > +                           vpmem->lock);
> > > > > > +       /* new request after previous flush is completed */
> > > > > > +       if (ktime_after(req_start, vpmem->prev_flush_start)) {
> > > > > > +               WARN_ON(vpmem->flush_bio);
> > > > > > +               vpmem->flush_bio = bio;
> > > > > > +               bio = NULL;
> > > > > > +       }
> > > > >
> > > > > Why the dance with ->prev_flush_start vs just calling queue_work()
> > > > > again. queue_work() is naturally coalescing in that if the last work
> > > > > request has not started execution another queue attempt will be
> > > > > dropped.
> > > >
> > > > How parent flush request will know when corresponding flush is completed?
> > >
> > > The eventual bio_endio() is what signals upper layers that the flush
> > > completed...
> > >
> > >
> > > Hold on... it's been so long that I forgot that you are copying
> > > md_flush_request() here. It would help immensely if that was mentioned
> > > in the changelog and at a minimum have a comment in the code that this
> > > was copied from md. In fact it would be extra helpful if you
> >
> > My bad. I only mentioned this in the cover letter.
>
> Yeah, sorry about that. Having come back to this after so long I just
> decided to jump straight into the patches, but even if I had read that
> cover I still would have given the feedback that md_flush_request()
> heritage should also be noted with a comment in the code.

Sure.

Thanks,
Pankaj

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [RFC v2 0/2] virtio-pmem: Asynchronous flush
  2021-08-19 11:08   ` Pankaj Gupta
  (?)
@ 2021-10-16  8:23   ` Pankaj Gupta
  -1 siblings, 0 replies; 21+ messages in thread
From: Pankaj Gupta @ 2021-10-16  8:23 UTC (permalink / raw)
  To: Linux NVDIMM, LKML
  Cc: Dan Williams, jmoyer, David Hildenbrand, Michael S . Tsirkin,
	Cornelia Huck, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta

Friendly ping!

Thanks,
Pankaj

On Thu, 19 Aug 2021 at 13:08, Pankaj Gupta <pankaj.gupta.linux@gmail.com> wrote:
>
> Gentle ping.
>
> >
> >  Jeff reported preflush order issue with the existing implementation
> >  of virtio pmem preflush. Dan suggested[1] to implement asynchronous flush
> >  for virtio pmem using work queue as done in md/RAID. This patch series
> >  intends to solve the preflush ordering issue and also makes the flush
> >  asynchronous for the submitting thread.
> >
> >  Submitting this patch series for review. Sorry, It took me long time to
> >  come back to this due to some personal reasons.
> >
> >  RFC v1 -> RFC v2
> >  - More testing and bug fix.
> >
> >  [1] https://marc.info/?l=linux-kernel&m=157446316409937&w=2
> >
> > Pankaj Gupta (2):
> >   virtio-pmem: Async virtio-pmem flush
> >   pmem: enable pmem_submit_bio for asynchronous flush
> >
> >  drivers/nvdimm/nd_virtio.c   | 72 ++++++++++++++++++++++++++++--------
> >  drivers/nvdimm/pmem.c        | 17 ++++++---
> >  drivers/nvdimm/virtio_pmem.c | 10 ++++-
> >  drivers/nvdimm/virtio_pmem.h | 14 +++++++
> >  4 files changed, 91 insertions(+), 22 deletions(-)
> >
> > --
> > 2.25.1
> >

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2021-10-16  8:23 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-26  6:08 [RFC v2 0/2] virtio-pmem: Asynchronous flush Pankaj Gupta
2021-07-26  6:08 ` [RFC v2 1/2] virtio-pmem: Async virtio-pmem flush Pankaj Gupta
2021-08-25 17:25   ` Dan Williams
2021-08-25 17:25     ` Dan Williams
2021-08-25 20:01     ` Pankaj Gupta
2021-08-25 20:01       ` Pankaj Gupta
2021-08-25 21:50       ` Dan Williams
2021-08-25 21:50         ` Dan Williams
2021-08-25 22:00         ` Pankaj Gupta
2021-08-25 22:00           ` Pankaj Gupta
2021-08-25 22:08           ` Dan Williams
2021-08-25 22:08             ` Dan Williams
2021-08-27 12:39             ` Pankaj Gupta
2021-08-27 12:39               ` Pankaj Gupta
2021-07-26  6:08 ` [RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush Pankaj Gupta
2021-07-26 13:25   ` kernel test robot
2021-07-26 13:58     ` Pankaj Gupta
2021-07-26 13:25   ` kernel test robot
2021-08-19 11:08 ` [RFC v2 0/2] virtio-pmem: Asynchronous flush Pankaj Gupta
2021-08-19 11:08   ` Pankaj Gupta
2021-10-16  8:23   ` Pankaj Gupta

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.