[RFC PATCH] powerpc/papr_scm: Implement scm async flush

From: Shivaprasad G Bhat <sbhat@linux.ibm.com>
To: ellerman@au1.ibm.com
Cc: aneesh.kumar@linux.ibm.com, linux-nvdimm@lists.01.org,
	linuxppc-dev@lists.ozlabs.org, kvm-ppc@vger.kernel.org
Subject: [RFC PATCH] powerpc/papr_scm: Implement scm async flush
Date: Tue, 01 Dec 2020 06:18:43 -0600	[thread overview]
Message-ID: <160682501436.2579014.14501834468510806255.stgit@lep8c.aus.stglabs.ibm.com> (raw)

Tha patch implements SCM async-flush hcall and sets the
ND_REGION_ASYNC capability when the platform device tree
has "ibm,async-flush-required" set.

The below demonstration shows the map_sync behavior when
ibm,async-flush-required is present in device tree.
(https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c)

The pmem0 is from nvdimm without async-flush-required,
and pmem1 is from nvdimm with async-flush-required, mounted as
/dev/pmem0 on /mnt1 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)
/dev/pmem1 on /mnt2 type xfs (rw,relatime,attr2,dax=always,inode64,logbufs=8,logbsize=32k,noquota)

#./mapsync /mnt1/newfile    ----> Without async-flush-required
#./mapsync /mnt2/newfile    ----> With async-flush-required
Failed to mmap  with Operation not supported

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
The HCALL semantics are in review, not final.

 Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
 arch/powerpc/include/asm/hvcall.h         |    3 +-
 arch/powerpc/platforms/pseries/papr_scm.c |   39 +++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
index 48fcf1255a33..cc310814f24c 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -275,6 +275,20 @@ Health Bitmap Flags:
 Given a DRC Index collect the performance statistics for NVDIMM and copy them
 to the resultBuffer.
 
+**H_SCM_ASYNC_FLUSH**
+
+| Input: *drcIndex*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list in
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS is returned by the hypervisor.
+
 References
 ==========
 .. [1] "Power Architecture Platform Reference"
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index c1fbccb04390..4a13074bc782 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -306,7 +306,8 @@
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
 #define H_RPT_INVALIDATE	0x448
-#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
+#define H_SCM_ASYNC_FLUSH	0x4A0
+#define MAX_HCALL_OPCODE	H_SCM_ASYNC_FLUSH
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 835163f54244..1f8c5153cb3d 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -93,6 +93,7 @@ struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool async_flush_required;
 
 	uint64_t bound_addr;
 
@@ -117,6 +118,38 @@ struct papr_scm_priv {
 	size_t stat_buffer_len;
 };
 
+static int papr_scm_pmem_flush(struct nd_region *nd_region, struct bio *bio)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+	int64_t rc;
+	uint64_t token = 0;
+
+	do {
+		rc = plpar_hcall(H_SCM_ASYNC_FLUSH, ret, p->drc_index, token);
+
+		/* Check if we are stalled for some time */
+		token = ret[0];
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+
+	} while (rc == H_BUSY);
+
+	if (rc)
+		dev_err(&p->pdev->dev, "flush error: %lld\n", rc);
+	else
+		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete\n",
+			p->drc_index);
+
+	dev_dbg(&p->pdev->dev, "Flush call complete\n");
+
+	return rc;
+}
+
 static LIST_HEAD(papr_nd_regions);
 static DEFINE_MUTEX(papr_ndr_lock);
 
@@ -943,6 +976,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
 
+	if (p->async_flush_required) {
+		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+		ndr_desc.flush = papr_scm_pmem_flush;
+	}
+
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
@@ -1088,6 +1126,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->block_size = block_size;
 	p->blocks = blocks;
 	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+	p->async_flush_required = of_property_read_bool(dn, "ibm,async-flush-required");
 
 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);

_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org