Linux-Doc Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
@ 2021-03-29 17:36 Shivaprasad G Bhat
  2021-03-30  4:56 ` Aneesh Kumar K.V
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Shivaprasad G Bhat @ 2021-03-29 17:36 UTC (permalink / raw)
  To: sbhat, linuxppc-dev, kvm-ppc, linux-nvdimm, aneesh.kumar, ellerman
  Cc: linux-doc, vaibhav

Add support for ND_REGION_ASYNC capability if the device tree
indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.

If the flush request failed, the hypervisor is expected to
to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.

This patch prevents mmap of namespaces with MAP_SYNC flag if the
nvdimm requires an explicit flush[1].

References:
[1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
v2 - https://www.spinics.net/lists/kvm-ppc/msg18799.html
Changes from v2:
       - Fixed the commit message.
       - Add dev_dbg before the H_SCM_FLUSH hcall

v1 - https://www.spinics.net/lists/kvm-ppc/msg18272.html
Changes from v1:
       - Hcall semantics finalized, all changes are to accomodate them.

 Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
 arch/powerpc/include/asm/hvcall.h         |    3 +-
 arch/powerpc/platforms/pseries/papr_scm.c |   40 +++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
index 48fcf1255a33..648f278eea8f 100644
--- a/Documentation/powerpc/papr_hcalls.rst
+++ b/Documentation/powerpc/papr_hcalls.rst
@@ -275,6 +275,20 @@ Health Bitmap Flags:
 Given a DRC Index collect the performance statistics for NVDIMM and copy them
 to the resultBuffer.
 
+**H_SCM_FLUSH**
+
+| Input: *drcIndex, continue-token*
+| Out: *continue-token*
+| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
+
+Given a DRC Index Flush the data to backend NVDIMM device.
+
+The hcall returns H_BUSY when the flush takes longer time and the hcall needs
+to be issued multiple times in order to be completely serviced. The
+*continue-token* from the output to be passed in the argument list of
+subsequent hcalls to the hypervisor until the hcall is completely serviced
+at which point H_SUCCESS or other error is returned by the hypervisor.
+
 References
 ==========
 .. [1] "Power Architecture Platform Reference"
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index ed6086d57b22..9f7729a97ebd 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -315,7 +315,8 @@
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
 #define H_RPT_INVALIDATE	0x448
-#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
+#define H_SCM_FLUSH		0x44C
+#define MAX_HCALL_OPCODE	H_SCM_FLUSH
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 835163f54244..b7a47fcc5aa5 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -93,6 +93,7 @@ struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool hcall_flush_required;
 
 	uint64_t bound_addr;
 
@@ -117,6 +118,39 @@ struct papr_scm_priv {
 	size_t stat_buffer_len;
 };
 
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+			       struct bio *bio __maybe_unused)
+{
+	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+	unsigned long ret_buf[PLPAR_HCALL_BUFSIZE];
+	uint64_t token = 0;
+	int64_t rc;
+
+	dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+	do {
+		rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+		token = ret_buf[0];
+
+		/* Check if we are stalled for some time */
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+	} while (rc == H_BUSY);
+
+	if (rc) {
+		dev_err(&p->pdev->dev, "flush error: %lld", rc);
+		rc = -EIO;
+	} else {
+		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+	}
+
+	return rc;
+}
+
 static LIST_HEAD(papr_nd_regions);
 static DEFINE_MUTEX(papr_ndr_lock);
 
@@ -943,6 +977,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
 
+	if (p->hcall_flush_required) {
+		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+		ndr_desc.flush = papr_scm_pmem_flush;
+	}
+
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
@@ -1088,6 +1127,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 	p->block_size = block_size;
 	p->blocks = blocks;
 	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
 
 	/* We just need to ensure that set cookies are unique across */
 	uuid_parse(uuid_str, (uuid_t *) uuid);



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-03-29 17:36 [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall Shivaprasad G Bhat
@ 2021-03-30  4:56 ` Aneesh Kumar K.V
  2021-03-31 10:20   ` Michael Ellerman
  2021-04-13 14:13 ` Vaibhav Jain
  2021-04-19  3:59 ` Michael Ellerman
  2 siblings, 1 reply; 8+ messages in thread
From: Aneesh Kumar K.V @ 2021-03-30  4:56 UTC (permalink / raw)
  To: Shivaprasad G Bhat, sbhat, linuxppc-dev, kvm-ppc, linux-nvdimm, ellerman
  Cc: linux-doc, vaibhav

Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:

> Add support for ND_REGION_ASYNC capability if the device tree
> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>
> If the flush request failed, the hypervisor is expected to
> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>
> This patch prevents mmap of namespaces with MAP_SYNC flag if the
> nvdimm requires an explicit flush[1].
>
> References:
> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c


Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

>
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> ---
> v2 - https://www.spinics.net/lists/kvm-ppc/msg18799.html
> Changes from v2:
>        - Fixed the commit message.
>        - Add dev_dbg before the H_SCM_FLUSH hcall
>
> v1 - https://www.spinics.net/lists/kvm-ppc/msg18272.html
> Changes from v1:
>        - Hcall semantics finalized, all changes are to accomodate them.
>
>  Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
>  arch/powerpc/include/asm/hvcall.h         |    3 +-
>  arch/powerpc/platforms/pseries/papr_scm.c |   40 +++++++++++++++++++++++++++++
>  3 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
> index 48fcf1255a33..648f278eea8f 100644
> --- a/Documentation/powerpc/papr_hcalls.rst
> +++ b/Documentation/powerpc/papr_hcalls.rst
> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>  to the resultBuffer.
>  
> +**H_SCM_FLUSH**
> +
> +| Input: *drcIndex, continue-token*
> +| Out: *continue-token*
> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
> +
> +Given a DRC Index Flush the data to backend NVDIMM device.
> +
> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
> +to be issued multiple times in order to be completely serviced. The
> +*continue-token* from the output to be passed in the argument list of
> +subsequent hcalls to the hypervisor until the hcall is completely serviced
> +at which point H_SUCCESS or other error is returned by the hypervisor.
> +
>  References
>  ==========
>  .. [1] "Power Architecture Platform Reference"
> diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> index ed6086d57b22..9f7729a97ebd 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -315,7 +315,8 @@
>  #define H_SCM_HEALTH            0x400
>  #define H_SCM_PERFORMANCE_STATS 0x418
>  #define H_RPT_INVALIDATE	0x448
> -#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
> +#define H_SCM_FLUSH		0x44C
> +#define MAX_HCALL_OPCODE	H_SCM_FLUSH
>  
>  /* Scope args for H_SCM_UNBIND_ALL */
>  #define H_UNBIND_SCOPE_ALL (0x1)
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 835163f54244..b7a47fcc5aa5 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -93,6 +93,7 @@ struct papr_scm_priv {
>  	uint64_t block_size;
>  	int metadata_size;
>  	bool is_volatile;
> +	bool hcall_flush_required;
>  
>  	uint64_t bound_addr;
>  
> @@ -117,6 +118,39 @@ struct papr_scm_priv {
>  	size_t stat_buffer_len;
>  };
>  
> +static int papr_scm_pmem_flush(struct nd_region *nd_region,
> +			       struct bio *bio __maybe_unused)
> +{
> +	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
> +	unsigned long ret_buf[PLPAR_HCALL_BUFSIZE];
> +	uint64_t token = 0;
> +	int64_t rc;
> +
> +	dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
> +
> +	do {
> +		rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
> +		token = ret_buf[0];
> +
> +		/* Check if we are stalled for some time */
> +		if (H_IS_LONG_BUSY(rc)) {
> +			msleep(get_longbusy_msecs(rc));
> +			rc = H_BUSY;
> +		} else if (rc == H_BUSY) {
> +			cond_resched();
> +		}
> +	} while (rc == H_BUSY);
> +
> +	if (rc) {
> +		dev_err(&p->pdev->dev, "flush error: %lld", rc);
> +		rc = -EIO;
> +	} else {
> +		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
> +	}
> +
> +	return rc;
> +}
> +
>  static LIST_HEAD(papr_nd_regions);
>  static DEFINE_MUTEX(papr_ndr_lock);
>  
> @@ -943,6 +977,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>  	ndr_desc.num_mappings = 1;
>  	ndr_desc.nd_set = &p->nd_set;
>  
> +	if (p->hcall_flush_required) {
> +		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
> +		ndr_desc.flush = papr_scm_pmem_flush;
> +	}
> +
>  	if (p->is_volatile)
>  		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
>  	else {
> @@ -1088,6 +1127,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>  	p->block_size = block_size;
>  	p->blocks = blocks;
>  	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
> +	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
>  
>  	/* We just need to ensure that set cookies are unique across */
>  	uuid_parse(uuid_str, (uuid_t *) uuid);
>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-03-30  4:56 ` Aneesh Kumar K.V
@ 2021-03-31 10:20   ` Michael Ellerman
  2021-04-05  3:47     ` Aneesh Kumar K.V
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Ellerman @ 2021-03-31 10:20 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Shivaprasad G Bhat, sbhat, linuxppc-dev,
	kvm-ppc, linux-nvdimm
  Cc: linux-doc, vaibhav

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:
>
>> Add support for ND_REGION_ASYNC capability if the device tree
>> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
>> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>>
>> If the flush request failed, the hypervisor is expected to
>> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>>
>> This patch prevents mmap of namespaces with MAP_SYNC flag if the
>> nvdimm requires an explicit flush[1].
>>
>> References:
>> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c
>
>
> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Do we need an ack from nvdimm folks on this?

Or is it entirely powerpc internal (seems like it from the diffstat)?

cheers

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-03-31 10:20   ` Michael Ellerman
@ 2021-04-05  3:47     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 8+ messages in thread
From: Aneesh Kumar K.V @ 2021-04-05  3:47 UTC (permalink / raw)
  To: Michael Ellerman, Shivaprasad G Bhat, sbhat, linuxppc-dev,
	kvm-ppc, linux-nvdimm
  Cc: linux-doc, vaibhav

On 3/31/21 3:50 PM, Michael Ellerman wrote:
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:
>>
>>> Add support for ND_REGION_ASYNC capability if the device tree
>>> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
>>> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>>>
>>> If the flush request failed, the hypervisor is expected to
>>> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>>>
>>> This patch prevents mmap of namespaces with MAP_SYNC flag if the
>>> nvdimm requires an explicit flush[1].
>>>
>>> References:
>>> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c
>>
>>
>> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> 
> Do we need an ack from nvdimm folks on this?
> 
> Or is it entirely powerpc internal (seems like it from the diffstat)?
> 

This is within powerpc and we are implementing details w.r.t PAPR spec. 
There is a Qemu implementation that is getting reviewed here
https://lore.kernel.org/linux-nvdimm/161650723087.2959.8703728357980727008.stgit@6532096d84d3

But with respect to this patch, we can take that independent of the Qemu 
backend implementation.

-aneesh


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-03-29 17:36 [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall Shivaprasad G Bhat
  2021-03-30  4:56 ` Aneesh Kumar K.V
@ 2021-04-13 14:13 ` Vaibhav Jain
  2021-04-14  5:20   ` Aneesh Kumar K.V
  2021-04-19  3:59 ` Michael Ellerman
  2 siblings, 1 reply; 8+ messages in thread
From: Vaibhav Jain @ 2021-04-13 14:13 UTC (permalink / raw)
  To: Shivaprasad G Bhat, sbhat, linuxppc-dev, kvm-ppc, linux-nvdimm,
	aneesh.kumar, ellerman
  Cc: linux-doc

Hi Shiva,

Apologies for a late review but something just caught my eye while
working on a different patch.

Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:

> Add support for ND_REGION_ASYNC capability if the device tree
> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>
> If the flush request failed, the hypervisor is expected to
> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>
> This patch prevents mmap of namespaces with MAP_SYNC flag if the
> nvdimm requires an explicit flush[1].
>
> References:
> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c
>
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> ---
> v2 - https://www.spinics.net/lists/kvm-ppc/msg18799.html
> Changes from v2:
>        - Fixed the commit message.
>        - Add dev_dbg before the H_SCM_FLUSH hcall
>
> v1 - https://www.spinics.net/lists/kvm-ppc/msg18272.html
> Changes from v1:
>        - Hcall semantics finalized, all changes are to accomodate them.
>
>  Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
>  arch/powerpc/include/asm/hvcall.h         |    3 +-
>  arch/powerpc/platforms/pseries/papr_scm.c |   40 +++++++++++++++++++++++++++++
>  3 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
> index 48fcf1255a33..648f278eea8f 100644
> --- a/Documentation/powerpc/papr_hcalls.rst
> +++ b/Documentation/powerpc/papr_hcalls.rst
> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>  to the resultBuffer.
>  
> +**H_SCM_FLUSH**
> +
> +| Input: *drcIndex, continue-token*
> +| Out: *continue-token*
> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
> +
> +Given a DRC Index Flush the data to backend NVDIMM device.
> +
> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
> +to be issued multiple times in order to be completely serviced. The
> +*continue-token* from the output to be passed in the argument list of
> +subsequent hcalls to the hypervisor until the hcall is completely serviced
> +at which point H_SUCCESS or other error is returned by the hypervisor.
> +
Does the hcall semantic also include measures to trigger a barrier/fence
(like pm_wmb()) so that all the stores before the hcall are gauranteed
to have hit the pmem controller ?

If not then the papr_scm_pmem_flush() introduced below should issue a
fence like pm_wmb() before issuing the flush hcall.

If yes then this behaviour should also be documented with the hcall
semantics above.

>  References
>  ==========
>  .. [1] "Power Architecture Platform Reference"
> diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> index ed6086d57b22..9f7729a97ebd 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -315,7 +315,8 @@
>  #define H_SCM_HEALTH            0x400
>  #define H_SCM_PERFORMANCE_STATS 0x418
>  #define H_RPT_INVALIDATE	0x448
> -#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
> +#define H_SCM_FLUSH		0x44C
> +#define MAX_HCALL_OPCODE	H_SCM_FLUSH
>  
>  /* Scope args for H_SCM_UNBIND_ALL */
>  #define H_UNBIND_SCOPE_ALL (0x1)
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index 835163f54244..b7a47fcc5aa5 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -93,6 +93,7 @@ struct papr_scm_priv {
>  	uint64_t block_size;
>  	int metadata_size;
>  	bool is_volatile;
> +	bool hcall_flush_required;
>  
>  	uint64_t bound_addr;
>  
> @@ -117,6 +118,39 @@ struct papr_scm_priv {
>  	size_t stat_buffer_len;
>  };
>  
> +static int papr_scm_pmem_flush(struct nd_region *nd_region,
> +			       struct bio *bio __maybe_unused)
> +{
> +	struct papr_scm_priv *p = nd_region_provider_data(nd_region);
> +	unsigned long ret_buf[PLPAR_HCALL_BUFSIZE];
> +	uint64_t token = 0;
> +	int64_t rc;
> +
> +	dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
> +
> +	do {
> +		rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
> +		token = ret_buf[0];
> +
> +		/* Check if we are stalled for some time */
> +		if (H_IS_LONG_BUSY(rc)) {
> +			msleep(get_longbusy_msecs(rc));
> +			rc = H_BUSY;
> +		} else if (rc == H_BUSY) {
> +			cond_resched();
> +		}
> +	} while (rc == H_BUSY);
> +
> +	if (rc) {
> +		dev_err(&p->pdev->dev, "flush error: %lld", rc);
> +		rc = -EIO;
> +	} else {
> +		dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
> +	}
> +
> +	return rc;
> +}
> +
>  static LIST_HEAD(papr_nd_regions);
>  static DEFINE_MUTEX(papr_ndr_lock);
>  
> @@ -943,6 +977,11 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>  	ndr_desc.num_mappings = 1;
>  	ndr_desc.nd_set = &p->nd_set;
>  
> +	if (p->hcall_flush_required) {
> +		set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
> +		ndr_desc.flush = papr_scm_pmem_flush;
> +	}
> +
>  	if (p->is_volatile)
>  		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
>  	else {
> @@ -1088,6 +1127,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>  	p->block_size = block_size;
>  	p->blocks = blocks;
>  	p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
> +	p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
>  
>  	/* We just need to ensure that set cookies are unique across */
>  	uuid_parse(uuid_str, (uuid_t *) uuid);
>
>

-- 
Cheers
~ Vaibhav

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-04-13 14:13 ` Vaibhav Jain
@ 2021-04-14  5:20   ` Aneesh Kumar K.V
  2021-04-14  9:21     ` Vaibhav Jain
  0 siblings, 1 reply; 8+ messages in thread
From: Aneesh Kumar K.V @ 2021-04-14  5:20 UTC (permalink / raw)
  To: Vaibhav Jain, Shivaprasad G Bhat, sbhat, linuxppc-dev, kvm-ppc,
	linux-nvdimm, ellerman
  Cc: linux-doc

Vaibhav Jain <vaibhav@linux.ibm.com> writes:

> Hi Shiva,
>
> Apologies for a late review but something just caught my eye while
> working on a different patch.
>
> Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:
>
>> Add support for ND_REGION_ASYNC capability if the device tree
>> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
>> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>>
>> If the flush request failed, the hypervisor is expected to
>> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>>
>> This patch prevents mmap of namespaces with MAP_SYNC flag if the
>> nvdimm requires an explicit flush[1].
>>
>> References:
>> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c
>>
>> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
>> ---
>> v2 - https://www.spinics.net/lists/kvm-ppc/msg18799.html
>> Changes from v2:
>>        - Fixed the commit message.
>>        - Add dev_dbg before the H_SCM_FLUSH hcall
>>
>> v1 - https://www.spinics.net/lists/kvm-ppc/msg18272.html
>> Changes from v1:
>>        - Hcall semantics finalized, all changes are to accomodate them.
>>
>>  Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
>>  arch/powerpc/include/asm/hvcall.h         |    3 +-
>>  arch/powerpc/platforms/pseries/papr_scm.c |   40 +++++++++++++++++++++++++++++
>>  3 files changed, 56 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
>> index 48fcf1255a33..648f278eea8f 100644
>> --- a/Documentation/powerpc/papr_hcalls.rst
>> +++ b/Documentation/powerpc/papr_hcalls.rst
>> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>>  to the resultBuffer.
>>  
>> +**H_SCM_FLUSH**
>> +
>> +| Input: *drcIndex, continue-token*
>> +| Out: *continue-token*
>> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
>> +
>> +Given a DRC Index Flush the data to backend NVDIMM device.
>> +
>> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
>> +to be issued multiple times in order to be completely serviced. The
>> +*continue-token* from the output to be passed in the argument list of
>> +subsequent hcalls to the hypervisor until the hcall is completely serviced
>> +at which point H_SUCCESS or other error is returned by the hypervisor.
>> +
> Does the hcall semantic also include measures to trigger a barrier/fence
> (like pm_wmb()) so that all the stores before the hcall are gauranteed
> to have hit the pmem controller ?

It is upto the hypervisor to implement the right sequence to ensure the
guarantee. The hcall doesn't expect the store to reach the platform
buffers.


>
> If not then the papr_scm_pmem_flush() introduced below should issue a
> fence like pm_wmb() before issuing the flush hcall.
>
> If yes then this behaviour should also be documented with the hcall
> semantics above.
>

IIUC fdatasync on the backend file is enough to ensure the
guaraantee. Such a request will have REQ_PREFLUSH and REQ_FUA which will
do the necessary barrier on the backing device holding the backend file.

-aneesh

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-04-14  5:20   ` Aneesh Kumar K.V
@ 2021-04-14  9:21     ` Vaibhav Jain
  0 siblings, 0 replies; 8+ messages in thread
From: Vaibhav Jain @ 2021-04-14  9:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Shivaprasad G Bhat, sbhat, linuxppc-dev,
	kvm-ppc, linux-nvdimm, ellerman
  Cc: linux-doc

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> Vaibhav Jain <vaibhav@linux.ibm.com> writes:
>
>> Hi Shiva,
>>
>> Apologies for a late review but something just caught my eye while
>> working on a different patch.
>>
>> Shivaprasad G Bhat <sbhat@linux.ibm.com> writes:
>>
>>> Add support for ND_REGION_ASYNC capability if the device tree
>>> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
>>> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
>>>
>>> If the flush request failed, the hypervisor is expected to
>>> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
>>>
>>> This patch prevents mmap of namespaces with MAP_SYNC flag if the
>>> nvdimm requires an explicit flush[1].
>>>
>>> References:
>>> [1] https://github.com/avocado-framework-tests/avocado-misc-tests/blob/master/memory/ndctl.py.data/map_sync.c
>>>
>>> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
>>> ---
>>> v2 - https://www.spinics.net/lists/kvm-ppc/msg18799.html
>>> Changes from v2:
>>>        - Fixed the commit message.
>>>        - Add dev_dbg before the H_SCM_FLUSH hcall
>>>
>>> v1 - https://www.spinics.net/lists/kvm-ppc/msg18272.html
>>> Changes from v1:
>>>        - Hcall semantics finalized, all changes are to accomodate them.
>>>
>>>  Documentation/powerpc/papr_hcalls.rst     |   14 ++++++++++
>>>  arch/powerpc/include/asm/hvcall.h         |    3 +-
>>>  arch/powerpc/platforms/pseries/papr_scm.c |   40 +++++++++++++++++++++++++++++
>>>  3 files changed, 56 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/Documentation/powerpc/papr_hcalls.rst b/Documentation/powerpc/papr_hcalls.rst
>>> index 48fcf1255a33..648f278eea8f 100644
>>> --- a/Documentation/powerpc/papr_hcalls.rst
>>> +++ b/Documentation/powerpc/papr_hcalls.rst
>>> @@ -275,6 +275,20 @@ Health Bitmap Flags:
>>>  Given a DRC Index collect the performance statistics for NVDIMM and copy them
>>>  to the resultBuffer.
>>>  
>>> +**H_SCM_FLUSH**
>>> +
>>> +| Input: *drcIndex, continue-token*
>>> +| Out: *continue-token*
>>> +| Return Value: *H_SUCCESS, H_Parameter, H_P2, H_BUSY*
>>> +
>>> +Given a DRC Index Flush the data to backend NVDIMM device.
>>> +
>>> +The hcall returns H_BUSY when the flush takes longer time and the hcall needs
>>> +to be issued multiple times in order to be completely serviced. The
>>> +*continue-token* from the output to be passed in the argument list of
>>> +subsequent hcalls to the hypervisor until the hcall is completely serviced
>>> +at which point H_SUCCESS or other error is returned by the hypervisor.
>>> +
>> Does the hcall semantic also include measures to trigger a barrier/fence
>> (like pm_wmb()) so that all the stores before the hcall are gauranteed
>> to have hit the pmem controller ?
>
> It is upto the hypervisor to implement the right sequence to ensure the
> guarantee. The hcall doesn't expect the store to reach the platform
> buffers.

Since the asyc_flush function is also called for performing
deep_flush from libnvdimm and as the hcall doesnt gaurantee stores to
reach the platform buffers, hence papr_scm_pmem_flush() should issue
pm_wmb() before the hcall.

This would ensure papr_scm_pmem_flush() semantics are consistent that to
generic_nvdimm_flush().

Also, adding the statement "The hcall doesn't expect the store to reach
the platform buffers" to the hcall documentation would be good to have.

>
>
>>
>> If not then the papr_scm_pmem_flush() introduced below should issue a
>> fence like pm_wmb() before issuing the flush hcall.
>>
>> If yes then this behaviour should also be documented with the hcall
>> semantics above.
>>
>
> IIUC fdatasync on the backend file is enough to ensure the
> guaraantee. Such a request will have REQ_PREFLUSH and REQ_FUA which will
> do the necessary barrier on the backing device holding the backend file.
>
Right, but thats assuming nvdimm is backed by a regular file in
hypervisor which may not always be the case.


> -aneesh

-- 
Cheers
~ Vaibhav

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
  2021-03-29 17:36 [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall Shivaprasad G Bhat
  2021-03-30  4:56 ` Aneesh Kumar K.V
  2021-04-13 14:13 ` Vaibhav Jain
@ 2021-04-19  3:59 ` Michael Ellerman
  2 siblings, 0 replies; 8+ messages in thread
From: Michael Ellerman @ 2021-04-19  3:59 UTC (permalink / raw)
  To: linuxppc-dev, ellerman, linux-nvdimm, sbhat, Shivaprasad G Bhat,
	kvm-ppc, aneesh.kumar
  Cc: vaibhav, linux-doc

On Mon, 29 Mar 2021 13:36:43 -0400, Shivaprasad G Bhat wrote:
> Add support for ND_REGION_ASYNC capability if the device tree
> indicates 'ibm,hcall-flush-required' property in the NVDIMM node.
> Flush is done by issuing H_SCM_FLUSH hcall to the hypervisor.
> 
> If the flush request failed, the hypervisor is expected to
> to reflect the problem in the subsequent nvdimm H_SCM_HEALTH call.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall
      https://git.kernel.org/powerpc/c/75b7c05ebf902632f7f540c3eb0a8945c2d74aab

cheers

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, back to index

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-29 17:36 [PATCH v3] powerpc/papr_scm: Implement support for H_SCM_FLUSH hcall Shivaprasad G Bhat
2021-03-30  4:56 ` Aneesh Kumar K.V
2021-03-31 10:20   ` Michael Ellerman
2021-04-05  3:47     ` Aneesh Kumar K.V
2021-04-13 14:13 ` Vaibhav Jain
2021-04-14  5:20   ` Aneesh Kumar K.V
2021-04-14  9:21     ` Vaibhav Jain
2021-04-19  3:59 ` Michael Ellerman

Linux-Doc Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-doc/0 linux-doc/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-doc linux-doc/ https://lore.kernel.org/linux-doc \
		linux-doc@vger.kernel.org
	public-inbox-index linux-doc

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-doc


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git