nvdimm.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* PAPR SCM support
@ 2018-10-10  6:08 Oliver O'Halloran
  2018-10-10  6:08 ` [PATCH 1/2] powerpc/pseries: PAPR persistent memory support Oliver O'Halloran
  2018-10-10  6:08 ` [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions Oliver O'Halloran
  0 siblings, 2 replies; 11+ messages in thread
From: Oliver O'Halloran @ 2018-10-10  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont, linux-nvdimm

This series adds support for the para-virtualised storage class memory
interface defined by the Power Architecture Platform Reference.

Patch 1 implements the pseries device discovery (via DT) and hotplug
support (via RTAS hotplug interrupt).

Patch 2 implements a driver that binds to the platform devices, does the
necessary H-calls to activate the region and configures the libnvdimm
interface. It also adds an NDCTL implementation to allow the "metadata"
space associated with an SCM region to be used as label space.

This should go in via the ppc tree, but an ack from Dan for patch two
would be appreciated.

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] powerpc/pseries: PAPR persistent memory support
  2018-10-10  6:08 PAPR SCM support Oliver O'Halloran
@ 2018-10-10  6:08 ` Oliver O'Halloran
  2018-10-10 16:36   ` Nathan Fontenot
  2018-10-10  6:08 ` [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions Oliver O'Halloran
  1 sibling, 1 reply; 11+ messages in thread
From: Oliver O'Halloran @ 2018-10-10  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont, linux-nvdimm

This patch implements support for discovering storage class memory
devices at boot and for handling hotplug of new regions via RTAS
hotplug events.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/firmware.h       |  3 ++-
 arch/powerpc/include/asm/hvcall.h         | 10 +++++++++-
 arch/powerpc/include/asm/rtas.h           |  2 ++
 arch/powerpc/kernel/rtasd.c               |  2 ++
 arch/powerpc/platforms/pseries/Makefile   |  2 +-
 arch/powerpc/platforms/pseries/dlpar.c    |  4 ++++
 arch/powerpc/platforms/pseries/firmware.c |  1 +
 arch/powerpc/platforms/pseries/pseries.h  |  5 +++++
 arch/powerpc/platforms/pseries/ras.c      |  3 ++-
 9 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 7a051bd21f87..113c64d5d394 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,7 @@
 #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
 #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
 #define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
+#define FW_FEATURE_PAPR_SCM 	ASM_CONST(0x0000001000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -69,7 +70,7 @@ enum {
 		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
 		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
-		FW_FEATURE_DRC_INFO,
+		FW_FEATURE_DRC_INFO | FW_FEATURE_PAPR_SCM,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a0b17f9f1ea4..0e81ef83b35a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -295,7 +295,15 @@
 #define H_INT_ESB               0x3C8
 #define H_INT_SYNC              0x3CC
 #define H_INT_RESET             0x3D0
-#define MAX_HCALL_OPCODE	H_INT_RESET
+#define H_SCM_READ_METADATA     0x3E4
+#define H_SCM_WRITE_METADATA    0x3E8
+#define H_SCM_BIND_MEM          0x3EC
+#define H_SCM_UNBIND_MEM        0x3F0
+#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
+#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
+#define H_SCM_MEM_QUERY	        0x3FC
+#define H_SCM_BLOCK_CLEAR       0x400
+#define MAX_HCALL_OPCODE	H_SCM_BLOCK_CLEAR
 
 /* H_VIOCTL functions */
 #define H_GET_VIOA_DUMP_SIZE	0x01
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 71e393c46a49..1e81f3d55457 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
 #define RTAS_TYPE_INFO			0xE2
 #define RTAS_TYPE_DEALLOC		0xE3
 #define RTAS_TYPE_DUMP			0xE4
+#define RTAS_TYPE_HOTPLUG		0xE5
 /* I don't add PowerMGM events right now, this is a different topic */ 
 #define RTAS_TYPE_PMGM_POWER_SW_ON	0x60
 #define RTAS_TYPE_PMGM_POWER_SW_OFF	0x61
@@ -316,6 +317,7 @@ struct pseries_hp_errorlog {
 #define PSERIES_HP_ELOG_RESOURCE_MEM	2
 #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
 #define PSERIES_HP_ELOG_RESOURCE_PHB	4
+#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
 
 #define PSERIES_HP_ELOG_ACTION_ADD	1
 #define PSERIES_HP_ELOG_ACTION_REMOVE	2
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 6fafc82c04b0..fad0baddfcba 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
 			return "Dump Notification Event";
 		case RTAS_TYPE_PRRN:
 			return "Platform Resource Reassignment Event";
+		case RTAS_TYPE_HOTPLUG:
+			return "Hotplug Event";
 	}
 
 	return rtas_type[0];
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 7e89d5c47068..892b27ced973 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE)	+= kexec.o
 obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
 
 obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
-obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o pmem.o
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index a0b20c03f078..795996fefdb9 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -357,6 +357,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_RESOURCE_CPU:
 		rc = dlpar_cpu(hp_elog);
 		break;
+	case PSERIES_HP_ELOG_RESOURCE_PMEM:
+		rc = dlpar_hp_pmem(hp_elog);
+		break;
+
 	default:
 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
 				    hp_elog->resource);
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index a3bbeb43689e..4927de57d8ee 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -65,6 +65,7 @@ hypertas_fw_features_table[] = {
 	{FW_FEATURE_SET_MODE,		"hcall-set-mode"},
 	{FW_FEATURE_BEST_ENERGY,	"hcall-best-energy-1*"},
 	{FW_FEATURE_HPT_RESIZE,		"hcall-hpt-resize"},
+	{FW_FEATURE_PAPR_SCM,		"hcall-scm"},
 };
 
 /* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 60db2ee511fb..d0829677c896 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -63,11 +63,16 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
 			 struct completion *hotplug_done, int *rc);
 #ifdef CONFIG_MEMORY_HOTPLUG
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
 #else
 static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 {
 	return -EOPNOTSUPP;
 }
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+};
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 851ce326874a..ae22fc007276 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -237,7 +237,8 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
 	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
 	 */
 	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
-	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
+	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
 		queue_hotplug_event(hp_elog, NULL, NULL);
 	else
 		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
-- 
2.9.5

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-10  6:08 PAPR SCM support Oliver O'Halloran
  2018-10-10  6:08 ` [PATCH 1/2] powerpc/pseries: PAPR persistent memory support Oliver O'Halloran
@ 2018-10-10  6:08 ` Oliver O'Halloran
  2018-10-10 23:59   ` Michael Ellerman
  2018-10-12 22:36   ` Dan Williams
  1 sibling, 2 replies; 11+ messages in thread
From: Oliver O'Halloran @ 2018-10-10  6:08 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: nfont, linux-nvdimm

Adds a driver that implements support for enabling and accessing PAPR
SCM regions. Unfortunately due to how the PAPR interface works we can't
use the existing of_pmem driver (yet) because:

 a) The guest is required to use the H_SCM_BIND_MEM h-call to add
    add the SCM region to it's physical address space, and
 b) There is currently no mechanism for relating a bare of_pmem region
    to the backing DIMM (or not-a-DIMM for our case).

Both of these are easily handled by rolling the functionality into a
seperate driver so here we are...

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
The alternative implementation here is that we have the pseries code
do the h-calls and craft a pmem-region@<addr> node based on that.
However, that doesn't solve b) and mpe has expressed his dislike of
adding new stuff to the DT at runtime so i'd say that's a non-starter.
---
 arch/powerpc/platforms/pseries/Kconfig    |   7 +
 arch/powerpc/platforms/pseries/Makefile   |   1 +
 arch/powerpc/platforms/pseries/papr_scm.c | 340 ++++++++++++++++++++++++++++++
 3 files changed, 348 insertions(+)
 create mode 100644 arch/powerpc/platforms/pseries/papr_scm.c

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 0c698fd6d491..4b0fcb80efe5 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -140,3 +140,10 @@ config IBMEBUS
 	bool "Support for GX bus based adapters"
 	help
 	  Bus device driver for GX bus based adapters.
+
+config PAPR_SCM
+	depends on PPC_PSERIES && MEMORY_HOTPLUG
+	select LIBNVDIMM
+	tristate "Support for the PAPR Storage Class Memory interface"
+	help
+	  Enable access to hypervisor provided storage class memory.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 892b27ced973..a43ec843c8e2 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
 obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_IBMVIO)		+= vio.o
 obj-$(CONFIG_IBMEBUS)		+= ibmebus.o
+obj-$(CONFIG_PAPR_SCM)		+= papr_scm.o
 
 ifdef CONFIG_PPC_PSERIES
 obj-$(CONFIG_SUSPEND)		+= suspend.o
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
new file mode 100644
index 000000000000..87d4dbc18845
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt)	"papr-scm: " fmt
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/ndctl.h>
+#include <linux/libnvdimm.h>
+#include <linux/platform_device.h>
+
+#include <asm/plpar_wrappers.h>
+
+#define BIND_ANY_ADDR (~0ul)
+
+#define PAPR_SCM_DIMM_CMD_MASK \
+	((1ul << ND_CMD_GET_CONFIG_SIZE) | \
+	 (1ul << ND_CMD_GET_CONFIG_DATA) | \
+	 (1ul << ND_CMD_SET_CONFIG_DATA))
+
+struct papr_scm_priv {
+	struct platform_device *pdev;
+	struct device_node *dn;
+	uint32_t drc_index;
+	uint64_t blocks;
+	uint64_t block_size;
+	int metadata_size;
+
+	uint64_t bound_addr;
+
+	struct nvdimm_bus_descriptor bus_desc;
+	struct nvdimm_bus *bus;
+	struct nvdimm *nvdimm;
+	struct resource res;
+	struct nd_region *region;
+	struct nd_interleave_set nd_set;
+};
+
+static int drc_pmem_bind(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	uint64_t rc, token;
+
+	/*
+	 * When the hypervisor cannot map all the requested memory in a single
+	 * hcall it returns H_BUSY and we call again with the token until
+	 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
+	 * leave the system in an undefined state, so we wait.
+	 */
+	token = 0;
+
+	do {
+		rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
+				p->blocks, BIND_ANY_ADDR, token);
+		token = be64_to_cpu(ret[0]);
+	} while (rc == H_BUSY);
+
+	if (rc) {
+		dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
+		return -ENXIO;
+	}
+
+	p->bound_addr = be64_to_cpu(ret[1]);
+
+	dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
+
+	return 0;
+}
+
+static int drc_pmem_unbind(struct papr_scm_priv *p)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	uint64_t rc, token;
+
+	token = 0;
+
+	/* NB: unbind has the same retry requirements mentioned above */
+	do {
+		rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
+				p->bound_addr, p->blocks, token);
+		token = be64_to_cpu(ret);
+	} while (rc == H_BUSY);
+
+	if (rc)
+		dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+
+	return !!rc;
+}
+
+static int papr_scm_meta_get(struct papr_scm_priv *p,
+			struct nd_cmd_get_config_data_hdr *hdr)
+{
+	unsigned long data[PLPAR_HCALL_BUFSIZE];
+	int64_t ret;
+
+	if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+		return -EINVAL;
+
+	ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+			hdr->in_offset, 1);
+
+	if (ret == H_PARAMETER) /* bad DRC index */
+		return -ENODEV;
+	if (ret)
+		return -EINVAL; /* other invalid parameter */
+
+	hdr->out_buf[0] = data[0] & 0xff;
+
+	return 0;
+}
+
+static int papr_scm_meta_set(struct papr_scm_priv *p,
+			struct nd_cmd_set_config_hdr *hdr)
+{
+	int64_t ret;
+
+	if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+		return -EINVAL;
+
+	ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
+			p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
+
+	if (ret == H_PARAMETER) /* bad DRC index */
+		return -ENODEV;
+	if (ret)
+		return -EINVAL; /* other invalid parameter */
+
+	return 0;
+}
+
+int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
+		unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
+{
+	struct nd_cmd_get_config_size *get_size_hdr;
+	struct papr_scm_priv *p;
+
+	/* Only dimm-specific calls are supported atm */
+	if (!nvdimm)
+		return -EINVAL;
+
+	p = nvdimm_provider_data(nvdimm);
+
+	switch (cmd) {
+	case ND_CMD_GET_CONFIG_SIZE:
+		get_size_hdr = buf;
+
+		get_size_hdr->status = 0;
+		get_size_hdr->max_xfer = 1;
+		get_size_hdr->config_size = p->metadata_size;
+		*cmd_rc = 0;
+		break;
+
+	case ND_CMD_GET_CONFIG_DATA:
+		*cmd_rc = papr_scm_meta_get(p, buf);
+		break;
+
+	case ND_CMD_SET_CONFIG_DATA:
+		*cmd_rc = papr_scm_meta_set(p, buf);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
+
+	return 0;
+}
+
+static const struct attribute_group *region_attr_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	&nd_mapping_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *papr_scm_dimm_groups[] = {
+	&nvdimm_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
+{
+	struct device *dev = &p->pdev->dev;
+	struct nd_mapping_desc mapping;
+	struct nd_region_desc ndr_desc;
+	unsigned long dimm_flags;
+
+	p->bus_desc.ndctl = papr_scm_ndctl;
+	p->bus_desc.module = THIS_MODULE;
+	p->bus_desc.of_node = p->pdev->dev.of_node;
+	p->bus_desc.attr_groups = bus_attr_groups;
+	p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+
+	if (!p->bus_desc.provider_name)
+		return -ENOMEM;
+
+	p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
+	if (!p->bus) {
+		dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+		return -ENXIO;
+	}
+
+	dimm_flags = 0;
+	set_bit(NDD_ALIASING, &dimm_flags);
+
+	p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
+				dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+	if (!p->nvdimm) {
+		dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
+		goto err;
+	}
+
+	/* now add the region */
+
+	memset(&mapping, 0, sizeof(mapping));
+	mapping.nvdimm = p->nvdimm;
+	mapping.start = p->res.start;
+	mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
+
+	memset(&ndr_desc, 0, sizeof(ndr_desc));
+	ndr_desc.attr_groups = region_attr_groups;
+	ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+	ndr_desc.res = &p->res;
+	ndr_desc.of_node = p->dn;
+	ndr_desc.provider_data = p;
+	ndr_desc.mapping = &mapping;
+	ndr_desc.num_mappings = 1;
+	ndr_desc.nd_set = &p->nd_set;
+	set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+	p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+	if (!p->region) {
+		dev_err(dev, "Error registering region %pR from %pOF\n",
+				ndr_desc.res, p->dn);
+		goto err;
+	}
+
+	return 0;
+
+err:	nvdimm_bus_unregister(p->bus);
+	kfree(p->bus_desc.provider_name);
+	return -ENXIO;
+}
+
+static int papr_scm_probe(struct platform_device *pdev)
+{
+	uint32_t drc_index, metadata_size, unit_cap[2];
+	struct device_node *dn = pdev->dev.of_node;
+	struct papr_scm_priv *p;
+	int rc;
+
+	/* check we have all the required DT properties */
+	if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
+		dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) {
+		dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn);
+		return -ENODEV;
+	}
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	/* optional DT properties */
+	of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
+
+	p->dn = dn;
+	p->drc_index = drc_index;
+	p->block_size = unit_cap[0];
+	p->blocks     = unit_cap[1];
+
+	/* might be zero */
+	p->metadata_size = metadata_size;
+	p->pdev = pdev;
+
+	/* request the hypervisor to bind this region to somewhere in memory */
+	rc = drc_pmem_bind(p);
+	if (rc)
+		goto err;
+
+	/* setup the resource for the newly bound range */
+	p->res.start = p->bound_addr;
+	p->res.end   = p->bound_addr + p->blocks * p->block_size;
+	p->res.name  = pdev->name;
+	p->res.flags = IORESOURCE_MEM;
+
+	rc = papr_scm_nvdimm_init(p);
+	if (rc)
+		goto err2;
+
+	return 0;
+
+err2:	drc_pmem_unbind(p);
+err:	kfree(p);
+	return rc;
+}
+
+static int papr_scm_remove(struct platform_device *pdev)
+{
+	struct papr_scm_priv *p = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(p->bus);
+	drc_pmem_unbind(p);
+	kfree(p);
+
+	return 0;
+}
+
+static const struct of_device_id papr_scm_match[] = {
+	{ .compatible = "ibm,pmemory" },
+	{ },
+};
+
+static struct platform_driver papr_scm_driver = {
+	.probe = papr_scm_probe,
+	.remove = papr_scm_remove,
+	.driver = {
+		.name = "papr_scm",
+		.owner = THIS_MODULE,
+		.of_match_table = papr_scm_match,
+	},
+};
+
+module_platform_driver(papr_scm_driver);
+MODULE_DEVICE_TABLE(of, papr_scm_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
-- 
2.9.5

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] powerpc/pseries: PAPR persistent memory support
  2018-10-10  6:08 ` [PATCH 1/2] powerpc/pseries: PAPR persistent memory support Oliver O'Halloran
@ 2018-10-10 16:36   ` Nathan Fontenot
  2018-10-10 22:24     ` Oliver
  0 siblings, 1 reply; 11+ messages in thread
From: Nathan Fontenot @ 2018-10-10 16:36 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: linux-nvdimm

On 10/10/2018 01:08 AM, Oliver O'Halloran wrote:
> This patch implements support for discovering storage class memory
> devices at boot and for handling hotplug of new regions via RTAS
> hotplug events.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/include/asm/firmware.h       |  3 ++-
>  arch/powerpc/include/asm/hvcall.h         | 10 +++++++++-
>  arch/powerpc/include/asm/rtas.h           |  2 ++
>  arch/powerpc/kernel/rtasd.c               |  2 ++
>  arch/powerpc/platforms/pseries/Makefile   |  2 +-
>  arch/powerpc/platforms/pseries/dlpar.c    |  4 ++++
>  arch/powerpc/platforms/pseries/firmware.c |  1 +
>  arch/powerpc/platforms/pseries/pseries.h  |  5 +++++
>  arch/powerpc/platforms/pseries/ras.c      |  3 ++-
>  9 files changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
> index 7a051bd21f87..113c64d5d394 100644
> --- a/arch/powerpc/include/asm/firmware.h
> +++ b/arch/powerpc/include/asm/firmware.h
> @@ -52,6 +52,7 @@
>  #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
>  #define FW_FEATURE_DRMEM_V2	ASM_CONST(0x0000000400000000)
>  #define FW_FEATURE_DRC_INFO	ASM_CONST(0x0000000800000000)
> +#define FW_FEATURE_PAPR_SCM 	ASM_CONST(0x0000001000000000)
> 
>  #ifndef __ASSEMBLY__
> 
> @@ -69,7 +70,7 @@ enum {
>  		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
>  		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
>  		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
> -		FW_FEATURE_DRC_INFO,
> +		FW_FEATURE_DRC_INFO | FW_FEATURE_PAPR_SCM,
>  	FW_FEATURE_PSERIES_ALWAYS = 0,
>  	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
>  	FW_FEATURE_POWERNV_ALWAYS = 0,
> diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> index a0b17f9f1ea4..0e81ef83b35a 100644
> --- a/arch/powerpc/include/asm/hvcall.h
> +++ b/arch/powerpc/include/asm/hvcall.h
> @@ -295,7 +295,15 @@
>  #define H_INT_ESB               0x3C8
>  #define H_INT_SYNC              0x3CC
>  #define H_INT_RESET             0x3D0
> -#define MAX_HCALL_OPCODE	H_INT_RESET
> +#define H_SCM_READ_METADATA     0x3E4
> +#define H_SCM_WRITE_METADATA    0x3E8
> +#define H_SCM_BIND_MEM          0x3EC
> +#define H_SCM_UNBIND_MEM        0x3F0
> +#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
> +#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
> +#define H_SCM_MEM_QUERY	        0x3FC
> +#define H_SCM_BLOCK_CLEAR       0x400
> +#define MAX_HCALL_OPCODE	H_SCM_BLOCK_CLEAR
> 
>  /* H_VIOCTL functions */
>  #define H_GET_VIOA_DUMP_SIZE	0x01
> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index 71e393c46a49..1e81f3d55457 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
>  #define RTAS_TYPE_INFO			0xE2
>  #define RTAS_TYPE_DEALLOC		0xE3
>  #define RTAS_TYPE_DUMP			0xE4
> +#define RTAS_TYPE_HOTPLUG		0xE5
>  /* I don't add PowerMGM events right now, this is a different topic */ 
>  #define RTAS_TYPE_PMGM_POWER_SW_ON	0x60
>  #define RTAS_TYPE_PMGM_POWER_SW_OFF	0x61
> @@ -316,6 +317,7 @@ struct pseries_hp_errorlog {
>  #define PSERIES_HP_ELOG_RESOURCE_MEM	2
>  #define PSERIES_HP_ELOG_RESOURCE_SLOT	3
>  #define PSERIES_HP_ELOG_RESOURCE_PHB	4
> +#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
> 
>  #define PSERIES_HP_ELOG_ACTION_ADD	1
>  #define PSERIES_HP_ELOG_ACTION_REMOVE	2
> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
> index 6fafc82c04b0..fad0baddfcba 100644
> --- a/arch/powerpc/kernel/rtasd.c
> +++ b/arch/powerpc/kernel/rtasd.c
> @@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
>  			return "Dump Notification Event";
>  		case RTAS_TYPE_PRRN:
>  			return "Platform Resource Reassignment Event";
> +		case RTAS_TYPE_HOTPLUG:
> +			return "Hotplug Event";
>  	}
> 
>  	return rtas_type[0];
> diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
> index 7e89d5c47068..892b27ced973 100644
> --- a/arch/powerpc/platforms/pseries/Makefile
> +++ b/arch/powerpc/platforms/pseries/Makefile
> @@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE)	+= kexec.o
>  obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
> 
>  obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
> -obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
> +obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o pmem.o
> 
>  obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
>  obj-$(CONFIG_HVCS)		+= hvcserver.o
> diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
> index a0b20c03f078..795996fefdb9 100644
> --- a/arch/powerpc/platforms/pseries/dlpar.c
> +++ b/arch/powerpc/platforms/pseries/dlpar.c
> @@ -357,6 +357,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
>  	case PSERIES_HP_ELOG_RESOURCE_CPU:
>  		rc = dlpar_cpu(hp_elog);
>  		break;
> +	case PSERIES_HP_ELOG_RESOURCE_PMEM:
> +		rc = dlpar_hp_pmem(hp_elog);

You reference a dlpar handler for pmem here but I am not finding a handler
defined when CONFIG_MEMORY_HOTPLUG is defined. The update to pseries.h below
only provides a definition when it is not defined.

Also, do we need to handle dlpar of pmem memory? There have been discussion of
supporting this in the future but I did not thin it was currently supported.

-Nathan

> +		break;
> +
>  	default:
>  		pr_warn_ratelimited("Invalid resource (%d) specified\n",
>  				    hp_elog->resource);
> diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
> index a3bbeb43689e..4927de57d8ee 100644
> --- a/arch/powerpc/platforms/pseries/firmware.c
> +++ b/arch/powerpc/platforms/pseries/firmware.c
> @@ -65,6 +65,7 @@ hypertas_fw_features_table[] = {
>  	{FW_FEATURE_SET_MODE,		"hcall-set-mode"},
>  	{FW_FEATURE_BEST_ENERGY,	"hcall-best-energy-1*"},
>  	{FW_FEATURE_HPT_RESIZE,		"hcall-hpt-resize"},
> +	{FW_FEATURE_PAPR_SCM,		"hcall-scm"},
>  };
> 
>  /* Build up the firmware features bitmask using the contents of
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 60db2ee511fb..d0829677c896 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -63,11 +63,16 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
>  			 struct completion *hotplug_done, int *rc);
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
> +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
>  #else
>  static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
>  {
>  	return -EOPNOTSUPP;
>  }
> +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
> +{
> +	return -EOPNOTSUPP;
> +};
>  #endif
> 
>  #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index 851ce326874a..ae22fc007276 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -237,7 +237,8 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
>  	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
>  	 */
>  	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
> -	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
> +	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
> +	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
>  		queue_hotplug_event(hp_elog, NULL, NULL);
>  	else
>  		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
> 

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] powerpc/pseries: PAPR persistent memory support
  2018-10-10 16:36   ` Nathan Fontenot
@ 2018-10-10 22:24     ` Oliver
  0 siblings, 0 replies; 11+ messages in thread
From: Oliver @ 2018-10-10 22:24 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linuxppc-dev, linux-nvdimm

On Thu, Oct 11, 2018 at 3:36 AM Nathan Fontenot
<nfont@linux.vnet.ibm.com> wrote:
>
> On 10/10/2018 01:08 AM, Oliver O'Halloran wrote:
> > This patch implements support for discovering storage class memory
> > devices at boot and for handling hotplug of new regions via RTAS
> > hotplug events.
> >
> > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> > ---
> >  arch/powerpc/include/asm/firmware.h       |  3 ++-
> >  arch/powerpc/include/asm/hvcall.h         | 10 +++++++++-
> >  arch/powerpc/include/asm/rtas.h           |  2 ++
> >  arch/powerpc/kernel/rtasd.c               |  2 ++
> >  arch/powerpc/platforms/pseries/Makefile   |  2 +-
> >  arch/powerpc/platforms/pseries/dlpar.c    |  4 ++++
> >  arch/powerpc/platforms/pseries/firmware.c |  1 +
> >  arch/powerpc/platforms/pseries/pseries.h  |  5 +++++
> >  arch/powerpc/platforms/pseries/ras.c      |  3 ++-
> >  9 files changed, 28 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
> > index 7a051bd21f87..113c64d5d394 100644
> > --- a/arch/powerpc/include/asm/firmware.h
> > +++ b/arch/powerpc/include/asm/firmware.h
> > @@ -52,6 +52,7 @@
> >  #define FW_FEATURE_PRRN              ASM_CONST(0x0000000200000000)
> >  #define FW_FEATURE_DRMEM_V2  ASM_CONST(0x0000000400000000)
> >  #define FW_FEATURE_DRC_INFO  ASM_CONST(0x0000000800000000)
> > +#define FW_FEATURE_PAPR_SCM  ASM_CONST(0x0000001000000000)
> >
> >  #ifndef __ASSEMBLY__
> >
> > @@ -69,7 +70,7 @@ enum {
> >               FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
> >               FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
> >               FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
> > -             FW_FEATURE_DRC_INFO,
> > +             FW_FEATURE_DRC_INFO | FW_FEATURE_PAPR_SCM,
> >       FW_FEATURE_PSERIES_ALWAYS = 0,
> >       FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
> >       FW_FEATURE_POWERNV_ALWAYS = 0,
> > diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
> > index a0b17f9f1ea4..0e81ef83b35a 100644
> > --- a/arch/powerpc/include/asm/hvcall.h
> > +++ b/arch/powerpc/include/asm/hvcall.h
> > @@ -295,7 +295,15 @@
> >  #define H_INT_ESB               0x3C8
> >  #define H_INT_SYNC              0x3CC
> >  #define H_INT_RESET             0x3D0
> > -#define MAX_HCALL_OPCODE     H_INT_RESET
> > +#define H_SCM_READ_METADATA     0x3E4
> > +#define H_SCM_WRITE_METADATA    0x3E8
> > +#define H_SCM_BIND_MEM          0x3EC
> > +#define H_SCM_UNBIND_MEM        0x3F0
> > +#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
> > +#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
> > +#define H_SCM_MEM_QUERY              0x3FC
> > +#define H_SCM_BLOCK_CLEAR       0x400
> > +#define MAX_HCALL_OPCODE     H_SCM_BLOCK_CLEAR
> >
> >  /* H_VIOCTL functions */
> >  #define H_GET_VIOA_DUMP_SIZE 0x01
> > diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> > index 71e393c46a49..1e81f3d55457 100644
> > --- a/arch/powerpc/include/asm/rtas.h
> > +++ b/arch/powerpc/include/asm/rtas.h
> > @@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
> >  #define RTAS_TYPE_INFO                       0xE2
> >  #define RTAS_TYPE_DEALLOC            0xE3so we might as well
> >  #define RTAS_TYPE_DUMP                       0xE4
> > +#define RTAS_TYPE_HOTPLUG            0xE5
> >  /* I don't add PowerMGM events right now, this is a different topic */
> >  #define RTAS_TYPE_PMGM_POWER_SW_ON   0x60
> >  #define RTAS_TYPE_PMGM_POWER_SW_OFF  0x61
> > @@ -316,6 +317,7 @@ struct pseries_hp_errorlog {
> >  #define PSERIES_HP_ELOG_RESOURCE_MEM 2
> >  #define PSERIES_HP_ELOG_RESOURCE_SLOT        3
> >  #define PSERIES_HP_ELOG_RESOURCE_PHB 4
> > +#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
> >
> >  #define PSERIES_HP_ELOG_ACTION_ADD   1
> >  #define PSERIES_HP_ELOG_ACTION_REMOVE        2
> > diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
> > index 6fafc82c04b0..fad0baddfcba 100644
> > --- a/arch/powerpc/kernel/rtasd.c
> > +++ b/arch/powerpc/kernel/rtasd.c
> > @@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
> >                       return "Dump Notification Event";
> >               case RTAS_TYPE_PRRN:
> >                       return "Platform Resource Reassignment Event";
> > +             case RTAS_TYPE_HOTPLUG:
> > +                     return "Hotplug Event";
> >       }
> >
> >       return rtas_type[0];
> > diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
> > index 7e89d5c47068..892b27ced973 100644
> > --- a/arch/powerpc/platforms/pseries/Makefile
> > +++ b/arch/powerpc/platforms/pseries/Makefile
> > @@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE)    += kexec.o
> >  obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
> >
> >  obj-$(CONFIG_HOTPLUG_CPU)    += hotplug-cpu.o
> > -obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
> > +obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o
> >
> >  obj-$(CONFIG_HVC_CONSOLE)    += hvconsole.o
> >  obj-$(CONFIG_HVCS)           += hvcserver.o
> > diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
> > index a0b20c03f078..795996fefdb9 100644
> > --- a/arch/powerpc/platforms/pseries/dlpar.c
> > +++ b/arch/powerpc/platforms/pseries/dlpar.c
> > @@ -357,6 +357,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
> >       case PSERIES_HP_ELOG_RESOURCE_CPU:
> >               rc = dlpar_cpu(hp_elog);
> >               break;
> > +     case PSERIES_HP_ELOG_RESOURCE_PMEM:
> > +             rc = dlpar_hp_pmem(hp_elog);
>
> You reference a dlpar handler for pmem here but I am not finding a handler
> defined when CONFIG_MEMORY_HOTPLUG is defined. The update to pseries.h below
> only provides a definition when it is not defined.

Ah crap. The file that adds it went missing when I rebased. I'll post a respin.

> Also, do we need to handle dlpar of pmem memory? There have been discussion of
> supporting this in the future but I did not thin it was currently supported.

I think it's something that we're going to need and Barata has already
implemented support for it inside of qemu. The implementation is more
or less the same in the hot and coldplug cases so it's not much work
to support it.

> -Nathan
>
> > +             break;
> > +
> >       default:
> >               pr_warn_ratelimited("Invalid resource (%d) specified\n",
> >                                   hp_elog->resource);
> > diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
> > index a3bbeb43689e..4927de57d8ee 100644
> > --- a/arch/powerpc/platforms/pseries/firmware.c
> > +++ b/arch/powerpc/platforms/pseries/firmware.c
> > @@ -65,6 +65,7 @@ hypertas_fw_features_table[] = {
> >       {FW_FEATURE_SET_MODE,           "hcall-set-mode"},
> >       {FW_FEATURE_BEST_ENERGY,        "hcall-best-energy-1*"},
> >       {FW_FEATURE_HPT_RESIZE,         "hcall-hpt-resize"},
> > +     {FW_FEATURE_PAPR_SCM,           "hcall-scm"},
> >  };
> >
> >  /* Build up the firmware features bitmask using the contents of
> > diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> > index 60db2ee511fb..d0829677c896 100644
> > --- a/arch/powerpc/platforms/pseries/pseries.h
> > +++ b/arch/powerpc/platforms/pseries/pseries.h
> > @@ -63,11 +63,16 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
> >                        struct completion *hotplug_done, int *rc);
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
> > +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
> >  #else
> >  static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
> >  {
> >       return -EOPNOTSUPP;
> >  }
> > +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
> > +{
> > +     return -EOPNOTSUPP;
> > +};
> >  #endif
> >
> >  #ifdef CONFIG_HOTPLUG_CPU
> > diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> > index 851ce326874a..ae22fc007276 100644
> > --- a/arch/powerpc/platforms/pseries/ras.c
> > +++ b/arch/powerpc/platforms/pseries/ras.c
> > @@ -237,7 +237,8 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
> >        * hotplug events on the ras_log_buf to be handled by rtas_errd.
> >        */
> >       if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
> > -         hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
> > +         hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
> > +         hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
> >               queue_hotplug_event(hp_elog, NULL, NULL);
> >       else
> >               log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
> >
>
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-10  6:08 ` [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions Oliver O'Halloran
@ 2018-10-10 23:59   ` Michael Ellerman
  2018-10-12 22:36   ` Dan Williams
  1 sibling, 0 replies; 11+ messages in thread
From: Michael Ellerman @ 2018-10-10 23:59 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev; +Cc: nfont, linux-nvdimm

Oliver O'Halloran <oohall@gmail.com> writes:

> Adds a driver that implements support for enabling and accessing PAPR
> SCM regions. Unfortunately due to how the PAPR interface works we can't
> use the existing of_pmem driver (yet) because:
>
>  a) The guest is required to use the H_SCM_BIND_MEM h-call to add
>     add the SCM region to it's physical address space, and
>  b) There is currently no mechanism for relating a bare of_pmem region
>     to the backing DIMM (or not-a-DIMM for our case).
>
> Both of these are easily handled by rolling the functionality into a
> seperate driver so here we are...
>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
> The alternative implementation here is that we have the pseries code
> do the h-calls and craft a pmem-region@<addr> node based on that.
> However, that doesn't solve b) and mpe has expressed his dislike of
> adding new stuff to the DT at runtime so i'd say that's a non-starter.

Hmm, from memory you yelled something at me across the office about
that, so my response may not have been entirely well considered.

I'm not quite sure what the state of the OF overlays support is, but
that would be The Right Way to do that sort of modification to the
device tree at runtime.

If we merged this and then later got the of_pmem driver to work for us
would there be any user-visible change?

cheers
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-10  6:08 ` [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions Oliver O'Halloran
  2018-10-10 23:59   ` Michael Ellerman
@ 2018-10-12 22:36   ` Dan Williams
  2018-10-13 12:08     ` Michael Ellerman
  2018-10-14 23:14     ` Oliver
  1 sibling, 2 replies; 11+ messages in thread
From: Dan Williams @ 2018-10-12 22:36 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: Nathan Fontenot, linuxppc-dev, linux-nvdimm

On Tue, Oct 9, 2018 at 11:21 PM Oliver O'Halloran <oohall@gmail.com> wrote:
>
> Adds a driver that implements support for enabling and accessing PAPR
> SCM regions. Unfortunately due to how the PAPR interface works we can't
> use the existing of_pmem driver (yet) because:
>
>  a) The guest is required to use the H_SCM_BIND_MEM h-call to add
>     add the SCM region to it's physical address space, and
>  b) There is currently no mechanism for relating a bare of_pmem region
>     to the backing DIMM (or not-a-DIMM for our case).
>
> Both of these are easily handled by rolling the functionality into a
> seperate driver so here we are...
>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
> The alternative implementation here is that we have the pseries code
> do the h-calls and craft a pmem-region@<addr> node based on that.
> However, that doesn't solve b) and mpe has expressed his dislike of
> adding new stuff to the DT at runtime so i'd say that's a non-starter.
> ---
>  arch/powerpc/platforms/pseries/Kconfig    |   7 +
>  arch/powerpc/platforms/pseries/Makefile   |   1 +
>  arch/powerpc/platforms/pseries/papr_scm.c | 340 ++++++++++++++++++++++++++++++
>  3 files changed, 348 insertions(+)
>  create mode 100644 arch/powerpc/platforms/pseries/papr_scm.c
>
> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> index 0c698fd6d491..4b0fcb80efe5 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -140,3 +140,10 @@ config IBMEBUS
>         bool "Support for GX bus based adapters"
>         help
>           Bus device driver for GX bus based adapters.
> +
> +config PAPR_SCM
> +       depends on PPC_PSERIES && MEMORY_HOTPLUG
> +       select LIBNVDIMM
> +       tristate "Support for the PAPR Storage Class Memory interface"
> +       help
> +         Enable access to hypervisor provided storage class memory.
> diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
> index 892b27ced973..a43ec843c8e2 100644
> --- a/arch/powerpc/platforms/pseries/Makefile
> +++ b/arch/powerpc/platforms/pseries/Makefile
> @@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ)    += io_event_irq.o
>  obj-$(CONFIG_LPARCFG)          += lparcfg.o
>  obj-$(CONFIG_IBMVIO)           += vio.o
>  obj-$(CONFIG_IBMEBUS)          += ibmebus.o
> +obj-$(CONFIG_PAPR_SCM)         += papr_scm.o
>
>  ifdef CONFIG_PPC_PSERIES
>  obj-$(CONFIG_SUSPEND)          += suspend.o
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> new file mode 100644
> index 000000000000..87d4dbc18845
> --- /dev/null
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -0,0 +1,340 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#define pr_fmt(fmt)    "papr-scm: " fmt
> +
> +#include <linux/of.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/ioport.h>
> +#include <linux/slab.h>
> +#include <linux/ndctl.h>
> +#include <linux/libnvdimm.h>
> +#include <linux/platform_device.h>
> +
> +#include <asm/plpar_wrappers.h>
> +
> +#define BIND_ANY_ADDR (~0ul)
> +
> +#define PAPR_SCM_DIMM_CMD_MASK \
> +       ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
> +        (1ul << ND_CMD_GET_CONFIG_DATA) | \
> +        (1ul << ND_CMD_SET_CONFIG_DATA))
> +
> +struct papr_scm_priv {
> +       struct platform_device *pdev;
> +       struct device_node *dn;
> +       uint32_t drc_index;
> +       uint64_t blocks;
> +       uint64_t block_size;
> +       int metadata_size;
> +
> +       uint64_t bound_addr;
> +
> +       struct nvdimm_bus_descriptor bus_desc;
> +       struct nvdimm_bus *bus;
> +       struct nvdimm *nvdimm;
> +       struct resource res;
> +       struct nd_region *region;
> +       struct nd_interleave_set nd_set;
> +};
> +
> +static int drc_pmem_bind(struct papr_scm_priv *p)
> +{
> +       unsigned long ret[PLPAR_HCALL_BUFSIZE];
> +       uint64_t rc, token;
> +
> +       /*
> +        * When the hypervisor cannot map all the requested memory in a single
> +        * hcall it returns H_BUSY and we call again with the token until
> +        * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
> +        * leave the system in an undefined state, so we wait.
> +        */
> +       token = 0;
> +
> +       do {
> +               rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
> +                               p->blocks, BIND_ANY_ADDR, token);
> +               token = be64_to_cpu(ret[0]);

Does plpar_hcall sleep? Should there be a cond_resched() here if not?

> +       } while (rc == H_BUSY);
> +
> +       if (rc) {
> +               dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
> +               return -ENXIO;
> +       }
> +
> +       p->bound_addr = be64_to_cpu(ret[1]);
> +
> +       dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
> +
> +       return 0;
> +}
> +
> +static int drc_pmem_unbind(struct papr_scm_priv *p)
> +{
> +       unsigned long ret[PLPAR_HCALL_BUFSIZE];
> +       uint64_t rc, token;
> +
> +       token = 0;
> +
> +       /* NB: unbind has the same retry requirements mentioned above */
> +       do {
> +               rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
> +                               p->bound_addr, p->blocks, token);
> +               token = be64_to_cpu(ret);

...same busy loop comment.

> +       } while (rc == H_BUSY);
> +
> +       if (rc)
> +               dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
> +
> +       return !!rc;
> +}
> +
> +static int papr_scm_meta_get(struct papr_scm_priv *p,
> +                       struct nd_cmd_get_config_data_hdr *hdr)
> +{
> +       unsigned long data[PLPAR_HCALL_BUFSIZE];
> +       int64_t ret;
> +
> +       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> +               return -EINVAL;
> +
> +       ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
> +                       hdr->in_offset, 1);
> +
> +       if (ret == H_PARAMETER) /* bad DRC index */
> +               return -ENODEV;
> +       if (ret)
> +               return -EINVAL; /* other invalid parameter */
> +
> +       hdr->out_buf[0] = data[0] & 0xff;
> +
> +       return 0;
> +}
> +
> +static int papr_scm_meta_set(struct papr_scm_priv *p,
> +                       struct nd_cmd_set_config_hdr *hdr)
> +{
> +       int64_t ret;
> +
> +       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> +               return -EINVAL;
> +
> +       ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
> +                       p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
> +
> +       if (ret == H_PARAMETER) /* bad DRC index */
> +               return -ENODEV;
> +       if (ret)
> +               return -EINVAL; /* other invalid parameter */
> +
> +       return 0;
> +}
> +
> +int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
> +               unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
> +{
> +       struct nd_cmd_get_config_size *get_size_hdr;
> +       struct papr_scm_priv *p;
> +
> +       /* Only dimm-specific calls are supported atm */
> +       if (!nvdimm)
> +               return -EINVAL;
> +
> +       p = nvdimm_provider_data(nvdimm);
> +
> +       switch (cmd) {
> +       case ND_CMD_GET_CONFIG_SIZE:
> +               get_size_hdr = buf;
> +
> +               get_size_hdr->status = 0;
> +               get_size_hdr->max_xfer = 1;
> +               get_size_hdr->config_size = p->metadata_size;
> +               *cmd_rc = 0;
> +               break;
> +
> +       case ND_CMD_GET_CONFIG_DATA:
> +               *cmd_rc = papr_scm_meta_get(p, buf);
> +               break;
> +
> +       case ND_CMD_SET_CONFIG_DATA:
> +               *cmd_rc = papr_scm_meta_set(p, buf);
> +               break;
> +
> +       default:
> +               return -EINVAL;
> +       }
> +
> +       dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
> +
> +       return 0;
> +}
> +
> +static const struct attribute_group *region_attr_groups[] = {
> +       &nd_region_attribute_group,
> +       &nd_device_attribute_group,
> +       &nd_mapping_attribute_group,
> +       &nd_numa_attribute_group,
> +       NULL,
> +};
> +
> +static const struct attribute_group *bus_attr_groups[] = {
> +       &nvdimm_bus_attribute_group,
> +       NULL,
> +};
> +
> +static const struct attribute_group *papr_scm_dimm_groups[] = {
> +       &nvdimm_attribute_group,
> +       &nd_device_attribute_group,
> +       NULL,
> +};
> +
> +static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> +{
> +       struct device *dev = &p->pdev->dev;
> +       struct nd_mapping_desc mapping;
> +       struct nd_region_desc ndr_desc;
> +       unsigned long dimm_flags;
> +
> +       p->bus_desc.ndctl = papr_scm_ndctl;
> +       p->bus_desc.module = THIS_MODULE;
> +       p->bus_desc.of_node = p->pdev->dev.of_node;
> +       p->bus_desc.attr_groups = bus_attr_groups;
> +       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
> +
> +       if (!p->bus_desc.provider_name)
> +               return -ENOMEM;
> +
> +       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
> +       if (!p->bus) {
> +               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
> +               return -ENXIO;
> +       }
> +
> +       dimm_flags = 0;
> +       set_bit(NDD_ALIASING, &dimm_flags);
> +
> +       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
> +                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);

Looks good, although I'm just about to push out commits that change
this function signature to take a 'security_ops' pointer. If you need
a stable branch to base this on, let me know.

> +       if (!p->nvdimm) {
> +               dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
> +               goto err;
> +       }
> +
> +       /* now add the region */
> +
> +       memset(&mapping, 0, sizeof(mapping));
> +       mapping.nvdimm = p->nvdimm;
> +       mapping.start = p->res.start;
> +       mapping.size = p->blocks * p->block_size; // XXX: potential overflow?

Ideally, should be nvdimm relative addresses. Since you have just a
single DIMM per-region I would expect this to be 0 to
nvdimm->region_size. That said I don't think anything breaks if you
make the nvdimm address equal to the system-physical address, just
wanted to point out the intent. This only matters when you have
multiple DIMMs per region and need to record the per-DIMM contribution
in the labels on each DIMM.

Other than that looks ok to me:

Acked-by: Dan Williams <dan.j.williams@intel.com>

...just the matter of what to do about function signature change.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-12 22:36   ` Dan Williams
@ 2018-10-13 12:08     ` Michael Ellerman
  2018-10-13 16:17       ` Dan Williams
  2018-10-14 23:14     ` Oliver
  1 sibling, 1 reply; 11+ messages in thread
From: Michael Ellerman @ 2018-10-13 12:08 UTC (permalink / raw)
  To: Dan Williams, Oliver O'Halloran
  Cc: Nathan Fontenot, linuxppc-dev, linux-nvdimm

Dan Williams <dan.j.williams@intel.com> writes:
> On Tue, Oct 9, 2018 at 11:21 PM Oliver O'Halloran <oohall@gmail.com> wrote:
>>
>> Adds a driver that implements support for enabling and accessing PAPR
>> SCM regions. Unfortunately due to how the PAPR interface works we can't
>> use the existing of_pmem driver (yet) because:
>>
...
>> +
>> +static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>> +{
>> +       struct device *dev = &p->pdev->dev;
>> +       struct nd_mapping_desc mapping;
>> +       struct nd_region_desc ndr_desc;
>> +       unsigned long dimm_flags;
>> +
>> +       p->bus_desc.ndctl = papr_scm_ndctl;
>> +       p->bus_desc.module = THIS_MODULE;
>> +       p->bus_desc.of_node = p->pdev->dev.of_node;
>> +       p->bus_desc.attr_groups = bus_attr_groups;
>> +       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
>> +
>> +       if (!p->bus_desc.provider_name)
>> +               return -ENOMEM;
>> +
>> +       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
>> +       if (!p->bus) {
>> +               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
>> +               return -ENXIO;
>> +       }
>> +
>> +       dimm_flags = 0;
>> +       set_bit(NDD_ALIASING, &dimm_flags);
>> +
>> +       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
>> +                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
>
> Looks good, although I'm just about to push out commits that change
> this function signature to take a 'security_ops' pointer. If you need
> a stable branch to base this on, let me know.
...
>
> Other than that looks ok to me:
>
> Acked-by: Dan Williams <dan.j.williams@intel.com>
>
> ...just the matter of what to do about function signature change.

Yeah that's a bit of a bother.

The ideal for me would be that you put the commit that changes the
signature by itself in a branch based on 4.19-rc3 (or earlier), and then
we could both just merge that.

But not sure if that will work with whatever else you're trying to sync
up with.

cheers
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-13 12:08     ` Michael Ellerman
@ 2018-10-13 16:17       ` Dan Williams
  2018-10-15  0:55         ` Michael Ellerman
  0 siblings, 1 reply; 11+ messages in thread
From: Dan Williams @ 2018-10-13 16:17 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Nathan Fontenot, linuxppc-dev, linux-nvdimm

On Sat, Oct 13, 2018 at 5:08 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Dan Williams <dan.j.williams@intel.com> writes:
> > On Tue, Oct 9, 2018 at 11:21 PM Oliver O'Halloran <oohall@gmail.com> wrote:
> >>
> >> Adds a driver that implements support for enabling and accessing PAPR
> >> SCM regions. Unfortunately due to how the PAPR interface works we can't
> >> use the existing of_pmem driver (yet) because:
> >>
> ...
> >> +
> >> +static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> >> +{
> >> +       struct device *dev = &p->pdev->dev;
> >> +       struct nd_mapping_desc mapping;
> >> +       struct nd_region_desc ndr_desc;
> >> +       unsigned long dimm_flags;
> >> +
> >> +       p->bus_desc.ndctl = papr_scm_ndctl;
> >> +       p->bus_desc.module = THIS_MODULE;
> >> +       p->bus_desc.of_node = p->pdev->dev.of_node;
> >> +       p->bus_desc.attr_groups = bus_attr_groups;
> >> +       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
> >> +
> >> +       if (!p->bus_desc.provider_name)
> >> +               return -ENOMEM;
> >> +
> >> +       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
> >> +       if (!p->bus) {
> >> +               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
> >> +               return -ENXIO;
> >> +       }
> >> +
> >> +       dimm_flags = 0;
> >> +       set_bit(NDD_ALIASING, &dimm_flags);
> >> +
> >> +       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
> >> +                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
> >
> > Looks good, although I'm just about to push out commits that change
> > this function signature to take a 'security_ops' pointer. If you need
> > a stable branch to base this on, let me know.
> ...
> >
> > Other than that looks ok to me:
> >
> > Acked-by: Dan Williams <dan.j.williams@intel.com>
> >
> > ...just the matter of what to do about function signature change.
>
> Yeah that's a bit of a bother.
>
> The ideal for me would be that you put the commit that changes the
> signature by itself in a branch based on 4.19-rc3 (or earlier), and then
> we could both just merge that.
>
> But not sure if that will work with whatever else you're trying to sync
> up with.

How about this, I'll move the new signature to an 'advanced':

    __nvdimm_create()

...and make the existing:

   nvdimm_create()

...a simple wrapper around the new functionality. That way no matter
what merge order we should be ok.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-12 22:36   ` Dan Williams
  2018-10-13 12:08     ` Michael Ellerman
@ 2018-10-14 23:14     ` Oliver
  1 sibling, 0 replies; 11+ messages in thread
From: Oliver @ 2018-10-14 23:14 UTC (permalink / raw)
  To: Dan Williams; +Cc: Nathan Fontenot, linuxppc-dev, linux-nvdimm

On Sat, Oct 13, 2018 at 9:37 AM Dan Williams <dan.j.williams@intel.com> wrote:
>
> On Tue, Oct 9, 2018 at 11:21 PM Oliver O'Halloran <oohall@gmail.com> wrote:
> >
> > Adds a driver that implements support for enabling and accessing PAPR
> > SCM regions. Unfortunately due to how the PAPR interface works we can't
> > use the existing of_pmem driver (yet) because:
> >
> >  a) The guest is required to use the H_SCM_BIND_MEM h-call to add
> >     add the SCM region to it's physical address space, and
> >  b) There is currently no mechanism for relating a bare of_pmem region
> >     to the backing DIMM (or not-a-DIMM for our case).
> >
> > Both of these are easily handled by rolling the functionality into a
> > seperate driver so here we are...
> >
> > Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> > ---
> > The alternative implementation here is that we have the pseries code
> > do the h-calls and craft a pmem-region@<addr> node based on that.
> > However, that doesn't solve b) and mpe has expressed his dislike of
> > adding new stuff to the DT at runtime so i'd say that's a non-starter.
> > ---
> >  arch/powerpc/platforms/pseries/Kconfig    |   7 +
> >  arch/powerpc/platforms/pseries/Makefile   |   1 +
> >  arch/powerpc/platforms/pseries/papr_scm.c | 340 ++++++++++++++++++++++++++++++
> >  3 files changed, 348 insertions(+)
> >  create mode 100644 arch/powerpc/platforms/pseries/papr_scm.c
> >
> > diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
> > index 0c698fd6d491..4b0fcb80efe5 100644
> > --- a/arch/powerpc/platforms/pseries/Kconfig
> > +++ b/arch/powerpc/platforms/pseries/Kconfig
> > @@ -140,3 +140,10 @@ config IBMEBUS
> >         bool "Support for GX bus based adapters"
> >         help
> >           Bus device driver for GX bus based adapters.
> > +
> > +config PAPR_SCM
> > +       depends on PPC_PSERIES && MEMORY_HOTPLUG
> > +       select LIBNVDIMM
> > +       tristate "Support for the PAPR Storage Class Memory interface"
> > +       help
> > +         Enable access to hypervisor provided storage class memory.
> > diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
> > index 892b27ced973..a43ec843c8e2 100644
> > --- a/arch/powerpc/platforms/pseries/Makefile
> > +++ b/arch/powerpc/platforms/pseries/Makefile
> > @@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ)    += io_event_irq.o
> >  obj-$(CONFIG_LPARCFG)          += lparcfg.o
> >  obj-$(CONFIG_IBMVIO)           += vio.o
> >  obj-$(CONFIG_IBMEBUS)          += ibmebus.o
> > +obj-$(CONFIG_PAPR_SCM)         += papr_scm.o
> >
> >  ifdef CONFIG_PPC_PSERIES
> >  obj-$(CONFIG_SUSPEND)          += suspend.o
> > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> > new file mode 100644
> > index 000000000000..87d4dbc18845
> > --- /dev/null
> > +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> > @@ -0,0 +1,340 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#define pr_fmt(fmt)    "papr-scm: " fmt
> > +
> > +#include <linux/of.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/ioport.h>
> > +#include <linux/slab.h>
> > +#include <linux/ndctl.h>
> > +#include <linux/libnvdimm.h>
> > +#include <linux/platform_device.h>
> > +
> > +#include <asm/plpar_wrappers.h>
> > +
> > +#define BIND_ANY_ADDR (~0ul)
> > +
> > +#define PAPR_SCM_DIMM_CMD_MASK \
> > +       ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
> > +        (1ul << ND_CMD_GET_CONFIG_DATA) | \
> > +        (1ul << ND_CMD_SET_CONFIG_DATA))
> > +
> > +struct papr_scm_priv {
> > +       struct platform_device *pdev;
> > +       struct device_node *dn;
> > +       uint32_t drc_index;
> > +       uint64_t blocks;
> > +       uint64_t block_size;
> > +       int metadata_size;
> > +
> > +       uint64_t bound_addr;
> > +
> > +       struct nvdimm_bus_descriptor bus_desc;
> > +       struct nvdimm_bus *bus;
> > +       struct nvdimm *nvdimm;
> > +       struct resource res;
> > +       struct nd_region *region;
> > +       struct nd_interleave_set nd_set;
> > +};
> > +
> > +static int drc_pmem_bind(struct papr_scm_priv *p)
> > +{
> > +       unsigned long ret[PLPAR_HCALL_BUFSIZE];
> > +       uint64_t rc, token;
> > +
> > +       /*
> > +        * When the hypervisor cannot map all the requested memory in a single
> > +        * hcall it returns H_BUSY and we call again with the token until
> > +        * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
> > +        * leave the system in an undefined state, so we wait.
> > +        */
> > +       token = 0;
> > +
> > +       do {
> > +               rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
> > +                               p->blocks, BIND_ANY_ADDR, token);
> > +               token = be64_to_cpu(ret[0]);
>
> Does plpar_hcall sleep? Should there be a cond_resched() here if not?

That's probably a good idea. The implementations I have to test with
never return H_BUSY and I had figured it just indicated that

>
> > +       } while (rc == H_BUSY);
> > +
> > +       if (rc) {
> > +               dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
> > +               return -ENXIO;
> > +       }
> > +
> > +       p->bound_addr = be64_to_cpu(ret[1]);
> > +
> > +       dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
> > +
> > +       return 0;
> > +}
> > +
> > +static int drc_pmem_unbind(struct papr_scm_priv *p)
> > +{
> > +       unsigned long ret[PLPAR_HCALL_BUFSIZE];
> > +       uint64_t rc, token;
> > +
> > +       token = 0;
> > +
> > +       /* NB: unbind has the same retry requirements mentioned above */
> > +       do {
> > +               rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
> > +                               p->bound_addr, p->blocks, token);
> > +               token = be64_to_cpu(ret);
>
> ...same busy loop comment.
>
> > +       } while (rc == H_BUSY);
> > +
> > +       if (rc)
> > +               dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
> > +
> > +       return !!rc;
> > +}
> > +
> > +static int papr_scm_meta_get(struct papr_scm_priv *p,
> > +                       struct nd_cmd_get_config_data_hdr *hdr)
> > +{
> > +       unsigned long data[PLPAR_HCALL_BUFSIZE];
> > +       int64_t ret;
> > +
> > +       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> > +               return -EINVAL;
> > +
> > +       ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
> > +                       hdr->in_offset, 1);
> > +
> > +       if (ret == H_PARAMETER) /* bad DRC index */
> > +               return -ENODEV;
> > +       if (ret)
> > +               return -EINVAL; /* other invalid parameter */
> > +
> > +       hdr->out_buf[0] = data[0] & 0xff;
> > +
> > +       return 0;
> > +}
> > +
> > +static int papr_scm_meta_set(struct papr_scm_priv *p,
> > +                       struct nd_cmd_set_config_hdr *hdr)
> > +{
> > +       int64_t ret;
> > +
> > +       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
> > +               return -EINVAL;
> > +
> > +       ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
> > +                       p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
> > +
> > +       if (ret == H_PARAMETER) /* bad DRC index */
> > +               return -ENODEV;
> > +       if (ret)
> > +               return -EINVAL; /* other invalid parameter */
> > +
> > +       return 0;
> > +}
> > +
> > +int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
> > +               unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
> > +{
> > +       struct nd_cmd_get_config_size *get_size_hdr;
> > +       struct papr_scm_priv *p;
> > +
> > +       /* Only dimm-specific calls are supported atm */
> > +       if (!nvdimm)
> > +               return -EINVAL;
> > +
> > +       p = nvdimm_provider_data(nvdimm);
> > +
> > +       switch (cmd) {
> > +       case ND_CMD_GET_CONFIG_SIZE:
> > +               get_size_hdr = buf;
> > +
> > +               get_size_hdr->status = 0;
> > +               get_size_hdr->max_xfer = 1;
> > +               get_size_hdr->config_size = p->metadata_size;
> > +               *cmd_rc = 0;
> > +               break;
> > +
> > +       case ND_CMD_GET_CONFIG_DATA:
> > +               *cmd_rc = papr_scm_meta_get(p, buf);
> > +               break;
> > +
> > +       case ND_CMD_SET_CONFIG_DATA:
> > +               *cmd_rc = papr_scm_meta_set(p, buf);
> > +               break;
> > +
> > +       default:
> > +               return -EINVAL;
> > +       }
> > +
> > +       dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
> > +
> > +       return 0;
> > +}
> > +
> > +static const struct attribute_group *region_attr_groups[] = {
> > +       &nd_region_attribute_group,
> > +       &nd_device_attribute_group,
> > +       &nd_mapping_attribute_group,
> > +       &nd_numa_attribute_group,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group *bus_attr_groups[] = {
> > +       &nvdimm_bus_attribute_group,
> > +       NULL,
> > +};
> > +
> > +static const struct attribute_group *papr_scm_dimm_groups[] = {
> > +       &nvdimm_attribute_group,
> > +       &nd_device_attribute_group,
> > +       NULL,
> > +};
> > +
> > +static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> > +{
> > +       struct device *dev = &p->pdev->dev;
> > +       struct nd_mapping_desc mapping;
> > +       struct nd_region_desc ndr_desc;
> > +       unsigned long dimm_flags;
> > +
> > +       p->bus_desc.ndctl = papr_scm_ndctl;
> > +       p->bus_desc.module = THIS_MODULE;
> > +       p->bus_desc.of_node = p->pdev->dev.of_node;
> > +       p->bus_desc.attr_groups = bus_attr_groups;
> > +       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
> > +
> > +       if (!p->bus_desc.provider_name)
> > +               return -ENOMEM;
> > +
> > +       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
> > +       if (!p->bus) {
> > +               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
> > +               return -ENXIO;
> > +       }
> > +
> > +       dimm_flags = 0;
> > +       set_bit(NDD_ALIASING, &dimm_flags);
> > +
> > +       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
> > +                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
>
> Looks good, although I'm just about to push out commits that change
> this function signature to take a 'security_ops' pointer. If you need
> a stable branch to base this on, let me know.
>
> > +       if (!p->nvdimm) {
> > +               dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
> > +               goto err;
> > +       }
> > +
> > +       /* now add the region */
> > +
> > +       memset(&mapping, 0, sizeof(mapping));
> > +       mapping.nvdimm = p->nvdimm;
> > +       mapping.start = p->res.start;
> > +       mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
>
> Ideally, should be nvdimm relative addresses. Since you have just a
> single DIMM per-region I would expect this to be 0 to
> nvdimm->region_size. That said I don't think anything breaks if you
> make the nvdimm address equal to the system-physical address, just
> wanted to point out the intent. This only matters when you have
> multiple DIMMs per region and need to record the per-DIMM contribution
> in the labels on each DIMM.

bleh

That's what I assumed originally. I wasn't sure so I looked at what
the NFIT driver did and confused myself by assuming the address field
of acpi_nfit_memory_map was an SPA.

>
> Other than that looks ok to me:
>
> Acked-by: Dan Williams <dan.j.williams@intel.com>
>
> ...just the matter of what to do about function signature change.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions
  2018-10-13 16:17       ` Dan Williams
@ 2018-10-15  0:55         ` Michael Ellerman
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Ellerman @ 2018-10-15  0:55 UTC (permalink / raw)
  To: Dan Williams; +Cc: Nathan Fontenot, linuxppc-dev, linux-nvdimm

Dan Williams <dan.j.williams@intel.com> writes:
> On Sat, Oct 13, 2018 at 5:08 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>>
>> Dan Williams <dan.j.williams@intel.com> writes:
>> > On Tue, Oct 9, 2018 at 11:21 PM Oliver O'Halloran <oohall@gmail.com> wrote:
>> >>
>> >> Adds a driver that implements support for enabling and accessing PAPR
>> >> SCM regions. Unfortunately due to how the PAPR interface works we can't
>> >> use the existing of_pmem driver (yet) because:
>> >>
>> ...
>> >> +
>> >> +static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>> >> +{
>> >> +       struct device *dev = &p->pdev->dev;
>> >> +       struct nd_mapping_desc mapping;
>> >> +       struct nd_region_desc ndr_desc;
>> >> +       unsigned long dimm_flags;
>> >> +
>> >> +       p->bus_desc.ndctl = papr_scm_ndctl;
>> >> +       p->bus_desc.module = THIS_MODULE;
>> >> +       p->bus_desc.of_node = p->pdev->dev.of_node;
>> >> +       p->bus_desc.attr_groups = bus_attr_groups;
>> >> +       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
>> >> +
>> >> +       if (!p->bus_desc.provider_name)
>> >> +               return -ENOMEM;
>> >> +
>> >> +       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
>> >> +       if (!p->bus) {
>> >> +               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
>> >> +               return -ENXIO;
>> >> +       }
>> >> +
>> >> +       dimm_flags = 0;
>> >> +       set_bit(NDD_ALIASING, &dimm_flags);
>> >> +
>> >> +       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
>> >> +                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
>> >
>> > Looks good, although I'm just about to push out commits that change
>> > this function signature to take a 'security_ops' pointer. If you need
>> > a stable branch to base this on, let me know.
>> ...
>> >
>> > Other than that looks ok to me:
>> >
>> > Acked-by: Dan Williams <dan.j.williams@intel.com>
>> >
>> > ...just the matter of what to do about function signature change.
>>
>> Yeah that's a bit of a bother.
>>
>> The ideal for me would be that you put the commit that changes the
>> signature by itself in a branch based on 4.19-rc3 (or earlier), and then
>> we could both just merge that.
>>
>> But not sure if that will work with whatever else you're trying to sync
>> up with.
>
> How about this, I'll move the new signature to an 'advanced':
>
>     __nvdimm_create()
>
> ...and make the existing:
>
>    nvdimm_create()
>
> ...a simple wrapper around the new functionality. That way no matter
> what merge order we should be ok.

Yep that's much easier, thanks.

cheers
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-10-15  0:55 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-10  6:08 PAPR SCM support Oliver O'Halloran
2018-10-10  6:08 ` [PATCH 1/2] powerpc/pseries: PAPR persistent memory support Oliver O'Halloran
2018-10-10 16:36   ` Nathan Fontenot
2018-10-10 22:24     ` Oliver
2018-10-10  6:08 ` [PATCH 2/2] powerpc/pseries: Add driver for PAPR SCM regions Oliver O'Halloran
2018-10-10 23:59   ` Michael Ellerman
2018-10-12 22:36   ` Dan Williams
2018-10-13 12:08     ` Michael Ellerman
2018-10-13 16:17       ` Dan Williams
2018-10-15  0:55         ` Michael Ellerman
2018-10-14 23:14     ` Oliver

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).