All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] cxl: DVSEC Range emulation fixups
@ 2023-02-22  1:51 Dan Williams
  2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Dan Williams @ 2023-02-22  1:51 UTC (permalink / raw)
  To: linux-cxl; +Cc: Jonathan Cameron, dave.jiang

Jonathan points out that the kernel is too agressive in assuming that
DVSEC range registers are in use, reliably skip emulation if
'mem_enabled' is not set. The helper devm_cxl_setup_emulated_hdm() is
needlessly redoing an allocation, clean that up.

---

Dan Williams (2):
      cxl/hdm: Fix double allocation of @cxlhdm
      cxl/hdm: Skip emulation when driver manages mem_enable


 drivers/cxl/core/hdm.c |   65 ++++++++++++++++++------------------------------
 drivers/cxl/cxl.h      |    4 ++-
 drivers/cxl/port.c     |    2 +
 3 files changed, 28 insertions(+), 43 deletions(-)

base-commit: 23c198e3dfaabbc891681aecb0855b9e0ac791e1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm
  2023-02-22  1:51 [PATCH 0/2] cxl: DVSEC Range emulation fixups Dan Williams
@ 2023-02-22  1:51 ` Dan Williams
  2023-02-22 12:53   ` Jonathan Cameron
  2023-02-22 16:57   ` Dave Jiang
  2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
  2023-02-24  1:14 ` [PATCH 0/2] cxl: DVSEC Range emulation fixups Gregory Price
  2 siblings, 2 replies; 15+ messages in thread
From: Dan Williams @ 2023-02-22  1:51 UTC (permalink / raw)
  To: linux-cxl; +Cc: dave.jiang

devm_cxl_setup_emulated_hdm() reallocates an instance of @cxlhdm that
was already allocated at the start of devm_cxl_setup_hdm(). Only one is
needed and devm_cxl_setup_emulated_hdm() does not do enough to warrant
being an explicit helper.

Fixes: 4474ce565ee4 ("cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/hdm.c |   34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 45deda18ed32..520814130928 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -101,27 +101,6 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
 				      BIT(CXL_CM_CAP_CAP_ID_HDM));
 }
 
-static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
-						   struct cxl_endpoint_dvsec_info *info)
-{
-	struct device *dev = &port->dev;
-	struct cxl_hdm *cxlhdm;
-
-	if (!info->mem_enabled)
-		return ERR_PTR(-ENODEV);
-
-	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
-	if (!cxlhdm)
-		return ERR_PTR(-ENOMEM);
-
-	cxlhdm->port = port;
-	cxlhdm->decoder_count = info->ranges;
-	cxlhdm->target_count = info->ranges;
-	dev_set_drvdata(&port->dev, cxlhdm);
-
-	return cxlhdm;
-}
-
 /**
  * devm_cxl_setup_hdm - map HDM decoder component registers
  * @port: cxl_port to map
@@ -138,13 +117,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
 	if (!cxlhdm)
 		return ERR_PTR(-ENOMEM);
-
 	cxlhdm->port = port;
-	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
-	if (!crb) {
-		if (info && info->mem_enabled)
-			return devm_cxl_setup_emulated_hdm(port, info);
+	dev_set_drvdata(dev, cxlhdm);
 
+	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
+	if (!crb && info && info->mem_enabled) {
+		cxlhdm->decoder_count = info->ranges;
+		cxlhdm->target_count = info->ranges;
+	} else if (!crb) {
 		dev_err(dev, "No component registers mapped\n");
 		return ERR_PTR(-ENXIO);
 	}
@@ -160,8 +140,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 		return ERR_PTR(-ENXIO);
 	}
 
-	dev_set_drvdata(dev, cxlhdm);
-
 	return cxlhdm;
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable
  2023-02-22  1:51 [PATCH 0/2] cxl: DVSEC Range emulation fixups Dan Williams
  2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
@ 2023-02-22  1:51 ` Dan Williams
  2023-02-22 13:22   ` Jonathan Cameron
                     ` (2 more replies)
  2023-02-24  1:14 ` [PATCH 0/2] cxl: DVSEC Range emulation fixups Gregory Price
  2 siblings, 3 replies; 15+ messages in thread
From: Dan Williams @ 2023-02-22  1:51 UTC (permalink / raw)
  To: linux-cxl; +Cc: Jonathan Cameron, dave.jiang

If the driver is allowed to enable memory operation itself then it can
also turn on HDM decoder support at will.

With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when
an HDM decoder is not committed, is not needed.

Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers")
Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com
Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/hdm.c |   31 ++++++++++++++++++-------------
 drivers/cxl/cxl.h      |    4 +++-
 drivers/cxl/port.c     |    2 +-
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 520814130928..5293fe13fce3 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -717,19 +717,29 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
 	return 0;
 }
 
-static bool should_emulate_decoders(struct cxl_port *port)
+static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
 {
-	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
-	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+	struct cxl_hdm *cxlhdm;
+	void __iomem *hdm;
 	u32 ctrl;
 	int i;
 
-	if (!is_cxl_endpoint(cxlhdm->port))
+	if (!info)
 		return false;
 
+	cxlhdm = dev_get_drvdata(&info->port->dev);
+	hdm = cxlhdm->regs.hdm_decoder;
+
 	if (!hdm)
 		return true;
 
+	/*
+	 * If HDM decoders are present and the driver is in control of
+	 * Mem_Enable skip DVSEC based emulation
+	 */
+	if (!info->mem_enabled)
+		return false;
+
 	/*
 	 * If any decoders are committed already, there should not be any
 	 * emulated DVSEC decoders.
@@ -747,7 +757,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 			    int *target_map, void __iomem *hdm, int which,
 			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
-	struct cxl_endpoint_decoder *cxled = NULL;
+	struct cxl_endpoint_decoder *cxled;
 	u64 size, base, skip, dpa_size;
 	bool committed;
 	u32 remainder;
@@ -758,12 +768,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		unsigned char target_id[8];
 	} target_list;
 
-	if (should_emulate_decoders(port))
+	if (should_emulate_decoders(info))
 		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
 
-	if (is_endpoint_decoder(&cxld->dev))
-		cxled = to_cxl_endpoint_decoder(&cxld->dev);
-
 	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
 	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
 	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
@@ -784,9 +791,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		.end = base + size - 1,
 	};
 
-	if (cxled && !committed && range_len(&info->dvsec_range[which]))
-		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
-
 	/* decoders are enabled if committed */
 	if (committed) {
 		cxld->flags |= CXL_DECODER_F_ENABLE;
@@ -824,7 +828,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 	if (rc)
 		return rc;
 
-	if (!cxled) {
+	if (!info) {
 		target_list.value =
 			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
 		for (i = 0; i < cxld->interleave_ways; i++)
@@ -844,6 +848,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		return -ENXIO;
 	}
 	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+	cxled = to_cxl_endpoint_decoder(&cxld->dev);
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
 	if (rc) {
 		dev_err(&port->dev,
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d853a0238ad7..dd4b7a729419 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
 
 /**
  * struct cxl_endpoint_dvsec_info - Cached DVSEC info
- * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
+ * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
  * @ranges: Number of active HDM ranges this device uses.
+ * @port: endpoint port associated with this info instance
  * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
  */
 struct cxl_endpoint_dvsec_info {
 	bool mem_enabled;
 	int ranges;
+	struct cxl_port *port;
 	struct range dvsec_range[2];
 };
 
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 1049bb5ea496..9c8f46ed336b 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port)
 
 static int cxl_endpoint_port_probe(struct cxl_port *port)
 {
+	struct cxl_endpoint_dvsec_info info = { .port = port };
 	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
-	struct cxl_endpoint_dvsec_info info = { 0 };
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_hdm *cxlhdm;
 	struct cxl_port *root;


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm
  2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
@ 2023-02-22 12:53   ` Jonathan Cameron
  2023-02-22 16:57   ` Dave Jiang
  1 sibling, 0 replies; 15+ messages in thread
From: Jonathan Cameron @ 2023-02-22 12:53 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, dave.jiang

On Tue, 21 Feb 2023 17:51:19 -0800
Dan Williams <dan.j.williams@intel.com> wrote:

> devm_cxl_setup_emulated_hdm() reallocates an instance of @cxlhdm that
> was already allocated at the start of devm_cxl_setup_hdm(). Only one is
> needed and devm_cxl_setup_emulated_hdm() does not do enough to warrant
> being an explicit helper.
> 
> Fixes: 4474ce565ee4 ("cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

LGTM
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/cxl/core/hdm.c |   34 ++++++----------------------------
>  1 file changed, 6 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 45deda18ed32..520814130928 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -101,27 +101,6 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
>  				      BIT(CXL_CM_CAP_CAP_ID_HDM));
>  }
>  
> -static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
> -						   struct cxl_endpoint_dvsec_info *info)
> -{
> -	struct device *dev = &port->dev;
> -	struct cxl_hdm *cxlhdm;
> -
> -	if (!info->mem_enabled)
> -		return ERR_PTR(-ENODEV);
> -
> -	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
> -	if (!cxlhdm)
> -		return ERR_PTR(-ENOMEM);
> -

> -	cxlhdm->decoder_count = info->ranges;
> -	cxlhdm->target_count = info->ranges;
> -	dev_set_drvdata(&port->dev, cxlhdm);
> -
> -	return cxlhdm;
> -}
> -
>  /**
>   * devm_cxl_setup_hdm - map HDM decoder component registers
>   * @port: cxl_port to map
> @@ -138,13 +117,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
>  	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
>  	if (!cxlhdm)
>  		return ERR_PTR(-ENOMEM);

> -	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
> -	if (!crb) {
> -		if (info && info->mem_enabled)
> -			return devm_cxl_setup_emulated_hdm(port, info);
> +	dev_set_drvdata(dev, cxlhdm);
>  
> +	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
> +	if (!crb && info && info->mem_enabled) {
> +		cxlhdm->decoder_count = info->ranges;
> +		cxlhdm->target_count = info->ranges;
> +	} else if (!crb) {
>  		dev_err(dev, "No component registers mapped\n");
>  		return ERR_PTR(-ENXIO);
>  	}
> @@ -160,8 +140,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
>  		return ERR_PTR(-ENXIO);
>  	}
>  
> -	dev_set_drvdata(dev, cxlhdm);
> -
>  	return cxlhdm;
>  }
>  EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable
  2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
@ 2023-02-22 13:22   ` Jonathan Cameron
  2023-02-23  5:05     ` Dan Williams
  2023-02-22 16:59   ` Dave Jiang
  2023-03-31 16:33   ` Fan Ni
  2 siblings, 1 reply; 15+ messages in thread
From: Jonathan Cameron @ 2023-02-22 13:22 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, dave.jiang

On Tue, 21 Feb 2023 17:51:24 -0800
Dan Williams <dan.j.williams@intel.com> wrote:

> If the driver is allowed to enable memory operation itself then it can
> also turn on HDM decoder support at will.
> 
> With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when
> an HDM decoder is not committed, is not needed.
> 
> Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers")
> Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com
> Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

For both
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

I could have sworn that mem_enabled was set when I was debugging this before
(hence the odd dance in my proposal)

Meh, doesn't seem to be now so I clearly did something wrong!

Trivial comment below.

I still need to add more tests cases, but this solves the one that
caused the original report.

> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d853a0238ad7..dd4b7a729419 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>  
>  /**
>   * struct cxl_endpoint_dvsec_info - Cached DVSEC info
> - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
> + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time

I guess could rename this to make the meaning more obvious, but would make for
a messier fix.

>   * @ranges: Number of active HDM ranges this device uses.
> + * @port: endpoint port associated with this info instance
>   * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
>   */
>  struct cxl_endpoint_dvsec_info {
>  	bool mem_enabled;
>  	int ranges;
> +	struct cxl_port *port;
>  	struct range dvsec_range[2];
>  };
>  

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm
  2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
  2023-02-22 12:53   ` Jonathan Cameron
@ 2023-02-22 16:57   ` Dave Jiang
  1 sibling, 0 replies; 15+ messages in thread
From: Dave Jiang @ 2023-02-22 16:57 UTC (permalink / raw)
  To: Dan Williams, linux-cxl



On 2/21/23 6:51 PM, Dan Williams wrote:
> devm_cxl_setup_emulated_hdm() reallocates an instance of @cxlhdm that
> was already allocated at the start of devm_cxl_setup_hdm(). Only one is
> needed and devm_cxl_setup_emulated_hdm() does not do enough to warrant
> being an explicit helper.
> 
> Fixes: 4474ce565ee4 ("cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders")
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/core/hdm.c |   34 ++++++----------------------------
>   1 file changed, 6 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 45deda18ed32..520814130928 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -101,27 +101,6 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
>   				      BIT(CXL_CM_CAP_CAP_ID_HDM));
>   }
>   
> -static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
> -						   struct cxl_endpoint_dvsec_info *info)
> -{
> -	struct device *dev = &port->dev;
> -	struct cxl_hdm *cxlhdm;
> -
> -	if (!info->mem_enabled)
> -		return ERR_PTR(-ENODEV);
> -
> -	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
> -	if (!cxlhdm)
> -		return ERR_PTR(-ENOMEM);
> -
> -	cxlhdm->port = port;
> -	cxlhdm->decoder_count = info->ranges;
> -	cxlhdm->target_count = info->ranges;
> -	dev_set_drvdata(&port->dev, cxlhdm);
> -
> -	return cxlhdm;
> -}
> -
>   /**
>    * devm_cxl_setup_hdm - map HDM decoder component registers
>    * @port: cxl_port to map
> @@ -138,13 +117,14 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
>   	cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
>   	if (!cxlhdm)
>   		return ERR_PTR(-ENOMEM);
> -
>   	cxlhdm->port = port;
> -	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
> -	if (!crb) {
> -		if (info && info->mem_enabled)
> -			return devm_cxl_setup_emulated_hdm(port, info);
> +	dev_set_drvdata(dev, cxlhdm);
>   
> +	crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
> +	if (!crb && info && info->mem_enabled) {
> +		cxlhdm->decoder_count = info->ranges;
> +		cxlhdm->target_count = info->ranges;
> +	} else if (!crb) {
>   		dev_err(dev, "No component registers mapped\n");
>   		return ERR_PTR(-ENXIO);
>   	}
> @@ -160,8 +140,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
>   		return ERR_PTR(-ENXIO);
>   	}
>   
> -	dev_set_drvdata(dev, cxlhdm);
> -
>   	return cxlhdm;
>   }
>   EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable
  2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
  2023-02-22 13:22   ` Jonathan Cameron
@ 2023-02-22 16:59   ` Dave Jiang
  2023-03-31 16:33   ` Fan Ni
  2 siblings, 0 replies; 15+ messages in thread
From: Dave Jiang @ 2023-02-22 16:59 UTC (permalink / raw)
  To: Dan Williams, linux-cxl; +Cc: Jonathan Cameron



On 2/21/23 6:51 PM, Dan Williams wrote:
> If the driver is allowed to enable memory operation itself then it can
> also turn on HDM decoder support at will.
> 
> With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when
> an HDM decoder is not committed, is not needed.
> 
> Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers")
> Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com
> Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/core/hdm.c |   31 ++++++++++++++++++-------------
>   drivers/cxl/cxl.h      |    4 +++-
>   drivers/cxl/port.c     |    2 +-
>   3 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 520814130928..5293fe13fce3 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -717,19 +717,29 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
>   	return 0;
>   }
>   
> -static bool should_emulate_decoders(struct cxl_port *port)
> +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
>   {
> -	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> -	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
> +	struct cxl_hdm *cxlhdm;
> +	void __iomem *hdm;
>   	u32 ctrl;
>   	int i;
>   
> -	if (!is_cxl_endpoint(cxlhdm->port))
> +	if (!info)
>   		return false;
>   
> +	cxlhdm = dev_get_drvdata(&info->port->dev);
> +	hdm = cxlhdm->regs.hdm_decoder;
> +
>   	if (!hdm)
>   		return true;
>   
> +	/*
> +	 * If HDM decoders are present and the driver is in control of
> +	 * Mem_Enable skip DVSEC based emulation
> +	 */
> +	if (!info->mem_enabled)
> +		return false;
> +
>   	/*
>   	 * If any decoders are committed already, there should not be any
>   	 * emulated DVSEC decoders.
> @@ -747,7 +757,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   			    int *target_map, void __iomem *hdm, int which,
>   			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
>   {
> -	struct cxl_endpoint_decoder *cxled = NULL;
> +	struct cxl_endpoint_decoder *cxled;
>   	u64 size, base, skip, dpa_size;
>   	bool committed;
>   	u32 remainder;
> @@ -758,12 +768,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   		unsigned char target_id[8];
>   	} target_list;
>   
> -	if (should_emulate_decoders(port))
> +	if (should_emulate_decoders(info))
>   		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
>   
> -	if (is_endpoint_decoder(&cxld->dev))
> -		cxled = to_cxl_endpoint_decoder(&cxld->dev);
> -
>   	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
>   	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
>   	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> @@ -784,9 +791,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   		.end = base + size - 1,
>   	};
>   
> -	if (cxled && !committed && range_len(&info->dvsec_range[which]))
> -		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
> -
>   	/* decoders are enabled if committed */
>   	if (committed) {
>   		cxld->flags |= CXL_DECODER_F_ENABLE;
> @@ -824,7 +828,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   	if (rc)
>   		return rc;
>   
> -	if (!cxled) {
> +	if (!info) {
>   		target_list.value =
>   			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
>   		for (i = 0; i < cxld->interleave_ways; i++)
> @@ -844,6 +848,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   		return -ENXIO;
>   	}
>   	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	cxled = to_cxl_endpoint_decoder(&cxld->dev);
>   	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
>   	if (rc) {
>   		dev_err(&port->dev,
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d853a0238ad7..dd4b7a729419 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>   
>   /**
>    * struct cxl_endpoint_dvsec_info - Cached DVSEC info
> - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
> + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
>    * @ranges: Number of active HDM ranges this device uses.
> + * @port: endpoint port associated with this info instance
>    * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
>    */
>   struct cxl_endpoint_dvsec_info {
>   	bool mem_enabled;
>   	int ranges;
> +	struct cxl_port *port;
>   	struct range dvsec_range[2];
>   };
>   
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 1049bb5ea496..9c8f46ed336b 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port)
>   
>   static int cxl_endpoint_port_probe(struct cxl_port *port)
>   {
> +	struct cxl_endpoint_dvsec_info info = { .port = port };
>   	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
> -	struct cxl_endpoint_dvsec_info info = { 0 };
>   	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>   	struct cxl_hdm *cxlhdm;
>   	struct cxl_port *root;
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable
  2023-02-22 13:22   ` Jonathan Cameron
@ 2023-02-23  5:05     ` Dan Williams
  0 siblings, 0 replies; 15+ messages in thread
From: Dan Williams @ 2023-02-23  5:05 UTC (permalink / raw)
  To: Jonathan Cameron, Dan Williams; +Cc: linux-cxl, dave.jiang

Jonathan Cameron wrote:
> On Tue, 21 Feb 2023 17:51:24 -0800
> Dan Williams <dan.j.williams@intel.com> wrote:
> 
> > If the driver is allowed to enable memory operation itself then it can
> > also turn on HDM decoder support at will.
> > 
> > With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when
> > an HDM decoder is not committed, is not needed.
> > 
> > Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers")
> > Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com
> > Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> 
> For both
> Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> I could have sworn that mem_enabled was set when I was debugging this before
> (hence the odd dance in my proposal)
> 
> Meh, doesn't seem to be now so I clearly did something wrong!
> 
> Trivial comment below.
> 
> I still need to add more tests cases, but this solves the one that
> caused the original report.

Good to hear.

> 
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index d853a0238ad7..dd4b7a729419 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
> >  
> >  /**
> >   * struct cxl_endpoint_dvsec_info - Cached DVSEC info
> > - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
> > + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
> 
> I guess could rename this to make the meaning more obvious, but would make for
> a messier fix.

Yeah, maybe a rename as a follow-on, but for now keep the fix smaller
seems prudent.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-02-22  1:51 [PATCH 0/2] cxl: DVSEC Range emulation fixups Dan Williams
  2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
  2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
@ 2023-02-24  1:14 ` Gregory Price
  2023-03-01 18:46   ` Dan Williams
  2 siblings, 1 reply; 15+ messages in thread
From: Gregory Price @ 2023-02-24  1:14 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

On Tue, Feb 21, 2023 at 05:51:13PM -0800, Dan Williams wrote:
> Jonathan points out that the kernel is too agressive in assuming that
> DVSEC range registers are in use, reliably skip emulation if
> 'mem_enabled' is not set. The helper devm_cxl_setup_emulated_hdm() is
> needlessly redoing an allocation, clean that up.
> 
> ---
> 
> Dan Williams (2):
>       cxl/hdm: Fix double allocation of @cxlhdm
>       cxl/hdm: Skip emulation when driver manages mem_enable
> 
> 
>  drivers/cxl/core/hdm.c |   65 ++++++++++++++++++------------------------------
>  drivers/cxl/cxl.h      |    4 ++-
>  drivers/cxl/port.c     |    2 +
>  3 files changed, 28 insertions(+), 43 deletions(-)
> 
> base-commit: 23c198e3dfaabbc891681aecb0855b9e0ac791e1


not *quite* sure what to make of this yet, but i get stack trace on boot
on real hardware with this patch.  I'm debugging other issues with this
hardware, so i'm not sure if it's related or not, but prior to this patch
I did not have a stack trace.


I think there's two issues here:

1) The system I'm on fails to register a CFMW/root port decoder.  I'm
   not entirely sure why, other than during cxl_decoder_add(), the
   target map contains "[0,]" as the target id's, and the only
   registered ports/decoders are the endpoints.

   I don't know whether this is because the hardware just doesn't have a
   root decoder, or what.  But it makes the volatile region patches
   non-functional, and i have to revert back to static configuration to
   use the real cxl device (i.e. don't mark it EFI_MEMORY_SP).

2) Per the second bit - there's no component registers being registered
   for this cxl device (plus some spurious DOE error).


The no root decoder thing has been throwing me for a loop, if you can
help me shed some light on this i'd greatly appreciate it.  If a socket
has no decoders, should we expect memory expanders to be managable via
the volatile region system in the driver?


relevant dmesg info

[   21.928436] cxl root0: Failed to populate active decoder targets
[   21.929077] cxl_acpi ACPI0017:00: Failed to add decode range [0x1050000000 - 0x304fffffff]
[   21.933150]  pci0000:3f: host supports CXL (restricted)
[... snip ...]
[   21.965126] cxl_pci 0000:3f:00.0: No component registers (-19)
[   22.001597] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
[   22.002351] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
[   22.003265] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
[... snip ...]
[   22.339973] BUG: unable to handle page fault for address: 0000000000001000
[   22.340584] #PF: supervisor read access in kernel mode
[   22.346801] #PF: error_code(0x0000) - not-present page
[   22.349059] PGD 1339ec067 P4D 0
[   22.350877] Oops: 0000 [#1] PREEMPT SMP NOPTI
[   22.354558] CPU: 45 PID: 1351 Comm: systemd-udevd Not tainted 6.2.0+ #7
[   22.358357] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]
[   22.361571] Code: 90 90 90 90 90 90 90 0f 1f 44 00 00 41 57 31 c0 b9 06 00 00 00 41 56 41 55 41 54 49 89 fc 48 89 d7 55 53 48 83 ec 10 f3 48 ab <8b> 86 00 10 00 00 66 83 f8 01 0f 85 30 01 00 00 c1 e8 18 0f 84 93
[   22.367000] RSP: 0018:ff4fc33844ce7830 EFLAGS: 00010286
[   22.369016] RAX: 0000000000000000 RBX: ff4f28d002dba000 RCX: 0000000000000000
[   22.372736] RDX: ff4fc33844ce7898 RSI: 0000000000000000 RDI: ff4fc33844ce78c8
[   22.372739] RBP: ff4f28d0baaf1268 R08: 0000000000000000 R09: 0000000000000001
[   22.375842] R10: 0000000000000001 R11: 00000000e0690b8e R12: ff4f28d002dba000
[   22.375844] R13: ff4fc33844ce7920 R14: ff4f28d000f9fc28 R15: ff4f28d028b45810
[   22.375845] FS:  00007fbb75a07580(0000) GS:ff4f28df09c00000(0000) knlGS:0000000000000000
[   22.375847] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   22.375848] CR2: 0000000000001000 CR3: 0000000133ab0003 CR4: 0000000000771ee0
[   22.375849] PKRU: 55555554
[   22.375850] Call Trace:
[   22.375854]  <TASK>
[   22.375858]  map_hdm_decoder_regs+0x46/0x90 [cxl_core]
[   22.398038]  devm_cxl_setup_hdm+0x95/0x120 [cxl_core]
[   22.398111]  cxl_port_probe+0xbe/0x190 [cxl_port]
[   22.398118]  cxl_bus_probe+0x14/0x50 [cxl_core]


~Gregory

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-03-01 18:46   ` Dan Williams
@ 2023-02-26  7:28     ` Gregory Price
  2023-03-03 16:43     ` Gregory Price
  2023-03-21 17:17     ` Gregory Price
  2 siblings, 0 replies; 15+ messages in thread
From: Gregory Price @ 2023-02-26  7:28 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

On Wed, Mar 01, 2023 at 10:46:29AM -0800, Dan Williams wrote:
> Hi Gregory,
[...]
> Would be interesting to know if decoder_populate_targets() is returning
> -EINVAL or -ENXIO.

It returns -ENXIO.  We have one attached device to the complex, and that
device appears to populate as expected, just not any root decoder
device.

> > [... snip ...]
> > [   21.965126] cxl_pci 0000:3f:00.0: No component registers (-19)
> > [   22.001597] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
> > [   22.002351] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
> > [   22.003265] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
> > [... snip ...]
> > [   22.339973] BUG: unable to handle page fault for address: 0000000000001000
> > [   22.340584] #PF: supervisor read access in kernel mode
> > [   22.346801] #PF: error_code(0x0000) - not-present page
> > [   22.349059] PGD 1339ec067 P4D 0
> > [   22.350877] Oops: 0000 [#1] PREEMPT SMP NOPTI
> > [   22.354558] CPU: 45 PID: 1351 Comm: systemd-udevd Not tainted 6.2.0+ #7
> > [   22.358357] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]
> 
> Can you send the output of:
> 
> scripts/faddr2line drivers/cxl/core/cxl_core.ko cxl_probe_component_regs+0x23
> 
> ...from your kernel build directory?
> 
> I suspect this crash can be avoided with an explicit earlier check for
> missing component registers, but that's not really a fix for this
> failure.
> 
> Can you also send the log without these patches applied for comparison?

I will have to get back to you with this, we had to pivot the box to
another test and I have some personal things creeping up on me.  Likely
sometime next week at earliest.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-02-24  1:14 ` [PATCH 0/2] cxl: DVSEC Range emulation fixups Gregory Price
@ 2023-03-01 18:46   ` Dan Williams
  2023-02-26  7:28     ` Gregory Price
                       ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Dan Williams @ 2023-03-01 18:46 UTC (permalink / raw)
  To: Gregory Price, Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

Hi Gregory,

Gregory Price wrote:
> On Tue, Feb 21, 2023 at 05:51:13PM -0800, Dan Williams wrote:
> > Jonathan points out that the kernel is too agressive in assuming that
> > DVSEC range registers are in use, reliably skip emulation if
> > 'mem_enabled' is not set. The helper devm_cxl_setup_emulated_hdm() is
> > needlessly redoing an allocation, clean that up.
> > 
> > ---
> > 
> > Dan Williams (2):
> >       cxl/hdm: Fix double allocation of @cxlhdm
> >       cxl/hdm: Skip emulation when driver manages mem_enable
> > 
> > 
> >  drivers/cxl/core/hdm.c |   65 ++++++++++++++++++------------------------------
> >  drivers/cxl/cxl.h      |    4 ++-
> >  drivers/cxl/port.c     |    2 +
> >  3 files changed, 28 insertions(+), 43 deletions(-)
> > 
> > base-commit: 23c198e3dfaabbc891681aecb0855b9e0ac791e1
> 
> 
> not *quite* sure what to make of this yet, but i get stack trace on boot
> on real hardware with this patch.  I'm debugging other issues with this
> hardware, so i'm not sure if it's related or not, but prior to this patch
> I did not have a stack trace.
> 
> 
> I think there's two issues here:
> 
> 1) The system I'm on fails to register a CFMW/root port decoder.  I'm
>    not entirely sure why, other than during cxl_decoder_add(), the
>    target map contains "[0,]" as the target id's, and the only
>    registered ports/decoders are the endpoints.
> 
>    I don't know whether this is because the hardware just doesn't have a
>    root decoder, or what.  But it makes the volatile region patches
>    non-functional, and i have to revert back to static configuration to
>    use the real cxl device (i.e. don't mark it EFI_MEMORY_SP).

It looks like the BIOS is trying to report something in the CEDT.CFMWS
but it looks

> 2) Per the second bit - there's no component registers being registered
>    for this cxl device (plus some spurious DOE error).

If the CEDT is broken then for RCH topologies the device component
registers will also be missing.

> 
> 
> The no root decoder thing has been throwing me for a loop, if you can
> help me shed some light on this i'd greatly appreciate it.  If a socket
> has no decoders, should we expect memory expanders to be managable via
> the volatile region system in the driver?
> 
> 
> relevant dmesg info
> 
> [   21.928436] cxl root0: Failed to populate active decoder targets

Would be interesting to know if decoder_populate_targets() is returning
-EINVAL or -ENXIO.

> [   21.929077] cxl_acpi ACPI0017:00: Failed to add decode range [0x1050000000 - 0x304fffffff]
> [   21.933150]  pci0000:3f: host supports CXL (restricted)

This signals this is an RCH topology.

> [... snip ...]
> [   21.965126] cxl_pci 0000:3f:00.0: No component registers (-19)
> [   22.001597] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
> [   22.002351] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
> [   22.003265] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
> [... snip ...]
> [   22.339973] BUG: unable to handle page fault for address: 0000000000001000
> [   22.340584] #PF: supervisor read access in kernel mode
> [   22.346801] #PF: error_code(0x0000) - not-present page
> [   22.349059] PGD 1339ec067 P4D 0
> [   22.350877] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [   22.354558] CPU: 45 PID: 1351 Comm: systemd-udevd Not tainted 6.2.0+ #7
> [   22.358357] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]

Can you send the output of:

scripts/faddr2line drivers/cxl/core/cxl_core.ko cxl_probe_component_regs+0x23

...from your kernel build directory?

I suspect this crash can be avoided with an explicit earlier check for
missing component registers, but that's not really a fix for this
failure.

Can you also send the log without these patches applied for comparison?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-03-01 18:46   ` Dan Williams
  2023-02-26  7:28     ` Gregory Price
@ 2023-03-03 16:43     ` Gregory Price
  2023-03-21 17:17     ` Gregory Price
  2 siblings, 0 replies; 15+ messages in thread
From: Gregory Price @ 2023-03-03 16:43 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

On Wed, Mar 01, 2023 at 10:46:29AM -0800, Dan Williams wrote:
> Hi Gregory,
> 
> > 1) The system I'm on fails to register a CFMW/root port decoder.  I'm
> >    not entirely sure why, other than during cxl_decoder_add(), the
> >    target map contains "[0,]" as the target id's, and the only
> >    registered ports/decoders are the endpoints.
> > 
> >    I don't know whether this is because the hardware just doesn't have a
> >    root decoder, or what.  But it makes the volatile region patches
> >    non-functional, and i have to revert back to static configuration to
> >    use the real cxl device (i.e. don't mark it EFI_MEMORY_SP).
> 
> It looks like the BIOS is trying to report something in the CEDT.CFMWS
> but it looks
> 
> > 2) Per the second bit - there's no component registers being registered
> >    for this cxl device (plus some spurious DOE error).
> 
> If the CEDT is broken then for RCH topologies the device component
> registers will also be missing.
> 
> > 
> > 
> > The no root decoder thing has been throwing me for a loop, if you can
> > help me shed some light on this i'd greatly appreciate it.  If a socket
> > has no decoders, should we expect memory expanders to be managable via
> > the volatile region system in the driver?
> > 
> > 
> > relevant dmesg info
> > 
> > [   21.928436] cxl root0: Failed to populate active decoder targets
> 
> Would be interesting to know if decoder_populate_targets() is returning
> -EINVAL or -ENXIO.
>

This is returning ENXIO.  Most specifically, the path here goes through

cxl_parse_cfmws
  cxl_root_decoder_alloc:
  	rp: 0xd85ea62a
	ways: 1
	hb: calc_hb_module
	succeeds

	decoder:
                flags: 41
                hpa_range.start: 1342177280 - 0x50000000
                hpa_range.end:   1342177279 - 0x4fffffff <- suspect?
                interleave ways: 1

  cxl_decoder_add
    cxl_decoder_add_locked
      // cxl decoder allocated by cxl_root_decoder_alloc
      cxld == &cxlrd->cxlsd.cxl
      cxld->dev.parent: 0000000096ae77e1

      // parent device port?
      port == to_cxl_port(cxld->dev.parent)	

      port->id: 0
      port->hb: 0000000000000000
      port->parent_dport: 0000000000000000
      port->nr_dports: 1 
      port->cdat_avail: 0

      target_map[0]: 0

      decoder_populate_targets(cxlsd, port, target_map)
        find_dport(port, 0) // target_map[0] == 0
	  // iterating through ports:
          [   22.614465]  find port: dport->port_id 4 - searching for 0
        [   22.657306] dport not found
        -> ENXIO

So the root port has a parent port... but the parent port doesn't have a
dport to the root port we just allocated? That's confusing.

The only dport for the parent is 4, and that might belong to the other
decoder on the system?

[user@host0 ~]# ls /sys/bus/cxl/devices/
decoder1.0  endpoint1  mem0  root0

Since this doesn't happen on QEMU, I imagine we would expect to find
this linkage, but it isn't happening for some reason.

I'll have to see if i can get the full topology out at some point.

> > [... snip ...]
> > [   21.965126] cxl_pci 0000:3f:00.0: No component registers (-19)
> > [   22.001597] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
> > [   22.002351] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
> > [   22.003265] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
> > [... snip ...]
> > [   22.339973] BUG: unable to handle page fault for address: 0000000000001000
> > [   22.340584] #PF: supervisor read access in kernel mode
> > [   22.346801] #PF: error_code(0x0000) - not-present page
> > [   22.349059] PGD 1339ec067 P4D 0
> > [   22.350877] Oops: 0000 [#1] PREEMPT SMP NOPTI
> > [   22.354558] CPU: 45 PID: 1351 Comm: systemd-udevd Not tainted 6.2.0+ #7
> > [   22.358357] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]
> 
> Can you send the output of:
> 
> scripts/faddr2line drivers/cxl/core/cxl_core.ko cxl_probe_component_regs+0x23
> 
> ...from your kernel build directory?

cxl_probe_component_regs at /data/cxl/drivers/cxl/core/regs.c:51

partial stack trace:

readl at /data/cxl/./arch/x86/include/asm/io.h:61
cxl_probe_component_regs at /data/cxl/drivers/cxl/core/regs.c:51
map_hdm_decoder_regs at /data/cxl/drivers/cxl/core/hdm.c:95
devm_cxl_setup_hdm at /data/cxl/drivers/cxl/core/hdm.c:134
cxl_endpoint_port_probe at /data/cxl/drivers/cxl/port.c:92
(inlined by) cxl_port_probe at /data/cxl/drivers/cxl/port.c:139

> 
> I suspect this crash can be avoided with an explicit earlier check for
> missing component registers, but that's not really a fix for this
> failure.
>
> Can you also send the log without these patches applied for comparison?

attempted to trim to just relevant stuff but i can send the raw logs if
really needed.  Note - pci 0000:3f is a memory expander device.

the issue seems to appear after the first patch

>crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
>if (!crb && info && info->mem_enabled) {
>	cxlhdm->decoder_count = info->ranges;
>	cxlhdm->target_count = info->ranges;
>} else if (!crb) {
>	dev_err(dev, "No component registers mapped\n");
>	return ERR_PTR(-ENXIO);
>}
>
>rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs);

we're reaching map_hdm_decoder_regs with !crb, and that leads to a
null+offset dereference in cxl_probe_component_regs:

cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET);


before patches:


[    0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000104dbbffff] usable
[    0.000000] BIOS-e820: [mem 0x0000001050000000-0x000000304fffffff] usable
[...]
[    0.000000] reserve setup_data: [mem 0x0000000100000000-0x000000104dbbffff] usable < dram
[    0.000000] reserve setup_data: [mem 0x0000001050000000-0x000000304fffffff] usable < memexp
[...]
[    0.001000] Early memory node ranges
[    0.001000] Initmem setup node 0 [mem 0x0000000000001000-0x000000104dbbffff]
[    0.001000] Initmem setup node 1 [mem 0x0000001050000000-0x000000304fffffff]
[...]
[    2.109081] ACPI: PCI Root Bridge [CX00] (domain 0000 [bus 3f])
[    2.109097] acpi ACPI0016:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI EDR HPX-Type3]
[    2.109116] acpi ACPI0016:00: _OSC: OS supports [CXL11PortRegAccess CXL20PortDevRegAccess CXLProtocolErrorReporting CXLNativeHot]
[    2.109474] acpi ACPI0016:00: _OSC: platform does not support [AER LTR DPC]
[    2.110128] acpi ACPI0016:00: _OSC: OS now controls [PCIeHotplug SHPCHotplug PME PCIeCapability]
[    2.110145] acpi ACPI0016:00: _OSC: OS now controls [CXLMemErrorReporting]
[    2.110447] PCI host bridge to bus 0000:3f
[    2.110457] pci_bus 0000:3f: Unknown NUMA node; performance will be reduced
[    2.110471] pci_bus 0000:3f: root bus resource [mem 0x24000000000-0x280801fffff window]
[    2.110487] pci_bus 0000:3f: root bus resource [bus 3f]
[    2.110516] pci 0000:3f:00.0: [1b00:c000] type 00 class 0x050210
[    2.110540] pci 0000:3f:00.0: reg 0x10: [mem 0x28000000000-0x28000ffffff 64bit pref]
[    2.110564] pci 0000:3f:00.0: reg 0x18: [mem 0x26000000000-0x27fffffffff 64bit pref]
[    2.110838] pci 0000:3f:00.1: [1b00:c000] type 00 class 0x000000
[    2.110861] pci 0000:3f:00.1: reg 0x10: [mem 0x28001000000-0x280010fffff 64bit pref]
[    2.114296] acpi/hmat: Memory Flags:0001 Processor Domain:0 Memory Domain:0
[    2.114313] acpi/hmat: Memory Flags:0000 Processor Domain:1 Memory Domain:1
[    2.114328] acpi/hmat: Locality: Flags:00 Type:Access Latency Initiator Domains:1 Target Domains:2 Base:1000
[    2.114347] acpi/hmat:   Initiator-Target[0-0]:110 nsec
[    2.114358] acpi/hmat:   Initiator-Target[0-1]:510 nsec
[    2.114368] acpi/hmat: Locality: Flags:00 Type:Access Bandwidth Initiator Domains:1 Target Domains:2 Base:100
[    2.114386] acpi/hmat:   Initiator-Target[0-0]:28800 MB/s
[    2.114396] acpi/hmat:   Initiator-Target[0-1]:5000 MB/
[...]
[    2.283144] pci 0000:40:07.1: PCI bridge to [bus 41]
[    2.283159] pci 0000:40:07.1:   bridge window [mem 0x380b0100000-0x380b01fffff 64bit pref]
[    2.283187] pci_bus 0000:40: resource 4 [io  0x0000-0x02e7 window]
[    2.283199] pci_bus 0000:40: resource 5 [io  0x0300-0x03af window]
[    2.283211] pci_bus 0000:40: resource 6 [io  0x0400-0x0cf7 window]
[    2.283222] pci_bus 0000:40: resource 7 [io  0x4000-0x6fff window]
[    2.283234] pci_bus 0000:40: resource 8 [mem 0x000c0000-0x000dffff window]
[    2.283247] pci_bus 0000:40: resource 9 [mem 0xb0000000-0xb1ffffff window]
[    2.283260] pci_bus 0000:40: resource 10 [mem 0x280b0200000-0x380b01fffff window]
[    2.283274] pci_bus 0000:41: resource 2 [mem 0x380b0100000-0x380b01fffff 64bit pref]
[    2.283370] pci_bus 0000:3f: resource 4 [mem 0x24000000000-0x280801fffff window]
[...]
[    2.321401] pci 0000:3f:00.0: Adding to iommu group 30
[    2.321486] pci 0000:3f:00.1: Adding to iommu group 30
[...]
[   22.882894] cxl_pci 0000:3f:00.0: No component registers (-19)
[   22.893368] cxl root0: Failed to populate active decoder targets
[   22.894495] cxl_acpi ACPI0017:00: Failed to add decode range [0x1050000000 - 0x304fffffff]
[   22.895253]  pci0000:3f: host supports CXL (restricted)
[   22.911081] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
[   22.912145] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
[   22.912604] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
[...]



after patch 1:

cxl_probe_component_regs+0x23/0x180:

readl at /data/cxl/./arch/x86/include/asm/io.h:61
cxl_probe_component_regs at /data/cxl/drivers/cxl/core/regs.c:51
map_hdm_decoder_regs at /data/cxl/drivers/cxl/core/hdm.c:95
devm_cxl_setup_hdm at /data/cxl/drivers/cxl/core/hdm.c:134
cxl_endpoint_port_probe at /data/cxl/drivers/cxl/port.c:92
(inlined by) cxl_port_probe at /data/cxl/drivers/cxl/port.c:139



[   22.782228] cxl root0: Failed to populate active decoder targets
[   22.782864] cxl_acpi ACPI0017:00: Failed to add decode range [0x1050000000 - 0x304fffffff]
[   22.784735]  pci0000:3f: host supports CXL (restricted)
[   22.835657] cxl_pci 0000:3f:00.0: No component registers (-19)
[   22.854734] piix4_smbus 0000:00:14.0: SMBus Host Controller at 0xb00, revision 0
[   22.855072] piix4_smbus 0000:00:14.0: Using register 0x02 for SMBus port selection
[   22.865626] piix4_smbus 0000:00:14.0: Auxiliary SMBus Host Controller at 0xb20
[   22.869447] cxl_pci 0000:3f:00.0: DOE: [d80] failed to cache protocols : -5
[   22.875385] cxl_pci 0000:3f:00.0: Failed to create MB object for MB @ d80
[   22.876136] cxl_pci 0000:3f:00.0: Failed to request region 0x0000000000001fff-0x000000000010201e
[... snip a bunch of ipmi stuff ...]
[   23.388179] BUG: unable to handle page fault for address: 0000000000001000
[   23.388809] #PF: supervisor read access in kernel mode
[   23.389177] #PF: error_code(0x0000) - not-present page
[   23.389566] PGD 12df78067 P4D 0
[   23.390017] Oops: 0000 [#1] PREEMPT SMP NOPTI
[   23.390405] CPU: 13 PID: 1316 Comm: systemd-udevd Not tainted 6.2.0+ #10
[   23.391098] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]
[   23.391468] Code: 90 90 90 90 90 90 90 0f 1f 44 00 00 41 57 31 c0 b9 06 00 00 00 41 56 41 55 41 54 49 89 fc 48 89 d7 55 53 48 83 ec 10 f3 48 ab <8b> 86 00 10 00 00 66 83 f8 01 0f 85 30 01 00 00 c1 e8 18 0f 84 93
[   23.392103] RSP: 0018:ff801efeb0da78a8 EFLAGS: 00010282
[   23.392408] RAX: 0000000000000000 RBX: ff4f082287578800 RCX: 0000000000000000
[   23.392721] RDX: ff801efeb0da7910 RSI: 0000000000000000 RDI: ff801efeb0da7940
[   23.393029] RBP: ff4f08233a9eeec8 R08: 0000000000000000 R09: 0000000000000001
[   23.393326] R10: 0000000000000001 R11: 000000000c1c20e0 R12: ff4f082287578800
[   23.393613] R13: ff801efeb0da7998 R14: ff4f08233f0f0428 R15: ff4f08320032a010
[   23.393965] FS:  00007f6682548580(0000) GS:ff4f083181e00000(0000) knlGS:0000000000000000
[   23.394307] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   23.394647] CR2: 0000000000001000 CR3: 000000012df04001 CR4: 0000000000771ee0
[   23.395003] PKRU: 55555554
[   23.395324] Call Trace:
[   23.395637]  <TASK>
[   23.395933]  map_hdm_decoder_regs+0x46/0x90 [cxl_core]
[   23.396340]  devm_cxl_setup_hdm+0x95/0x120 [cxl_core]
[   23.396664]  cxl_port_probe+0xdc/0x1b0 [cxl_port]
[   23.396967]  cxl_bus_probe+0x14/0x50 [cxl_core]
[   23.397271]  really_probe+0x1b6/0x410
[   23.397557]  __driver_probe_device+0x78/0x170
[   23.397855]  driver_probe_device+0x1f/0x90
[   23.398165]  __device_attach_driver+0x85/0x110
[   23.398440]  ? __pfx___device_attach_driver+0x10/0x10
[   23.398778]  bus_for_each_drv+0x77/0xb0
[   23.399096]  __device_attach+0xb3/0x1d0
[   23.399376]  bus_probe_device+0x9f/0xc0
[   23.399656]  device_add+0x41e/0x9b0
[   23.399935]  ? kobject_set_name_vargs+0x6d/0x90
[   23.400199]  ? dev_set_name+0x4b/0x60
[   23.400445]  devm_cxl_add_port+0x345/0x570 [cxl_core]
[   23.400750]  cxl_mem_probe+0x1ca/0x310 [cxl_mem]
[   23.400995]  cxl_bus_probe+0x14/0x50 [cxl_core]
[   23.401258]  really_probe+0x1b6/0x410
[   23.401498]  __driver_probe_device+0x78/0x170
[   23.401792]  driver_probe_device+0x1f/0x90
[   23.402032]  __driver_attach+0xd2/0x1c0
[   23.402268]  ? __pfx___driver_attach+0x10/0x10
[   23.402482]  bus_for_each_dev+0x73/0xa0
[   23.402695]  bus_add_driver+0x141/0x230
[   23.402896]  driver_register+0x77/0x120
[   23.403097]  ? __pfx_init_module+0x10/0x10 [cxl_mem]
[   23.403832]  do_one_initcall+0x6b/0x350
[   23.405789]  do_init_module+0x4a/0x220
[   23.408580]  __do_sys_finit_module+0x93/0xf0
[   23.411445]  do_syscall_64+0x58/0x80
[   23.414885]  ? do_syscall_64+0x67/0x80
[   23.418410]  ? do_syscall_64+0x67/0x80
[   23.422373]  ? do_syscall_64+0x67/0x80
[   23.425381]  ? do_syscall_64+0x67/0x80
[   23.428484]  ? lockdep_hardirqs_on+0x7d/0x100
[   23.431170]  ? do_syscall_64+0x67/0x80
[   23.434292]  ? asm_exc_page_fault+0x22/0x30
[   23.437575]  ? lockdep_hardirqs_on+0x7d/0x100
[   23.440641]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   23.443242] RIP: 0033:0x7f6681f0afbd
[   23.445806] Code: 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 33 ce 0e 00 f7 d8 64 89 01 48
[   23.450708] RSP: 002b:00007fff40383f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[   23.453096] RAX: ffffffffffffffda RBX: 0000559037cb6d70 RCX: 00007f6681f0afbd
[   23.453110] RDX: 0000000000000000 RSI: 00007f668270443c RDI: 000000000000000e
[   23.453113] RBP: 00007f668270443c R08: 0000000000000000 R09: 0000559037cc51a0
[   23.453117] R10: 000000000000000e R11: 0000000000000246 R12: 0000000000020000
[   23.453119] R13: 0000559037cbef10 R14: 0000000000000000 R15: 0000559037cc5220
[   23.453131]  </TASK>
[   23.463696] Modules linked in: cxl_mem(+) cxl_port irqbypass rapl wmi_bmof pcspkr dax_hmem ipmi_ssif acpi_ipmi ipmi_si ipmi_devintf cxl_pci k10temp i2c_piix4 ipmi_msghandler cxl_acpi cxl_core acpi_cpufreq fuse zram xfs crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 nvme ast tg3 nvme_core i2c_algo_bit nvme_common ccp sp5100_tco wmi
[   23.463784] CR2: 0000000000001000
[   23.463799] ---[ end trace 0000000000000000 ]---
[   23.463804] RIP: 0010:cxl_probe_component_regs+0x23/0x180 [cxl_core]
[   23.464017] Code: 90 90 90 90 90 90 90 0f 1f 44 00 00 41 57 31 c0 b9 06 00 00 00 41 56 41 55 41 54 49 89 fc 48 89 d7 55 53 48 83 ec 10 f3 48 ab <8b> 86 00 10 00 00 66 83 f8 01 0f 85 30 01 00 00 c1 e8 18 0f 84 93
[   23.464021] RSP: 0018:ff801efeb0da78a8 EFLAGS: 00010282
[   23.464024] RAX: 0000000000000000 RBX: ff4f082287578800 RCX: 0000000000000000
[   23.464026] RDX: ff801efeb0da7910 RSI: 0000000000000000 RDI: ff801efeb0da7940
[   23.464028] RBP: ff4f08233a9eeec8 R08: 0000000000000000 R09: 0000000000000001
[   23.464030] R10: 0000000000000001 R11: 000000000c1c20e0 R12: ff4f082287578800
[   23.464031] R13: ff801efeb0da7998 R14: ff4f08233f0f0428 R15: ff4f08320032a010
[   23.464034] FS:  00007f6682548580(0000) GS:ff4f083181e00000(0000) knlGS:0000000000000000
[   23.464036] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   23.464037] CR2: 0000000000001000 CR3: 000000012df04001 CR4: 0000000000771ee0


after patch 2:
same results

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-03-01 18:46   ` Dan Williams
  2023-02-26  7:28     ` Gregory Price
  2023-03-03 16:43     ` Gregory Price
@ 2023-03-21 17:17     ` Gregory Price
  2023-03-23 17:56       ` Dan Williams
  2 siblings, 1 reply; 15+ messages in thread
From: Gregory Price @ 2023-03-21 17:17 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

On Wed, Mar 01, 2023 at 10:46:29AM -0800, Dan Williams wrote:
> Hi Gregory,
> 
> Gregory Price wrote:
> > On Tue, Feb 21, 2023 at 05:51:13PM -0800, Dan Williams wrote:
> > > Jonathan points out that the kernel is too agressive in assuming that
> > > DVSEC range registers are in use, reliably skip emulation if
> > > 'mem_enabled' is not set. The helper devm_cxl_setup_emulated_hdm() is
> > > needlessly redoing an allocation, clean that up.
> > > 
> > > ---
> > > 
> > > Dan Williams (2):
> > >       cxl/hdm: Fix double allocation of @cxlhdm
> > >       cxl/hdm: Skip emulation when driver manages mem_enable
> > > 
> > > 
> > >  drivers/cxl/core/hdm.c |   65 ++++++++++++++++++------------------------------
> > >  drivers/cxl/cxl.h      |    4 ++-
> > >  drivers/cxl/port.c     |    2 +
> > >  3 files changed, 28 insertions(+), 43 deletions(-)
> > > 
> > > base-commit: 23c198e3dfaabbc891681aecb0855b9e0ac791e1
> > 
> > 
> > not *quite* sure what to make of this yet, but i get stack trace on boot
> > on real hardware with this patch.  I'm debugging other issues with this
> > hardware, so i'm not sure if it's related or not, but prior to this patch
> > I did not have a stack trace.
> > 
> > 
> > I think there's two issues here:
> > 
> > 1) The system I'm on fails to register a CFMW/root port decoder.  I'm
> >    not entirely sure why, other than during cxl_decoder_add(), the
> >    target map contains "[0,]" as the target id's, and the only
> >    registered ports/decoders are the endpoints.
> > 
> >    I don't know whether this is because the hardware just doesn't have a
> >    root decoder, or what.  But it makes the volatile region patches
> >    non-functional, and i have to revert back to static configuration to
> >    use the real cxl device (i.e. don't mark it EFI_MEMORY_SP).
> 
> It looks like the BIOS is trying to report something in the CEDT.CFMWS
> but it looks
> 
> > 2) Per the second bit - there's no component registers being registered
> >    for this cxl device (plus some spurious DOE error).
> 
> If the CEDT is broken then for RCH topologies the device component
> registers will also be missing.
> 

Just following up, you were correct that the CEDT.CFMWS was broken.

There are other issues that appear, but I will wait for updated BIOS and
then push a bug report once i validate it in a more sane environment.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 0/2] cxl: DVSEC Range emulation fixups
  2023-03-21 17:17     ` Gregory Price
@ 2023-03-23 17:56       ` Dan Williams
  0 siblings, 0 replies; 15+ messages in thread
From: Dan Williams @ 2023-03-23 17:56 UTC (permalink / raw)
  To: Gregory Price, Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang

Gregory Price wrote:
[..]
> > If the CEDT is broken then for RCH topologies the device component
> > registers will also be missing.
> > 
> 
> Just following up, you were correct that the CEDT.CFMWS was broken.
> 
> There are other issues that appear, but I will wait for updated BIOS and
> then push a bug report once i validate it in a more sane environment.

Ah, thanks for that follow up. I had set these aside, but will go ahead
and queue them up for 6.3-rc.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable
  2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
  2023-02-22 13:22   ` Jonathan Cameron
  2023-02-22 16:59   ` Dave Jiang
@ 2023-03-31 16:33   ` Fan Ni
  2 siblings, 0 replies; 15+ messages in thread
From: Fan Ni @ 2023-03-31 16:33 UTC (permalink / raw)
  To: Dan Williams; +Cc: linux-cxl, Jonathan Cameron, dave.jiang, a.manzanares, dave

The 02/21/2023 17:51, Dan Williams wrote:
> If the driver is allowed to enable memory operation itself then it can
> also turn on HDM decoder support at will.
> 
> With this the second call to cxl_setup_hdm_decoder_from_dvsec(), when
> an HDM decoder is not committed, is not needed.
> 
> Fixes: b777e9bec960 ("cxl/hdm: Emulate HDM decoder from DVSEC range registers")
> Link: http://lore.kernel.org/r/20230220113657.000042e1@huawei.com
> Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Fan Ni <fan.ni@samsung.com>

Confirming this patch fixed the issue we hit when creating a region with
cxl create-region command and failed to find a free decoder as the
hpa_range is not correctly initialized as should_emulate_decoders
returns true instead of false when loading modules and init_hdm_decoder
is called.

> ---
>  drivers/cxl/core/hdm.c |   31 ++++++++++++++++++-------------
>  drivers/cxl/cxl.h      |    4 +++-
>  drivers/cxl/port.c     |    2 +-
>  3 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 520814130928..5293fe13fce3 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -717,19 +717,29 @@ static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
>  	return 0;
>  }
>  
> -static bool should_emulate_decoders(struct cxl_port *port)
> +static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
>  {
> -	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> -	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
> +	struct cxl_hdm *cxlhdm;
> +	void __iomem *hdm;
>  	u32 ctrl;
>  	int i;
>  
> -	if (!is_cxl_endpoint(cxlhdm->port))
> +	if (!info)
>  		return false;
>  
> +	cxlhdm = dev_get_drvdata(&info->port->dev);
> +	hdm = cxlhdm->regs.hdm_decoder;
> +
>  	if (!hdm)
>  		return true;
>  
> +	/*
> +	 * If HDM decoders are present and the driver is in control of
> +	 * Mem_Enable skip DVSEC based emulation
> +	 */
> +	if (!info->mem_enabled)
> +		return false;
> +
>  	/*
>  	 * If any decoders are committed already, there should not be any
>  	 * emulated DVSEC decoders.
> @@ -747,7 +757,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  			    int *target_map, void __iomem *hdm, int which,
>  			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
>  {
> -	struct cxl_endpoint_decoder *cxled = NULL;
> +	struct cxl_endpoint_decoder *cxled;
>  	u64 size, base, skip, dpa_size;
>  	bool committed;
>  	u32 remainder;
> @@ -758,12 +768,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		unsigned char target_id[8];
>  	} target_list;
>  
> -	if (should_emulate_decoders(port))
> +	if (should_emulate_decoders(info))
>  		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
>  
> -	if (is_endpoint_decoder(&cxld->dev))
> -		cxled = to_cxl_endpoint_decoder(&cxld->dev);
> -
>  	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
>  	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
>  	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> @@ -784,9 +791,6 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		.end = base + size - 1,
>  	};
>  
> -	if (cxled && !committed && range_len(&info->dvsec_range[which]))
> -		return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
> -
>  	/* decoders are enabled if committed */
>  	if (committed) {
>  		cxld->flags |= CXL_DECODER_F_ENABLE;
> @@ -824,7 +828,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  	if (rc)
>  		return rc;
>  
> -	if (!cxled) {
> +	if (!info) {
>  		target_list.value =
>  			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
>  		for (i = 0; i < cxld->interleave_ways; i++)
> @@ -844,6 +848,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		return -ENXIO;
>  	}
>  	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	cxled = to_cxl_endpoint_decoder(&cxld->dev);
>  	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
>  	if (rc) {
>  		dev_err(&port->dev,
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d853a0238ad7..dd4b7a729419 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -695,13 +695,15 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>  
>  /**
>   * struct cxl_endpoint_dvsec_info - Cached DVSEC info
> - * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
> + * @mem_enabled: cached value of mem_enabled in the DVSEC at init time
>   * @ranges: Number of active HDM ranges this device uses.
> + * @port: endpoint port associated with this info instance
>   * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
>   */
>  struct cxl_endpoint_dvsec_info {
>  	bool mem_enabled;
>  	int ranges;
> +	struct cxl_port *port;
>  	struct range dvsec_range[2];
>  };
>  
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 1049bb5ea496..9c8f46ed336b 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -78,8 +78,8 @@ static int cxl_switch_port_probe(struct cxl_port *port)
>  
>  static int cxl_endpoint_port_probe(struct cxl_port *port)
>  {
> +	struct cxl_endpoint_dvsec_info info = { .port = port };
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
> -	struct cxl_endpoint_dvsec_info info = { 0 };
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct cxl_hdm *cxlhdm;
>  	struct cxl_port *root;
> 

-- 
John Smith
My name is not generic at all.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-03-31 16:38 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-22  1:51 [PATCH 0/2] cxl: DVSEC Range emulation fixups Dan Williams
2023-02-22  1:51 ` [PATCH 1/2] cxl/hdm: Fix double allocation of @cxlhdm Dan Williams
2023-02-22 12:53   ` Jonathan Cameron
2023-02-22 16:57   ` Dave Jiang
2023-02-22  1:51 ` [PATCH 2/2] cxl/hdm: Skip emulation when driver manages mem_enable Dan Williams
2023-02-22 13:22   ` Jonathan Cameron
2023-02-23  5:05     ` Dan Williams
2023-02-22 16:59   ` Dave Jiang
2023-03-31 16:33   ` Fan Ni
2023-02-24  1:14 ` [PATCH 0/2] cxl: DVSEC Range emulation fixups Gregory Price
2023-03-01 18:46   ` Dan Williams
2023-02-26  7:28     ` Gregory Price
2023-03-03 16:43     ` Gregory Price
2023-03-21 17:17     ` Gregory Price
2023-03-23 17:56       ` Dan Williams

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.