linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Ferguson <danielf@os.amperecomputing.com>
To: shiju.jose@huawei.com, linux-cxl@vger.kernel.org,
	linux-acpi@vger.kernel.org, linux-mm@kvack.org,
	dan.j.williams@intel.com, dave@stgolabs.net,
	jonathan.cameron@huawei.com, dave.jiang@intel.com,
	alison.schofield@intel.com, vishal.l.verma@intel.com,
	ira.weiny@intel.com
Cc: linux-edac@vger.kernel.org, linux-kernel@vger.kernel.org,
	david@redhat.com, Vilas.Sridharan@amd.com, leo.duran@amd.com,
	Yazen.Ghannam@amd.com, rientjes@google.com, jiaqiyan@google.com,
	tony.luck@intel.com, Jon.Grimm@amd.com,
	dave.hansen@linux.intel.com, rafael@kernel.org, lenb@kernel.org,
	naoya.horiguchi@nec.com, james.morse@arm.com,
	jthoughton@google.com, somasundaram.a@hpe.com,
	erdemaktas@google.com, pgonda@google.com, duenwen@google.com,
	mike.malvestuto@intel.com, gthelen@google.com,
	wschwartz@amperecomputing.com, dferguson@amperecomputing.com,
	tanxiaofei@huawei.com, prime.zeng@hisilicon.com,
	kangkang.shen@futurewei.com, wanghuiqiang@huawei.com,
	linuxarm@huawei.com, wbs@os.amperecomputing.com
Subject: Re: [RFC PATCH v7 12/12] memory: RAS2: Add memory RAS2 driver
Date: Thu, 28 Mar 2024 16:41:36 -0700	[thread overview]
Message-ID: <78d11760-bb43-42a1-a302-3e2d3bf40c48@os.amperecomputing.com> (raw)
In-Reply-To: <20240223143723.1574-13-shiju.jose@huawei.com>

> +/*
> + * The below functions are exposed to OSPM, to query, configure and
> + * initiate memory patrol scrub.
> + */
> +static int ras2_is_patrol_scrub_support(struct ras2_context *ras2_ctx)
> +{
> +	int ret;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = 0;
> +
> +	/* send command for reading RAS2 capabilities */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: ras2_send_pcc_cmd failed\n", __func__);
> +		return ret;
> +	}
> +
> +	return generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB;

Since firmware populates the feature bitmask on initialization, it would 
seem
that we do not need to send a PCC CMD EXEC to read RAS2 capabilities.
> +}
> +
> +static int ras2_get_patrol_scrub_params(struct ras2_context *ras2_ctx,
> +					struct ras2_scrub_params *params)
> +{
> +	int ret = 0;
> +	u8  min_supp_scrub_rate, max_supp_scrub_rate;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	/* send command for reading RAS2 capabilities */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: ras2_send_pcc_cmd failed\n", __func__);
> +		return ret;
> +	}

Similarly, since firmware populates the feature bitmask on
initialization, it would seem that we do not need to send
a PCC CMD EXEC to read RAS2 capabilities.
> +
> +	if (!(generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB) ||
> +	    !(generic_comm_base->num_parameter_blocks)) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: Platform does not support HW Patrol Scrubber\n", __func__);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (!patrol_scrub_params->requested_address_range[1]) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: Invalid requested address range, \
> +			requested_address_range[0]=0x%llx \
> +			requested_address_range[1]=0x%llx\n",
> +			__func__,
> +			patrol_scrub_params->requested_address_range[0],
> +			patrol_scrub_params->requested_address_range[1]);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +	patrol_scrub_params->patrol_scrub_command = RAS2_GET_PATROL_PARAMETERS;
> +
> +	/* send command for reading the HW patrol scrub parameters */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: failed to read HW patrol scrub parameters\n",
> +			__func__);
> +		return ret;
> +	}
> +
> +	/* copy output scrub parameters */
> +	params->addr_base = patrol_scrub_params->actual_address_range[0];
> +	params->addr_size = patrol_scrub_params->actual_address_range[1];
> +	params->flags = patrol_scrub_params->flags;
> +	params->rate = FIELD_GET(RAS2_PATROL_SCRUB_RATE_OUT_MASK,
> +				 patrol_scrub_params->scrub_params_out);
> +	min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
> +					patrol_scrub_params->scrub_params_out);
> +	max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
> +					patrol_scrub_params->scrub_params_out);
> +	snprintf(params->rate_avail, RAS2_MAX_RATE_RANGE_LENGTH,
> +		 "%d-%d", min_supp_scrub_rate, max_supp_scrub_rate);
> +
> +	return 0;
> +}
> +
> +static int ras2_enable_patrol_scrub(struct ras2_context *ras2_ctx, bool enable)
> +{
> +	int ret = 0;
> +	struct ras2_scrub_params params;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	u8 scrub_rate_to_set, min_supp_scrub_rate, max_supp_scrub_rate;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	if (enable) {
> +		ret = ras2_get_patrol_scrub_params(ras2_ctx, &params);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +
> +	if (enable) {
> +		patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
> +		patrol_scrub_params->requested_address_range[0] = params.addr_base;
> +		patrol_scrub_params->requested_address_range[1] = params.addr_size;
> +
> +		scrub_rate_to_set = FIELD_GET(RAS2_PATROL_SCRUB_RATE_IN_MASK,
> +					      patrol_scrub_params->scrub_params_in);
> +		min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
> +						patrol_scrub_params->scrub_params_out);
> +		max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
> +						patrol_scrub_params->scrub_params_out);
> +		if (scrub_rate_to_set < min_supp_scrub_rate ||
> +		    scrub_rate_to_set > max_supp_scrub_rate) {
> +			dev_warn(ras2_ctx->dev,
> +				 "patrol scrub rate to set is out of the supported range\n");
> +			dev_warn(ras2_ctx->dev,
> +				 "min_supp_scrub_rate=%d max_supp_scrub_rate=%d\n",
> +				 min_supp_scrub_rate, max_supp_scrub_rate);
> +			return -EINVAL;
> +		}
> +	} else {
> +		patrol_scrub_params->patrol_scrub_command = RAS2_STOP_PATROL_SCRUBBER;
> +	}
> +
> +	/* send command for enable/disable HW patrol scrub */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		pr_err("%s: failed to enable/disable the HW patrol scrub\n", __func__);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ras2_enable_background_scrub(struct ras2_context *ras2_ctx, bool enable)
> +{
> +	int ret;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +	patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
> +
> +	patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_EN_BACKGROUND;
> +	patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_EN_BACKGROUND,
> +							   enable);
> +
> +	/* send command for enable/disable HW patrol scrub */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: failed to enable/disable background patrol scrubbing\n",
> +			__func__);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +static int ras2_set_patrol_scrub_params(struct ras2_context *ras2_ctx,
> +					struct ras2_scrub_params *params, u8 param_type)
> +{
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;
> +	if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_BASE && params->addr_base) {
> +		patrol_scrub_params->requested_address_range[0] = params->addr_base;
> +	} else if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_SIZE && params->addr_size) {
> +		patrol_scrub_params->requested_address_range[1] = params->addr_size;
> +	} else if (param_type == RAS2_MEM_SCRUB_PARAM_RATE) {
> +		patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_RATE_IN_MASK;
> +		patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_RATE_IN_MASK,
> +								   params->rate);
> +	} else {
> +		dev_err(ras2_ctx->dev, "Invalid patrol scrub parameter to set\n");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct ras2_hw_scrub_ops ras2_hw_ops = {
> +	.enable_scrub = ras2_enable_patrol_scrub,
> +	.enable_background_scrub = ras2_enable_background_scrub,
> +	.get_scrub_params = ras2_get_patrol_scrub_params,
> +	.set_scrub_params = ras2_set_patrol_scrub_params,
> +};
> +
> +static const struct scrub_ops ras2_scrub_ops = {
> +	.is_visible = ras2_hw_scrub_is_visible,
> +	.read = ras2_hw_scrub_read,
> +	.write = ras2_hw_scrub_write,
> +	.read_string = ras2_hw_scrub_read_strings,
> +};
> +
> +static DEFINE_IDA(ras2_ida);
> +
> +static void devm_ras2_release(void *ctx)
> +{
> +	struct ras2_context *ras2_ctx = ctx;
> +
> +	ida_free(&ras2_ida, ras2_ctx->id);
> +	ras2_unregister_pcc_channel(ras2_ctx);
> +}
> +
> +static int ras2_probe(struct platform_device *pdev)
> +{
> +	int ret, id;
> +	struct mbox_client *cl;
> +	struct device *hw_scrub_dev;
> +	struct ras2_context *ras2_ctx;
> +	char scrub_name[RAS2_MAX_NAME_LENGTH];
> +
> +	ras2_ctx = devm_kzalloc(&pdev->dev, sizeof(*ras2_ctx), GFP_KERNEL);
> +	if (!ras2_ctx)
> +		return -ENOMEM;
> +
> +	ras2_ctx->dev = &pdev->dev;
> +	ras2_ctx->ops = &ras2_hw_ops;
> +	spin_lock_init(&ras2_ctx->spinlock);
> +	platform_set_drvdata(pdev, ras2_ctx);
> +
> +	cl = &ras2_ctx->mbox_client;
> +	/* Request mailbox channel */
> +	cl->dev = &pdev->dev;
> +	cl->tx_done = ras2_tx_done;
> +	cl->knows_txdone = true;
> +	ras2_ctx->pcc_subspace_idx = *((int *)pdev->dev.platform_data);
> +	dev_dbg(&pdev->dev, "pcc-subspace-id=%d\n", ras2_ctx->pcc_subspace_idx);
> +	ret = ras2_register_pcc_channel(ras2_ctx);

In our enabling activities, we have found a challenge here.
Our hardware has a single PCC channel corresponding to a single
platform-wide scrub interface. This driver, following the ACPI spec,
will create a new scrub node for each NUMA node. However, for us,
this means that each scrub device will try to map the same PCC channel,
and this causes an error.
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = devm_add_action_or_reset(&pdev->dev, devm_ras2_release, ras2_ctx);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ras2_is_patrol_scrub_support(ras2_ctx)) {
> +		id = ida_alloc(&ras2_ida, GFP_KERNEL);
> +		if (id < 0)
> +			return id;
> +		ras2_ctx->id = id;
> +		snprintf(scrub_name, sizeof(scrub_name), "%s%d", RAS2_SCRUB, id);
> +		dev_set_name(&pdev->dev, RAS2_ID_FORMAT, id);
> +		hw_scrub_dev = devm_scrub_device_register(&pdev->dev, scrub_name,
> +							  ras2_ctx, &ras2_scrub_ops,
> +							  0, NULL);
> +		if (PTR_ERR_OR_ZERO(hw_scrub_dev))
> +			return PTR_ERR_OR_ZERO(hw_scrub_dev);
> +	}
> +	ras2_ctx->scrub_dev = hw_scrub_dev;
> +
> +	return 0;
> +}










  parent reply	other threads:[~2024-03-28 23:41 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-23 14:37 [RFC PATCH v7 00/12] memory: scrub: introduce subsystem + CXL/ACPI-RAS2 drivers shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 01/12] cxl/mbox: Add GET_SUPPORTED_FEATURES mailbox command shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 02/12] cxl/mbox: Add GET_FEATURE " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 03/12] cxl/mbox: Add SET_FEATURE " shiju.jose
2024-03-11 21:20   ` fan
2024-03-12  9:41     ` Shiju Jose
2024-02-23 14:37 ` [RFC PATCH v7 04/12] cxl/memscrub: Add CXL device patrol scrub control feature shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 05/12] cxl/memscrub: Add CXL device ECS " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 06/12] memory: scrub: Add scrub subsystem driver supports configuring memory scrubs in the system shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 07/12] cxl/memscrub: Register CXL device patrol scrub with scrub subsystem driver shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 08/12] cxl/memscrub: Register CXL device ECS " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 09/12] ACPICA: ACPI 6.5: Add support for RAS2 table shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 10/12] ACPI:RAS2: Add common library for RAS2 PCC interfaces shiju.jose
2024-03-12 18:32   ` fan
2024-03-28 23:40   ` Daniel Ferguson
2024-02-23 14:37 ` [RFC PATCH v7 11/12] ACPI:RAS2: Add driver for ACPI RAS2 feature table (RAS2) shiju.jose
2024-03-28 23:41   ` Daniel Ferguson
2024-02-23 14:37 ` [RFC PATCH v7 12/12] memory: RAS2: Add memory RAS2 driver shiju.jose
2024-03-28 15:23   ` Yazen Ghannam
2024-04-02 10:17     ` Jonathan Cameron
2024-03-28 23:41   ` Daniel Ferguson [this message]
2024-04-03 14:03     ` Shiju Jose
2024-02-23 15:42 ` [RFC PATCH v7 00/12] memory: scrub: introduce subsystem + CXL/ACPI-RAS2 drivers Borislav Petkov
2024-02-23 16:25   ` Jonathan Cameron
2024-02-23 17:51     ` Borislav Petkov
2024-02-23 17:57     ` Duran, Leo
2024-03-28 23:39 ` Daniel Ferguson
2024-04-03 13:52   ` Shiju Jose

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=78d11760-bb43-42a1-a302-3e2d3bf40c48@os.amperecomputing.com \
    --to=danielf@os.amperecomputing.com \
    --cc=Jon.Grimm@amd.com \
    --cc=Vilas.Sridharan@amd.com \
    --cc=Yazen.Ghannam@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dferguson@amperecomputing.com \
    --cc=duenwen@google.com \
    --cc=erdemaktas@google.com \
    --cc=gthelen@google.com \
    --cc=ira.weiny@intel.com \
    --cc=james.morse@arm.com \
    --cc=jiaqiyan@google.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=jthoughton@google.com \
    --cc=kangkang.shen@futurewei.com \
    --cc=lenb@kernel.org \
    --cc=leo.duran@amd.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxarm@huawei.com \
    --cc=mike.malvestuto@intel.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=pgonda@google.com \
    --cc=prime.zeng@hisilicon.com \
    --cc=rafael@kernel.org \
    --cc=rientjes@google.com \
    --cc=shiju.jose@huawei.com \
    --cc=somasundaram.a@hpe.com \
    --cc=tanxiaofei@huawei.com \
    --cc=tony.luck@intel.com \
    --cc=vishal.l.verma@intel.com \
    --cc=wanghuiqiang@huawei.com \
    --cc=wbs@os.amperecomputing.com \
    --cc=wschwartz@amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).