All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Ferguson <danielf@os.amperecomputing.com>
To: shiju.jose@huawei.com, linux-cxl@vger.kernel.org,
	linux-acpi@vger.kernel.org, linux-mm@kvack.org,
	dan.j.williams@intel.com, dave@stgolabs.net,
	jonathan.cameron@huawei.com, dave.jiang@intel.com,
	alison.schofield@intel.com, vishal.l.verma@intel.com,
	ira.weiny@intel.com
Cc: linux-edac@vger.kernel.org, linux-kernel@vger.kernel.org,
	david@redhat.com, Vilas.Sridharan@amd.com, leo.duran@amd.com,
	Yazen.Ghannam@amd.com, rientjes@google.com, jiaqiyan@google.com,
	tony.luck@intel.com, Jon.Grimm@amd.com,
	dave.hansen@linux.intel.com, rafael@kernel.org, lenb@kernel.org,
	naoya.horiguchi@nec.com, james.morse@arm.com,
	jthoughton@google.com, somasundaram.a@hpe.com,
	erdemaktas@google.com, pgonda@google.com, duenwen@google.com,
	mike.malvestuto@intel.com, gthelen@google.com,
	wschwartz@amperecomputing.com, dferguson@amperecomputing.com,
	tanxiaofei@huawei.com, prime.zeng@hisilicon.com,
	kangkang.shen@futurewei.com, wanghuiqiang@huawei.com,
	linuxarm@huawei.com, wbs@os.amperecomputing.com
Subject: Re: [RFC PATCH v7 12/12] memory: RAS2: Add memory RAS2 driver
Date: Thu, 28 Mar 2024 16:41:36 -0700	[thread overview]
Message-ID: <78d11760-bb43-42a1-a302-3e2d3bf40c48@os.amperecomputing.com> (raw)
In-Reply-To: <20240223143723.1574-13-shiju.jose@huawei.com>

> +/*
> + * The below functions are exposed to OSPM, to query, configure and
> + * initiate memory patrol scrub.
> + */
> +static int ras2_is_patrol_scrub_support(struct ras2_context *ras2_ctx)
> +{
> +	int ret;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = 0;
> +
> +	/* send command for reading RAS2 capabilities */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: ras2_send_pcc_cmd failed\n", __func__);
> +		return ret;
> +	}
> +
> +	return generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB;

Since firmware populates the feature bitmask on initialization, it would 
seem
that we do not need to send a PCC CMD EXEC to read RAS2 capabilities.
> +}
> +
> +static int ras2_get_patrol_scrub_params(struct ras2_context *ras2_ctx,
> +					struct ras2_scrub_params *params)
> +{
> +	int ret = 0;
> +	u8  min_supp_scrub_rate, max_supp_scrub_rate;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	/* send command for reading RAS2 capabilities */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: ras2_send_pcc_cmd failed\n", __func__);
> +		return ret;
> +	}

Similarly, since firmware populates the feature bitmask on
initialization, it would seem that we do not need to send
a PCC CMD EXEC to read RAS2 capabilities.
> +
> +	if (!(generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB) ||
> +	    !(generic_comm_base->num_parameter_blocks)) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: Platform does not support HW Patrol Scrubber\n", __func__);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (!patrol_scrub_params->requested_address_range[1]) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: Invalid requested address range, \
> +			requested_address_range[0]=0x%llx \
> +			requested_address_range[1]=0x%llx\n",
> +			__func__,
> +			patrol_scrub_params->requested_address_range[0],
> +			patrol_scrub_params->requested_address_range[1]);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +	patrol_scrub_params->patrol_scrub_command = RAS2_GET_PATROL_PARAMETERS;
> +
> +	/* send command for reading the HW patrol scrub parameters */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: failed to read HW patrol scrub parameters\n",
> +			__func__);
> +		return ret;
> +	}
> +
> +	/* copy output scrub parameters */
> +	params->addr_base = patrol_scrub_params->actual_address_range[0];
> +	params->addr_size = patrol_scrub_params->actual_address_range[1];
> +	params->flags = patrol_scrub_params->flags;
> +	params->rate = FIELD_GET(RAS2_PATROL_SCRUB_RATE_OUT_MASK,
> +				 patrol_scrub_params->scrub_params_out);
> +	min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
> +					patrol_scrub_params->scrub_params_out);
> +	max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
> +					patrol_scrub_params->scrub_params_out);
> +	snprintf(params->rate_avail, RAS2_MAX_RATE_RANGE_LENGTH,
> +		 "%d-%d", min_supp_scrub_rate, max_supp_scrub_rate);
> +
> +	return 0;
> +}
> +
> +static int ras2_enable_patrol_scrub(struct ras2_context *ras2_ctx, bool enable)
> +{
> +	int ret = 0;
> +	struct ras2_scrub_params params;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	u8 scrub_rate_to_set, min_supp_scrub_rate, max_supp_scrub_rate;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	if (enable) {
> +		ret = ras2_get_patrol_scrub_params(ras2_ctx, &params);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +
> +	if (enable) {
> +		patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
> +		patrol_scrub_params->requested_address_range[0] = params.addr_base;
> +		patrol_scrub_params->requested_address_range[1] = params.addr_size;
> +
> +		scrub_rate_to_set = FIELD_GET(RAS2_PATROL_SCRUB_RATE_IN_MASK,
> +					      patrol_scrub_params->scrub_params_in);
> +		min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
> +						patrol_scrub_params->scrub_params_out);
> +		max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
> +						patrol_scrub_params->scrub_params_out);
> +		if (scrub_rate_to_set < min_supp_scrub_rate ||
> +		    scrub_rate_to_set > max_supp_scrub_rate) {
> +			dev_warn(ras2_ctx->dev,
> +				 "patrol scrub rate to set is out of the supported range\n");
> +			dev_warn(ras2_ctx->dev,
> +				 "min_supp_scrub_rate=%d max_supp_scrub_rate=%d\n",
> +				 min_supp_scrub_rate, max_supp_scrub_rate);
> +			return -EINVAL;
> +		}
> +	} else {
> +		patrol_scrub_params->patrol_scrub_command = RAS2_STOP_PATROL_SCRUBBER;
> +	}
> +
> +	/* send command for enable/disable HW patrol scrub */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		pr_err("%s: failed to enable/disable the HW patrol scrub\n", __func__);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ras2_enable_background_scrub(struct ras2_context *ras2_ctx, bool enable)
> +{
> +	int ret;
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
> +	patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
> +
> +	patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_EN_BACKGROUND;
> +	patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_EN_BACKGROUND,
> +							   enable);
> +
> +	/* send command for enable/disable HW patrol scrub */
> +	ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
> +	if (ret) {
> +		dev_err(ras2_ctx->dev,
> +			"%s: failed to enable/disable background patrol scrubbing\n",
> +			__func__);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +static int ras2_set_patrol_scrub_params(struct ras2_context *ras2_ctx,
> +					struct ras2_scrub_params *params, u8 param_type)
> +{
> +	struct acpi_ras2_shared_memory  __iomem *generic_comm_base;
> +	struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
> +
> +	if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
> +		return -EFAULT;
> +
> +	generic_comm_base = ras2_ctx->pcc_comm_addr;
> +	patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
> +
> +	guard(spinlock_irqsave)(&ras2_ctx->spinlock);
> +	patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;
> +	if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_BASE && params->addr_base) {
> +		patrol_scrub_params->requested_address_range[0] = params->addr_base;
> +	} else if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_SIZE && params->addr_size) {
> +		patrol_scrub_params->requested_address_range[1] = params->addr_size;
> +	} else if (param_type == RAS2_MEM_SCRUB_PARAM_RATE) {
> +		patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_RATE_IN_MASK;
> +		patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_RATE_IN_MASK,
> +								   params->rate);
> +	} else {
> +		dev_err(ras2_ctx->dev, "Invalid patrol scrub parameter to set\n");
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct ras2_hw_scrub_ops ras2_hw_ops = {
> +	.enable_scrub = ras2_enable_patrol_scrub,
> +	.enable_background_scrub = ras2_enable_background_scrub,
> +	.get_scrub_params = ras2_get_patrol_scrub_params,
> +	.set_scrub_params = ras2_set_patrol_scrub_params,
> +};
> +
> +static const struct scrub_ops ras2_scrub_ops = {
> +	.is_visible = ras2_hw_scrub_is_visible,
> +	.read = ras2_hw_scrub_read,
> +	.write = ras2_hw_scrub_write,
> +	.read_string = ras2_hw_scrub_read_strings,
> +};
> +
> +static DEFINE_IDA(ras2_ida);
> +
> +static void devm_ras2_release(void *ctx)
> +{
> +	struct ras2_context *ras2_ctx = ctx;
> +
> +	ida_free(&ras2_ida, ras2_ctx->id);
> +	ras2_unregister_pcc_channel(ras2_ctx);
> +}
> +
> +static int ras2_probe(struct platform_device *pdev)
> +{
> +	int ret, id;
> +	struct mbox_client *cl;
> +	struct device *hw_scrub_dev;
> +	struct ras2_context *ras2_ctx;
> +	char scrub_name[RAS2_MAX_NAME_LENGTH];
> +
> +	ras2_ctx = devm_kzalloc(&pdev->dev, sizeof(*ras2_ctx), GFP_KERNEL);
> +	if (!ras2_ctx)
> +		return -ENOMEM;
> +
> +	ras2_ctx->dev = &pdev->dev;
> +	ras2_ctx->ops = &ras2_hw_ops;
> +	spin_lock_init(&ras2_ctx->spinlock);
> +	platform_set_drvdata(pdev, ras2_ctx);
> +
> +	cl = &ras2_ctx->mbox_client;
> +	/* Request mailbox channel */
> +	cl->dev = &pdev->dev;
> +	cl->tx_done = ras2_tx_done;
> +	cl->knows_txdone = true;
> +	ras2_ctx->pcc_subspace_idx = *((int *)pdev->dev.platform_data);
> +	dev_dbg(&pdev->dev, "pcc-subspace-id=%d\n", ras2_ctx->pcc_subspace_idx);
> +	ret = ras2_register_pcc_channel(ras2_ctx);

In our enabling activities, we have found a challenge here.
Our hardware has a single PCC channel corresponding to a single
platform-wide scrub interface. This driver, following the ACPI spec,
will create a new scrub node for each NUMA node. However, for us,
this means that each scrub device will try to map the same PCC channel,
and this causes an error.
> +	if (ret < 0)
> +		return ret;
> +
> +	ret = devm_add_action_or_reset(&pdev->dev, devm_ras2_release, ras2_ctx);
> +	if (ret < 0)
> +		return ret;
> +
> +	if (ras2_is_patrol_scrub_support(ras2_ctx)) {
> +		id = ida_alloc(&ras2_ida, GFP_KERNEL);
> +		if (id < 0)
> +			return id;
> +		ras2_ctx->id = id;
> +		snprintf(scrub_name, sizeof(scrub_name), "%s%d", RAS2_SCRUB, id);
> +		dev_set_name(&pdev->dev, RAS2_ID_FORMAT, id);
> +		hw_scrub_dev = devm_scrub_device_register(&pdev->dev, scrub_name,
> +							  ras2_ctx, &ras2_scrub_ops,
> +							  0, NULL);
> +		if (PTR_ERR_OR_ZERO(hw_scrub_dev))
> +			return PTR_ERR_OR_ZERO(hw_scrub_dev);
> +	}
> +	ras2_ctx->scrub_dev = hw_scrub_dev;
> +
> +	return 0;
> +}










  parent reply	other threads:[~2024-03-28 23:41 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-23 14:37 [RFC PATCH v7 00/12] memory: scrub: introduce subsystem + CXL/ACPI-RAS2 drivers shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 01/12] cxl/mbox: Add GET_SUPPORTED_FEATURES mailbox command shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 02/12] cxl/mbox: Add GET_FEATURE " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 03/12] cxl/mbox: Add SET_FEATURE " shiju.jose
2024-03-11 21:20   ` fan
2024-03-12  9:41     ` Shiju Jose
2024-02-23 14:37 ` [RFC PATCH v7 04/12] cxl/memscrub: Add CXL device patrol scrub control feature shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 05/12] cxl/memscrub: Add CXL device ECS " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 06/12] memory: scrub: Add scrub subsystem driver supports configuring memory scrubs in the system shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 07/12] cxl/memscrub: Register CXL device patrol scrub with scrub subsystem driver shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 08/12] cxl/memscrub: Register CXL device ECS " shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 09/12] ACPICA: ACPI 6.5: Add support for RAS2 table shiju.jose
2024-02-23 14:37 ` [RFC PATCH v7 10/12] ACPI:RAS2: Add common library for RAS2 PCC interfaces shiju.jose
2024-03-12 18:32   ` fan
2024-03-28 23:40   ` Daniel Ferguson
2024-02-23 14:37 ` [RFC PATCH v7 11/12] ACPI:RAS2: Add driver for ACPI RAS2 feature table (RAS2) shiju.jose
2024-03-28 23:41   ` Daniel Ferguson
2024-02-23 14:37 ` [RFC PATCH v7 12/12] memory: RAS2: Add memory RAS2 driver shiju.jose
2024-03-28 15:23   ` Yazen Ghannam
2024-04-02 10:17     ` Jonathan Cameron
2024-03-28 23:41   ` Daniel Ferguson [this message]
2024-04-03 14:03     ` Shiju Jose
2024-02-23 15:42 ` [RFC PATCH v7 00/12] memory: scrub: introduce subsystem + CXL/ACPI-RAS2 drivers Borislav Petkov
2024-02-23 16:25   ` Jonathan Cameron
2024-02-23 17:51     ` Borislav Petkov
2024-02-23 17:57     ` Duran, Leo
2024-03-28 23:39 ` Daniel Ferguson
2024-04-03 13:52   ` Shiju Jose

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=78d11760-bb43-42a1-a302-3e2d3bf40c48@os.amperecomputing.com \
    --to=danielf@os.amperecomputing.com \
    --cc=Jon.Grimm@amd.com \
    --cc=Vilas.Sridharan@amd.com \
    --cc=Yazen.Ghannam@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dferguson@amperecomputing.com \
    --cc=duenwen@google.com \
    --cc=erdemaktas@google.com \
    --cc=gthelen@google.com \
    --cc=ira.weiny@intel.com \
    --cc=james.morse@arm.com \
    --cc=jiaqiyan@google.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=jthoughton@google.com \
    --cc=kangkang.shen@futurewei.com \
    --cc=lenb@kernel.org \
    --cc=leo.duran@amd.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxarm@huawei.com \
    --cc=mike.malvestuto@intel.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=pgonda@google.com \
    --cc=prime.zeng@hisilicon.com \
    --cc=rafael@kernel.org \
    --cc=rientjes@google.com \
    --cc=shiju.jose@huawei.com \
    --cc=somasundaram.a@hpe.com \
    --cc=tanxiaofei@huawei.com \
    --cc=tony.luck@intel.com \
    --cc=vishal.l.verma@intel.com \
    --cc=wanghuiqiang@huawei.com \
    --cc=wbs@os.amperecomputing.com \
    --cc=wschwartz@amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.