From: Kajol Jain <kjain@linux.ibm.com>
To: mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
nvdimm@lists.linux.dev, linux-kernel@vger.kernel.org,
peterz@infradead.org, dan.j.williams@intel.com,
ira.weiny@intel.com, vishal.l.verma@intel.com
Cc: maddy@linux.ibm.com, santosh@fossix.org,
aneesh.kumar@linux.ibm.com, vaibhav@linux.ibm.com,
atrajeev@linux.vnet.ibm.com, tglx@linutronix.de,
kjain@linux.ibm.com, rnsastry@linux.ibm.com
Subject: [RESEND PATCH v4 2/4] drivers/nvdimm: Add perf interface to expose nvdimm performance stats
Date: Fri, 3 Sep 2021 10:39:12 +0530 [thread overview]
Message-ID: <20210903050914.273525-3-kjain@linux.ibm.com> (raw)
In-Reply-To: <20210903050914.273525-1-kjain@linux.ibm.com>
A common interface is added to get performance stats reporting
support for nvdimm devices. Added interface includes support for
pmu register/unregister functions, cpu hotplug and pmu event
functions like event_init/add/read/del.
User could use the standard perf tool to access perf
events exposed via pmu.
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
drivers/nvdimm/Makefile | 1 +
drivers/nvdimm/nd_perf.c | 230 +++++++++++++++++++++++++++++++++++++++
include/linux/nd.h | 3 +
3 files changed, 234 insertions(+)
create mode 100644 drivers/nvdimm/nd_perf.c
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 29203f3d3069..25dba6095612 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -18,6 +18,7 @@ nd_e820-y := e820.o
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
+libnvdimm-y += nd_perf.o
libnvdimm-y += dimm.o
libnvdimm-y += region_devs.o
libnvdimm-y += region.o
diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c
new file mode 100644
index 000000000000..4c49d1bc2a3c
--- /dev/null
+++ b/drivers/nvdimm/nd_perf.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * nd_perf.c: NVDIMM Device Performance Monitoring Unit support
+ *
+ * Perf interface to expose nvdimm performance stats.
+ *
+ * Copyright (C) 2021 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "nvdimm_pmu: " fmt
+
+#include <linux/nd.h>
+
+static ssize_t nvdimm_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct nvdimm_pmu *nd_pmu;
+
+ nd_pmu = container_of(pmu, struct nvdimm_pmu, pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(nd_pmu->cpu));
+}
+
+static int nvdimm_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct nvdimm_pmu *nd_pmu;
+ u32 target;
+ int nodeid;
+ const struct cpumask *cpumask;
+
+ nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
+
+ /* Clear it, incase given cpu is set in nd_pmu->arch_cpumask */
+ cpumask_test_and_clear_cpu(cpu, &nd_pmu->arch_cpumask);
+
+ /*
+ * If given cpu is not same as current designated cpu for
+ * counter access, just return.
+ */
+ if (cpu != nd_pmu->cpu)
+ return 0;
+
+ /* Check for any active cpu in nd_pmu->arch_cpumask */
+ target = cpumask_any(&nd_pmu->arch_cpumask);
+
+ /*
+ * Incase we don't have any active cpu in nd_pmu->arch_cpumask,
+ * check in given cpu's numa node list.
+ */
+ if (target >= nr_cpu_ids) {
+ nodeid = cpu_to_node(cpu);
+ cpumask = cpumask_of_node(nodeid);
+ target = cpumask_any_but(cpumask, cpu);
+ }
+ nd_pmu->cpu = target;
+
+ /* Migrate nvdimm pmu events to the new target cpu if valid */
+ if (target >= 0 && target < nr_cpu_ids)
+ perf_pmu_migrate_context(&nd_pmu->pmu, cpu, target);
+
+ return 0;
+}
+
+static int nvdimm_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct nvdimm_pmu *nd_pmu;
+
+ nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
+
+ if (nd_pmu->cpu >= nr_cpu_ids)
+ nd_pmu->cpu = cpu;
+
+ return 0;
+}
+
+static int create_cpumask_attr_group(struct nvdimm_pmu *nd_pmu)
+{
+ struct perf_pmu_events_attr *attr;
+ struct attribute **attrs;
+ struct attribute_group *nvdimm_pmu_cpumask_group;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ attrs = kzalloc(2 * sizeof(struct attribute *), GFP_KERNEL);
+ if (!attrs) {
+ kfree(attr);
+ return -ENOMEM;
+ }
+
+ /* Allocate memory for cpumask attribute group */
+ nvdimm_pmu_cpumask_group = kzalloc(sizeof(*nvdimm_pmu_cpumask_group), GFP_KERNEL);
+ if (!nvdimm_pmu_cpumask_group) {
+ kfree(attr);
+ kfree(attrs);
+ return -ENOMEM;
+ }
+
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "cpumask";
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = nvdimm_pmu_cpumask_show;
+ attrs[0] = &attr->attr.attr;
+ attrs[1] = NULL;
+
+ nvdimm_pmu_cpumask_group->attrs = attrs;
+ nd_pmu->attr_groups[NVDIMM_PMU_CPUMASK_ATTR] = nvdimm_pmu_cpumask_group;
+ return 0;
+}
+
+static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu)
+{
+ int nodeid, rc;
+ const struct cpumask *cpumask;
+
+ /*
+ * Incase cpu hotplug is not handled by arch specific code
+ * they can still provide required cpumask which can be used
+ * to get designatd cpu for counter access.
+ * Check for any active cpu in nd_pmu->arch_cpumask.
+ */
+ if (!cpumask_empty(&nd_pmu->arch_cpumask)) {
+ nd_pmu->cpu = cpumask_any(&nd_pmu->arch_cpumask);
+ } else {
+ /* pick active cpu from the cpumask of device numa node. */
+ nodeid = dev_to_node(nd_pmu->dev);
+ cpumask = cpumask_of_node(nodeid);
+ nd_pmu->cpu = cpumask_any(cpumask);
+ }
+
+ rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/nvdimm:online",
+ nvdimm_pmu_cpu_online, nvdimm_pmu_cpu_offline);
+
+ if (rc < 0)
+ return rc;
+
+ nd_pmu->cpuhp_state = rc;
+
+ /* Register the pmu instance for cpu hotplug */
+ rc = cpuhp_state_add_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ if (rc) {
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+ return rc;
+ }
+
+ /* Create cpumask attribute group */
+ rc = create_cpumask_attr_group(nd_pmu);
+ if (rc) {
+ cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+ return rc;
+ }
+
+ return 0;
+}
+
+void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
+{
+ cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+
+ if (nd_pmu->attr_groups[NVDIMM_PMU_CPUMASK_ATTR])
+ kfree(nd_pmu->attr_groups[NVDIMM_PMU_CPUMASK_ATTR]->attrs);
+ kfree(nd_pmu->attr_groups[NVDIMM_PMU_CPUMASK_ATTR]);
+}
+
+int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev)
+{
+ int rc;
+
+ if (!nd_pmu || !pdev)
+ return -EINVAL;
+
+ /* event functions like add/del/read/event_init should not be NULL */
+ if (WARN_ON_ONCE(!(nd_pmu->event_init && nd_pmu->add && nd_pmu->del && nd_pmu->read)))
+ return -EINVAL;
+
+ nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ nd_pmu->pmu.name = nd_pmu->name;
+ nd_pmu->pmu.event_init = nd_pmu->event_init;
+ nd_pmu->pmu.add = nd_pmu->add;
+ nd_pmu->pmu.del = nd_pmu->del;
+ nd_pmu->pmu.read = nd_pmu->read;
+
+ nd_pmu->pmu.attr_groups = nd_pmu->attr_groups;
+ nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ /*
+ * Add platform_device->dev pointer to nvdimm_pmu to access
+ * device data in events functions.
+ */
+ nd_pmu->dev = &pdev->dev;
+
+ /*
+ * Incase cpumask attribute is set it means cpu
+ * hotplug is handled by the arch specific code and
+ * we can skip calling hotplug_init.
+ */
+ if (!nd_pmu->attr_groups[NVDIMM_PMU_CPUMASK_ATTR]) {
+ /* init cpuhotplug */
+ rc = nvdimm_pmu_cpu_hotplug_init(nd_pmu);
+ if (rc) {
+ pr_info("cpu hotplug feature failed for device: %s\n", nd_pmu->name);
+ return rc;
+ }
+ }
+
+ rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->name, -1);
+ if (rc) {
+ nvdimm_pmu_free_hotplug_memory(nd_pmu);
+ return rc;
+ }
+
+ pr_info("%s NVDIMM performance monitor support registered\n",
+ nd_pmu->name);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_nvdimm_pmu);
+
+void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu)
+{
+ /* handle freeing of memory nd_pmu in arch specific code */
+ perf_pmu_unregister(&nd_pmu->pmu);
+ nvdimm_pmu_free_hotplug_memory(nd_pmu);
+}
+EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 712499cf7335..7d8b4f7d277d 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -66,6 +66,9 @@ struct nvdimm_pmu {
struct cpumask arch_cpumask;
};
+int register_nvdimm_pmu(struct nvdimm_pmu *nvdimm, struct platform_device *pdev);
+void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu);
+
struct nd_device_driver {
struct device_driver drv;
unsigned long type;
--
2.26.2
next prev parent reply other threads:[~2021-09-03 5:11 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-03 5:09 [RESEND PATCH v4 0/4] Add perf interface to expose nvdimm Kajol Jain
2021-09-03 5:09 ` [RESEND PATCH v4 1/4] drivers/nvdimm: Add nvdimm pmu structure Kajol Jain
2021-09-07 21:59 ` Dan Williams
2021-09-09 7:55 ` kajoljain
2021-09-15 4:08 ` Dan Williams
2021-09-15 4:11 ` Dan Williams
2021-09-03 5:09 ` Kajol Jain [this message]
2021-09-03 12:32 ` [RESEND PATCH v4 2/4] drivers/nvdimm: Add perf interface to expose nvdimm performance stats kernel test robot
2021-09-03 15:19 ` kernel test robot
2021-09-04 6:38 ` kajoljain
2021-09-03 15:19 ` [RFC PATCH] drivers/nvdimm: nvdimm_pmu_free_hotplug_memory() can be static kernel test robot
2021-09-04 6:39 ` kajoljain
2021-09-03 5:09 ` [RESEND PATCH v4 3/4] powerpc/papr_scm: Add perf interface support Kajol Jain
2021-09-03 5:09 ` [RESEND PATCH v4 4/4] powerpc/papr_scm: Document papr_scm sysfs event format entries Kajol Jain
2021-09-08 1:03 ` Dan Williams
2021-09-09 8:03 ` kajoljain
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210903050914.273525-3-kjain@linux.ibm.com \
--to=kjain@linux.ibm.com \
--cc=aneesh.kumar@linux.ibm.com \
--cc=atrajeev@linux.vnet.ibm.com \
--cc=dan.j.williams@intel.com \
--cc=ira.weiny@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=maddy@linux.ibm.com \
--cc=mpe@ellerman.id.au \
--cc=nvdimm@lists.linux.dev \
--cc=peterz@infradead.org \
--cc=rnsastry@linux.ibm.com \
--cc=santosh@fossix.org \
--cc=tglx@linutronix.de \
--cc=vaibhav@linux.ibm.com \
--cc=vishal.l.verma@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).