linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: roman.sudarikov@linux.intel.com
To: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
	mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
	jolsa@redhat.com, namhyung@kernel.org,
	linux-kernel@vger.kernel.org, eranian@google.com,
	bgregg@netflix.com, ak@linux.intel.com,
	kan.liang@linux.intel.com, gregkh@linuxfoundation.org
Cc: alexander.antonov@intel.com, roman.sudarikov@linux.intel.com
Subject: [PATCH v4 2/2] perf x86: Exposing an Uncore unit to PMON for Intel Xeon® server platform
Date: Fri, 17 Jan 2020 16:37:59 +0300	[thread overview]
Message-ID: <20200117133759.5729-3-roman.sudarikov@linux.intel.com> (raw)
In-Reply-To: <20200117133759.5729-1-roman.sudarikov@linux.intel.com>

From: Roman Sudarikov <roman.sudarikov@linux.intel.com>

Current version supports a server line starting Intel® Xeon® Processor
Scalable Family and introduces mapping for IIO Uncore units only.
Other units can be added on demand.

IIO stack to PMON mapping is exposed through:
    /sys/devices/uncore_iio_<pmu_idx>/mapping
    in the following format: domain:bus

For example, on a 4-die Intel Xeon® server platform:
    $ cat /sys/devices/uncore_iio_0/mapping
    0000:00,0000:40,0000:80,0000:c0

Which means:
IIO PMON block 0 on die 0 belongs to IIO stack on bus 0x00, domain 0x0000
IIO PMON block 0 on die 1 belongs to IIO stack on bus 0x40, domain 0x0000
IIO PMON block 0 on die 2 belongs to IIO stack on bus 0x80, domain 0x0000
IIO PMON block 0 on die 3 belongs to IIO stack on bus 0xc0, domain 0x0000

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Co-developed-by: Alexander Antonov <alexander.antonov@intel.com>
Signed-off-by: Alexander Antonov <alexander.antonov@intel.com>
Signed-off-by: Roman Sudarikov <roman.sudarikov@linux.intel.com>
---
 arch/x86/events/intel/uncore_snbep.c | 160 +++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10a5ec79e48..813009b48a0f 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -273,6 +273,30 @@
 #define SKX_CPUNODEID			0xc0
 #define SKX_GIDNIDMAP			0xd4
 
+/*
+ * The CPU_BUS_NUMBER MSR returns the values of the respective CPUBUSNO CSR
+ * that BIOS programmed. MSR has package scope.
+ * |  Bit  |  Default  |  Description
+ * | [63]  |    00h    | VALID - When set, indicates the CPU bus
+ *                       numbers have been initialized. (RO)
+ * |[62:48]|    ---    | Reserved
+ * |[47:40]|    00h    | BUS_NUM_5 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(5). (RO)
+ * |[39:32]|    00h    | BUS_NUM_4 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(4). (RO)
+ * |[31:24]|    00h    | BUS_NUM_3 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(3). (RO)
+ * |[23:16]|    00h    | BUS_NUM_2 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(2). (RO)
+ * |[15:8] |    00h    | BUS_NUM_1 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(1). (RO)
+ * | [7:0] |    00h    | BUS_NUM_0 — Return the bus number BIOS assigned
+ *                       CPUBUSNO(0). (RO)
+ */
+#define SKX_MSR_CPU_BUS_NUMBER		0x300
+#define SKX_MSR_CPU_BUS_VALID_BIT	(1ULL << 63)
+#define BUS_NUM_STRIDE			8
+
 /* SKX CHA */
 #define SKX_CHA_MSR_PMON_BOX_FILTER_TID		(0x1ffULL << 0)
 #define SKX_CHA_MSR_PMON_BOX_FILTER_LINK	(0xfULL << 9)
@@ -3580,6 +3604,9 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
 	.read_counter		= uncore_msr_read_counter,
 };
 
+static int skx_iio_get_topology(struct intel_uncore_type *type, int max_dies);
+static int skx_iio_set_mapping(struct intel_uncore_type *type, int max_dies);
+
 static struct intel_uncore_type skx_uncore_iio = {
 	.name			= "iio",
 	.num_counters		= 4,
@@ -3594,6 +3621,8 @@ static struct intel_uncore_type skx_uncore_iio = {
 	.constraints		= skx_uncore_iio_constraints,
 	.ops			= &skx_uncore_iio_ops,
 	.format_group		= &skx_uncore_iio_format_group,
+	.get_topology		= skx_iio_get_topology,
+	.set_mapping		= skx_iio_set_mapping,
 };
 
 enum perf_uncore_iio_freerunning_type_id {
@@ -3780,6 +3809,137 @@ static int skx_count_chabox(void)
 	return hweight32(val);
 }
 
+static inline int skx_msr_cpu_bus_read(int cpu, u64 *topology)
+{
+	u64 msr_value;
+
+	if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) ||
+			!(msr_value & SKX_MSR_CPU_BUS_VALID_BIT))
+		return -ENXIO;
+
+	*topology = msr_value;
+
+	return 0;
+}
+
+static int skx_iio_get_topology(struct intel_uncore_type *type, int max_dies)
+{
+	int ret, cpu, die, current_die;
+	struct pci_bus *bus = NULL;
+
+	/*
+	 * Verified single-segment environments only; disabled for multiple
+	 * segment topologies for now.
+	 */
+	while ((bus = pci_find_next_bus(bus)) && !pci_domain_nr(bus))
+		;
+	if (bus)
+		return -EPERM;
+
+	type->topology = kcalloc(max_dies, sizeof(u64), GFP_KERNEL);
+	if (!type->topology)
+		return -ENOMEM;
+
+	/*
+	 * Using cpus_read_lock() to ensure cpu is not going down between
+	 * looking at cpu_online_mask.
+	 */
+	cpus_read_lock();
+	/* Invalid value to start loop.*/
+	current_die = -1;
+	for_each_online_cpu(cpu) {
+		die = topology_logical_die_id(cpu);
+		if (current_die == die)
+			continue;
+		ret = skx_msr_cpu_bus_read(cpu, &type->topology[die]);
+		if (ret) {
+			kfree(type->topology);
+			break;
+		}
+		current_die = die;
+	}
+	cpus_read_unlock();
+
+	return ret;
+}
+
+static inline u8 skx_iio_stack_bus(struct intel_uncore_pmu *pmu, int die)
+{
+	return pmu->type->topology[die] >> (pmu->pmu_idx * BUS_NUM_STRIDE);
+}
+
+static int skx_iio_set_box_mapping(struct intel_uncore_pmu *pmu, int max_dies)
+{
+	char *buf;
+	int die = 0;
+	/* Length of template "%04x:%02x," without null character. */
+	const int template_len = 8;
+
+	/*
+	 * Root bus 0x00 is valid only for die 0 AND pmu_idx = 0.
+	 * Set "0" platform mapping for PMUs which have zero stack bus and
+	 * non-zero index.
+	 */
+	if (!skx_iio_stack_bus(pmu, die) && pmu->pmu_idx) {
+		pmu->mapping = kzalloc(2, GFP_KERNEL);
+		if (!pmu->mapping)
+			return -ENOMEM;
+		sprintf(pmu->mapping, "0");
+		return 0;
+	}
+
+	pmu->mapping = kzalloc(max_dies * template_len + 1, GFP_KERNEL);
+	if (!pmu->mapping)
+		return -ENOMEM;
+
+	buf = pmu->mapping;
+	for (; die < max_dies; die++) {
+		buf += snprintf(buf, template_len + 1, "%04x:%02x,", 0,
+				skx_iio_stack_bus(pmu, die));
+	}
+
+	*(--buf) = '\0';
+
+	return 0;
+}
+
+static int skx_iio_set_mapping(struct intel_uncore_type *type, int max_dies)
+{
+	/*
+	 * Each IIO stack (PCIe root port) has its own IIO PMON block, so each
+	 * "mapping" holds bus number(s) of PCIe root port(s), which can
+	 * be monitored by that IIO PMON block.
+	 *
+	 * For example, on 4-die Xeon platform with up to 6 IIO stacks per die
+	 * and, therefore, 6 IIO PMON blocks per die, the "mapping"
+	 * of IIO PMON block 0 holds "0000:00,0000:40,0000:80,0000:c0":
+	 *
+	 * $ cat /sys/devices/uncore_iio_0/mapping
+	 * 0000:00,0000:40,0000:80,0000:c0
+	 *
+	 * Which means:
+	 * IIO PMON 0 on die 0 belongs to PCIe RP on bus 0x00, domain 0x0000
+	 * IIO PMON 0 on die 1 belongs to PCIe RP on bus 0x40, domain 0x0000
+	 * IIO PMON 0 on die 2 belongs to PCIe RP on bus 0x80, domain 0x0000
+	 * IIO PMON 0 on die 3 belongs to PCIe RP on bus 0xc0, domain 0x0000
+	 */
+
+	int ret;
+	struct intel_uncore_pmu *pmu = type->pmus;
+
+	for (; pmu - type->pmus < type->num_boxes; pmu++) {
+		ret = skx_iio_set_box_mapping(pmu, max_dies);
+		if (ret) {
+			for (; pmu->pmu_idx > 0; --pmu)
+				kfree(pmu->mapping);
+			break;
+		}
+	}
+
+	kfree(type->topology);
+	return ret;
+}
+
 void skx_uncore_cpu_init(void)
 {
 	skx_uncore_chabox.num_boxes = skx_count_chabox();
-- 
2.19.1


  parent reply	other threads:[~2020-01-17 13:38 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-17 13:37 [PATCH v4 0/2] perf x86: Exposing IO stack to IO PMON mapping through sysfs roman.sudarikov
2020-01-17 13:37 ` [PATCH v4 1/2] perf x86: Infrastructure for exposing an Uncore unit to PMON mapping roman.sudarikov
2020-01-17 14:16   ` Greg KH
2020-01-17 13:37 ` roman.sudarikov [this message]
2020-01-17 14:19   ` [PATCH v4 2/2] perf x86: Exposing an Uncore unit to PMON for Intel Xeon® server platform Greg KH
2020-01-17 16:23     ` Andi Kleen
2020-01-17 16:54       ` Greg KH
2020-01-17 17:27         ` Andi Kleen
2020-01-17 18:42           ` Greg KH
2020-01-17 19:12             ` Andi Kleen
2020-01-17 23:03               ` Greg KH
2020-01-17 23:21                 ` Andi Kleen
2020-01-21 16:15         ` Sudarikov, Roman
2020-01-21 17:15           ` Greg KH
2020-01-28 14:55             ` Sudarikov, Roman
2020-01-28 20:19               ` Liang, Kan
2020-01-17 14:14 ` [PATCH v4 0/2] perf x86: Exposing IO stack to IO PMON mapping through sysfs Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200117133759.5729-3-roman.sudarikov@linux.intel.com \
    --to=roman.sudarikov@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.antonov@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bgregg@netflix.com \
    --cc=eranian@google.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=jolsa@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).