linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: kan.liang@linux.intel.com
To: peterz@infradead.org, acme@kernel.org, mingo@redhat.com,
	linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de, jolsa@kernel.org, eranian@google.com,
	alexander.shishkin@linux.intel.com, ak@linux.intel.com,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V4 01/23] perf/x86: Support outputting XMM registers
Date: Tue, 26 Mar 2019 09:08:39 -0700	[thread overview]
Message-ID: <20190326160901.4887-2-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20190326160901.4887-1-kan.liang@linux.intel.com>

From: Kan Liang <kan.liang@linux.intel.com>

Starting from Icelake, XMM registers can be collected in PEBS record.
But current code only output the pt_regs.

Add a new struct x86_perf_regs for both pt_regs and xmm_regs.
XMM registers are 128 bit. To simplify the code, they are handled like
two different registers, which means setting two bits in the register
bitmap. This also allows only sampling the lower 64bit bits in XMM.

The index of XMM registers starts from 32. There are 16 XMM registers.
So all reserved space for regs are used. Remove REG_RESERVED.

Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
regs including both GPRs and XMM.

XMM is not supported on all platforms. Adding has_xmm_regs to indicate
the specific platform. Also add checks in x86_pmu_hw_config() to reject
invalid config of regs_user and regs_intr.

Add REG_NOSUPPORT for 32bit to exclude unsupported registers.

Originally-by: Andi Kleen <ak@linux.intel.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V3:
- Keep the old names for GPRs. Rename PERF_REG_X86_MAX to
  PERF_REG_X86_XMM_MAX
- Remove unnecessary REG_RESERVED
- Add REG_NOSUPPORT for 32bit

 arch/x86/events/core.c                | 10 ++++++++++
 arch/x86/events/perf_event.h          |  2 ++
 arch/x86/include/asm/perf_event.h     |  5 +++++
 arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
 arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
 5 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a8..9378c6b2128f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,16 @@ int x86_pmu_hw_config(struct perf_event *event)
 			return -EINVAL;
 	}
 
+	if (event->attr.sample_regs_user & ~PEBS_REGS)
+		return -EINVAL;
+	/*
+	 * Besides the general purpose registers, XMM registers may
+	 * be collected in PEBS on some platforms, e.g. Icelake
+	 */
+	if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
+	    (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
+		return -EINVAL;
+
 	return x86_setup_perfctr(event);
 }
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..6428941a5073 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -657,6 +657,8 @@ struct x86_pmu {
 	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 	 */
 	int (*check_period) (struct perf_event *event, u64 period);
+
+	unsigned int	has_xmm_regs : 1; /* support XMM regs */
 };
 
 struct x86_perf_task_context {
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..d9f5bbe44b3c 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
 #define PERF_EFLAGS_VM		(1UL << 5)
 
 struct pt_regs;
+struct x86_perf_regs {
+	struct pt_regs	regs;
+	u64		*xmm_regs;
+};
+
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..ac67bbea10ca 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,29 @@ enum perf_event_x86_regs {
 	PERF_REG_X86_R13,
 	PERF_REG_X86_R14,
 	PERF_REG_X86_R15,
-
+	/* These are the limits for the GPRs. */
 	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
 	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+	/* These all need two bits set because they are 128bit */
+	PERF_REG_X86_XMM0  = 32,
+	PERF_REG_X86_XMM1  = 34,
+	PERF_REG_X86_XMM2  = 36,
+	PERF_REG_X86_XMM3  = 38,
+	PERF_REG_X86_XMM4  = 40,
+	PERF_REG_X86_XMM5  = 42,
+	PERF_REG_X86_XMM6  = 44,
+	PERF_REG_X86_XMM7  = 46,
+	PERF_REG_X86_XMM8  = 48,
+	PERF_REG_X86_XMM9  = 50,
+	PERF_REG_X86_XMM10 = 52,
+	PERF_REG_X86_XMM11 = 54,
+	PERF_REG_X86_XMM12 = 56,
+	PERF_REG_X86_XMM13 = 58,
+	PERF_REG_X86_XMM14 = 60,
+	PERF_REG_X86_XMM15 = 62,
+
+	/* These include both GPRs and XMMX registers */
+	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c06c4c16c6b6..07c30ee17425 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
+	struct x86_perf_regs *perf_regs;
+
+	if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+		perf_regs = container_of(regs, struct x86_perf_regs, regs);
+		if (!perf_regs->xmm_regs)
+			return 0;
+		return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+	}
+
 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
 		return 0;
 
 	return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
 #ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+		       (1ULL << PERF_REG_X86_R9) | \
+		       (1ULL << PERF_REG_X86_R10) | \
+		       (1ULL << PERF_REG_X86_R11) | \
+		       (1ULL << PERF_REG_X86_R12) | \
+		       (1ULL << PERF_REG_X86_R13) | \
+		       (1ULL << PERF_REG_X86_R14) | \
+		       (1ULL << PERF_REG_X86_R15))
+
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;
@@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
-		return -EINVAL;
-
-	if (mask & REG_NOSUPPORT)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;
-- 
2.17.1


  reply	other threads:[~2019-03-26 16:11 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-26 16:08 [PATCH V4 00/23] perf: Add Icelake support kan.liang
2019-03-26 16:08 ` kan.liang [this message]
2019-04-01 19:18   ` [PATCH V4 01/23] perf/x86: Support outputting XMM registers Stephane Eranian
2019-04-01 19:54     ` Liang, Kan
2019-04-01 21:11       ` Stephane Eranian
2019-04-01 22:33         ` Liang, Kan
2019-03-26 16:08 ` [PATCH V4 02/23] perf/x86/intel: Extract memory code PEBS parser for reuse kan.liang
2019-03-26 16:08 ` [PATCH V4 03/23] perf/x86/intel/ds: Extract code of event update in short period kan.liang
2019-03-26 16:08 ` [PATCH V4 04/23] perf/x86/intel: Support adaptive PEBSv4 kan.liang
2019-03-26 22:24   ` Andi Kleen
2019-03-27 14:25     ` Liang, Kan
2019-03-27 14:42       ` Andi Kleen
2019-03-26 16:08 ` [PATCH V4 05/23] perf/x86/lbr: Avoid reading the LBRs when adaptive PEBS handles them kan.liang
2019-03-26 16:08 ` [PATCH V4 06/23] perf/x86: Support constraint ranges kan.liang
2019-03-26 16:08 ` [PATCH V4 07/23] perf/x86/intel: Add Icelake support kan.liang
2019-03-26 16:08 ` [PATCH V4 08/23] perf/x86/intel/cstate: " kan.liang
2019-03-26 16:08 ` [PATCH V4 09/23] perf/x86/intel/rapl: " kan.liang
2019-03-26 16:08 ` [PATCH V4 10/23] perf/x86/msr: " kan.liang
2019-03-26 16:08 ` [PATCH V4 11/23] perf/x86/intel/uncore: Add Intel Icelake uncore support kan.liang
2019-03-26 16:08 ` [PATCH V4 12/23] perf/core: Support a REMOVE transaction kan.liang
2019-03-26 16:08 ` [PATCH V4 13/23] perf/x86/intel: Basic support for metrics counters kan.liang
2019-03-26 16:08 ` [PATCH V4 14/23] perf/x86/intel: Support overflows on SLOTS kan.liang
2019-03-26 16:08 ` [PATCH V4 15/23] perf/x86/intel: Support hardware TopDown metrics kan.liang
2019-03-26 16:08 ` [PATCH V4 16/23] perf/x86/intel: Set correct weight for topdown subevent counters kan.liang
2019-03-26 16:08 ` [PATCH V4 17/23] perf/x86/intel: Export new top down events for Icelake kan.liang
2019-03-26 16:08 ` [PATCH V4 18/23] perf/x86/intel: Disable sampling read slots and topdown kan.liang
2019-03-26 16:08 ` [PATCH V4 19/23] perf/x86/intel: Support CPUID 10.ECX to disable fixed counters kan.liang
2019-03-26 16:08 ` [PATCH V4 20/23] perf, tools: Add support for recording and printing XMM registers kan.liang
2019-03-26 16:08 ` [PATCH V4 21/23] perf, tools, stat: Support new per thread TopDown metrics kan.liang
2019-03-26 16:09 ` [PATCH V4 22/23] perf, tools: Add documentation for topdown metrics kan.liang
2019-03-26 16:09 ` [PATCH V4 23/23] perf vendor events intel: Add JSON files for Icelake kan.liang
2019-04-01 13:01 ` [PATCH V4 00/23] perf: Add Icelake support Liang, Kan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190326160901.4887-2-kan.liang@linux.intel.com \
    --to=kan.liang@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).