[V4,01/23] perf/x86: Support outputting XMM registers
diff mbox series

Message ID 20190326160901.4887-2-kan.liang@linux.intel.com
State New
Headers show
Series
  • perf: Add Icelake support
Related show

Commit Message

Liang, Kan March 26, 2019, 4:08 p.m. UTC
From: Kan Liang <kan.liang@linux.intel.com>

Starting from Icelake, XMM registers can be collected in PEBS record.
But current code only output the pt_regs.

Add a new struct x86_perf_regs for both pt_regs and xmm_regs.
XMM registers are 128 bit. To simplify the code, they are handled like
two different registers, which means setting two bits in the register
bitmap. This also allows only sampling the lower 64bit bits in XMM.

The index of XMM registers starts from 32. There are 16 XMM registers.
So all reserved space for regs are used. Remove REG_RESERVED.

Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
regs including both GPRs and XMM.

XMM is not supported on all platforms. Adding has_xmm_regs to indicate
the specific platform. Also add checks in x86_pmu_hw_config() to reject
invalid config of regs_user and regs_intr.

Add REG_NOSUPPORT for 32bit to exclude unsupported registers.

Originally-by: Andi Kleen <ak@linux.intel.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V3:
- Keep the old names for GPRs. Rename PERF_REG_X86_MAX to
  PERF_REG_X86_XMM_MAX
- Remove unnecessary REG_RESERVED
- Add REG_NOSUPPORT for 32bit

 arch/x86/events/core.c                | 10 ++++++++++
 arch/x86/events/perf_event.h          |  2 ++
 arch/x86/include/asm/perf_event.h     |  5 +++++
 arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
 arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
 5 files changed, 59 insertions(+), 8 deletions(-)

Comments

Stephane Eranian April 1, 2019, 7:18 p.m. UTC | #1
On Tue, Mar 26, 2019 at 9:11 AM <kan.liang@linux.intel.com> wrote:
>
> From: Kan Liang <kan.liang@linux.intel.com>
>
> Starting from Icelake, XMM registers can be collected in PEBS record.
> But current code only output the pt_regs.
>
> Add a new struct x86_perf_regs for both pt_regs and xmm_regs.
> XMM registers are 128 bit. To simplify the code, they are handled like
> two different registers, which means setting two bits in the register
> bitmap. This also allows only sampling the lower 64bit bits in XMM.
>
You are adding this new x86_perf_regs struct but the patch does not
include how it is allocated.
I don't see from this patch where x86_perf_regs->xmm_regs is actually allocated.

> The index of XMM registers starts from 32. There are 16 XMM registers.
> So all reserved space for regs are used. Remove REG_RESERVED.
>
> Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
> regs including both GPRs and XMM.
>
> XMM is not supported on all platforms. Adding has_xmm_regs to indicate
> the specific platform. Also add checks in x86_pmu_hw_config() to reject
> invalid config of regs_user and regs_intr.
>
> Add REG_NOSUPPORT for 32bit to exclude unsupported registers.
>
> Originally-by: Andi Kleen <ak@linux.intel.com>
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>
> Changes since V3:
> - Keep the old names for GPRs. Rename PERF_REG_X86_MAX to
>   PERF_REG_X86_XMM_MAX
> - Remove unnecessary REG_RESERVED
> - Add REG_NOSUPPORT for 32bit
>
>  arch/x86/events/core.c                | 10 ++++++++++
>  arch/x86/events/perf_event.h          |  2 ++
>  arch/x86/include/asm/perf_event.h     |  5 +++++
>  arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
>  arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
>  5 files changed, 59 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index e2b1447192a8..9378c6b2128f 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -560,6 +560,16 @@ int x86_pmu_hw_config(struct perf_event *event)
>                         return -EINVAL;
>         }
>
> +       if (event->attr.sample_regs_user & ~PEBS_REGS)
> +               return -EINVAL;
> +       /*
> +        * Besides the general purpose registers, XMM registers may
> +        * be collected in PEBS on some platforms, e.g. Icelake
> +        */
> +       if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
> +           (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
> +               return -EINVAL;
> +
Shouldn't you be testing on PEBS_REGS only if the user is asking for
PEBS sampling?
That is not because PEBS may not capture a register that the kernel
could not do it
without PEBS.

>         return x86_setup_perfctr(event);
>  }
>
> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> index a75955741c50..6428941a5073 100644
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -657,6 +657,8 @@ struct x86_pmu {
>          * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
>          */
>         int (*check_period) (struct perf_event *event, u64 period);
> +
> +       unsigned int    has_xmm_regs : 1; /* support XMM regs */
>  };
Is this an Intel specific field? If so, then say intel_has_xmm_regs,
just like amd_nb_constraints above.
If not, then define what is is supposed to mean? Because I am sure
there is another way to detect if
the CPU support XMM regs, like cpufeatures?

>
>  struct x86_perf_task_context {
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index 8bdf74902293..d9f5bbe44b3c 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
>  #define PERF_EFLAGS_VM         (1UL << 5)
>
>  struct pt_regs;
> +struct x86_perf_regs {
> +       struct pt_regs  regs;
> +       u64             *xmm_regs;
> +};
> +
>  extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
>  extern unsigned long perf_misc_flags(struct pt_regs *regs);
>  #define perf_misc_flags(regs)  perf_misc_flags(regs)
> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
> index f3329cabce5c..ac67bbea10ca 100644
> --- a/arch/x86/include/uapi/asm/perf_regs.h
> +++ b/arch/x86/include/uapi/asm/perf_regs.h
> @@ -27,8 +27,29 @@ enum perf_event_x86_regs {
>         PERF_REG_X86_R13,
>         PERF_REG_X86_R14,
>         PERF_REG_X86_R15,
> -
> +       /* These are the limits for the GPRs. */
>         PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
>         PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
> +
> +       /* These all need two bits set because they are 128bit */
> +       PERF_REG_X86_XMM0  = 32,
> +       PERF_REG_X86_XMM1  = 34,
> +       PERF_REG_X86_XMM2  = 36,
> +       PERF_REG_X86_XMM3  = 38,
> +       PERF_REG_X86_XMM4  = 40,
> +       PERF_REG_X86_XMM5  = 42,
> +       PERF_REG_X86_XMM6  = 44,
> +       PERF_REG_X86_XMM7  = 46,
> +       PERF_REG_X86_XMM8  = 48,
> +       PERF_REG_X86_XMM9  = 50,
> +       PERF_REG_X86_XMM10 = 52,
> +       PERF_REG_X86_XMM11 = 54,
> +       PERF_REG_X86_XMM12 = 56,
> +       PERF_REG_X86_XMM13 = 58,
> +       PERF_REG_X86_XMM14 = 60,
> +       PERF_REG_X86_XMM15 = 62,
> +
> +       /* These include both GPRs and XMMX registers */
> +       PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
>  };
>  #endif /* _ASM_X86_PERF_REGS_H */
> diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
> index c06c4c16c6b6..07c30ee17425 100644
> --- a/arch/x86/kernel/perf_regs.c
> +++ b/arch/x86/kernel/perf_regs.c
> @@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
>
>  u64 perf_reg_value(struct pt_regs *regs, int idx)
>  {
> +       struct x86_perf_regs *perf_regs;
> +
> +       if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
> +               perf_regs = container_of(regs, struct x86_perf_regs, regs);
> +               if (!perf_regs->xmm_regs)
> +                       return 0;
> +               return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
> +       }
> +
>         if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
>                 return 0;
>
>         return regs_get_register(regs, pt_regs_offset[idx]);
>  }
>
> -#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
> -
>  #ifdef CONFIG_X86_32
> +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
> +                      (1ULL << PERF_REG_X86_R9) | \
> +                      (1ULL << PERF_REG_X86_R10) | \
> +                      (1ULL << PERF_REG_X86_R11) | \
> +                      (1ULL << PERF_REG_X86_R12) | \
> +                      (1ULL << PERF_REG_X86_R13) | \
> +                      (1ULL << PERF_REG_X86_R14) | \
> +                      (1ULL << PERF_REG_X86_R15))
> +
>  int perf_reg_validate(u64 mask)
>  {
> -       if (!mask || mask & REG_RESERVED)
> +       if (!mask || (mask & REG_NOSUPPORT))
>                 return -EINVAL;
>
>         return 0;
> @@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
>
>  int perf_reg_validate(u64 mask)
>  {
> -       if (!mask || mask & REG_RESERVED)
> -               return -EINVAL;
> -
> -       if (mask & REG_NOSUPPORT)
> +       if (!mask || (mask & REG_NOSUPPORT))
>                 return -EINVAL;
>
>         return 0;
> --
> 2.17.1
>
Liang, Kan April 1, 2019, 7:54 p.m. UTC | #2
On 4/1/2019 3:18 PM, Stephane Eranian wrote:
> On Tue, Mar 26, 2019 at 9:11 AM <kan.liang@linux.intel.com> wrote:
>>
>> From: Kan Liang <kan.liang@linux.intel.com>
>>
>> Starting from Icelake, XMM registers can be collected in PEBS record.
>> But current code only output the pt_regs.
>>
>> Add a new struct x86_perf_regs for both pt_regs and xmm_regs.
>> XMM registers are 128 bit. To simplify the code, they are handled like
>> two different registers, which means setting two bits in the register
>> bitmap. This also allows only sampling the lower 64bit bits in XMM.
>>
> You are adding this new x86_perf_regs struct but the patch does not
> include how it is allocated.
> I don't see from this patch where x86_perf_regs->xmm_regs is actually allocated.
>

The x86_perf_regs->xmm_regs saves the pointer to PEBS record.
We don't allocate space for it.
The related code can be found at 04/23 "perf/x86/intel: Support adaptive 
PEBSv4"

+	if (format_size & PEBS_DATACFG_XMMS) {
+		struct pebs_xmm *xmm = next_record;
+
+		next_record = xmm + 1;
+		perf_regs->xmm_regs = xmm->xmm;
+	}

This patch only include the generic support for x86_perf_regs.

>> The index of XMM registers starts from 32. There are 16 XMM registers.
>> So all reserved space for regs are used. Remove REG_RESERVED.
>>
>> Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
>> regs including both GPRs and XMM.
>>
>> XMM is not supported on all platforms. Adding has_xmm_regs to indicate
>> the specific platform. Also add checks in x86_pmu_hw_config() to reject
>> invalid config of regs_user and regs_intr.
>>
>> Add REG_NOSUPPORT for 32bit to exclude unsupported registers.
>>
>> Originally-by: Andi Kleen <ak@linux.intel.com>
>> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>>
>> Changes since V3:
>> - Keep the old names for GPRs. Rename PERF_REG_X86_MAX to
>>    PERF_REG_X86_XMM_MAX
>> - Remove unnecessary REG_RESERVED
>> - Add REG_NOSUPPORT for 32bit
>>
>>   arch/x86/events/core.c                | 10 ++++++++++
>>   arch/x86/events/perf_event.h          |  2 ++
>>   arch/x86/include/asm/perf_event.h     |  5 +++++
>>   arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
>>   arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
>>   5 files changed, 59 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>> index e2b1447192a8..9378c6b2128f 100644
>> --- a/arch/x86/events/core.c
>> +++ b/arch/x86/events/core.c
>> @@ -560,6 +560,16 @@ int x86_pmu_hw_config(struct perf_event *event)
>>                          return -EINVAL;
>>          }
>>
>> +       if (event->attr.sample_regs_user & ~PEBS_REGS)
>> +               return -EINVAL;
>> +       /*
>> +        * Besides the general purpose registers, XMM registers may
>> +        * be collected in PEBS on some platforms, e.g. Icelake
>> +        */
>> +       if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
>> +           (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
>> +               return -EINVAL;
>> +
> Shouldn't you be testing on PEBS_REGS only if the user is asking for
> PEBS sampling?
> That is not because PEBS may not capture a register that the kernel
> could not do it
> without PEBS.

I will add is_sampling_event() check as below.

if (is_sampling_event(event) &&
     (event->attr.sample_regs_user & ~PEBS_REGS))
          return -EINVAL;
if (is_sampling_event(event) &&
     (event->attr.sample_regs_intr & ~PEBS_REGS) &&
     (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
         return -EINVAL;


> 
>>          return x86_setup_perfctr(event);
>>   }
>>
>> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
>> index a75955741c50..6428941a5073 100644
>> --- a/arch/x86/events/perf_event.h
>> +++ b/arch/x86/events/perf_event.h
>> @@ -657,6 +657,8 @@ struct x86_pmu {
>>           * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
>>           */
>>          int (*check_period) (struct perf_event *event, u64 period);
>> +
>> +       unsigned int    has_xmm_regs : 1; /* support XMM regs */
>>   };
> Is this an Intel specific field? If so, then say intel_has_xmm_regs,
> just like amd_nb_constraints above.

I'm not familiar with AMD. I just google it. It looks like AMD also has 
XMM registers.

> If not, then define what is is supposed to mean? Because I am sure
> there is another way to detect if
> the CPU support XMM regs, like cpufeatures?

It means that XMM registers can be collected in PEBS record.
How about the name "pebs_has_xmm_regs"?


Thanks,
Kan

> 
>>
>>   struct x86_perf_task_context {
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index 8bdf74902293..d9f5bbe44b3c 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
>>   #define PERF_EFLAGS_VM         (1UL << 5)
>>
>>   struct pt_regs;
>> +struct x86_perf_regs {
>> +       struct pt_regs  regs;
>> +       u64             *xmm_regs;
>> +};
>> +
>>   extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
>>   extern unsigned long perf_misc_flags(struct pt_regs *regs);
>>   #define perf_misc_flags(regs)  perf_misc_flags(regs)
>> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
>> index f3329cabce5c..ac67bbea10ca 100644
>> --- a/arch/x86/include/uapi/asm/perf_regs.h
>> +++ b/arch/x86/include/uapi/asm/perf_regs.h
>> @@ -27,8 +27,29 @@ enum perf_event_x86_regs {
>>          PERF_REG_X86_R13,
>>          PERF_REG_X86_R14,
>>          PERF_REG_X86_R15,
>> -
>> +       /* These are the limits for the GPRs. */
>>          PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
>>          PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
>> +
>> +       /* These all need two bits set because they are 128bit */
>> +       PERF_REG_X86_XMM0  = 32,
>> +       PERF_REG_X86_XMM1  = 34,
>> +       PERF_REG_X86_XMM2  = 36,
>> +       PERF_REG_X86_XMM3  = 38,
>> +       PERF_REG_X86_XMM4  = 40,
>> +       PERF_REG_X86_XMM5  = 42,
>> +       PERF_REG_X86_XMM6  = 44,
>> +       PERF_REG_X86_XMM7  = 46,
>> +       PERF_REG_X86_XMM8  = 48,
>> +       PERF_REG_X86_XMM9  = 50,
>> +       PERF_REG_X86_XMM10 = 52,
>> +       PERF_REG_X86_XMM11 = 54,
>> +       PERF_REG_X86_XMM12 = 56,
>> +       PERF_REG_X86_XMM13 = 58,
>> +       PERF_REG_X86_XMM14 = 60,
>> +       PERF_REG_X86_XMM15 = 62,
>> +
>> +       /* These include both GPRs and XMMX registers */
>> +       PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
>>   };
>>   #endif /* _ASM_X86_PERF_REGS_H */
>> diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
>> index c06c4c16c6b6..07c30ee17425 100644
>> --- a/arch/x86/kernel/perf_regs.c
>> +++ b/arch/x86/kernel/perf_regs.c
>> @@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
>>
>>   u64 perf_reg_value(struct pt_regs *regs, int idx)
>>   {
>> +       struct x86_perf_regs *perf_regs;
>> +
>> +       if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
>> +               perf_regs = container_of(regs, struct x86_perf_regs, regs);
>> +               if (!perf_regs->xmm_regs)
>> +                       return 0;
>> +               return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
>> +       }
>> +
>>          if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
>>                  return 0;
>>
>>          return regs_get_register(regs, pt_regs_offset[idx]);
>>   }
>>
>> -#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
>> -
>>   #ifdef CONFIG_X86_32
>> +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
>> +                      (1ULL << PERF_REG_X86_R9) | \
>> +                      (1ULL << PERF_REG_X86_R10) | \
>> +                      (1ULL << PERF_REG_X86_R11) | \
>> +                      (1ULL << PERF_REG_X86_R12) | \
>> +                      (1ULL << PERF_REG_X86_R13) | \
>> +                      (1ULL << PERF_REG_X86_R14) | \
>> +                      (1ULL << PERF_REG_X86_R15))
>> +
>>   int perf_reg_validate(u64 mask)
>>   {
>> -       if (!mask || mask & REG_RESERVED)
>> +       if (!mask || (mask & REG_NOSUPPORT))
>>                  return -EINVAL;
>>
>>          return 0;
>> @@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
>>
>>   int perf_reg_validate(u64 mask)
>>   {
>> -       if (!mask || mask & REG_RESERVED)
>> -               return -EINVAL;
>> -
>> -       if (mask & REG_NOSUPPORT)
>> +       if (!mask || (mask & REG_NOSUPPORT))
>>                  return -EINVAL;
>>
>>          return 0;
>> --
>> 2.17.1
>>
Stephane Eranian April 1, 2019, 9:11 p.m. UTC | #3
On Mon, Apr 1, 2019 at 12:54 PM Liang, Kan <kan.liang@linux.intel.com> wrote:
>
>
>
> On 4/1/2019 3:18 PM, Stephane Eranian wrote:
> > On Tue, Mar 26, 2019 at 9:11 AM <kan.liang@linux.intel.com> wrote:
> >>
> >> From: Kan Liang <kan.liang@linux.intel.com>
> >>
> >> Starting from Icelake, XMM registers can be collected in PEBS record.
> >> But current code only output the pt_regs.
> >>
> >> Add a new struct x86_perf_regs for both pt_regs and xmm_regs.
> >> XMM registers are 128 bit. To simplify the code, they are handled like
> >> two different registers, which means setting two bits in the register
> >> bitmap. This also allows only sampling the lower 64bit bits in XMM.
> >>
> > You are adding this new x86_perf_regs struct but the patch does not
> > include how it is allocated.
> > I don't see from this patch where x86_perf_regs->xmm_regs is actually allocated.
> >
>
> The x86_perf_regs->xmm_regs saves the pointer to PEBS record.
> We don't allocate space for it.
> The related code can be found at 04/23 "perf/x86/intel: Support adaptive
> PEBSv4"

Ok, I will look at it next.

>
> +       if (format_size & PEBS_DATACFG_XMMS) {
> +               struct pebs_xmm *xmm = next_record;
> +
> +               next_record = xmm + 1;
> +               perf_regs->xmm_regs = xmm->xmm;
> +       }
>
> This patch only include the generic support for x86_perf_regs.
>
> >> The index of XMM registers starts from 32. There are 16 XMM registers.
> >> So all reserved space for regs are used. Remove REG_RESERVED.
> >>
> >> Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
> >> regs including both GPRs and XMM.
> >>
> >> XMM is not supported on all platforms. Adding has_xmm_regs to indicate
> >> the specific platform. Also add checks in x86_pmu_hw_config() to reject
> >> invalid config of regs_user and regs_intr.
> >>
> >> Add REG_NOSUPPORT for 32bit to exclude unsupported registers.
> >>
> >> Originally-by: Andi Kleen <ak@linux.intel.com>
> >> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> >> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> >> ---
> >>
> >> Changes since V3:
> >> - Keep the old names for GPRs. Rename PERF_REG_X86_MAX to
> >>    PERF_REG_X86_XMM_MAX
> >> - Remove unnecessary REG_RESERVED
> >> - Add REG_NOSUPPORT for 32bit
> >>
> >>   arch/x86/events/core.c                | 10 ++++++++++
> >>   arch/x86/events/perf_event.h          |  2 ++
> >>   arch/x86/include/asm/perf_event.h     |  5 +++++
> >>   arch/x86/include/uapi/asm/perf_regs.h | 23 ++++++++++++++++++++++-
> >>   arch/x86/kernel/perf_regs.c           | 27 ++++++++++++++++++++-------
> >>   5 files changed, 59 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> >> index e2b1447192a8..9378c6b2128f 100644
> >> --- a/arch/x86/events/core.c
> >> +++ b/arch/x86/events/core.c
> >> @@ -560,6 +560,16 @@ int x86_pmu_hw_config(struct perf_event *event)
> >>                          return -EINVAL;
> >>          }
> >>
> >> +       if (event->attr.sample_regs_user & ~PEBS_REGS)
> >> +               return -EINVAL;
> >> +       /*
> >> +        * Besides the general purpose registers, XMM registers may
> >> +        * be collected in PEBS on some platforms, e.g. Icelake
> >> +        */
> >> +       if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
> >> +           (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
> >> +               return -EINVAL;
> >> +
> > Shouldn't you be testing on PEBS_REGS only if the user is asking for
> > PEBS sampling?
> > That is not because PEBS may not capture a register that the kernel
> > could not do it
> > without PEBS.
>
> I will add is_sampling_event() check as below.
>
> if (is_sampling_event(event) &&
>      (event->attr.sample_regs_user & ~PEBS_REGS))
>           return -EINVAL;
> if (is_sampling_event(event) &&
>      (event->attr.sample_regs_intr & ~PEBS_REGS) &&
>      (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
>          return -EINVAL;
>
That is not enough. I can be sampling without PEBS and thus why I am comparing
to PEBS_REGS? If I recall by the time the kernel gets to this code,
the sample_regs_* has
already been validated to contain only supported registers. So you
need this extra check
to make sure that WHEN you are sampling with PEBS, then they are also
covered by PEBS.

Also if I sample with sample_regs_users != 0 and sample_regs_intr != 0
and PEBS, and
I get a kernel sample, I wonder how sample_regs_users can be updated from PEBS.
I think you can update from PEBS it ONLY when the sample was for a
user-level instruction
in which case both sample_regs_user and sample_regs_intr can be served
from the PEBS
machine state.

>
> >
> >>          return x86_setup_perfctr(event);
> >>   }
> >>
> >> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
> >> index a75955741c50..6428941a5073 100644
> >> --- a/arch/x86/events/perf_event.h
> >> +++ b/arch/x86/events/perf_event.h
> >> @@ -657,6 +657,8 @@ struct x86_pmu {
> >>           * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
> >>           */
> >>          int (*check_period) (struct perf_event *event, u64 period);
> >> +
> >> +       unsigned int    has_xmm_regs : 1; /* support XMM regs */
> >>   };
> > Is this an Intel specific field? If so, then say intel_has_xmm_regs,
> > just like amd_nb_constraints above.
>
> I'm not familiar with AMD. I just google it. It looks like AMD also has
> XMM registers.
>
> > If not, then define what is is supposed to mean? Because I am sure
> > there is another way to detect if
> > the CPU support XMM regs, like cpufeatures?
>
> It means that XMM registers can be collected in PEBS record.
> How about the name "pebs_has_xmm_regs"?
>
>
> Thanks,
> Kan
>
> >
> >>
> >>   struct x86_perf_task_context {
> >> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >> index 8bdf74902293..d9f5bbe44b3c 100644
> >> --- a/arch/x86/include/asm/perf_event.h
> >> +++ b/arch/x86/include/asm/perf_event.h
> >> @@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
> >>   #define PERF_EFLAGS_VM         (1UL << 5)
> >>
> >>   struct pt_regs;
> >> +struct x86_perf_regs {
> >> +       struct pt_regs  regs;
> >> +       u64             *xmm_regs;
> >> +};
> >> +
> >>   extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
> >>   extern unsigned long perf_misc_flags(struct pt_regs *regs);
> >>   #define perf_misc_flags(regs)  perf_misc_flags(regs)
> >> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
> >> index f3329cabce5c..ac67bbea10ca 100644
> >> --- a/arch/x86/include/uapi/asm/perf_regs.h
> >> +++ b/arch/x86/include/uapi/asm/perf_regs.h
> >> @@ -27,8 +27,29 @@ enum perf_event_x86_regs {
> >>          PERF_REG_X86_R13,
> >>          PERF_REG_X86_R14,
> >>          PERF_REG_X86_R15,
> >> -
> >> +       /* These are the limits for the GPRs. */
> >>          PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
> >>          PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
> >> +
> >> +       /* These all need two bits set because they are 128bit */
> >> +       PERF_REG_X86_XMM0  = 32,
> >> +       PERF_REG_X86_XMM1  = 34,
> >> +       PERF_REG_X86_XMM2  = 36,
> >> +       PERF_REG_X86_XMM3  = 38,
> >> +       PERF_REG_X86_XMM4  = 40,
> >> +       PERF_REG_X86_XMM5  = 42,
> >> +       PERF_REG_X86_XMM6  = 44,
> >> +       PERF_REG_X86_XMM7  = 46,
> >> +       PERF_REG_X86_XMM8  = 48,
> >> +       PERF_REG_X86_XMM9  = 50,
> >> +       PERF_REG_X86_XMM10 = 52,
> >> +       PERF_REG_X86_XMM11 = 54,
> >> +       PERF_REG_X86_XMM12 = 56,
> >> +       PERF_REG_X86_XMM13 = 58,
> >> +       PERF_REG_X86_XMM14 = 60,
> >> +       PERF_REG_X86_XMM15 = 62,
> >> +
> >> +       /* These include both GPRs and XMMX registers */
> >> +       PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
> >>   };
> >>   #endif /* _ASM_X86_PERF_REGS_H */
> >> diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
> >> index c06c4c16c6b6..07c30ee17425 100644
> >> --- a/arch/x86/kernel/perf_regs.c
> >> +++ b/arch/x86/kernel/perf_regs.c
> >> @@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
> >>
> >>   u64 perf_reg_value(struct pt_regs *regs, int idx)
> >>   {
> >> +       struct x86_perf_regs *perf_regs;
> >> +
> >> +       if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
> >> +               perf_regs = container_of(regs, struct x86_perf_regs, regs);
> >> +               if (!perf_regs->xmm_regs)
> >> +                       return 0;
> >> +               return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
> >> +       }
> >> +
> >>          if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
> >>                  return 0;
> >>
> >>          return regs_get_register(regs, pt_regs_offset[idx]);
> >>   }
> >>
> >> -#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
> >> -
> >>   #ifdef CONFIG_X86_32
> >> +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
> >> +                      (1ULL << PERF_REG_X86_R9) | \
> >> +                      (1ULL << PERF_REG_X86_R10) | \
> >> +                      (1ULL << PERF_REG_X86_R11) | \
> >> +                      (1ULL << PERF_REG_X86_R12) | \
> >> +                      (1ULL << PERF_REG_X86_R13) | \
> >> +                      (1ULL << PERF_REG_X86_R14) | \
> >> +                      (1ULL << PERF_REG_X86_R15))
> >> +
> >>   int perf_reg_validate(u64 mask)
> >>   {
> >> -       if (!mask || mask & REG_RESERVED)
> >> +       if (!mask || (mask & REG_NOSUPPORT))
> >>                  return -EINVAL;
> >>
> >>          return 0;
> >> @@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
> >>
> >>   int perf_reg_validate(u64 mask)
> >>   {
> >> -       if (!mask || mask & REG_RESERVED)
> >> -               return -EINVAL;
> >> -
> >> -       if (mask & REG_NOSUPPORT)
> >> +       if (!mask || (mask & REG_NOSUPPORT))
> >>                  return -EINVAL;
> >>
> >>          return 0;
> >> --
> >> 2.17.1
> >>
Liang, Kan April 1, 2019, 10:33 p.m. UTC | #4
On 4/1/2019 5:11 PM, Stephane Eranian wrote:
>>>> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
>>>> index e2b1447192a8..9378c6b2128f 100644
>>>> --- a/arch/x86/events/core.c
>>>> +++ b/arch/x86/events/core.c
>>>> @@ -560,6 +560,16 @@ int x86_pmu_hw_config(struct perf_event *event)
>>>>                           return -EINVAL;
>>>>           }
>>>>
>>>> +       if (event->attr.sample_regs_user & ~PEBS_REGS)
>>>> +               return -EINVAL;
>>>> +       /*
>>>> +        * Besides the general purpose registers, XMM registers may
>>>> +        * be collected in PEBS on some platforms, e.g. Icelake
>>>> +        */
>>>> +       if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
>>>> +           (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
>>>> +               return -EINVAL;
>>>> +
>>> Shouldn't you be testing on PEBS_REGS only if the user is asking for
>>> PEBS sampling?
>>> That is not because PEBS may not capture a register that the kernel
>>> could not do it
>>> without PEBS.
>> I will add is_sampling_event() check as below.
>>
>> if (is_sampling_event(event) &&
>>       (event->attr.sample_regs_user & ~PEBS_REGS))
>>            return -EINVAL;
>> if (is_sampling_event(event) &&
>>       (event->attr.sample_regs_intr & ~PEBS_REGS) &&
>>       (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
>>           return -EINVAL;
>>
> That is not enough. I can be sampling without PEBS and thus why I am comparing
> to PEBS_REGS? If I recall by the time the kernel gets to this code,
> the sample_regs_* has
> already been validated to contain only supported registers. So you
> need this extra check
> to make sure that WHEN you are sampling with PEBS, then they are also
> covered by PEBS.

Yes, the common code still validate the supported registers. However, it 
cannot check model specific registers, e.g. XMM.
The extra check here is only for XMM registers. If it's non-PEBS | 
non-sampling | pre-icl and XMM bit is set for sample_regs_intr, it 
should error out.

It looks like the PEBS_REGS is a very confused name? I can rename it 
PEBS_GPRS_REGS, and add a new name for PEBS_XMM_REGS.
How about the code as below? (not test yet)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a8..e93c43e54c75 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
  			return -EINVAL;
  	}

+	/* sample_regs_user never support XMM registers */
+	if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+		return -EINVAL;
+	/*
+	 * Besides the general purpose registers, XMM registers may
+	 * be collected in PEBS on some platforms, e.g. Icelake
+	 */
+	if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
+		if (!is_sampling_event(event) ||
+		    !event->attr.precise_ip ||
+		    x86_pmu.pebs_no_xmm_regs)
+			return -EINVAL;
+
+	}
+
  	return x86_setup_perfctr(event);
  }

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 8baa441d8000..a9721457f187 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3131,7 +3131,7 @@ static unsigned long 
intel_pmu_large_pebs_flags(struct perf_event *event)
  		flags &= ~PERF_SAMPLE_TIME;
  	if (!event->attr.exclude_kernel)
  		flags &= ~PERF_SAMPLE_REGS_USER;
-	if (event->attr.sample_regs_user & ~PEBS_REGS)
+	if (event->attr.sample_regs_user & ~PEBS_GPRS_REGS)
  		flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
  	return flags;
  }
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 10c99ce1fead..f57e6cb7fd99 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1628,8 +1628,10 @@ void __init intel_ds_init(void)
  	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
  	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
  	x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
-	if (x86_pmu.version <= 4)
+	if (x86_pmu.version <= 4) {
  		x86_pmu.pebs_no_isolation = 1;
+		x86_pmu.pebs_no_xmm_regs = 1;
+	}
  	if (x86_pmu.pebs) {
  		char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
  		int format = x86_pmu.intel_cap.pebs_format;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..3b195435b386 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -96,7 +96,7 @@ struct amd_nb {
  	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
  	PERF_SAMPLE_PERIOD)

-#define PEBS_REGS \
+#define PEBS_GPRS_REGS \
  	(PERF_REG_X86_AX | \
  	 PERF_REG_X86_BX | \
  	 PERF_REG_X86_CX | \
@@ -116,6 +116,24 @@ struct amd_nb {
  	 PERF_REG_X86_R14 | \
  	 PERF_REG_X86_R15)

+#define PEBS_XMM_REGS         \
+	(PERF_REG_X86_XMM0  | \
+	 PERF_REG_X86_XMM1  | \
+	 PERF_REG_X86_XMM2  | \
+	 PERF_REG_X86_XMM3  | \
+	 PERF_REG_X86_XMM4  | \
+	 PERF_REG_X86_XMM5  | \
+	 PERF_REG_X86_XMM6  | \
+	 PERF_REG_X86_XMM7  | \
+	 PERF_REG_X86_XMM8  | \
+	 PERF_REG_X86_XMM9  | \
+	 PERF_REG_X86_XMM10 | \
+	 PERF_REG_X86_XMM11 | \
+	 PERF_REG_X86_XMM12 | \
+	 PERF_REG_X86_XMM13 | \
+	 PERF_REG_X86_XMM14 | \
+	 PERF_REG_X86_XMM15)
+
  /*
   * Per register state.
   */
@@ -613,7 +631,8 @@ struct x86_pmu {
  			pebs_broken		:1,
  			pebs_prec_dist		:1,
  			pebs_no_tlb		:1,
-			pebs_no_isolation	:1;
+			pebs_no_isolation	:1,
+			pebs_no_xmm_regs	:1;
  	int		pebs_record_size;
  	int		pebs_buffer_size;
  	void		(*drain_pebs)(struct pt_regs *regs);
> 
> Also if I sample with sample_regs_users != 0 and sample_regs_intr != 0
> and PEBS, and
> I get a kernel sample, I wonder how sample_regs_users can be updated from PEBS.
> I think you can update from PEBS it ONLY when the sample was for a
> user-level instruction
> in which case both sample_regs_user and sample_regs_intr can be served
> from the PEBS
> machine state.
> 

AFAIK, the sample_regs_users is not from PEBS. So there is nothing 
changed for sample_regs_users. It doesn't support XMM registers.

Thanks,
Kan

Patch
diff mbox series

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index e2b1447192a8..9378c6b2128f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,16 @@  int x86_pmu_hw_config(struct perf_event *event)
 			return -EINVAL;
 	}
 
+	if (event->attr.sample_regs_user & ~PEBS_REGS)
+		return -EINVAL;
+	/*
+	 * Besides the general purpose registers, XMM registers may
+	 * be collected in PEBS on some platforms, e.g. Icelake
+	 */
+	if ((event->attr.sample_regs_intr & ~PEBS_REGS) &&
+	    (!x86_pmu.has_xmm_regs || !event->attr.precise_ip))
+		return -EINVAL;
+
 	return x86_setup_perfctr(event);
 }
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..6428941a5073 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -657,6 +657,8 @@  struct x86_pmu {
 	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
 	 */
 	int (*check_period) (struct perf_event *event, u64 period);
+
+	unsigned int	has_xmm_regs : 1; /* support XMM regs */
 };
 
 struct x86_perf_task_context {
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..d9f5bbe44b3c 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -248,6 +248,11 @@  extern void perf_events_lapic_init(void);
 #define PERF_EFLAGS_VM		(1UL << 5)
 
 struct pt_regs;
+struct x86_perf_regs {
+	struct pt_regs	regs;
+	u64		*xmm_regs;
+};
+
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)	perf_misc_flags(regs)
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..ac67bbea10ca 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,29 @@  enum perf_event_x86_regs {
 	PERF_REG_X86_R13,
 	PERF_REG_X86_R14,
 	PERF_REG_X86_R15,
-
+	/* These are the limits for the GPRs. */
 	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
 	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+	/* These all need two bits set because they are 128bit */
+	PERF_REG_X86_XMM0  = 32,
+	PERF_REG_X86_XMM1  = 34,
+	PERF_REG_X86_XMM2  = 36,
+	PERF_REG_X86_XMM3  = 38,
+	PERF_REG_X86_XMM4  = 40,
+	PERF_REG_X86_XMM5  = 42,
+	PERF_REG_X86_XMM6  = 44,
+	PERF_REG_X86_XMM7  = 46,
+	PERF_REG_X86_XMM8  = 48,
+	PERF_REG_X86_XMM9  = 50,
+	PERF_REG_X86_XMM10 = 52,
+	PERF_REG_X86_XMM11 = 54,
+	PERF_REG_X86_XMM12 = 56,
+	PERF_REG_X86_XMM13 = 58,
+	PERF_REG_X86_XMM14 = 60,
+	PERF_REG_X86_XMM15 = 62,
+
+	/* These include both GPRs and XMMX registers */
+	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c06c4c16c6b6..07c30ee17425 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -59,18 +59,34 @@  static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
+	struct x86_perf_regs *perf_regs;
+
+	if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+		perf_regs = container_of(regs, struct x86_perf_regs, regs);
+		if (!perf_regs->xmm_regs)
+			return 0;
+		return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+	}
+
 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
 		return 0;
 
 	return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
 #ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+		       (1ULL << PERF_REG_X86_R9) | \
+		       (1ULL << PERF_REG_X86_R10) | \
+		       (1ULL << PERF_REG_X86_R11) | \
+		       (1ULL << PERF_REG_X86_R12) | \
+		       (1ULL << PERF_REG_X86_R13) | \
+		       (1ULL << PERF_REG_X86_R14) | \
+		       (1ULL << PERF_REG_X86_R15))
+
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;
@@ -96,10 +112,7 @@  void perf_get_regs_user(struct perf_regs *regs_user,
 
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
-		return -EINVAL;
-
-	if (mask & REG_NOSUPPORT)
+	if (!mask || (mask & REG_NOSUPPORT))
 		return -EINVAL;
 
 	return 0;