[V2,1/3] perf/x86: Disable non generic regs for software/probe events
diff mbox series

Message ID 1558984077-7773-1-git-send-email-kan.liang@linux.intel.com
State New
Headers show
Series
  • [V2,1/3] perf/x86: Disable non generic regs for software/probe events
Related show

Commit Message

Liang, Kan May 27, 2019, 7:07 p.m. UTC
From: Kan Liang <kan.liang@linux.intel.com>

The perf fuzzer caused skylake machine to crash.

[ 9680.085831] Call Trace:
[ 9680.088301]  <IRQ>
[ 9680.090363]  perf_output_sample_regs+0x43/0xa0
[ 9680.094928]  perf_output_sample+0x3aa/0x7a0
[ 9680.099181]  perf_event_output_forward+0x53/0x80
[ 9680.103917]  __perf_event_overflow+0x52/0xf0
[ 9680.108266]  ? perf_trace_run_bpf_submit+0xc0/0xc0
[ 9680.113108]  perf_swevent_hrtimer+0xe2/0x150
[ 9680.117475]  ? check_preempt_wakeup+0x181/0x230
[ 9680.122091]  ? check_preempt_curr+0x62/0x90
[ 9680.126361]  ? ttwu_do_wakeup+0x19/0x140
[ 9680.130355]  ? try_to_wake_up+0x54/0x460
[ 9680.134366]  ? reweight_entity+0x15b/0x1a0
[ 9680.138559]  ? __queue_work+0x103/0x3f0
[ 9680.142472]  ? update_dl_rq_load_avg+0x1cd/0x270
[ 9680.147194]  ? timerqueue_del+0x1e/0x40
[ 9680.151092]  ? __remove_hrtimer+0x35/0x70
[ 9680.155191]  __hrtimer_run_queues+0x100/0x280
[ 9680.159658]  hrtimer_interrupt+0x100/0x220
[ 9680.163835]  smp_apic_timer_interrupt+0x6a/0x140
[ 9680.168555]  apic_timer_interrupt+0xf/0x20
[ 9680.172756]  </IRQ>

The XMM registers can only be collected by hardware events, not
software/probe events.

Add has_non_generic_regs() to check if non-generic registers, e.g. XMM
on X86, are applied for software/probe events. If yes, return
-EOPNOTSUPP.

The generic code define the mask of non-generic registers as 0 if arch
headers haven't overridden it.

Fixes: 878068ea270e ("perf/x86: Support outputting XMM registers")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---

Changes since V1:
- Use macro PERF_REG_NON_GENERIC_MASK to replace function
- Avoid unnecessary abbreviations in comments
- Remove unnecessary bracket for return.

 arch/x86/include/uapi/asm/perf_regs.h |  3 +++
 include/linux/perf_regs.h             |  8 ++++++++
 kernel/events/core.c                  | 30 ++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

Comments

Peter Zijlstra May 28, 2019, 8:56 a.m. UTC | #1
On Mon, May 27, 2019 at 12:07:55PM -0700, kan.liang@linux.intel.com wrote:
> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
> index ac67bbe..3a96971 100644
> --- a/arch/x86/include/uapi/asm/perf_regs.h
> +++ b/arch/x86/include/uapi/asm/perf_regs.h
> @@ -52,4 +52,7 @@ enum perf_event_x86_regs {
>  	/* These include both GPRs and XMMX registers */
>  	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
>  };
> +
> +#define PERF_REG_NON_GENERIC_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
> +
>  #endif /* _ASM_X86_PERF_REGS_H */
> diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
> index 4767474..1d794355 100644
> --- a/include/linux/perf_regs.h
> +++ b/include/linux/perf_regs.h
> @@ -11,6 +11,11 @@ struct perf_regs {
>  
>  #ifdef CONFIG_HAVE_PERF_REGS
>  #include <asm/perf_regs.h>
> +
> +#ifndef PERF_REG_NON_GENERIC_MASK
> +#define PERF_REG_NON_GENERIC_MASK	0
> +#endif
> +
>  u64 perf_reg_value(struct pt_regs *regs, int idx);
>  int perf_reg_validate(u64 mask);
>  u64 perf_reg_abi(struct task_struct *task);
> @@ -18,6 +23,9 @@ void perf_get_regs_user(struct perf_regs *regs_user,
>  			struct pt_regs *regs,
>  			struct pt_regs *regs_user_copy);
>  #else
> +
> +#define PERF_REG_NON_GENERIC_MASK	0
> +
>  static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
>  {
>  	return 0;

Much better than the last version; however..

> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index abbd4b3..4865bdf 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -8457,6 +8457,12 @@ static void sw_perf_event_destroy(struct perf_event *event)
>  	swevent_hlist_put();
>  }
>  
> +static inline bool has_non_generic_regs(struct perf_event *event)
> +{
> +	return (event->attr.sample_regs_user & PERF_REG_NON_GENERIC_MASK) ||
> +	       (event->attr.sample_regs_intr & PERF_REG_NON_GENERIC_MASK);
> +}
> +
>  static int perf_swevent_init(struct perf_event *event)
>  {
>  	u64 event_id = event->attr.config;
> @@ -8470,6 +8476,10 @@ static int perf_swevent_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	switch (event_id) {
>  	case PERF_COUNT_SW_CPU_CLOCK:
>  	case PERF_COUNT_SW_TASK_CLOCK:
> @@ -8633,6 +8643,10 @@ static int perf_tp_event_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	err = perf_trace_init(event);
>  	if (err)
>  		return err;
> @@ -8722,6 +8736,10 @@ static int perf_kprobe_event_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
>  	err = perf_kprobe_init(event, is_retprobe);
>  	if (err)
> @@ -8782,6 +8800,10 @@ static int perf_uprobe_event_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
>  	ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
>  	err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
> @@ -9562,6 +9584,10 @@ static int cpu_clock_event_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	perf_swevent_init_hrtimer(event);
>  
>  	return 0;
> @@ -9643,6 +9669,10 @@ static int task_clock_event_init(struct perf_event *event)
>  	if (has_branch_stack(event))
>  		return -EOPNOTSUPP;
>  
> +	/* Only support generic registers */
> +	if (has_non_generic_regs(event))
> +		return -EOPNOTSUPP;
> +
>  	perf_swevent_init_hrtimer(event);
>  
>  	return 0;

I don't think this is anywhere near sufficient. What happens if we
request XMM regs for an uncore PMU ?

I'm thinking you want something along these lines...

---
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a5436cee20b1..3ef1c2e0b177 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3281,7 +3281,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
 
 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
 			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
-	}
+
+		/* we only support extended (XMM) registers for sample_regs_intr */
+		if (event->attr.sample_regs_user & PERF_REGS_EXTENDED_MASK)
+			return -EOPNOTSUPP;
+
+	} else if (has_extended_regs(event))
+		return -EOPNOTSUPP;
 
 	if (needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 5e9bb246b3a6..4fae37f8c7c2 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2020,6 +2020,7 @@ void __init intel_ds_init(void)
 					PERF_SAMPLE_TIME;
 				x86_pmu.flags |= PMU_FL_PEBS_ALL;
 				pebs_qual = "-baseline";
+				x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
 			} else {
 				/* Only basic record supported */
 				x86_pmu.pebs_no_xmm_regs = 1;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0ab99c7b652d..2bca72f3028b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -241,6 +241,7 @@ struct perf_event;
 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
 #define PERF_PMU_CAP_NO_NMI			0x02
 #define PERF_PMU_CAP_AUX_NO_SG			0x04
+#define PERF_PMU_CAP_EXTENDED_REGS		0x08
 #define PERF_PMU_CAP_EXCLUSIVE			0x10
 #define PERF_PMU_CAP_ITRACE			0x20
 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3b96c2..0c4872426b70 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10064,12 +10064,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 		perf_event_ctx_unlock(event->group_leader, ctx);
 
 	if (!ret) {
+		if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
+		    has_extended_regs(event))
+			ret = -EOPNOTSUPP;
+
 		if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
-				event_has_any_exclude_flag(event)) {
-			if (event->destroy)
-				event->destroy(event);
+				event_has_any_exclude_flag(event))
 			ret = -EINVAL;
-		}
+
+		if (ret && event->destroy)
+			event->destroy(event);
 	}
 
 	if (ret)
Liang, Kan May 28, 2019, 1:33 p.m. UTC | #2
On 5/28/2019 4:56 AM, Peter Zijlstra wrote:
> On Mon, May 27, 2019 at 12:07:55PM -0700, kan.liang@linux.intel.com wrote:
>> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
>> index ac67bbe..3a96971 100644
>> --- a/arch/x86/include/uapi/asm/perf_regs.h
>> +++ b/arch/x86/include/uapi/asm/perf_regs.h
>> @@ -52,4 +52,7 @@ enum perf_event_x86_regs {
>>   	/* These include both GPRs and XMMX registers */
>>   	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
>>   };
>> +
>> +#define PERF_REG_NON_GENERIC_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
>> +
>>   #endif /* _ASM_X86_PERF_REGS_H */
>> diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
>> index 4767474..1d794355 100644
>> --- a/include/linux/perf_regs.h
>> +++ b/include/linux/perf_regs.h
>> @@ -11,6 +11,11 @@ struct perf_regs {
>>   
>>   #ifdef CONFIG_HAVE_PERF_REGS
>>   #include <asm/perf_regs.h>
>> +
>> +#ifndef PERF_REG_NON_GENERIC_MASK
>> +#define PERF_REG_NON_GENERIC_MASK	0
>> +#endif
>> +
>>   u64 perf_reg_value(struct pt_regs *regs, int idx);
>>   int perf_reg_validate(u64 mask);
>>   u64 perf_reg_abi(struct task_struct *task);
>> @@ -18,6 +23,9 @@ void perf_get_regs_user(struct perf_regs *regs_user,
>>   			struct pt_regs *regs,
>>   			struct pt_regs *regs_user_copy);
>>   #else
>> +
>> +#define PERF_REG_NON_GENERIC_MASK	0
>> +
>>   static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
>>   {
>>   	return 0;
> 
> Much better than the last version; however..
> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index abbd4b3..4865bdf 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -8457,6 +8457,12 @@ static void sw_perf_event_destroy(struct perf_event *event)
>>   	swevent_hlist_put();
>>   }
>>   
>> +static inline bool has_non_generic_regs(struct perf_event *event)
>> +{
>> +	return (event->attr.sample_regs_user & PERF_REG_NON_GENERIC_MASK) ||
>> +	       (event->attr.sample_regs_intr & PERF_REG_NON_GENERIC_MASK);
>> +}
>> +
>>   static int perf_swevent_init(struct perf_event *event)
>>   {
>>   	u64 event_id = event->attr.config;
>> @@ -8470,6 +8476,10 @@ static int perf_swevent_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	switch (event_id) {
>>   	case PERF_COUNT_SW_CPU_CLOCK:
>>   	case PERF_COUNT_SW_TASK_CLOCK:
>> @@ -8633,6 +8643,10 @@ static int perf_tp_event_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	err = perf_trace_init(event);
>>   	if (err)
>>   		return err;
>> @@ -8722,6 +8736,10 @@ static int perf_kprobe_event_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
>>   	err = perf_kprobe_init(event, is_retprobe);
>>   	if (err)
>> @@ -8782,6 +8800,10 @@ static int perf_uprobe_event_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
>>   	ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
>>   	err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
>> @@ -9562,6 +9584,10 @@ static int cpu_clock_event_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	perf_swevent_init_hrtimer(event);
>>   
>>   	return 0;
>> @@ -9643,6 +9669,10 @@ static int task_clock_event_init(struct perf_event *event)
>>   	if (has_branch_stack(event))
>>   		return -EOPNOTSUPP;
>>   
>> +	/* Only support generic registers */
>> +	if (has_non_generic_regs(event))
>> +		return -EOPNOTSUPP;
>> +
>>   	perf_swevent_init_hrtimer(event);
>>   
>>   	return 0;
> 
> I don't think this is anywhere near sufficient. What happens if we
> request XMM regs for an uncore PMU ?
>

Uncore PMU doesn't support sampling. It will return -EINVAL.
There is no regs support for counting. The request will be ignored.

I think current check for uncore is good enough.

Thanks,
Kan

> I'm thinking you want something along these lines...
> 
> ---
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index a5436cee20b1..3ef1c2e0b177 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3281,7 +3281,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
>   
>   		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
>   			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
> -	}
> +
> +		/* we only support extended (XMM) registers for sample_regs_intr */
> +		if (event->attr.sample_regs_user & PERF_REGS_EXTENDED_MASK)
> +			return -EOPNOTSUPP;
> +
> +	} else if (has_extended_regs(event))
> +		return -EOPNOTSUPP;
>   
>   	if (needs_branch_stack(event)) {
>   		ret = intel_pmu_setup_lbr_filter(event);
> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> index 5e9bb246b3a6..4fae37f8c7c2 100644
> --- a/arch/x86/events/intel/ds.c
> +++ b/arch/x86/events/intel/ds.c
> @@ -2020,6 +2020,7 @@ void __init intel_ds_init(void)
>   					PERF_SAMPLE_TIME;
>   				x86_pmu.flags |= PMU_FL_PEBS_ALL;
>   				pebs_qual = "-baseline";
> +				x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
>   			} else {
>   				/* Only basic record supported */
>   				x86_pmu.pebs_no_xmm_regs = 1;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 0ab99c7b652d..2bca72f3028b 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -241,6 +241,7 @@ struct perf_event;
>   #define PERF_PMU_CAP_NO_INTERRUPT		0x01
>   #define PERF_PMU_CAP_NO_NMI			0x02
>   #define PERF_PMU_CAP_AUX_NO_SG			0x04
> +#define PERF_PMU_CAP_EXTENDED_REGS		0x08
>   #define PERF_PMU_CAP_EXCLUSIVE			0x10
>   #define PERF_PMU_CAP_ITRACE			0x20
>   #define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index abbd4b3b96c2..0c4872426b70 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -10064,12 +10064,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
>   		perf_event_ctx_unlock(event->group_leader, ctx);
>   
>   	if (!ret) {
> +		if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
> +		    has_extended_regs(event))
> +			ret = -EOPNOTSUPP;
> +
>   		if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
> -				event_has_any_exclude_flag(event)) {
> -			if (event->destroy)
> -				event->destroy(event);
> +				event_has_any_exclude_flag(event))
>   			ret = -EINVAL;
> -		}
> +
> +		if (ret && event->destroy)
> +			event->destroy(event);
>   	}
>   
>   	if (ret)
>
Peter Zijlstra May 28, 2019, 2:05 p.m. UTC | #3
On Tue, May 28, 2019 at 09:33:40AM -0400, Liang, Kan wrote:
> Uncore PMU doesn't support sampling. It will return -EINVAL.
> There is no regs support for counting. The request will be ignored.
> 
> I think current check for uncore is good enough.

breakpoints then.. There's also no guarantee you covered all software
events, and the core rewrite will allow other per-task/sampling PMUs
too.

The approach you take is just not complete, don't do that.
Vince Weaver May 28, 2019, 2:14 p.m. UTC | #4
On Tue, 28 May 2019, Peter Zijlstra wrote:

> On Tue, May 28, 2019 at 09:33:40AM -0400, Liang, Kan wrote:
> > Uncore PMU doesn't support sampling. It will return -EINVAL.
> > There is no regs support for counting. The request will be ignored.
> > 
> > I think current check for uncore is good enough.
> 
> breakpoints then.. There's also no guarantee you covered all software
> events, and the core rewrite will allow other per-task/sampling PMUs
> too.

possibly related, even with the patches applied, the skylake machine 
eventually did still crash while fuzzing:

[133621.333101] BUG: unable to handle page fault for address: 00000001000000c8
[133621.333102] #PF: supervisor read access in kernel mode
[133621.333103] #PF: error_code(0x0000) - not-present page
[133621.333104] PGD 0 P4D 0 
[133621.333106] Oops: 0000 [#1] SMP PTI
[133621.333108] CPU: 4 PID: 22203 Comm: perf_fuzzer Tainted: G        W         5.2.0-rc1+ #39
[133621.333109] Hardware name: LENOVO 10FY0017US/SKYBAY, BIOS FWKT53A   06/06/2016
[133621.333109] RIP: 0010:perf_reg_value+0x1e/0x50
[133621.333111] Code: 00 48 b8 00 00 00 00 ff ff ff ff c3 0f 1f 44 00 00 8d 46 e0 83 f8 1f 77 1d 48 8b 97 a8 00 00 00 31 c0 48 85 d2 74 0e 48 63 f6 <48> 8b 84 f2 00 ff ff ff c3 31 c0 c3 83 fe 17 77 16 48 63 f6 8b 04
[133621.333112] RSP: 0000:fffffe00000d5a80 EFLAGS: 00010006
[133621.333113] RAX: 0000000000000000 RBX: 0000000000000039 RCX: 0000000000000039
[133621.333114] RDX: 0000000100000000 RSI: 0000000000000039 RDI: fffffe00000d5c88
[133621.333115] RBP: fffffe00000d5b38 R08: 0000000000000000 R09: 0000000000000000
[133621.333116] R10: 00000000bffffff0 R11: 0000000000000012 R12: fffffe00000d5c88
[133621.333117] R13: ffff99883253ed10 R14: 0000000000000050 R15: 0000000000000000
[133621.333118] FS:  00007fb9741d3540(0000) GS:ffff998835b00000(0000) knlGS:0000000000000000
[133621.333119] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[133621.333119] CR2: 00000001000000c8 CR3: 000000023333c004 CR4: 00000000003607e0
[133621.333120] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[133621.333121] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
[133621.333122] Call Trace:
[133621.333122]  <NMI>
[133621.333123]  perf_output_sample_regs+0x43/0xa0
[133621.333124]  perf_output_sample+0x3aa/0x7a0
[133621.333125]  perf_event_output_forward+0x53/0x80
[133621.333125]  __perf_event_overflow+0x52/0xf0
[133621.333126]  handle_pmi_common+0x1b3/0x240
[133621.333127]  ? visit_groups_merge+0xeb/0x180
[133621.333127]  ? native_write_msr+0xb/0x20
[133621.333128]  ? native_write_msr+0x1a/0x20
[133621.333129]  ? native_write_msr+0xc/0x20
[133621.333129]  ? intel_pmu_lbr_read+0x29f/0x3d0
[133621.333130]  ? intel_pmu_lbr_filter+0x7f/0x1f0
[133621.333131]  intel_pmu_handle_irq+0xbf/0x160
[133621.333132]  perf_event_nmi_handler+0x2d/0x50
[133621.333132]  nmi_handle+0x63/0x110
[133621.333133]  default_do_nmi+0x4e/0x100
[133621.333134]  do_nmi+0x14d/0x1b0
[133621.333134]  end_repeat_nmi+0x16/0x50
[133621.333135] RIP: 0010:visit_groups_merge+0xeb/0x180
[133621.333137] Code: c0 75 73 48 8d 7b 30 e8 c3 de 55 00 48 85 c0 0f 84 9a 00 00 00 48 89 c2 48 83 ea 30 74 10 8b b3 74 02 00 00 39 b0 44 02 00 00 <49> 0f 45 d7 48 89 55 00 48 8b 04 24 48 8b 5c 24 08 48 85 c0 48 89
[133621.333137] RSP: 0000:ffffb28f4c897e10 EFLAGS: 00000046
[133621.333139] RAX: ffff998833315830 RBX: ffff99882a17a000 RCX: 0000000000000001
[133621.333140] RDX: ffff998833315800 RSI: 0000000000000004 RDI: ffff99882a17a030
[133621.333140] RBP: ffffb28f4c897e18 R08: 0000000000000000 R09: ffff998835b26a80
[133621.333141] R10: ffff99882a17a000 R11: 0000000000000001 R12: ffffffffb6bb93b0
[133621.333142] R13: ffffb28f4c897e68 R14: ffffb28f4c897e10 R15: 0000000000000000
[133621.333143]  ? __perf_event_disable+0x160/0x160
[133621.333144]  ? visit_groups_merge+0xeb/0x180
[133621.333144]  ? visit_groups_merge+0xeb/0x180
[133621.333145]  </NMI>
[133621.333145]  ctx_sched_in+0xb7/0x180
[133621.333146]  __perf_event_task_sched_in+0x16e/0x1c0
[133621.333147]  ? __switch_to_asm+0x40/0x70
[133621.333147]  ? __switch_to_asm+0x34/0x70
[133621.333148]  ? __switch_to_asm+0x40/0x70
[133621.333149]  ? __switch_to_asm+0x34/0x70
[133621.333149]  finish_task_switch+0xcd/0x270
[133621.333150]  schedule_tail+0xb/0x50
[133621.333151]  ret_from_fork+0x8/0x40
[133621.333151] Modules linked in: intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic crct10dif_pclmul crc32_pclmul ledtrig_audio ghash_clmulni_intel snd_hda_intel snd_hda_codec aesni_intel snd_hda_core aes_x86_64 crypto_simd snd_hwdep cryptd snd_pcm mei_me glue_helper snd_timer snd sg mei iTCO_wdt iTCO_vendor_support soundcore wmi_bmof tpm_tis evdev tpm_tis_core acpi_pad tpm rng_core pcspkr pcc_cpufreq fuse parport_pc sunrpc ppdev lp parport ip_tables x_tables autofs4 ext4 crc32c_generic crc16 mbcache jbd2 sr_mod sd_mod cdrom i915 i2c_algo_bit ahci libahci crc32c_intel xhci_pci libata drm_kms_helper i2c_i801 xhci_hcd e1000e drm scsi_mod usbcore fan thermal wmi video button
[133621.333183] CR2: 00000001000000c8
[133621.743913] ---[ end trace 7a151c3de6b000fc ]---
[133621.743913] RIP: 0010:perf_reg_value+0x1e/0x50
[133621.743913] Code: 00 48 b8 00 00 00 00 ff ff ff ff c3 0f 1f 44 00 00 8d 46 e0 83 f8 1f 77 1d 48 8b 97 a8 00 00 00 31 c0 48 85 d2 74 0e 48 63 f6 <48> 8b 84 f2 00 ff ff ff c3 31 c0 c3 83 fe 17 77 16 48 63 f6 8b 04
[133621.743913] RSP: 0000:fffffe00000d5a80 EFLAGS: 00010006
[133621.743914] RAX: 0000000000000000 RBX: 0000000000000039 RCX: 0000000000000039
[133621.743914] RDX: 0000000100000000 RSI: 0000000000000039 RDI: fffffe00000d5c88
[133621.743914] RBP: fffffe00000d5b38 R08: 0000000000000000 R09: 0000000000000000
[133621.743914] R10: 00000000bffffff0 R11: 0000000000000012 R12: fffffe00000d5c88
[133621.743915] R13: ffff99883253ed10 R14: 0000000000000050 R15: 0000000000000000
[133621.743915] FS:  00007fb9741d3540(0000) GS:ffff998835b00000(0000) knlGS:0000000000000000
[133621.743915] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[133621.743915] CR2: 00000001000000c8 CR3: 000000023333c004 CR4: 00000000003607e0
[133621.743915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[133621.743916] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
[133621.743916] Kernel panic - not syncing: Fatal exception in interrupt
[133622.777346] Shutting down cpus with NMI
[133622.777347] Kernel Offset: 0x35a00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
Liang, Kan May 28, 2019, 2:26 p.m. UTC | #5
On 5/28/2019 10:05 AM, Peter Zijlstra wrote:
> On Tue, May 28, 2019 at 09:33:40AM -0400, Liang, Kan wrote:
>> Uncore PMU doesn't support sampling. It will return -EINVAL.
>> There is no regs support for counting. The request will be ignored.
>>
>> I think current check for uncore is good enough.
> 
> breakpoints then.. There's also no guarantee you covered all software
> events, and the core rewrite will allow other per-task/sampling PMUs
> too.
> 
> The approach you take is just not complete, don't do that.
> 

OK. I will send V3 base on your proposed patches.

Thanks,
Kan

Patch
diff mbox series

diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index ac67bbe..3a96971 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -52,4 +52,7 @@  enum perf_event_x86_regs {
 	/* These include both GPRs and XMMX registers */
 	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
+
+#define PERF_REG_NON_GENERIC_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
+
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index 4767474..1d794355 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -11,6 +11,11 @@  struct perf_regs {
 
 #ifdef CONFIG_HAVE_PERF_REGS
 #include <asm/perf_regs.h>
+
+#ifndef PERF_REG_NON_GENERIC_MASK
+#define PERF_REG_NON_GENERIC_MASK	0
+#endif
+
 u64 perf_reg_value(struct pt_regs *regs, int idx);
 int perf_reg_validate(u64 mask);
 u64 perf_reg_abi(struct task_struct *task);
@@ -18,6 +23,9 @@  void perf_get_regs_user(struct perf_regs *regs_user,
 			struct pt_regs *regs,
 			struct pt_regs *regs_user_copy);
 #else
+
+#define PERF_REG_NON_GENERIC_MASK	0
+
 static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
 	return 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3..4865bdf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8457,6 +8457,12 @@  static void sw_perf_event_destroy(struct perf_event *event)
 	swevent_hlist_put();
 }
 
+static inline bool has_non_generic_regs(struct perf_event *event)
+{
+	return (event->attr.sample_regs_user & PERF_REG_NON_GENERIC_MASK) ||
+	       (event->attr.sample_regs_intr & PERF_REG_NON_GENERIC_MASK);
+}
+
 static int perf_swevent_init(struct perf_event *event)
 {
 	u64 event_id = event->attr.config;
@@ -8470,6 +8476,10 @@  static int perf_swevent_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	switch (event_id) {
 	case PERF_COUNT_SW_CPU_CLOCK:
 	case PERF_COUNT_SW_TASK_CLOCK:
@@ -8633,6 +8643,10 @@  static int perf_tp_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	err = perf_trace_init(event);
 	if (err)
 		return err;
@@ -8722,6 +8736,10 @@  static int perf_kprobe_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
 	err = perf_kprobe_init(event, is_retprobe);
 	if (err)
@@ -8782,6 +8800,10 @@  static int perf_uprobe_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
 	ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
 	err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
@@ -9562,6 +9584,10 @@  static int cpu_clock_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	perf_swevent_init_hrtimer(event);
 
 	return 0;
@@ -9643,6 +9669,10 @@  static int task_clock_event_init(struct perf_event *event)
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	/* Only support generic registers */
+	if (has_non_generic_regs(event))
+		return -EOPNOTSUPP;
+
 	perf_swevent_init_hrtimer(event);
 
 	return 0;