* [MODERATED] [PATCH 1/4] walnut 1
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 2/4] walnut 2 Peter Zijlstra
` (4 subsequent siblings)
5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
To: speck
The cpuc data structure allocation is different between fake and real
cpuc's; use the same code to init/free both.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/events/core.c | 13 +++++--------
arch/x86/events/intel/core.c | 29 ++++++++++++++++++-----------
arch/x86/events/perf_event.h | 11 ++++++++---
3 files changed, 31 insertions(+), 22 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1995,7 +1995,7 @@ static int x86_pmu_commit_txn(struct pmu
*/
static void free_fake_cpuc(struct cpu_hw_events *cpuc)
{
- kfree(cpuc->shared_regs);
+ intel_cpuc_finish(cpuc);
kfree(cpuc);
}
@@ -2007,14 +2007,11 @@ static struct cpu_hw_events *allocate_fa
cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
if (!cpuc)
return ERR_PTR(-ENOMEM);
-
- /* only needed, if we have extra_regs */
- if (x86_pmu.extra_regs) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
- if (!cpuc->shared_regs)
- goto error;
- }
cpuc->is_fake = 1;
+
+ if (intel_cpuc_prepare(cpuc, cpu))
+ goto error;
+
return cpuc;
error:
free_fake_cpuc(cpuc);
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3410,7 +3410,7 @@ ssize_t intel_event_sysfs_show(char *pag
return x86_event_sysfs_show(page, config, event);
}
-struct intel_shared_regs *allocate_shared_regs(int cpu)
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
{
struct intel_shared_regs *regs;
int i;
@@ -3442,10 +3442,9 @@ static struct intel_excl_cntrs *allocate
return c;
}
-static int intel_pmu_cpu_prepare(int cpu)
-{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
+{
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
@@ -3455,7 +3454,7 @@ static int intel_pmu_cpu_prepare(int cpu
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
- cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+ cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->constraint_list)
goto err_shared_regs;
@@ -3480,6 +3479,11 @@ static int intel_pmu_cpu_prepare(int cpu
return -ENOMEM;
}
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
+}
+
static void flip_smm_bit(void *data)
{
unsigned long set = *(unsigned long *)data;
@@ -3554,9 +3558,8 @@ static void intel_pmu_cpu_starting(int c
}
}
-static void free_excl_cntrs(int cpu)
+static void free_excl_cntrs(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_excl_cntrs *c;
c = cpuc->excl_cntrs;
@@ -3577,9 +3580,8 @@ static void intel_pmu_cpu_dying(int cpu)
disable_counter_freeze();
}
-static void intel_pmu_cpu_dead(int cpu)
+void intel_cpuc_finish(struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
pc = cpuc->shared_regs;
@@ -3589,7 +3591,12 @@ static void intel_pmu_cpu_dead(int cpu)
cpuc->shared_regs = NULL;
}
- free_excl_cntrs(cpu);
+ free_excl_cntrs(cpuc);
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
+ intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu));
}
static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -4758,7 +4765,7 @@ static __init int fixup_ht_bug(void)
hardlockup_detector_perf_restart();
for_each_online_cpu(c)
- free_excl_cntrs(c);
+ free_excl_cntrs(&per_cpu(cpu_hw_events, c));
cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -890,7 +890,8 @@ struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event);
-struct intel_shared_regs *allocate_shared_regs(int cpu);
+extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu);
+extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
int intel_pmu_init(void);
@@ -1026,9 +1027,13 @@ static inline int intel_pmu_init(void)
return 0;
}
-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+static inline int intel_cpuc_prepare(struct cpu_hw_event *cpuc, int cpu)
+{
+ return 0;
+}
+
+static inline void intel_cpuc_finish(struct cpu_hw_event *cpuc)
{
- return NULL;
}
static inline int is_ht_workaround_enabled(void)
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] [PATCH 2/4] walnut 2
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 3/4] walnut 3 Peter Zijlstra
` (3 subsequent siblings)
5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
To: speck
Such that we can re-use it.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/events/intel/core.c | 51 +++++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 21 deletions(-)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2770,6 +2770,35 @@ intel_stop_scheduling(struct cpu_hw_even
}
static struct event_constraint *
+dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
+{
+ WARN_ON_ONCE(!cpuc->constraint_list);
+
+ if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ struct event_constraint *cx;
+
+ /*
+ * grab pre-allocated constraint entry
+ */
+ cx = &cpuc->constraint_list[idx];
+
+ /*
+ * initialize dynamic constraint
+ * with static constraint
+ */
+ *cx = *c;
+
+ /*
+ * mark constraint as dynamic
+ */
+ cx->flags |= PERF_X86_EVENT_DYNAMIC;
+ c = cx;
+ }
+
+ return c;
+}
+
+static struct event_constraint *
intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
int idx, struct event_constraint *c)
{
@@ -2799,27 +2828,7 @@ intel_get_excl_constraints(struct cpu_hw
* only needed when constraint has not yet
* been cloned (marked dynamic)
*/
- if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
- struct event_constraint *cx;
-
- /*
- * grab pre-allocated constraint entry
- */
- cx = &cpuc->constraint_list[idx];
-
- /*
- * initialize dynamic constraint
- * with static constraint
- */
- *cx = *c;
-
- /*
- * mark constraint as dynamic, so we
- * can free it later on
- */
- cx->flags |= PERF_X86_EVENT_DYNAMIC;
- c = cx;
- }
+ c = dyn_constraint(cpuc, c, idx);
/*
* From here on, the constraint is dynamic.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] [PATCH 3/4] walnut 3
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 1/4] walnut 1 Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 2/4] walnut 2 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
` (2 subsequent siblings)
5 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
To: speck
Skylake systems will receive a microcode update to address a TSX
errata. This microcode will (by default) clobber PMC3 when TSX
instructions are (speculatively or not) executed.
It also provides an MSR to cause all TSX transaction to abort and
preserve PMC3.
Add the CPUID enumeration and MSR definition.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/msr-index.h | 6 ++++++
2 files changed, 7 insertions(+)
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -344,6 +344,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -666,6 +666,12 @@
#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+#define MSR_TSX_FORCE_ABORT 0x0000010F
+
+#define MSR_TFA_RTM_FORCE_ABORT_BIT 0
+#define MSR_TFA_RTM_FORCE_ABORT BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] [PATCH 4/4] walnut 4
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
` (2 preceding siblings ...)
2019-02-19 15:58 ` [MODERATED] [PATCH 3/4] walnut 3 Peter Zijlstra
@ 2019-02-19 15:58 ` Peter Zijlstra
2019-02-19 16:10 ` [MODERATED] " Peter Zijlstra
` (2 more replies)
2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
2019-03-01 18:59 ` mark gross
5 siblings, 3 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 15:58 UTC (permalink / raw)
To: speck
On Skylake systems that have TSX Force Abort (TFA), provide a system
wide knob to allow (by default) or disallow the usage of TFA.
When TFA is allowed (default); the MSR gets set when PMC3 gets
scheduled and cleared when, after scheduling, PMC3 is unused.
When TFA is not allowed; clear PMC3 from all constraints such that it
will not get used.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/events/intel/core.c | 74 +++++++++++++++++++++++++++++++++++++++++--
arch/x86/events/perf_event.h | 6 +++
2 files changed, 77 insertions(+), 3 deletions(-)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2000,6 +2000,39 @@ static void intel_pmu_nhm_enable_all(int
intel_pmu_enable_all(added);
}
+static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
+{
+ u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
+
+ if (cpuc->tfa_shadow != val) {
+ cpuc->tfa_shadow = val;
+ wrmsrl(MSR_TSX_FORCE_ABORT, val);
+ }
+}
+
+static void intel_skl_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+ /*
+ * We're going to use PMC3, make sure TFA is set before we touch it.
+ */
+ if (cntr == 3 && !cpuc->is_fake)
+ intel_set_tfa(cpuc, true);
+}
+
+static void intel_skl_pmu_enable_all(int added)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * If we find PMC3 is no longer used when we enable the PMU, we can
+ * clear TFA.
+ */
+ if (!test_bit(3, cpuc->active_mask))
+ intel_set_tfa(cpuc, false);
+
+ intel_pmu_enable_all(added);
+}
+
static void enable_counter_freeze(void)
{
update_debugctlmsr(get_debugctlmsr() |
@@ -3366,6 +3399,26 @@ glp_get_event_constraints(struct cpu_hw_
return c;
}
+static bool allow_tsx_force_abort = true;
+
+static struct event_constraint *
+skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
+
+ /*
+ * Without TFA we must not use PMC3.
+ */
+ if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
+ c = dyn_constraint(cpuc, c, idx);
+ c->idxmsk64 &= ~(1ULL << 3);
+ c->weight = c->idxmsk64 - 1;
+ }
+
+ return c;
+}
+
/*
* Broadwell:
*
@@ -3460,13 +3513,15 @@ int intel_cpuc_prepare(struct cpu_hw_eve
goto err;
}
- if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
if (!cpuc->constraint_list)
goto err_shared_regs;
+ }
+ if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
if (!cpuc->excl_cntrs)
goto err_constraint_list;
@@ -3576,9 +3631,10 @@ static void free_excl_cntrs(struct cpu_h
if (c->core_id == -1 || --c->refcnt == 0)
kfree(c);
cpuc->excl_cntrs = NULL;
- kfree(cpuc->constraint_list);
- cpuc->constraint_list = NULL;
}
+
+ kfree(cpuc->constraint_list);
+ cpuc->constraint_list = NULL;
}
static void intel_pmu_cpu_dying(int cpu)
@@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
NULL
};
+DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
+ NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
NULL,
};
@@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
tsx_attr = hsw_tsx_events_attrs;
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
+
+ if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+ x86_pmu.flags |= PMU_FL_TFA;
+ x86_pmu.get_event_constraints = skl_get_event_constraints;
+ x86_pmu.enable_all = intel_skl_pmu_enable_all;
+ x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
+ intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+ }
+
pr_cont("Skylake events, ");
name = "skylake";
break;
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -243,6 +243,11 @@ struct cpu_hw_events {
int excl_thread_id; /* 0 or 1 */
/*
+ * SKL TSX_FORCE_ABORT shadow
+ */
+ u64 tfa_shadow;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -682,6 +687,7 @@ do { \
#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
+#define PMU_FL_TFA 0x20 /* deal with TSX force abort */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 4/4] walnut 4
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
@ 2019-02-19 16:10 ` Peter Zijlstra
2019-02-20 22:31 ` Nelson D'Souza
2019-02-19 18:49 ` [MODERATED] Linus Torvalds
2019-03-07 13:56 ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
2 siblings, 1 reply; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-19 16:10 UTC (permalink / raw)
To: speck
On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
> + c = dyn_constraint(cpuc, c, idx);
> + c->idxmsk64 &= ~(1ULL << 3);
> + c->weight = c->idxmsk64 - 1;
Damn; that should be:
c->weight--;
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 4/4] walnut 4
2019-02-19 16:10 ` [MODERATED] " Peter Zijlstra
@ 2019-02-20 22:31 ` Nelson D'Souza
2019-02-22 22:58 ` [MODERATED] Fwd: " Nelson D'Souza
0 siblings, 1 reply; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-20 22:31 UTC (permalink / raw)
To: speck
[-- Attachment #1.1: Type: text/plain, Size: 554 bytes --]
Hi Peter,
I have attached the initial test logs from your latest patchset.
The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
Thanks,
Nelson
On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> + c = dyn_constraint(cpuc, c, idx);
>> + c->idxmsk64 &= ~(1ULL << 3);
>> + c->weight = c->idxmsk64 - 1;
>
> Damn; that should be:
>
> c->weight--;
>
[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 395 bytes --]
# started on Thu Feb 21 15:23:52 2019
Performance counter stats for 'system wide':
89,033 cache-misses # 21.591 % of all cache refs
412,353 cache-references
4,630,472,209 branches
1.005154418 seconds time elapsed
[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 481 bytes --]
# started on Thu Feb 21 15:24:10 2019
Performance counter stats for 'system wide':
<not counted> cache-misses
<not counted> cache-references
<not counted> branches
<not supported> branch-misses
1.005054423 seconds time elapsed
[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 395 bytes --]
# started on Thu Feb 21 15:21:37 2019
Performance counter stats for 'system wide':
58,484 cache-misses # 14.165 % of all cache refs
412,875 cache-references
4,627,163,916 branches
1.005123883 seconds time elapsed
[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 481 bytes --]
# started on Thu Feb 21 15:21:04 2019
Performance counter stats for 'system wide':
71,146 cache-misses # 11.203 % of all cache refs
635,039 cache-references
4,616,150,556 branches
84,552 branch-misses # 0.00% of all branches
1.005141562 seconds time elapsed
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Fwd: [PATCH 4/4] walnut 4
2019-02-20 22:31 ` Nelson D'Souza
@ 2019-02-22 22:58 ` Nelson D'Souza
2019-02-22 23:19 ` Nelson D'Souza
0 siblings, 1 reply; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-22 22:58 UTC (permalink / raw)
To: speck
[-- Attachment #1.1: Type: text/plain, Size: 558 bytes --]
Hi Peter,
I have attached the initial test logs from your latest patchset.
The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
Thanks,
Nelson
On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> + c = dyn_constraint(cpuc, c, idx);
>> + c->idxmsk64 &= ~(1ULL << 3);
>> + c->weight = c->idxmsk64 - 1;
>
> Damn; that should be:
>
> c->weight--;
>
[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 397 bytes --]
# started on Thu Feb 21 15:23:52 2019
Performance counter stats for 'system wide':
89,033 cache-misses # 21.591 % of all cache refs
412,353 cache-references
4,630,472,209 branches
1.005154418 seconds time elapsed
[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 483 bytes --]
# started on Thu Feb 21 15:24:10 2019
Performance counter stats for 'system wide':
<not counted> cache-misses
<not counted> cache-references
<not counted> branches
<not supported> branch-misses
1.005054423 seconds time elapsed
[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 397 bytes --]
# started on Thu Feb 21 15:21:37 2019
Performance counter stats for 'system wide':
58,484 cache-misses # 14.165 % of all cache refs
412,875 cache-references
4,627,163,916 branches
1.005123883 seconds time elapsed
[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 483 bytes --]
# started on Thu Feb 21 15:21:04 2019
Performance counter stats for 'system wide':
71,146 cache-misses # 11.203 % of all cache refs
635,039 cache-references
4,616,150,556 branches
84,552 branch-misses # 0.00% of all branches
1.005141562 seconds time elapsed
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Fwd: [PATCH 4/4] walnut 4
2019-02-22 22:58 ` [MODERATED] Fwd: " Nelson D'Souza
@ 2019-02-22 23:19 ` Nelson D'Souza
0 siblings, 0 replies; 17+ messages in thread
From: Nelson D'Souza @ 2019-02-22 23:19 UTC (permalink / raw)
To: speck
[-- Attachment #1.1: Type: text/plain, Size: 562 bytes --]
Hi Peter,
I have attached the initial test logs from your latest patchset.
The perf command does not crash when /sys/devices/cpu/allow_tsx_force_abort is set to 0.
Thanks,
Nelson
On 2/19/19 8:10 AM, speck for Peter Zijlstra wrote:
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
>> + if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
>> + c = dyn_constraint(cpuc, c, idx);
>> + c->idxmsk64 &= ~(1ULL << 3);
>> + c->weight = c->idxmsk64 - 1;
>
> Damn; that should be:
>
> c->weight--;
>
[-- Attachment #1.2: tsx_force_abort_0.3counters --]
[-- Type: text/plain, Size: 399 bytes --]
# started on Thu Feb 21 15:23:52 2019
Performance counter stats for 'system wide':
89,033 cache-misses # 21.591 % of all cache refs
412,353 cache-references
4,630,472,209 branches
1.005154418 seconds time elapsed
[-- Attachment #1.3: tsx_force_abort_0.4counters --]
[-- Type: text/plain, Size: 485 bytes --]
# started on Thu Feb 21 15:24:10 2019
Performance counter stats for 'system wide':
<not counted> cache-misses
<not counted> cache-references
<not counted> branches
<not supported> branch-misses
1.005054423 seconds time elapsed
[-- Attachment #1.4: tsx_force_abort_1.3counters --]
[-- Type: text/plain, Size: 399 bytes --]
# started on Thu Feb 21 15:21:37 2019
Performance counter stats for 'system wide':
58,484 cache-misses # 14.165 % of all cache refs
412,875 cache-references
4,627,163,916 branches
1.005123883 seconds time elapsed
[-- Attachment #1.5: tsx_force_abort_1.4counters --]
[-- Type: text/plain, Size: 485 bytes --]
# started on Thu Feb 21 15:21:04 2019
Performance counter stats for 'system wide':
71,146 cache-misses # 11.203 % of all cache refs
635,039 cache-references
4,616,150,556 branches
84,552 branch-misses # 0.00% of all branches
1.005141562 seconds time elapsed
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re:
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
2019-02-19 16:10 ` [MODERATED] " Peter Zijlstra
@ 2019-02-19 18:49 ` Linus Torvalds
2019-02-20 14:37 ` Peter Zijlstra
2019-03-07 13:56 ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
2 siblings, 1 reply; 17+ messages in thread
From: Linus Torvalds @ 2019-02-19 18:49 UTC (permalink / raw)
To: speck
On Tue, Feb 19, 2019 at 8:03 AM speck for Peter Zijlstra
<speck@linutronix.de> wrote:
>
> When TFA is allowed (default); the MSR gets set when PMC3 gets
> scheduled and cleared when, after scheduling, PMC3 is unused.
>
> When TFA is not allowed; clear PMC3 from all constraints such that it
> will not get used.
I wonder if somebody wants a "abort always" mode for testing?
IOW, have a mode where even if PCM3 is not used, set the TSX_FORCE_ABORT bit.
That way people can actually verify that their code works even if it
sees TSX enabled and uses it (but it always aborts). Basically
verifying their fallback code.
Linus
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re:
2019-02-19 18:49 ` [MODERATED] Linus Torvalds
@ 2019-02-20 14:37 ` Peter Zijlstra
0 siblings, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-02-20 14:37 UTC (permalink / raw)
To: speck
On Tue, Feb 19, 2019 at 10:49:41AM -0800, speck for Linus Torvalds wrote:
> On Tue, Feb 19, 2019 at 8:03 AM speck for Peter Zijlstra
> <speck@linutronix.de> wrote:
> >
> > When TFA is allowed (default); the MSR gets set when PMC3 gets
> > scheduled and cleared when, after scheduling, PMC3 is unused.
> >
> > When TFA is not allowed; clear PMC3 from all constraints such that it
> > will not get used.
>
> I wonder if somebody wants a "abort always" mode for testing?
>
> IOW, have a mode where even if PCM3 is not used, set the TSX_FORCE_ABORT bit.
>
> That way people can actually verify that their code works even if it
> sees TSX enabled and uses it (but it always aborts). Basically
> verifying their fallback code.
Something like the below would make that happen. Not sure it's worth it
to backport this to everything though.
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2000,6 +2000,8 @@ static void intel_pmu_nhm_enable_all(int
intel_pmu_enable_all(added);
}
+static int allow_tsx_force_abort = 1;
+
static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
{
u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
@@ -2027,7 +2029,7 @@ static void intel_skl_pmu_enable_all(int
* If we find PMC3 is no longer used when we enable the PMU, we can
* clear TFA.
*/
- if (!test_bit(3, cpuc->active_mask))
+ if (!test_bit(3, cpuc->active_mask) && allow_tsx_force_abort < 2)
intel_set_tfa(cpuc, false);
intel_pmu_enable_all(added);
@@ -3399,8 +3401,6 @@ glp_get_event_constraints(struct cpu_hw_
return c;
}
-static bool allow_tsx_force_abort = true;
-
static struct event_constraint *
skl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -4115,7 +4115,7 @@ static ssize_t freeze_on_smi_show(struct
return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
}
-static DEFINE_MUTEX(freeze_on_smi_mutex);
+static DEFINE_MUTEX(sysfs_mutex);
static ssize_t freeze_on_smi_store(struct device *cdev,
struct device_attribute *attr,
@@ -4131,7 +4131,7 @@ static ssize_t freeze_on_smi_store(struc
if (val > 1)
return -EINVAL;
- mutex_lock(&freeze_on_smi_mutex);
+ mutex_lock(&sysfs_mutex);
if (x86_pmu.attr_freeze_on_smi == val)
goto done;
@@ -4142,7 +4142,7 @@ static ssize_t freeze_on_smi_store(struc
on_each_cpu(flip_smm_bit, &val, 1);
put_online_cpus();
done:
- mutex_unlock(&freeze_on_smi_mutex);
+ mutex_unlock(&sysfs_mutex);
return count;
}
@@ -4179,11 +4179,59 @@ static struct attribute *intel_pmu_caps_
NULL
};
-DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static ssize_t allow_tsx_force_abort_show(struct device *cdev
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", allow_tsx_force_abort);
+}
+
+static void flip_tfa_bit(void *data)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int val = *(int *)data;
+ bool set;
+
+ lockdep_assert_irqs_disabled();
+
+ if (val == 2)
+ set = true;
+ else
+ set = test_bit(3, cpuc->active_mask);
+
+ intel_set_tfa(cpuc, set);
+}
+
+static ssize_t allow_tsx_force_abort_store(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, ssize_t count)
+{
+ ssize_t ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if ((unsigned int)val > 2)
+ return -EINVAL;
+
+ mutex_lock(&sysfs_mutex);
+ if (allow_tsx_force_abort != val) {
+ allow_tsx_force_abort = val;
+ cpus_read_lock();
+ on_each_cpu(flip_tfa_bit, &val, 1);
+ cpus_read_unlock();
+ }
+ mutex_unlock(&sysfs_mutex);
+ return ret;
+}
+
+DEVICE_ATTR_RW(allow_tsx_force_abort);
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
- NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
+ NULL, /* &dev_attr_allow_tsx_force_abort.attr */
NULL,
};
@@ -4688,7 +4736,7 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = skl_get_event_constraints;
x86_pmu.enable_all = intel_skl_pmu_enable_all;
x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+ intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 4/4] walnut 4
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
2019-02-19 16:10 ` [MODERATED] " Peter Zijlstra
2019-02-19 18:49 ` [MODERATED] Linus Torvalds
@ 2019-03-07 13:56 ` Greg KH
2019-03-07 14:34 ` Peter Zijlstra
2019-03-07 15:09 ` mark gross
2 siblings, 2 replies; 17+ messages in thread
From: Greg KH @ 2019-03-07 13:56 UTC (permalink / raw)
To: speck
I was working on the backport to 4.9.y and ran across merge issues with
this patch. I understand the constraints on which you are working with
here, but please, let me do a tiny rant about how you all are doing
"extra work" with regards to the creation of sysfs files:
On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> @@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
> NULL
> };
>
> +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
> +
> static struct attribute *intel_pmu_attrs[] = {
> &dev_attr_freeze_on_smi.attr,
> + NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
> NULL,
> };
>
> @@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
> tsx_attr = hsw_tsx_events_attrs;
> intel_pmu_pebs_data_source_skl(
> boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
> +
> + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
> + x86_pmu.flags |= PMU_FL_TFA;
> + x86_pmu.get_event_constraints = skl_get_event_constraints;
> + x86_pmu.enable_all = intel_skl_pmu_enable_all;
> + x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
> + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
> + }
> +
> pr_cont("Skylake events, ");
> name = "skylake";
> break;
sysfs files have the ability to be "shown" or not, on their own. There
is a "is_visable()" callback for every sysfs group.
This allows you to set a whole bunch of groups to a device, and then,
when the device is created, the kobject core will call back to you and
ask "should I show this file?" At that point in time, you can do your
check for "boot_cpu_has(...)" and the like to see if you really should
be showing the file or not.
This saves you all the horrid mess of the merge_attr() function, trying
to overload NULL pointers here in this attribute list, and other such
hacks.
I guess no one stopped to think why _only_ the perf cpu code has a
function like merge_attr(), and how the rest of the kernel could get
away without needing a hack like that :(
Anyway, rant over. I'll see if I can squeze this into the 4.9.y tree as
it was before the major revamp of the perf cpu sysfs files.
In the future, someone should just switch all of this to attribute
groups, like the rest of the kernel uses, so no such crazyness is
needed.
thanks,
greg k-h
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 4/4] walnut 4
2019-03-07 13:56 ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
@ 2019-03-07 14:34 ` Peter Zijlstra
2019-03-07 15:09 ` mark gross
1 sibling, 0 replies; 17+ messages in thread
From: Peter Zijlstra @ 2019-03-07 14:34 UTC (permalink / raw)
To: speck
On Thu, Mar 07, 2019 at 02:56:31PM +0100, speck for Greg KH wrote:
> sysfs files have the ability to be "shown" or not, on their own. There
> is a "is_visable()" callback for every sysfs group.
>
> This allows you to set a whole bunch of groups to a device, and then,
> when the device is created, the kobject core will call back to you and
> ask "should I show this file?" At that point in time, you can do your
> check for "boot_cpu_has(...)" and the like to see if you really should
> be showing the file or not.
>
> This saves you all the horrid mess of the merge_attr() function, trying
> to overload NULL pointers here in this attribute list, and other such
> hacks.
>
> I guess no one stopped to think why _only_ the perf cpu code has a
> function like merge_attr(), and how the rest of the kernel could get
> away without needing a hack like that :(
>
> Anyway, rant over. I'll see if I can squeze this into the 4.9.y tree as
> it was before the major revamp of the perf cpu sysfs files.
>
> In the future, someone should just switch all of this to attribute
> groups, like the rest of the kernel uses, so no such crazyness is
> needed.
Right; that would be a really nice cleanup. I suppose nobody who touched
all that (very much including me) knew about that is_visible() thing.
I'll try and not forget and make someone clean that up.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 4/4] walnut 4
2019-03-07 13:56 ` [MODERATED] Re: [PATCH 4/4] walnut 4 Greg KH
2019-03-07 14:34 ` Peter Zijlstra
@ 2019-03-07 15:09 ` mark gross
1 sibling, 0 replies; 17+ messages in thread
From: mark gross @ 2019-03-07 15:09 UTC (permalink / raw)
To: speck
On Thu, Mar 07, 2019 at 02:56:31PM +0100, speck for Greg KH wrote:
> I was working on the backport to 4.9.y and ran across merge issues with
> this patch. I understand the constraints on which you are working with
> here, but please, let me do a tiny rant about how you all are doing
> "extra work" with regards to the creation of sysfs files:
>
> On Tue, Feb 19, 2019 at 04:58:11PM +0100, speck for Peter Zijlstra wrote:
> > @@ -4123,8 +4179,11 @@ static struct attribute *intel_pmu_caps_
> > NULL
> > };
> >
> > +DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
> > +
> > static struct attribute *intel_pmu_attrs[] = {
> > &dev_attr_freeze_on_smi.attr,
> > + NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
> > NULL,
> > };
> >
> > @@ -4623,6 +4682,15 @@ __init int intel_pmu_init(void)
> > tsx_attr = hsw_tsx_events_attrs;
> > intel_pmu_pebs_data_source_skl(
> > boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
> > +
> > + if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
> > + x86_pmu.flags |= PMU_FL_TFA;
> > + x86_pmu.get_event_constraints = skl_get_event_constraints;
> > + x86_pmu.enable_all = intel_skl_pmu_enable_all;
> > + x86_pmu.commit_scheduling = intel_skl_commit_scheduling;
> > + intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
> > + }
> > +
> > pr_cont("Skylake events, ");
> > name = "skylake";
> > break;
>
> sysfs files have the ability to be "shown" or not, on their own. There
> is a "is_visable()" callback for every sysfs group.
>
> This allows you to set a whole bunch of groups to a device, and then,
> when the device is created, the kobject core will call back to you and
> ask "should I show this file?" At that point in time, you can do your
> check for "boot_cpu_has(...)" and the like to see if you really should
> be showing the file or not.
>
> This saves you all the horrid mess of the merge_attr() function, trying
> to overload NULL pointers here in this attribute list, and other such
> hacks.
>
> I guess no one stopped to think why _only_ the perf cpu code has a
> function like merge_attr(), and how the rest of the kernel could get
> away without needing a hack like that :(
>
> Anyway, rant over. I'll see if I can squeze this into the 4.9.y tree as
> it was before the major revamp of the perf cpu sysfs files.
>
> In the future, someone should just switch all of this to attribute
> groups, like the rest of the kernel uses, so no such crazyness is
> needed.
If peterz doesn't beat me to it I want to mentor one of the more Jr engineers
I'm working with to do this. (and if I can't convince one of them to do this
with my help then I'll do it by the end of next week.)
But, its fine if someone beats me to it too. There will be other opportunities
;)
--mark
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 0/4] walnut 0
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
` (3 preceding siblings ...)
2019-02-19 15:58 ` [MODERATED] [PATCH 4/4] walnut 4 Peter Zijlstra
@ 2019-02-20 18:10 ` mark gross
2019-02-22 19:20 ` Nelson D'Souza
2019-03-01 18:59 ` mark gross
5 siblings, 1 reply; 17+ messages in thread
From: mark gross @ 2019-02-20 18:10 UTC (permalink / raw)
To: speck
On Tue, Feb 19, 2019 at 04:58:07PM +0100, speck for Peter Zijlstra wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> Subject: [PATCH 0/4] walnut
^cute^
>
> nicely split
^ permalink raw reply [flat|nested] 17+ messages in thread
* [MODERATED] Re: [PATCH 0/4] walnut 0
2019-02-19 15:58 [MODERATED] [PATCH 0/4] walnut 0 Peter Zijlstra
` (4 preceding siblings ...)
2019-02-20 18:10 ` [MODERATED] Re: [PATCH 0/4] walnut 0 mark gross
@ 2019-03-01 18:59 ` mark gross
5 siblings, 0 replies; 17+ messages in thread
From: mark gross @ 2019-03-01 18:59 UTC (permalink / raw)
To: speck
[-- Attachment #1: Type: text/plain, Size: 812 bytes --]
On Tue, Feb 19, 2019 at 04:58:07PM +0100, speck for Peter Zijlstra wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> Subject: [PATCH 0/4] walnut
>
> nicely split
Peter, We have been testing your patch set named after a nut posted Feb 19.
We tested it on bare metal and it looks good. There was some worry that a KVM
guest would have issues with multiplexing if the host had your patch and
allow_tsx_abort=0.
What we saw away the 4th counter in the guest showed zero. Which IMO seems ok.
Attached are the tests that Nelson ran for both host and kvm guest. The
behavior between host and guest behavior with 4 counters when allow_tsx_abort=0
looks minor to me.
We are eager to see a more final version of this patchset so we can be ready
for the public disclosure coming up March 12.
Thanks! --mark
[-- Attachment #2: patchedhost.txt --]
[-- Type: text/plain, Size: 2859 bytes --]
/* ALLOW_TSX_FORCE_ABORT = 0 */
// Four Counters
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 155
Performance counter stats for './rtm-success-long':
154 cs
<not counted> cache-misses
<not counted> cache-references
<not counted> branches
<not supported> branch-misses
6.749371782 seconds time elapsed
4.458202000 seconds user
0.000000000 seconds sys
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 193
// Three Counters
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 171
Performance counter stats for './rtm-success-long':
155 cs
5,332 cache-misses # 16.190 % of all cache refs
32,934 cache-references
201,012,996 branches
6.808987753 seconds time elapsed
4.466809000 seconds user
0.000000000 seconds sys
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 166
/* ALLOW_TSX_FORCE_ABORT = 1 */
//Four Counters
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 0.00%, aborted 100000000
Performance counter stats for './rtm-success-long':
145 cs
42,505 cache-misses # 48.776 % of all cache refs
87,144 cache-references
301,075,199 branches
100,040,329 branch-misses # 33.23% of all branches
6.496993416 seconds time elapsed
4.347726000 seconds user
0.000000000 seconds sys
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 589
//Three Counters
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 162
Performance counter stats for './rtm-success-long':
145 cs
5,488 cache-misses # 17.551 % of all cache refs
31,268 cache-references
201,016,636 branches
6.593972485 seconds time elapsed
4.462666000 seconds user
0.000000000 seconds sys
root@jf1-otc-3AR3-33:/home/labuser/ocean/tools/perf# taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 180
[-- Attachment #3: patchedkvm_guest.txt --]
[-- Type: text/plain, Size: 2992 bytes --]
/* ALLOW_TSX_FORCE_ABORT = 0 */
// Four Counters
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 919
Performance counter stats for './rtm-success-long':
0 cs:u
3,318 cache-misses:u # 44.753 % of all cache refs
7,414 cache-references:u
200,038,222 branches:u
0 branch-misses:u # 0.00% of all branches
16.023427318 seconds time elapsed
6.446820000 seconds user
2.420087000 seconds sys
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 701
// Three Counters
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 830
Performance counter stats for './rtm-success-long':
0 cs:u
3,311 cache-misses:u # 47.354 % of all cache refs
6,992 cache-references:u
200,038,067 branches:u
13.679651620 seconds time elapsed
6.123687000 seconds user
1.926116000 seconds sys
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 704
/* ALLOW_TSX_FORCE_ABORT = 1 */
// Four Counters
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches,branch-misses}' --no-merge ./rtm-success-long
rtm success 0.00%, aborted 100000000
Performance counter stats for './rtm-success-long':
0 cs:u
2,742 cache-misses:u # 29.685 % of all cache refs
9,237 cache-references:u
300,037,068 branches:u
100,003,687 branch-misses:u # 33.33% of all branches
17.318943108 seconds time elapsed
6.415269000 seconds user
2.753311000 seconds sys
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 1501
//Three Counters
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./perf stat -e cs,'{cache-misses,cache-references,branches}' --no-merge ./rtm-success-long
rtm success 100.00%, aborted 772
Performance counter stats for './rtm-success-long':
0 cs:u
2,892 cache-misses:u # 40.228 % of all cache refs
7,189 cache-references:u
200,038,116 branches:u
17.638868052 seconds time elapsed
6.710811000 seconds user
2.761211000 seconds sys
kvm-virtual@kvmvirtual-Standard-PC-i440FX-PIIX-1996:~$ taskset -c 0 ./rtm-success-long
rtm success 100.00%, aborted 830
^ permalink raw reply [flat|nested] 17+ messages in thread