linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
@ 2021-09-04  6:49 Kajol Jain
  2021-09-04  6:49 ` [PATCH 2/3] " Kajol Jain
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: maddy, atrajeev, kjain, rnsastry, yao.jin, ast, daniel,
	songliubraving, kan.liang, mark.rutland, alexander.shishkin,
	paulus

Add couple of new macros to represent onchip L2 and onchip L3 accesses.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 include/uapi/linux/perf_event.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..030b3e990ac3 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1265,7 +1265,9 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0xa available */
+#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
+#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
+/* 7-0xa available */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
 #define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-04  6:49 [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Kajol Jain
@ 2021-09-04  6:49 ` Kajol Jain
  2021-09-04  6:49 ` [PATCH 3/3] powerpc/perf: Fix data source encodings for power10 Kajol Jain
  2021-09-08  7:17 ` [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Michael Ellerman
  2 siblings, 0 replies; 10+ messages in thread
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: maddy, atrajeev, kjain, rnsastry, yao.jin, ast, daniel,
	songliubraving, kan.liang, mark.rutland, alexander.shishkin,
	paulus

Add couple of new macros to represent onchip L2 and onchip L3 accesses.
Patch also adds the decoding strings in the mem_lvlnum data structure.

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 tools/include/uapi/linux/perf_event.h | 4 +++-
 tools/perf/util/mem-events.c          | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index f92880a15645..030b3e990ac3 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1265,7 +1265,9 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
-/* 5-0xa available */
+#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
+#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
+/* 7-0xa available */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
 #define PERF_MEM_LVLNUM_LFB	0x0c /* LFB */
 #define PERF_MEM_LVLNUM_RAM	0x0d /* RAM */
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index f0e75df72b80..f846a91220c2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -294,6 +294,8 @@ static const char * const mem_lvl[] = {
 };
 
 static const char * const mem_lvlnum[] = {
+	[PERF_MEM_LVLNUM_OC_L2] = "OnChip L2",
+	[PERF_MEM_LVLNUM_OC_L3] = "OnChip L3",
 	[PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
 	[PERF_MEM_LVLNUM_LFB] = "LFB",
 	[PERF_MEM_LVLNUM_RAM] = "RAM",
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] powerpc/perf: Fix data source encodings for power10
  2021-09-04  6:49 [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Kajol Jain
  2021-09-04  6:49 ` [PATCH 2/3] " Kajol Jain
@ 2021-09-04  6:49 ` Kajol Jain
  2021-09-08  7:17 ` [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Michael Ellerman
  2 siblings, 0 replies; 10+ messages in thread
From: Kajol Jain @ 2021-09-04  6:49 UTC (permalink / raw)
  To: mpe, linuxppc-dev, linux-kernel, peterz, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak
  Cc: maddy, atrajeev, kjain, rnsastry, yao.jin, ast, daniel,
	songliubraving, kan.liang, mark.rutland, alexander.shishkin,
	paulus

Fix the data source encodings for power10 to represent
onchip L2/L3 accesses properly.

Fixes: 79e96f8f930d ("powerpc/perf: Export memory hierarchy info to user
space")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/perf/isa207-common.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index f92bf5f6b74f..9630a17c5da4 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -238,11 +238,22 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
 		ret |= P(SNOOP, HIT);
 		break;
 	case 5:
-		ret = PH(LVL, REM_CCE1);
-		if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
-			ret |= P(SNOOP, HIT);
-		else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
-			ret |= P(SNOOP, HITM);
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (sub_idx == 0 || sub_idx == 4)
+				ret = PH(LVLNUM, OC_L2) | P(SNOOP, HIT);
+			else if (sub_idx == 1 || sub_idx == 5)
+				ret = PH(LVLNUM, OC_L2) | P(SNOOP, HITM);
+			else if (sub_idx == 2 || sub_idx == 6)
+				ret = PH(LVLNUM, OC_L3) | P(SNOOP, HIT);
+			else if (sub_idx == 3 || sub_idx == 7)
+				ret = PH(LVLNUM, OC_L3) | P(SNOOP, HITM);
+		} else {
+			ret = PH(LVL, REM_CCE1);
+			if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
+				ret |= P(SNOOP, HIT);
+			else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
+				ret |= P(SNOOP, HITM);
+		}
 		break;
 	case 6:
 		ret = PH(LVL, REM_CCE2);
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-04  6:49 [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Kajol Jain
  2021-09-04  6:49 ` [PATCH 2/3] " Kajol Jain
  2021-09-04  6:49 ` [PATCH 3/3] powerpc/perf: Fix data source encodings for power10 Kajol Jain
@ 2021-09-08  7:17 ` Michael Ellerman
  2021-09-08  9:26   ` Peter Zijlstra
  2 siblings, 1 reply; 10+ messages in thread
From: Michael Ellerman @ 2021-09-08  7:17 UTC (permalink / raw)
  To: Kajol Jain, linuxppc-dev, linux-kernel, peterz, mingo, acme,
	jolsa, namhyung, linux-perf-users, ak
  Cc: maddy, atrajeev, kjain, rnsastry, yao.jin, ast, daniel,
	songliubraving, kan.liang, mark.rutland, alexander.shishkin,
	paulus

Kajol Jain <kjain@linux.ibm.com> writes:
> Add couple of new macros to represent onchip L2 and onchip L3 accesses.

It would be "on chip". But I think this needs much more explanation,
this is a generic header so these definitions need to make sense, and
have an understood meaning, across all architectures.

I think most people are going to read "on chip" as differentiating
between an L2/L3 that is "on chip" vs "off chip".

But the case you're trying to express is "another core's L2/L3 on the
same chip as the CPU", vs "the current CPU's L2/L3".


> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index f92880a15645..030b3e990ac3 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -1265,7 +1265,9 @@ union perf_mem_data_src {
>  #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
>  #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
>  #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
> -/* 5-0xa available */
> +#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
> +#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */

The obvious use for 5 is for "L5" and so on.

I'm not sure adding new levels is the best idea, because these don't fit
neatly into the hierarchy, they are off to the side.


I wonder if we should use the remote field.

ie. for another core's L2 we set:

  mem_lvl = PERF_MEM_LVL_L2
  mem_remote = 1

Which would mean "remote L2", but not remote enough to be
lvl = PERF_MEM_LVL_REM_CCE1.

It would be printed by the existing tools/perf code as "Remote L2", vs
"Remote cache (1 hop)", which seems OK.


ie. we'd be able to express:

  Current core's L2: LVL_L2
  Other core's L2:   LVL_L2 | REMOTE
  Other chip's L2:   LVL_REM_CCE1 | REMOTE

And similarly for L3.

I think that makes sense? Unless people think remote should be reserved
to mean on another chip, though we already have REM_CCE1 for that.

cheers



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-08  7:17 ` [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Michael Ellerman
@ 2021-09-08  9:26   ` Peter Zijlstra
  2021-09-09 12:45     ` Michael Ellerman
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2021-09-08  9:26 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

On Wed, Sep 08, 2021 at 05:17:53PM +1000, Michael Ellerman wrote:
> Kajol Jain <kjain@linux.ibm.com> writes:

> > diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> > index f92880a15645..030b3e990ac3 100644
> > --- a/include/uapi/linux/perf_event.h
> > +++ b/include/uapi/linux/perf_event.h
> > @@ -1265,7 +1265,9 @@ union perf_mem_data_src {
> >  #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
> >  #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
> >  #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
> > -/* 5-0xa available */
> > +#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
> > +#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
> 
> The obvious use for 5 is for "L5" and so on.
> 
> I'm not sure adding new levels is the best idea, because these don't fit
> neatly into the hierarchy, they are off to the side.
> 
> 
> I wonder if we should use the remote field.
> 
> ie. for another core's L2 we set:
> 
>   mem_lvl = PERF_MEM_LVL_L2
>   mem_remote = 1

This mixes APIs (see below), IIUC the correct usage would be something
like: lvl_num=L2 remote=1

> Which would mean "remote L2", but not remote enough to be
> lvl = PERF_MEM_LVL_REM_CCE1.
> 
> It would be printed by the existing tools/perf code as "Remote L2", vs
> "Remote cache (1 hop)", which seems OK.
> 
> 
> ie. we'd be able to express:
> 
>   Current core's L2: LVL_L2
>   Other core's L2:   LVL_L2 | REMOTE
>   Other chip's L2:   LVL_REM_CCE1 | REMOTE
> 
> And similarly for L3.
> 
> I think that makes sense? Unless people think remote should be reserved
> to mean on another chip, though we already have REM_CCE1 for that.

IIRC the PERF_MEM_LVL_* namespace is somewhat depricated in favour of
the newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. Of
course, ABIs being what they are, we get to support both :/ But I'm not
sure mixing them is a great idea.

Also, clearly this could use a comment...

The 'new' composite doesnt have a hops field because the hardware that
nessecitated that change doesn't report it, but we could easily add a
field there.

Suppose we add, mem_hops:3 (would 6 hops be too small?) and the
corresponding PERF_MEM_HOPS_{NA, 0..6}

Then I suppose you can encode things like:

	L2			- local L2
	L2 | REMOTE		- remote L2 at an unspecified distance (NA)
	L2 | REMOTE | HOPS_0	- remote L2 on the same node
	L2 | REMOTE | HOPS_1	- remote L2 on a node 1 removed

Would that work?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-08  9:26   ` Peter Zijlstra
@ 2021-09-09 12:45     ` Michael Ellerman
  2021-09-09 14:36       ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Michael Ellerman @ 2021-09-09 12:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

Peter Zijlstra <peterz@infradead.org> writes:
> On Wed, Sep 08, 2021 at 05:17:53PM +1000, Michael Ellerman wrote:
>> Kajol Jain <kjain@linux.ibm.com> writes:
>
>> > diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> > index f92880a15645..030b3e990ac3 100644
>> > --- a/include/uapi/linux/perf_event.h
>> > +++ b/include/uapi/linux/perf_event.h
>> > @@ -1265,7 +1265,9 @@ union perf_mem_data_src {
>> >  #define PERF_MEM_LVLNUM_L2	0x02 /* L2 */
>> >  #define PERF_MEM_LVLNUM_L3	0x03 /* L3 */
>> >  #define PERF_MEM_LVLNUM_L4	0x04 /* L4 */
>> > -/* 5-0xa available */
>> > +#define PERF_MEM_LVLNUM_OC_L2	0x05 /* On Chip L2 */
>> > +#define PERF_MEM_LVLNUM_OC_L3	0x06 /* On Chip L3 */
>> 
>> The obvious use for 5 is for "L5" and so on.
>> 
>> I'm not sure adding new levels is the best idea, because these don't fit
>> neatly into the hierarchy, they are off to the side.
>> 
>> 
>> I wonder if we should use the remote field.
>> 
>> ie. for another core's L2 we set:
>> 
>>   mem_lvl = PERF_MEM_LVL_L2
>>   mem_remote = 1
>
> This mixes APIs (see below), IIUC the correct usage would be something
> like: lvl_num=L2 remote=1

Aha, I was wondering how lvl and lvl_num were supposed to interact.

>> Which would mean "remote L2", but not remote enough to be
>> lvl = PERF_MEM_LVL_REM_CCE1.
>> 
>> It would be printed by the existing tools/perf code as "Remote L2", vs
>> "Remote cache (1 hop)", which seems OK.
>> 
>> 
>> ie. we'd be able to express:
>> 
>>   Current core's L2: LVL_L2
>>   Other core's L2:   LVL_L2 | REMOTE
>>   Other chip's L2:   LVL_REM_CCE1 | REMOTE
>> 
>> And similarly for L3.
>> 
>> I think that makes sense? Unless people think remote should be reserved
>> to mean on another chip, though we already have REM_CCE1 for that.
>
> IIRC the PERF_MEM_LVL_* namespace is somewhat depricated in favour of
> the newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. Of
> course, ABIs being what they are, we get to support both :/ But I'm not
> sure mixing them is a great idea.

OK.

> Also, clearly this could use a comment...
>
> The 'new' composite doesnt have a hops field because the hardware that
> nessecitated that change doesn't report it, but we could easily add a
> field there.
>
> Suppose we add, mem_hops:3 (would 6 hops be too small?) and the
> corresponding PERF_MEM_HOPS_{NA, 0..6}

It's really 7 if we use remote && hop = 0 to mean the first hop.

If we're wanting to use some of the hop levels to represent
intra-chip/package hops then we could possibly use them all on a really
big system.

eg. you could imagine something like:

 L2 | 		        - local L2
 L2 | REMOTE | HOPS_0	- L2 of neighbour core
 L2 | REMOTE | HOPS_1	- L2 of near core on same chip (same 1/2 of chip)
 L2 | REMOTE | HOPS_2	- L2 of far core on same chip (other 1/2 of chip)
 L2 | REMOTE | HOPS_3	- L2 of sibling chip in same package
 L2 | REMOTE | HOPS_4	- L2 on separate package 1 hop away
 L2 | REMOTE | HOPS_5	- L2 on separate package 2 hops away
 L2 | REMOTE | HOPS_6	- L2 on separate package 3 hops away


Whether it's useful to represent all those levels I'm not sure, but it's
probably good if we have the ability.

I guess I'm 50/50 on whether that's enough levels, or whether we want
another bit to allow for future growth.

> Then I suppose you can encode things like:
> 
> 	L2			- local L2
> 	L2 | REMOTE		- remote L2 at an unspecified distance (NA)
> 	L2 | REMOTE | HOPS_0	- remote L2 on the same node
> 	L2 | REMOTE | HOPS_1	- remote L2 on a node 1 removed
> 
> Would that work?

Yeah that looks good to me.

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-09 12:45     ` Michael Ellerman
@ 2021-09-09 14:36       ` Peter Zijlstra
  2021-09-14 10:40         ` Michael Ellerman
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2021-09-09 14:36 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

On Thu, Sep 09, 2021 at 10:45:54PM +1000, Michael Ellerman wrote:

> > The 'new' composite doesnt have a hops field because the hardware that
> > nessecitated that change doesn't report it, but we could easily add a
> > field there.
> >
> > Suppose we add, mem_hops:3 (would 6 hops be too small?) and the
> > corresponding PERF_MEM_HOPS_{NA, 0..6}
> 
> It's really 7 if we use remote && hop = 0 to mean the first hop.

I don't think we can do that, becaus of backward compat. Currently:

  lvl_num=DRAM, remote=1

denites: "Remote DRAM of any distance". Effectively it would have the new
hops field filled with zeros though, so if you then decode with the hops
field added it suddenly becomes:

 lvl_num=DRAM, remote=1, hops=0

and reads like: "Remote DRAM of 0 hops" which is quite daft. Therefore 0
really must denote a 'N/A'.

> If we're wanting to use some of the hop levels to represent
> intra-chip/package hops then we could possibly use them all on a really
> big system.
> 
> eg. you could imagine something like:
> 
>  L2 | 		        - local L2
>  L2 | REMOTE | HOPS_0	- L2 of neighbour core
>  L2 | REMOTE | HOPS_1	- L2 of near core on same chip (same 1/2 of chip)
>  L2 | REMOTE | HOPS_2	- L2 of far core on same chip (other 1/2 of chip)
>  L2 | REMOTE | HOPS_3	- L2 of sibling chip in same package
>  L2 | REMOTE | HOPS_4	- L2 on separate package 1 hop away
>  L2 | REMOTE | HOPS_5	- L2 on separate package 2 hops away
>  L2 | REMOTE | HOPS_6	- L2 on separate package 3 hops away
> 
> 
> Whether it's useful to represent all those levels I'm not sure, but it's
> probably good if we have the ability.

I'm thinking we ought to keep hops as steps along the NUMA fabric, with
0 hops being the local node. That only gets us:

 L2, remote=0, hops=HOPS_0 -- our L2
 L2, remote=1, hops=HOPS_0 -- L2 on the local node but not ours
 L2, remote=1, hops!=HOPS_0 -- L2 on a remote node

> I guess I'm 50/50 on whether that's enough levels, or whether we want
> another bit to allow for future growth.

Right, possibly safer to add one extra bit while we can.... I suppose.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-09 14:36       ` Peter Zijlstra
@ 2021-09-14 10:40         ` Michael Ellerman
  2021-09-14 11:49           ` Peter Zijlstra
  0 siblings, 1 reply; 10+ messages in thread
From: Michael Ellerman @ 2021-09-14 10:40 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

Peter Zijlstra <peterz@infradead.org> writes:
> On Thu, Sep 09, 2021 at 10:45:54PM +1000, Michael Ellerman wrote:
>
>> > The 'new' composite doesnt have a hops field because the hardware that
>> > nessecitated that change doesn't report it, but we could easily add a
>> > field there.
>> >
>> > Suppose we add, mem_hops:3 (would 6 hops be too small?) and the
>> > corresponding PERF_MEM_HOPS_{NA, 0..6}
>> 
>> It's really 7 if we use remote && hop = 0 to mean the first hop.
>
> I don't think we can do that, becaus of backward compat. Currently:
>
>   lvl_num=DRAM, remote=1
>
> denites: "Remote DRAM of any distance". Effectively it would have the new
> hops field filled with zeros though, so if you then decode with the hops
> field added it suddenly becomes:
>
>  lvl_num=DRAM, remote=1, hops=0
>
> and reads like: "Remote DRAM of 0 hops" which is quite daft. Therefore 0
> really must denote a 'N/A'.

Ah yeah, duh, it needs to be backward compatible.

>> If we're wanting to use some of the hop levels to represent
>> intra-chip/package hops then we could possibly use them all on a really
>> big system.
>> 
>> eg. you could imagine something like:
>> 
>>  L2 | 		        - local L2
>>  L2 | REMOTE | HOPS_0	- L2 of neighbour core
>>  L2 | REMOTE | HOPS_1	- L2 of near core on same chip (same 1/2 of chip)
>>  L2 | REMOTE | HOPS_2	- L2 of far core on same chip (other 1/2 of chip)
>>  L2 | REMOTE | HOPS_3	- L2 of sibling chip in same package
>>  L2 | REMOTE | HOPS_4	- L2 on separate package 1 hop away
>>  L2 | REMOTE | HOPS_5	- L2 on separate package 2 hops away
>>  L2 | REMOTE | HOPS_6	- L2 on separate package 3 hops away
>> 
>> 
>> Whether it's useful to represent all those levels I'm not sure, but it's
>> probably good if we have the ability.
>
> I'm thinking we ought to keep hops as steps along the NUMA fabric, with
> 0 hops being the local node. That only gets us:
>
>  L2, remote=0, hops=HOPS_0 -- our L2
>  L2, remote=1, hops=HOPS_0 -- L2 on the local node but not ours
>  L2, remote=1, hops!=HOPS_0 -- L2 on a remote node

Hmm. I'm not sure about tying it directly to NUMA hops. I worry we're
going to see more and more systems where there's a hierarchy within the
chip/package, in addition to the traditional NUMA hierarchy.

Although then I guess it becomes a question of what exactly is a NUMA
hop, maybe the answer is that on those future systems those
intra-chip/package hops should be represented as NUMA hops.

It's not like we have a hard definition of what a NUMA hop is?

>> I guess I'm 50/50 on whether that's enough levels, or whether we want
>> another bit to allow for future growth.
>
> Right, possibly safer to add one extra bit while we can.... I suppose.

Equally it's not _that_ hard to add another bit later (if there's still
one free), makes the API a little uglier to use, but not the end of the
world.

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-14 10:40         ` Michael Ellerman
@ 2021-09-14 11:49           ` Peter Zijlstra
  2021-09-16 10:57             ` Michael Ellerman
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2021-09-14 11:49 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

On Tue, Sep 14, 2021 at 08:40:38PM +1000, Michael Ellerman wrote:
> Peter Zijlstra <peterz@infradead.org> writes:

> > I'm thinking we ought to keep hops as steps along the NUMA fabric, with
> > 0 hops being the local node. That only gets us:
> >
> >  L2, remote=0, hops=HOPS_0 -- our L2
> >  L2, remote=1, hops=HOPS_0 -- L2 on the local node but not ours
> >  L2, remote=1, hops!=HOPS_0 -- L2 on a remote node
> 
> Hmm. I'm not sure about tying it directly to NUMA hops. I worry we're
> going to see more and more systems where there's a hierarchy within the
> chip/package, in addition to the traditional NUMA hierarchy.
> 
> Although then I guess it becomes a question of what exactly is a NUMA
> hop, maybe the answer is that on those future systems those
> intra-chip/package hops should be represented as NUMA hops.
> 
> It's not like we have a hard definition of what a NUMA hop is?

Not really, typically whatever the BIOS/DT/whatever tables tell us. I
think in case of Power you're mostly making things up in software :-)

But yeah, I think we have plenty wriggle room there.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses
  2021-09-14 11:49           ` Peter Zijlstra
@ 2021-09-16 10:57             ` Michael Ellerman
  0 siblings, 0 replies; 10+ messages in thread
From: Michael Ellerman @ 2021-09-16 10:57 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Kajol Jain, linuxppc-dev, linux-kernel, mingo, acme, jolsa,
	namhyung, linux-perf-users, ak, maddy, atrajeev, rnsastry,
	yao.jin, ast, daniel, songliubraving, kan.liang, mark.rutland,
	alexander.shishkin, paulus

Peter Zijlstra <peterz@infradead.org> writes:
> On Tue, Sep 14, 2021 at 08:40:38PM +1000, Michael Ellerman wrote:
>> Peter Zijlstra <peterz@infradead.org> writes:
>
>> > I'm thinking we ought to keep hops as steps along the NUMA fabric, with
>> > 0 hops being the local node. That only gets us:
>> >
>> >  L2, remote=0, hops=HOPS_0 -- our L2
>> >  L2, remote=1, hops=HOPS_0 -- L2 on the local node but not ours
>> >  L2, remote=1, hops!=HOPS_0 -- L2 on a remote node
>> 
>> Hmm. I'm not sure about tying it directly to NUMA hops. I worry we're
>> going to see more and more systems where there's a hierarchy within the
>> chip/package, in addition to the traditional NUMA hierarchy.
>> 
>> Although then I guess it becomes a question of what exactly is a NUMA
>> hop, maybe the answer is that on those future systems those
>> intra-chip/package hops should be represented as NUMA hops.
>> 
>> It's not like we have a hard definition of what a NUMA hop is?
>
> Not really, typically whatever the BIOS/DT/whatever tables tell us. I
> think in case of Power you're mostly making things up in software :-)

Firmware is software so yes :)

> But yeah, I think we have plenty wriggle room there.

OK.

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-09-16 10:57 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-04  6:49 [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Kajol Jain
2021-09-04  6:49 ` [PATCH 2/3] " Kajol Jain
2021-09-04  6:49 ` [PATCH 3/3] powerpc/perf: Fix data source encodings for power10 Kajol Jain
2021-09-08  7:17 ` [PATCH 1/3] perf: Add macros to specify onchip L2/L3 accesses Michael Ellerman
2021-09-08  9:26   ` Peter Zijlstra
2021-09-09 12:45     ` Michael Ellerman
2021-09-09 14:36       ` Peter Zijlstra
2021-09-14 10:40         ` Michael Ellerman
2021-09-14 11:49           ` Peter Zijlstra
2021-09-16 10:57             ` Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).