All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
@ 2023-03-27 13:08 Ravi Bangoria
  2023-03-29  7:05 ` Namhyung Kim
  0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2023-03-27 13:08 UTC (permalink / raw)
  To: peterz, namhyung
  Cc: ravi.bangoria, mingo, acme, eranian, kan.liang, jolsa, irogers,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla

From: Namhyung Kim <namhyung@kernel.org>

Kernel IBS driver wasn't using new PERF_MEM_* APIs due to few of its
limitations. Mainly:

- mem_lvl_num doesn't allow setting multiple sources whereas old API
  allows it. Setting multiple data sources is useful because IBS on
  pre-zen4 uarch doesn't provide fine granular DataSrc details (there
  is only one such DataSrc(2h) though).
- perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
  c2c (c2c_decode_stats()) does not use mem_lvl_num at all. perf mem
  prints mem_lvl and mem_lvl_num both if both are set, which is ugly.

1st one can be handled by using ANY_CACHE with HOPS_0. 2nd one is
purely perf tool specific issue and should be fixed separately.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
---
v1: https://lore.kernel.org/r/20230311000642.1270971-1-namhyung@kernel.org
v1->v2:
- In addition to setting new API fields, convert all individual field
  assignments to compile time wrapper macros built using PERF_MEM_S().
  Also convert DataSrc conditional code to array lookups.

 arch/x86/events/amd/ibs.c | 155 +++++++++++++++++---------------------
 1 file changed, 68 insertions(+), 87 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 64582954b5f6..b46e0b725fe5 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -703,9 +703,41 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
 	return op_data2->data_src_lo;
 }
 
-static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
-				 union ibs_op_data3 *op_data3,
-				 struct perf_sample_data *data)
+#define	L(x)		(PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
+#define	LN(x)		PERF_MEM_S(LVLNUM, x)
+#define	REM		PERF_MEM_S(REMOTE, REMOTE)
+#define	HOPS(x)		PERF_MEM_S(HOPS, x)
+
+static u64 g_data_src[8] = {
+	[IBS_DATA_SRC_LOC_CACHE]	  = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
+	[IBS_DATA_SRC_DRAM]		  = L(LOC_RAM) | LN(RAM),
+	[IBS_DATA_SRC_REM_CACHE]	  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+	[IBS_DATA_SRC_IO]		  = L(IO) | LN(IO),
+};
+
+#define RMT_NODE_BITS			((1 << IBS_DATA_SRC_DRAM) | \
+					 (1 << IBS_DATA_SRC_IO))
+#define RMT_NODE_APPLICABLE(x)		(RMT_NODE_BITS & (1 << x))
+
+static u64 g_zen4_data_src[32] = {
+	[IBS_DATA_SRC_EXT_LOC_CACHE]	  = L(L3) | LN(L3),
+	[IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
+	[IBS_DATA_SRC_EXT_DRAM]		  = L(LOC_RAM) | LN(RAM),
+	[IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+	[IBS_DATA_SRC_EXT_PMEM]		  = LN(PMEM),
+	[IBS_DATA_SRC_EXT_IO]		  = L(IO) | LN(IO),
+	[IBS_DATA_SRC_EXT_EXT_MEM]	  = LN(CXL),
+};
+
+#define ZEN4_RMT_NODE_BITS		((1 << IBS_DATA_SRC_EXT_DRAM) | \
+					 (1 << IBS_DATA_SRC_EXT_PMEM) | \
+					 (1 << IBS_DATA_SRC_EXT_IO) |\
+					 (1 << IBS_DATA_SRC_EXT_EXT_MEM))
+#define ZEN4_RMT_NODE_APPLICABLE(x)	(RMT_NODE_BITS & (1 << x))
+
+static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+				  union ibs_op_data3 *op_data3,
+				  struct perf_sample_data *data)
 {
 	union perf_mem_data_src *data_src = &data->data_src;
 	u8 ibs_data_src = perf_ibs_data_src(op_data2);
@@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
 	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
 	 * memory accesses. So, check DcUcMemAcc bit early.
 	 */
-	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
-		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
+		return L(UNC);
 
 	/* L1 Hit */
-	if (op_data3->dc_miss == 0) {
-		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_miss == 0)
+		return L(L1) | LN(L1);
 
 	/* L2 Hit */
 	if (op_data3->l2_miss == 0) {
 		/* Erratum #1293 */
 		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
-		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
-			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
-			return;
-		}
+		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
+			return L(L2) | LN(L2);
 	}
 
 	/*
@@ -744,82 +770,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
 	if (data_src->mem_op != PERF_MEM_OP_LOAD)
 		goto check_mab;
 
-	/* L3 Hit */
 	if (ibs_caps & IBS_CAPS_ZEN4) {
-		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
-			return;
-		}
-	} else {
-		if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
-					    PERF_MEM_LVL_HIT;
-			return;
-		}
-	}
+		u64 val = g_zen4_data_src[ibs_data_src];
 
-	/* A peer cache in a near CCX */
-	if (ibs_caps & IBS_CAPS_ZEN4 &&
-	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
-		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
-		return;
-	}
+		if (!val)
+			goto check_mab;
 
-	/* A peer cache in a far CCX */
-	if (ibs_caps & IBS_CAPS_ZEN4) {
-		if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-			return;
+		/* HOPS_1 because IBS doesn't provide remote socket detail */
+		if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
+			if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
+				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+			else
+				val |= REM | HOPS(1);
 		}
-	} else {
-		if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
-			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-			return;
-		}
-	}
 
-	/* DRAM */
-	if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
-		if (op_data2->rmt_node == 0)
-			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
-		else
-			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
-		return;
-	}
+		return val;
+	} else {
+		u64 val = g_data_src[ibs_data_src];
 
-	/* PMEM */
-	if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
-		}
-		return;
-	}
+		if (!val)
+			goto check_mab;
 
-	/* Extension Memory */
-	if (ibs_caps & IBS_CAPS_ZEN4 &&
-	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
+		/* HOPS_1 because IBS doesn't provide remote socket detail */
+		if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
+			if (ibs_data_src == IBS_DATA_SRC_DRAM)
+				val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+			else
+				val |= REM | HOPS(1);
 		}
-		return;
-	}
 
-	/* IO */
-	if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
-		data_src->mem_lvl = PERF_MEM_LVL_IO;
-		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
-		if (op_data2->rmt_node) {
-			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-			/* IBS doesn't provide Remote socket detail */
-			data_src->mem_hops = PERF_MEM_HOPS_1;
-		}
-		return;
+		return val;
 	}
 
 check_mab:
@@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
 	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
 	 * MAB only when IBS fails to provide DataSrc.
 	 */
-	if (op_data3->dc_miss_no_mab_alloc) {
-		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
-		return;
-	}
+	if (op_data3->dc_miss_no_mab_alloc)
+		return L(LFB) | LN(LFB);
 
 	data_src->mem_lvl = PERF_MEM_LVL_NA;
+	return 0;
 }
 
 static bool perf_ibs_cache_hit_st_valid(void)
@@ -925,7 +904,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
 				  union ibs_op_data2 *op_data2,
 				  union ibs_op_data3 *op_data3)
 {
-	perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
 	perf_ibs_get_mem_snoop(op_data2, data);
 	perf_ibs_get_tlb_lvl(op_data3, data);
 	perf_ibs_get_mem_lock(op_data3, data);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
  2023-03-27 13:08 [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src Ravi Bangoria
@ 2023-03-29  7:05 ` Namhyung Kim
  2023-03-29  9:45   ` Ravi Bangoria
  0 siblings, 1 reply; 5+ messages in thread
From: Namhyung Kim @ 2023-03-29  7:05 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: peterz, mingo, acme, eranian, kan.liang, jolsa, irogers,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla

Hi Ravi,

On Mon, Mar 27, 2023 at 6:09 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> From: Namhyung Kim <namhyung@kernel.org>
>
> Kernel IBS driver wasn't using new PERF_MEM_* APIs due to few of its
> limitations. Mainly:
>
> - mem_lvl_num doesn't allow setting multiple sources whereas old API
>   allows it. Setting multiple data sources is useful because IBS on
>   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
>   is only one such DataSrc(2h) though).
> - perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
>   c2c (c2c_decode_stats()) does not use mem_lvl_num at all. perf mem
>   prints mem_lvl and mem_lvl_num both if both are set, which is ugly.
>
> 1st one can be handled by using ANY_CACHE with HOPS_0. 2nd one is
> purely perf tool specific issue and should be fixed separately.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
> v1: https://lore.kernel.org/r/20230311000642.1270971-1-namhyung@kernel.org
> v1->v2:
> - In addition to setting new API fields, convert all individual field
>   assignments to compile time wrapper macros built using PERF_MEM_S().
>   Also convert DataSrc conditional code to array lookups.
>
>  arch/x86/events/amd/ibs.c | 155 +++++++++++++++++---------------------
>  1 file changed, 68 insertions(+), 87 deletions(-)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index 64582954b5f6..b46e0b725fe5 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -703,9 +703,41 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
>         return op_data2->data_src_lo;
>  }
>
> -static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> -                                union ibs_op_data3 *op_data3,
> -                                struct perf_sample_data *data)
> +#define        L(x)            (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
> +#define        LN(x)           PERF_MEM_S(LVLNUM, x)
> +#define        REM             PERF_MEM_S(REMOTE, REMOTE)
> +#define        HOPS(x)         PERF_MEM_S(HOPS, x)
> +
> +static u64 g_data_src[8] = {
> +       [IBS_DATA_SRC_LOC_CACHE]          = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
> +       [IBS_DATA_SRC_DRAM]               = L(LOC_RAM) | LN(RAM),
> +       [IBS_DATA_SRC_REM_CACHE]          = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
> +       [IBS_DATA_SRC_IO]                 = L(IO) | LN(IO),
> +};
> +
> +#define RMT_NODE_BITS                  ((1 << IBS_DATA_SRC_DRAM) | \
> +                                        (1 << IBS_DATA_SRC_IO))
> +#define RMT_NODE_APPLICABLE(x)         (RMT_NODE_BITS & (1 << x))
> +
> +static u64 g_zen4_data_src[32] = {
> +       [IBS_DATA_SRC_EXT_LOC_CACHE]      = L(L3) | LN(L3),
> +       [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
> +       [IBS_DATA_SRC_EXT_DRAM]           = L(LOC_RAM) | LN(RAM),
> +       [IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
> +       [IBS_DATA_SRC_EXT_PMEM]           = LN(PMEM),
> +       [IBS_DATA_SRC_EXT_IO]             = L(IO) | LN(IO),
> +       [IBS_DATA_SRC_EXT_EXT_MEM]        = LN(CXL),
> +};
> +
> +#define ZEN4_RMT_NODE_BITS             ((1 << IBS_DATA_SRC_EXT_DRAM) | \
> +                                        (1 << IBS_DATA_SRC_EXT_PMEM) | \
> +                                        (1 << IBS_DATA_SRC_EXT_IO) |\
> +                                        (1 << IBS_DATA_SRC_EXT_EXT_MEM))
> +#define ZEN4_RMT_NODE_APPLICABLE(x)    (RMT_NODE_BITS & (1 << x))
> +
> +static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> +                                 union ibs_op_data3 *op_data3,
> +                                 struct perf_sample_data *data)
>  {
>         union perf_mem_data_src *data_src = &data->data_src;
>         u8 ibs_data_src = perf_ibs_data_src(op_data2);
> @@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>          * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
>          * memory accesses. So, check DcUcMemAcc bit early.
>          */
> -       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
> -               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
> +               return L(UNC);

Hmm.. it seems we don't have PERF_MEM_LVLNUM_UNC.

>
>         /* L1 Hit */
> -       if (op_data3->dc_miss == 0) {
> -               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_miss == 0)
> +               return L(L1) | LN(L1);
>
>         /* L2 Hit */
>         if (op_data3->l2_miss == 0) {
>                 /* Erratum #1293 */
>                 if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
> -                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> +                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
> +                       return L(L2) | LN(L2);
>         }
>
>         /*
> @@ -744,82 +770,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>         if (data_src->mem_op != PERF_MEM_OP_LOAD)
>                 goto check_mab;
>
> -       /* L3 Hit */
>         if (ibs_caps & IBS_CAPS_ZEN4) {
> -               if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       } else {
> -               if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
> -                                           PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       }
> +               u64 val = g_zen4_data_src[ibs_data_src];
>
> -       /* A peer cache in a near CCX */
> -       if (ibs_caps & IBS_CAPS_ZEN4 &&
> -           ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
> -               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +               if (!val)
> +                       goto check_mab;
>
> -       /* A peer cache in a far CCX */
> -       if (ibs_caps & IBS_CAPS_ZEN4) {
> -               if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
> -                       return;
> +               /* HOPS_1 because IBS doesn't provide remote socket detail */
> +               if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
> +                       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
> +                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
> +                       else
> +                               val |= REM | HOPS(1);
>                 }
> -       } else {
> -               if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       }
>
> -       /* DRAM */
> -       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
> -               if (op_data2->rmt_node == 0)
> -                       data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> -               else
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +               return val;
> +       } else {
> +               u64 val = g_data_src[ibs_data_src];
>
> -       /* PMEM */
> -       if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> -               }
> -               return;
> -       }
> +               if (!val)
> +                       goto check_mab;
>
> -       /* Extension Memory */
> -       if (ibs_caps & IBS_CAPS_ZEN4 &&
> -           ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> +               /* HOPS_1 because IBS doesn't provide remote socket detail */
> +               if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
> +                       if (ibs_data_src == IBS_DATA_SRC_DRAM)
> +                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
> +                       else
> +                               val |= REM | HOPS(1);
>                 }
> -               return;
> -       }
>
> -       /* IO */
> -       if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
> -               data_src->mem_lvl = PERF_MEM_LVL_IO;
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> -               }
> -               return;
> +               return val;
>         }
>
>  check_mab:
> @@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>          * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
>          * MAB only when IBS fails to provide DataSrc.
>          */
> -       if (op_data3->dc_miss_no_mab_alloc) {
> -               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_miss_no_mab_alloc)
> +               return L(LFB) | LN(LFB);
>
>         data_src->mem_lvl = PERF_MEM_LVL_NA;
> +       return 0;

Wouldn't it be 'return L(NA) | LN(NA);' ?

Thanks,
Namhyung


>  }
>
>  static bool perf_ibs_cache_hit_st_valid(void)
> @@ -925,7 +904,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
>                                   union ibs_op_data2 *op_data2,
>                                   union ibs_op_data3 *op_data3)
>  {
> -       perf_ibs_get_mem_lvl(op_data2, op_data3, data);
> +       union perf_mem_data_src *data_src = &data->data_src;
> +
> +       data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
>         perf_ibs_get_mem_snoop(op_data2, data);
>         perf_ibs_get_tlb_lvl(op_data3, data);
>         perf_ibs_get_mem_lock(op_data3, data);
> --
> 2.39.2
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
  2023-03-29  7:05 ` Namhyung Kim
@ 2023-03-29  9:45   ` Ravi Bangoria
  2023-03-29 23:47     ` Namhyung Kim
  0 siblings, 1 reply; 5+ messages in thread
From: Ravi Bangoria @ 2023-03-29  9:45 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: peterz, mingo, acme, eranian, kan.liang, jolsa, irogers,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla, Ravi Bangoria

Hi Namhyung,

>> @@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>>          * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
>>          * memory accesses. So, check DcUcMemAcc bit early.
>>          */
>> -       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
>> -               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
>> -               return;
>> -       }
>> +       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
>> +               return L(UNC);
> 
> Hmm.. it seems we don't have PERF_MEM_LVLNUM_UNC.

Right. Is it worth to introduce one?

On a side note, I came to know that IBS OpData2[RmtNode] is not applicable
when DataSrc=7 (I/O). So, I need to respin this patch with that change.

[...]

>>  check_mab:
>> @@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>>          * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
>>          * MAB only when IBS fails to provide DataSrc.
>>          */
>> -       if (op_data3->dc_miss_no_mab_alloc) {
>> -               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
>> -               return;
>> -       }
>> +       if (op_data3->dc_miss_no_mab_alloc)
>> +               return L(LFB) | LN(LFB);
>>
>>         data_src->mem_lvl = PERF_MEM_LVL_NA;
>> +       return 0;
> 
> Wouldn't it be 'return L(NA) | LN(NA);' ?

IBS has no instruction type filtering, i.e. it tags whatever instruction it
sees at overflow. When IBS tags non-load/store instruction, data_src->val is
set to PERF_MEM_NA, which does not initialize mem_lvl_num (Shall we change
that?). If I set both LVL_NA and LVL_NUM_NA for load/store with no DataSrc
info, perf mem output becomes funny:

  $ sudo ./perf mem report -F sample,mem --stdio
  #      Samples  Memory access
  # ............  .......................................
  #
            1914  N/A                   <====== Non-LS
             905  L1 or L1 hit
              19  L3 or L3 hit
              16  L2 or L2 hit
               6  N/A or N/A hit        <====== LS with no DataSrc info
               6  Local RAM or RAM hit
               4  Remote node, same socket RAM hit
               3  Remote core, same node Any cache hit
               2  Remote node, same socket Any cache hit

Also, L(NA) is PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT. If I just return L(NA),
perf tools shows it as "N/A hit".

So, until tool code gets refactored, setting mem_lvl = NA here is hiding
tool's dumbness :(. Maybe I should refactor perf_mem__snp_scnprintf() as
part of this patchset.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
  2023-03-29  9:45   ` Ravi Bangoria
@ 2023-03-29 23:47     ` Namhyung Kim
  2023-03-30  4:38       ` Ravi Bangoria
  0 siblings, 1 reply; 5+ messages in thread
From: Namhyung Kim @ 2023-03-29 23:47 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: peterz, mingo, acme, eranian, kan.liang, jolsa, irogers,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla

On Wed, Mar 29, 2023 at 2:45 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> Hi Namhyung,
>
> >> @@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> >>          * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
> >>          * memory accesses. So, check DcUcMemAcc bit early.
> >>          */
> >> -       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
> >> -               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
> >> -               return;
> >> -       }
> >> +       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
> >> +               return L(UNC);
> >
> > Hmm.. it seems we don't have PERF_MEM_LVLNUM_UNC.
>
> Right. Is it worth to introduce one?

I think MEM_LVLNUM should express every memory level in MEM_LVL.

>
> On a side note, I came to know that IBS OpData2[RmtNode] is not applicable
> when DataSrc=7 (I/O). So, I need to respin this patch with that change.
>
> [...]
>
> >>  check_mab:
> >> @@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> >>          * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
> >>          * MAB only when IBS fails to provide DataSrc.
> >>          */
> >> -       if (op_data3->dc_miss_no_mab_alloc) {
> >> -               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
> >> -               return;
> >> -       }
> >> +       if (op_data3->dc_miss_no_mab_alloc)
> >> +               return L(LFB) | LN(LFB);
> >>
> >>         data_src->mem_lvl = PERF_MEM_LVL_NA;
> >> +       return 0;
> >
> > Wouldn't it be 'return L(NA) | LN(NA);' ?
>
> IBS has no instruction type filtering, i.e. it tags whatever instruction it
> sees at overflow. When IBS tags non-load/store instruction, data_src->val is
> set to PERF_MEM_NA, which does not initialize mem_lvl_num (Shall we change
> that?). If I set both LVL_NA and LVL_NUM_NA for load/store with no DataSrc
> info, perf mem output becomes funny:

Probably worth changing PERF_MEM_NA.

>
>   $ sudo ./perf mem report -F sample,mem --stdio
>   #      Samples  Memory access
>   # ............  .......................................
>   #
>             1914  N/A                   <====== Non-LS
>              905  L1 or L1 hit
>               19  L3 or L3 hit
>               16  L2 or L2 hit
>                6  N/A or N/A hit        <====== LS with no DataSrc info
>                6  Local RAM or RAM hit
>                4  Remote node, same socket RAM hit
>                3  Remote core, same node Any cache hit
>                2  Remote node, same socket Any cache hit

Maybe that's better to differentiate them :)

>
> Also, L(NA) is PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT. If I just return L(NA),
> perf tools shows it as "N/A hit".
>
> So, until tool code gets refactored, setting mem_lvl = NA here is hiding
> tool's dumbness :(. Maybe I should refactor perf_mem__snp_scnprintf() as
> part of this patchset.

I think we can change the tool independently - preferring MEM_LVLNUM
if present but you might want to add an option to override.
Given IBS didn't set it so far, the output would remain mostly the same.

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
  2023-03-29 23:47     ` Namhyung Kim
@ 2023-03-30  4:38       ` Ravi Bangoria
  0 siblings, 0 replies; 5+ messages in thread
From: Ravi Bangoria @ 2023-03-30  4:38 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: peterz, mingo, acme, eranian, kan.liang, jolsa, irogers,
	linux-perf-users, linux-kernel, sandipan.das, ananth.narayan,
	santosh.shukla, Ravi Bangoria

On 30-Mar-23 5:17 AM, Namhyung Kim wrote:
> On Wed, Mar 29, 2023 at 2:45 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>>
>> Hi Namhyung,
>>
>>>> @@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>>>>          * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
>>>>          * memory accesses. So, check DcUcMemAcc bit early.
>>>>          */
>>>> -       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
>>>> -               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
>>>> -               return;
>>>> -       }
>>>> +       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
>>>> +               return L(UNC);
>>>
>>> Hmm.. it seems we don't have PERF_MEM_LVLNUM_UNC.
>>
>> Right. Is it worth to introduce one?
> 
> I think MEM_LVLNUM should express every memory level in MEM_LVL.

Ok.

> 
>>
>> On a side note, I came to know that IBS OpData2[RmtNode] is not applicable
>> when DataSrc=7 (I/O). So, I need to respin this patch with that change.
>>
>> [...]
>>
>>>>  check_mab:
>>>> @@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>>>>          * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
>>>>          * MAB only when IBS fails to provide DataSrc.
>>>>          */
>>>> -       if (op_data3->dc_miss_no_mab_alloc) {
>>>> -               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
>>>> -               return;
>>>> -       }
>>>> +       if (op_data3->dc_miss_no_mab_alloc)
>>>> +               return L(LFB) | LN(LFB);
>>>>
>>>>         data_src->mem_lvl = PERF_MEM_LVL_NA;
>>>> +       return 0;
>>>
>>> Wouldn't it be 'return L(NA) | LN(NA);' ?
>>
>> IBS has no instruction type filtering, i.e. it tags whatever instruction it
>> sees at overflow. When IBS tags non-load/store instruction, data_src->val is
>> set to PERF_MEM_NA, which does not initialize mem_lvl_num (Shall we change
>> that?). If I set both LVL_NA and LVL_NUM_NA for load/store with no DataSrc
>> info, perf mem output becomes funny:
> 
> Probably worth changing PERF_MEM_NA.

Yeah seems so.

> 
>>
>>   $ sudo ./perf mem report -F sample,mem --stdio
>>   #      Samples  Memory access
>>   # ............  .......................................
>>   #
>>             1914  N/A                   <====== Non-LS
>>              905  L1 or L1 hit
>>               19  L3 or L3 hit
>>               16  L2 or L2 hit
>>                6  N/A or N/A hit        <====== LS with no DataSrc info
>>                6  Local RAM or RAM hit
>>                4  Remote node, same socket RAM hit
>>                3  Remote core, same node Any cache hit
>>                2  Remote node, same socket Any cache hit
> 
> Maybe that's better to differentiate them :)

:). I would rather add instruction type column and print non-LS, L, S or LS.

> 
>>
>> Also, L(NA) is PERF_MEM_LVL_NA | PERF_MEM_LVL_HIT. If I just return L(NA),
>> perf tools shows it as "N/A hit".
>>
>> So, until tool code gets refactored, setting mem_lvl = NA here is hiding
>> tool's dumbness :(. Maybe I should refactor perf_mem__snp_scnprintf() as
>> part of this patchset.
> 
> I think we can change the tool independently - preferring MEM_LVLNUM
> if present but you might want to add an option to override.
> Given IBS didn't set it so far, the output would remain mostly the same.

Sure.

Thanks,
Ravi

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-03-30  4:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-27 13:08 [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src Ravi Bangoria
2023-03-29  7:05 ` Namhyung Kim
2023-03-29  9:45   ` Ravi Bangoria
2023-03-29 23:47     ` Namhyung Kim
2023-03-30  4:38       ` Ravi Bangoria

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.