All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: peterz@infradead.org, mingo@kernel.org, acme@kernel.org,
	eranian@google.com, kan.liang@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, linux-perf-users@vger.kernel.org,
	linux-kernel@vger.kernel.org, sandipan.das@amd.com,
	ananth.narayan@amd.com, santosh.shukla@amd.com
Subject: Re: [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
Date: Wed, 29 Mar 2023 00:05:21 -0700	[thread overview]
Message-ID: <CAM9d7ciQLXAcVwFJWeJDHwkwUQ-rnHvdetYsQn7w83kkOsFKWg@mail.gmail.com> (raw)
In-Reply-To: <20230327130851.1565-1-ravi.bangoria@amd.com>

Hi Ravi,

On Mon, Mar 27, 2023 at 6:09 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> From: Namhyung Kim <namhyung@kernel.org>
>
> Kernel IBS driver wasn't using new PERF_MEM_* APIs due to few of its
> limitations. Mainly:
>
> - mem_lvl_num doesn't allow setting multiple sources whereas old API
>   allows it. Setting multiple data sources is useful because IBS on
>   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
>   is only one such DataSrc(2h) though).
> - perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
>   c2c (c2c_decode_stats()) does not use mem_lvl_num at all. perf mem
>   prints mem_lvl and mem_lvl_num both if both are set, which is ugly.
>
> 1st one can be handled by using ANY_CACHE with HOPS_0. 2nd one is
> purely perf tool specific issue and should be fixed separately.
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
> v1: https://lore.kernel.org/r/20230311000642.1270971-1-namhyung@kernel.org
> v1->v2:
> - In addition to setting new API fields, convert all individual field
>   assignments to compile time wrapper macros built using PERF_MEM_S().
>   Also convert DataSrc conditional code to array lookups.
>
>  arch/x86/events/amd/ibs.c | 155 +++++++++++++++++---------------------
>  1 file changed, 68 insertions(+), 87 deletions(-)
>
> diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
> index 64582954b5f6..b46e0b725fe5 100644
> --- a/arch/x86/events/amd/ibs.c
> +++ b/arch/x86/events/amd/ibs.c
> @@ -703,9 +703,41 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
>         return op_data2->data_src_lo;
>  }
>
> -static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> -                                union ibs_op_data3 *op_data3,
> -                                struct perf_sample_data *data)
> +#define        L(x)            (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
> +#define        LN(x)           PERF_MEM_S(LVLNUM, x)
> +#define        REM             PERF_MEM_S(REMOTE, REMOTE)
> +#define        HOPS(x)         PERF_MEM_S(HOPS, x)
> +
> +static u64 g_data_src[8] = {
> +       [IBS_DATA_SRC_LOC_CACHE]          = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
> +       [IBS_DATA_SRC_DRAM]               = L(LOC_RAM) | LN(RAM),
> +       [IBS_DATA_SRC_REM_CACHE]          = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
> +       [IBS_DATA_SRC_IO]                 = L(IO) | LN(IO),
> +};
> +
> +#define RMT_NODE_BITS                  ((1 << IBS_DATA_SRC_DRAM) | \
> +                                        (1 << IBS_DATA_SRC_IO))
> +#define RMT_NODE_APPLICABLE(x)         (RMT_NODE_BITS & (1 << x))
> +
> +static u64 g_zen4_data_src[32] = {
> +       [IBS_DATA_SRC_EXT_LOC_CACHE]      = L(L3) | LN(L3),
> +       [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
> +       [IBS_DATA_SRC_EXT_DRAM]           = L(LOC_RAM) | LN(RAM),
> +       [IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
> +       [IBS_DATA_SRC_EXT_PMEM]           = LN(PMEM),
> +       [IBS_DATA_SRC_EXT_IO]             = L(IO) | LN(IO),
> +       [IBS_DATA_SRC_EXT_EXT_MEM]        = LN(CXL),
> +};
> +
> +#define ZEN4_RMT_NODE_BITS             ((1 << IBS_DATA_SRC_EXT_DRAM) | \
> +                                        (1 << IBS_DATA_SRC_EXT_PMEM) | \
> +                                        (1 << IBS_DATA_SRC_EXT_IO) |\
> +                                        (1 << IBS_DATA_SRC_EXT_EXT_MEM))
> +#define ZEN4_RMT_NODE_APPLICABLE(x)    (RMT_NODE_BITS & (1 << x))
> +
> +static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
> +                                 union ibs_op_data3 *op_data3,
> +                                 struct perf_sample_data *data)
>  {
>         union perf_mem_data_src *data_src = &data->data_src;
>         u8 ibs_data_src = perf_ibs_data_src(op_data2);
> @@ -716,25 +748,19 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>          * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
>          * memory accesses. So, check DcUcMemAcc bit early.
>          */
> -       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
> -               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
> +               return L(UNC);

Hmm.. it seems we don't have PERF_MEM_LVLNUM_UNC.

>
>         /* L1 Hit */
> -       if (op_data3->dc_miss == 0) {
> -               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_miss == 0)
> +               return L(L1) | LN(L1);
>
>         /* L2 Hit */
>         if (op_data3->l2_miss == 0) {
>                 /* Erratum #1293 */
>                 if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
> -                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> +                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
> +                       return L(L2) | LN(L2);
>         }
>
>         /*
> @@ -744,82 +770,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>         if (data_src->mem_op != PERF_MEM_OP_LOAD)
>                 goto check_mab;
>
> -       /* L3 Hit */
>         if (ibs_caps & IBS_CAPS_ZEN4) {
> -               if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       } else {
> -               if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
> -                                           PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       }
> +               u64 val = g_zen4_data_src[ibs_data_src];
>
> -       /* A peer cache in a near CCX */
> -       if (ibs_caps & IBS_CAPS_ZEN4 &&
> -           ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
> -               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +               if (!val)
> +                       goto check_mab;
>
> -       /* A peer cache in a far CCX */
> -       if (ibs_caps & IBS_CAPS_ZEN4) {
> -               if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
> -                       return;
> +               /* HOPS_1 because IBS doesn't provide remote socket detail */
> +               if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
> +                       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
> +                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
> +                       else
> +                               val |= REM | HOPS(1);
>                 }
> -       } else {
> -               if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
> -                       return;
> -               }
> -       }
>
> -       /* DRAM */
> -       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
> -               if (op_data2->rmt_node == 0)
> -                       data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
> -               else
> -                       data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +               return val;
> +       } else {
> +               u64 val = g_data_src[ibs_data_src];
>
> -       /* PMEM */
> -       if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> -               }
> -               return;
> -       }
> +               if (!val)
> +                       goto check_mab;
>
> -       /* Extension Memory */
> -       if (ibs_caps & IBS_CAPS_ZEN4 &&
> -           ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> +               /* HOPS_1 because IBS doesn't provide remote socket detail */
> +               if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
> +                       if (ibs_data_src == IBS_DATA_SRC_DRAM)
> +                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
> +                       else
> +                               val |= REM | HOPS(1);
>                 }
> -               return;
> -       }
>
> -       /* IO */
> -       if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
> -               data_src->mem_lvl = PERF_MEM_LVL_IO;
> -               data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
> -               if (op_data2->rmt_node) {
> -                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
> -                       /* IBS doesn't provide Remote socket detail */
> -                       data_src->mem_hops = PERF_MEM_HOPS_1;
> -               }
> -               return;
> +               return val;
>         }
>
>  check_mab:
> @@ -830,12 +810,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
>          * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
>          * MAB only when IBS fails to provide DataSrc.
>          */
> -       if (op_data3->dc_miss_no_mab_alloc) {
> -               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
> -               return;
> -       }
> +       if (op_data3->dc_miss_no_mab_alloc)
> +               return L(LFB) | LN(LFB);
>
>         data_src->mem_lvl = PERF_MEM_LVL_NA;
> +       return 0;

Wouldn't it be 'return L(NA) | LN(NA);' ?

Thanks,
Namhyung


>  }
>
>  static bool perf_ibs_cache_hit_st_valid(void)
> @@ -925,7 +904,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
>                                   union ibs_op_data2 *op_data2,
>                                   union ibs_op_data3 *op_data3)
>  {
> -       perf_ibs_get_mem_lvl(op_data2, op_data3, data);
> +       union perf_mem_data_src *data_src = &data->data_src;
> +
> +       data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
>         perf_ibs_get_mem_snoop(op_data2, data);
>         perf_ibs_get_tlb_lvl(op_data3, data);
>         perf_ibs_get_mem_lock(op_data3, data);
> --
> 2.39.2
>

  reply	other threads:[~2023-03-29  7:05 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-27 13:08 [PATCH v2] perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src Ravi Bangoria
2023-03-29  7:05 ` Namhyung Kim [this message]
2023-03-29  9:45   ` Ravi Bangoria
2023-03-29 23:47     ` Namhyung Kim
2023-03-30  4:38       ` Ravi Bangoria

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAM9d7ciQLXAcVwFJWeJDHwkwUQ-rnHvdetYsQn7w83kkOsFKWg@mail.gmail.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=ananth.narayan@amd.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=sandipan.das@amd.com \
    --cc=santosh.shukla@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.