From: Borislav Petkov <bp@alien8.de>
To: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: linux-kernel@vger.kernel.org, iommu@lists.linux-foundation.org,
joro@8bytes.org, peterz@infradead.org, mingo@redhat.com
Subject: Re: [PATCH v8 9/9] perf/amd/iommu: Enable support for multiple IOMMUs
Date: Sun, 22 Jan 2017 20:55:31 +0100 [thread overview]
Message-ID: <20170122195531.5y4ufm7pm5qkz3yx@pd.tnic> (raw)
In-Reply-To: <1484551416-5440-10-git-send-email-Suravee.Suthikulpanit@amd.com>
On Mon, Jan 16, 2017 at 01:23:36AM -0600, Suravee Suthikulpanit wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Add multi-IOMMU support for perf by exposing an AMD IOMMU PMU
> for each IOMMU found in the system via:
>
> /bus/event_source/devices/amd_iommu_x
>
> where x is the IOMMU index. This allows users to specify
> different events to be programed onto performance counters
"programmed"
Please introduce a spellchecker into your patch creation workflow.
> of each IOMMU.
>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
> ---
> arch/x86/events/amd/iommu.c | 114 ++++++++++++++++++++++++++------------------
> 1 file changed, 67 insertions(+), 47 deletions(-)
>
> diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c
> index 223c01d..38eafbf 100644
> --- a/arch/x86/events/amd/iommu.c
> +++ b/arch/x86/events/amd/iommu.c
> @@ -35,16 +35,21 @@
> #define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
> #define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
>
> -static struct perf_amd_iommu __perf_iommu;
> +#define PERF_AMD_IOMMU_NAME_SZ 16
AMD_IOMMU_PMU_NAME_SIZE
sounds more to the point to me.
> struct perf_amd_iommu {
> + struct list_head list;
> struct pmu pmu;
> + unsigned int idx;
> + char name[PERF_AMD_IOMMU_NAME_SZ];
> u8 max_banks;
> u8 max_counters;
> u64 cntr_assign_mask;
> raw_spinlock_t lock;
> };
...
> @@ -253,30 +248,34 @@ static void perf_iommu_enable_event(struct perf_event *ev)
> u64 reg = 0ULL;
>
> reg = csource;
> - amd_iommu_pc_set_reg(0, bank, cntr,
> + amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
> IOMMU_PC_COUNTER_SRC_REG, ®);
>
> reg = devid | (_GET_DEVID_MASK(ev) << 32);
> if (reg)
> reg |= BIT(31);
> - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®);
> + amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
> + IOMMU_PC_DEVID_MATCH_REG, ®);
>
> reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
> if (reg)
> reg |= BIT(31);
> - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®);
> + amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
> + IOMMU_PC_PASID_MATCH_REG, ®);
>
> reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
> if (reg)
> reg |= BIT(31);
> - amd_iommu_pc_set_reg(0, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®);
> + amd_iommu_pc_set_reg(hwc->idx, bank, cntr,
> + IOMMU_PC_DOMID_MATCH_REG, ®);
You can let those stick out - the 80 cols rule is not a strict one:
reg = csource;
amd_iommu_pc_set_reg(hwc->idx, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, ®);
reg = devid | (_GET_DEVID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
amd_iommu_pc_set_reg(hwc->idx, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, ®);
reg = _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
amd_iommu_pc_set_reg(hwc->idx, bank, cntr, IOMMU_PC_PASID_MATCH_REG, ®);
reg = _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
if (reg)
reg |= BIT(31);
amd_iommu_pc_set_reg(hwc->idx, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, ®);
}
> static void perf_iommu_disable_event(struct perf_event *event)
> {
> + struct hw_perf_event *hwc = &event->hw;
> u64 reg = 0ULL;
>
> - amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
> + amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event),
> IOMMU_PC_COUNTER_SRC_REG, ®);
> }
>
> @@ -295,7 +294,7 @@ static void perf_iommu_start(struct perf_event *event, int flags)
> return;
>
> val = local64_read(&hwc->prev_count) & GENMASK_ULL(48, 0);
> - if (amd_iommu_pc_set_reg(0, _GET_BANK(event), _GET_CNTR(event),
> + if (amd_iommu_pc_set_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event),
> IOMMU_PC_COUNTER_REG, &val))
> return;
>
> @@ -309,7 +308,7 @@ static void perf_iommu_read(struct perf_event *event)
> s64 delta;
> struct hw_perf_event *hwc = &event->hw;
>
> - if (amd_iommu_pc_get_reg(0, _GET_BANK(event), _GET_CNTR(event),
> + if (amd_iommu_pc_get_reg(hwc->idx, _GET_BANK(event), _GET_CNTR(event),
> IOMMU_PC_COUNTER_REG, &count))
> return;
>
> @@ -407,6 +406,13 @@ static __init int _init_events_attrs(void)
>
> static __init void amd_iommu_pc_exit(void)
> {
> + struct perf_amd_iommu *pi, *next;
> +
> + list_for_each_entry_safe(pi, next, &perf_amd_iommu_list, list) {
> + list_del(&pi->list);
> + kfree(pi);
> + }
> +
> if (amd_iommu_events_group.attrs) {
> kfree(amd_iommu_events_group.attrs);
> amd_iommu_events_group.attrs = NULL;
> @@ -421,46 +427,46 @@ static __init void amd_iommu_pc_exit(void)
> };
>
> static __init int
> -_init_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, char *name)
> +init_one_perf_amd_iommu(struct perf_amd_iommu *perf_iommu, unsigned int idx)
> {
> int ret;
>
> raw_spin_lock_init(&perf_iommu->lock);
>
> - /* Init cpumask attributes to only core 0 */
> - cpumask_set_cpu(0, &iommu_cpumask);
> -
> - perf_iommu->max_banks = amd_iommu_pc_get_max_banks(0);
> - perf_iommu->max_counters = amd_iommu_pc_get_max_counters(0);
> + perf_iommu->idx = idx;
> + perf_iommu->max_banks = amd_iommu_pc_get_max_banks(idx);
> + perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
> if (!perf_iommu->max_banks || !perf_iommu->max_counters)
> return -EINVAL;
>
> + snprintf(perf_iommu->name, PERF_AMD_IOMMU_NAME_SZ, "amd_iommu_%u", idx);
> +
> + perf_iommu->pmu.event_init = perf_iommu_event_init,
> + perf_iommu->pmu.add = perf_iommu_add,
> + perf_iommu->pmu.del = perf_iommu_del,
> + perf_iommu->pmu.start = perf_iommu_start,
> + perf_iommu->pmu.stop = perf_iommu_stop,
> + perf_iommu->pmu.read = perf_iommu_read,
This compiles but it is yucky.
You should do that instead:
static struct pmu amd_iommu_pmu = {
.event_init = perf_iommu_event_init,
.add = perf_iommu_add,
.del = perf_iommu_del,
.start = perf_iommu_start,
.stop = perf_iommu_stop,
.read = perf_iommu_read,
.task_ctx_nr = perf_invalid_context,
.attr_groups = amd_iommu_attr_groups,
};
...
ret = perf_pmu_register(&amd_iommu_pmu, perf_iommu->name, -1);
Because otherwise you're carrying a struct pmu in each struct
perf_amd_iommu which has identical contents.
Now, you need to access the struct perf_amd_iommu pointer for each
IOMMU PMU in some of the functions like perf_iommu_event_init(), for
example. But for that you only need the index and to iterate the
perf_amd_iommu_list.
I wasn't able to find a good way to do that from a quick stare but
PeterZ might have a better idea...
> + perf_iommu->pmu.task_ctx_nr = perf_invalid_context;
> perf_iommu->pmu.attr_groups = amd_iommu_attr_groups;
> - ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
> +
> + ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
> if (ret)
> pr_err("Error initializing AMD IOMMU perf counters.\n");
> else
> - pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
> - amd_iommu_pc_get_max_banks(0),
> - amd_iommu_pc_get_max_counters(0));
> + pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank)\n",
> + idx, amd_iommu_pc_get_max_banks(idx),
> + amd_iommu_pc_get_max_counters(idx));
> return ret;
> }
>
> -static struct perf_amd_iommu __perf_iommu = {
> - .pmu = {
> - .task_ctx_nr = perf_invalid_context,
> - .event_init = perf_iommu_event_init,
> - .add = perf_iommu_add,
> - .del = perf_iommu_del,
> - .start = perf_iommu_start,
> - .stop = perf_iommu_stop,
> - .read = perf_iommu_read,
> - },
> -};
> -
> static __init int amd_iommu_pc_init(void)
> {
> int ret;
> + unsigned int i;
> +
> + /* Init cpumask attributes to only core 0 */
> + cpumask_set_cpu(0, &iommu_cpumask);
>
> /* Make sure the IOMMU PC resource is available */
> if (!amd_iommu_pc_supported())
> @@ -470,7 +476,21 @@ static __init int amd_iommu_pc_init(void)
> if (ret)
> goto err_out;
>
> - ret = _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
> + for (i = 0 ; i < amd_iommu_get_num_iommus(); i++) {
> + struct perf_amd_iommu *pi;
> +
> + pi = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
> + if (!pi) {
> + ret = -ENOMEM;
> + break;
> + }
> +
> + list_add_tail(&pi->list, &perf_amd_iommu_list);
> + ret = init_one_perf_amd_iommu(pi, i);
You need to init *first* and iff you succeed, only *then* add to the
list.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
next prev parent reply other threads:[~2017-01-22 19:55 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-16 7:23 [PATCH v8 0/9] perf/amd/iommu: Enable multi-IOMMU support Suravee Suthikulpanit
2017-01-16 7:23 ` [PATCH v8 1/9] perf/amd/iommu: Declare pr_fmt and remove unnecessary pr_debug Suravee Suthikulpanit
2017-01-16 7:23 ` [PATCH v8 2/9] perf/amd/iommu: Clean up perf_iommu_enable_event Suravee Suthikulpanit
2017-01-18 18:20 ` Borislav Petkov
2017-01-16 7:23 ` [PATCH v8 3/9] perf/amd/iommu: Misc fix up perf_iommu_read Suravee Suthikulpanit
2017-01-19 10:01 ` Borislav Petkov
2017-01-23 12:33 ` Peter Zijlstra
2017-02-07 4:50 ` Suravee Suthikulpanit
2017-01-16 7:23 ` [PATCH v8 4/9] iommu/amd: Introduce amd_iommu_get_num_iommus() Suravee Suthikulpanit
2017-01-19 18:41 ` Borislav Petkov
2017-01-16 7:23 ` [PATCH v8 5/9] perf/amd/iommu: Modify functions to query max banks and counters Suravee Suthikulpanit
2017-01-22 19:53 ` Borislav Petkov
2017-01-16 7:23 ` [PATCH v8 6/9] perf/amd/iommu: Modify amd_iommu_pc_get_set_reg_val() API to allow specifying IOMMU index Suravee Suthikulpanit
2017-01-22 19:53 ` Borislav Petkov
2017-01-16 7:23 ` [PATCH v8 7/9] perf/amd/iommu: Check return value when set and get counter value Suravee Suthikulpanit
2017-01-22 19:53 ` Borislav Petkov
2017-01-23 12:31 ` Peter Zijlstra
2017-01-16 7:23 ` [PATCH v8 8/9] perf/amd/iommu: Fix sysfs perf attribute groups Suravee Suthikulpanit
2017-01-22 19:54 ` Borislav Petkov
2017-01-16 7:23 ` [PATCH v8 9/9] perf/amd/iommu: Enable support for multiple IOMMUs Suravee Suthikulpanit
2017-01-22 19:55 ` Borislav Petkov [this message]
2017-02-07 1:42 ` Suravee Suthikulpanit
2017-01-25 9:46 ` Peter Zijlstra
2017-01-25 9:55 ` Borislav Petkov
2017-02-07 1:58 ` Suravee Suthikulpanit
2017-02-07 1:57 ` Suravee Suthikulpanit
2017-02-14 12:31 ` Peter Zijlstra
2017-02-23 17:43 ` Suravee Suthikulpanit
2017-02-23 18:11 ` Peter Zijlstra
2017-02-23 18:20 ` Suravee Suthikulpanit
2017-01-17 15:36 ` [PATCH v8 0/9] perf/amd/iommu: Enable multi-IOMMU support Joerg Roedel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170122195531.5y4ufm7pm5qkz3yx@pd.tnic \
--to=bp@alien8.de \
--cc=Suravee.Suthikulpanit@amd.com \
--cc=iommu@lists.linux-foundation.org \
--cc=joro@8bytes.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).