Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct

From: James Clark <james.clark@arm.com>
To: John Garry <john.garry@huawei.com>,
	linux-perf-users@vger.kernel.org, jolsa@redhat.com
Subject: Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
Date: Tue, 17 Nov 2020 17:48:57 +0200	[thread overview]
Message-ID: <fcc716b1-34f0-c03a-57aa-8ad4a1c753a4@arm.com> (raw)
In-Reply-To: <5ca5f80d-cae1-3756-13f7-21db6da89840@arm.com>

On 17/11/2020 17:05, James Clark wrote:
> 
> Hi John,
> 
> Sorry I missed your review comments here. Replies below:
> 
> On 12/11/2020 17:18, John Garry wrote:
>>
>>>   +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
>>> +{
>>> +    if (map) {
>>
>> is this check just paranoia?
>>
>>> +        WARN_ONCE(refcount_read(&map->refcnt) != 0,
>>> +              "cpu_aggr_map refcnt unbalanced\n");
>>
>> and this?
>>
>>> +        free(map);
>>> +    }
>>> +}
>>> +
> 
> The cpu_aggr_map__delete and cpu_aggr_map__put functions were direct
> copies of cpu_map__delete and cpu_map__put. I suppose there is more
> control over the usages of the new ones so the check could possibly be avoided.
> 
> It all depends on whether perf_stat__exit_aggr_mode() is only ever called
> once or not. But I think it might make sense to leave the checks for
> consistency and in case the maps are used somewhere else in the future.
> 
> 
>>> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
>>> +{
>>> +    if (map && refcount_dec_and_test(&map->refcnt))
>>> +        cpu_aggr_map__delete(map);
>>> +}
>>> +
>>>   static void perf_stat__exit_aggr_mode(void)
>>
>> ...
>>
>>>   +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
>>> +{
>>> +    struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
>>> +
>>
>> if (!cpus)
>>     return NULL
>>
>> cpus->nr = nr;
>> ...
>>
>> this avoids extra indentation and {}
>>
> 
> Do you think I should also make this change to the existing perf_cpu_map__empty_new() function
> above for consistency?
> 
> 
>>> +    if (cpus != NULL) {
>>> +        int i;
>>> +
>>> +        cpus->nr = nr;
>>> +        for (i = 0; i < nr; i++)
>>> +            cpus->map[i] = cpu_map__empty_aggr_cpu_id();
>>> +
>>> +        refcount_set(&cpus->refcnt, 1);
>>> +    }
>>> +
>>> +    return cpus;
>>> +}
>>> +
>>>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>>>   {
>>>       char path[PATH_MAX];
>>> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>>>       return ret ?: value;
>>>   }
>>>   -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
>>> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
>>> +                    void *data __maybe_unused)
>>>   {
>>>       int cpu;
>>> +    struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>   -    return cpu_map__get_socket_id(cpu);
>>> +    socket.id = cpu_map__get_socket_id(cpu);
>>> +    return socket;
>>>   }
>>>   -static int cmp_ids(const void *a, const void *b)
>>> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>>>   {
>>> -    return *(int *)a - *(int *)b;
>>> +    struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
>>> +    struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
>>> +
>>> +    return a->id - b->id;
>>>   }
>>>   -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>> -               int (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
>>> +               struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>>                  void *data)
>>>   {
>>> -    struct perf_cpu_map *c;
>>> +    struct cpu_aggr_map *c;
>>>       int nr = cpus->nr;
>>> -    int cpu, s1, s2;
>>> +    int cpu, s2;
>>> +    struct aggr_cpu_id s1;
>>>         /* allocate as much as possible */
>>> -    c = calloc(1, sizeof(*c) + nr * sizeof(int));
>>> +    c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>>>       if (!c)
>>>           return -1;
>>>         for (cpu = 0; cpu < nr; cpu++) {
>>>           s1 = f(cpus, cpu, data);
>>>           for (s2 = 0; s2 < c->nr; s2++) {
>>> -            if (s1 == c->map[s2])
>>> +            if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>>>                   break;
>>>           }
>>>           if (s2 == c->nr) {
>>> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>>           }
>>>       }
>>>       /* ensure we process id in increasing order */
>>> -    qsort(c->map, c->nr, sizeof(int), cmp_ids);
>>> +    qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>>>         refcount_set(&c->refcnt, 1);
>>>       *res = c;
>>> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>>>       return ret ?: value;
>>>   }
>>>   -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>   {
>>> -    int cpu, die_id, s;
>>> +    int cpu, s;
>>> +    struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>   -    die_id = cpu_map__get_die_id(cpu);
>>> +    die_id.id = cpu_map__get_die_id(cpu);
>>>       /* There is no die_id on legacy system. */
>>> -    if (die_id == -1)
>>> -        die_id = 0;
>>> +    if (die_id.id == -1)
>>> +        die_id.id = 0;
>>>   -    s = cpu_map__get_socket(map, idx, data);
>>> +    s = cpu_map__get_socket(map, idx, data).id;
>>>       if (s == -1)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         /*
>>>        * Encode socket in bit range 15:8
>>> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>        * we need a global id. So we combine
>>>        * socket + die id
>>>        */
>>> -    if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
>>> -        return -1;
>>> +    if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>   -    return (s << 8) | (die_id & 0xff);
>>> +    die_id.id = (s << 8) | (die_id.id & 0xff);
>>> +    return die_id;
>>>   }
>>>     int cpu_map__get_core_id(int cpu)
>>> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>>>       return cpu__get_node(cpu);
>>>   }
>>>   -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>>   {
>>> -    int cpu, s_die;
>>> +    int cpu;
>>> +    struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>
>> should pre-existing code be idx >= map->nr? I didn't check the code any deeper
> 
> I think you might be right. But there is a mixture of > and >= throughout the file.
> So either the same mistake has been made several times or it's not zero indexed.
> 
> I will look into it.

Hi Garry,

Yes > is an issue and it should be >=. It probably hasn't caused a problem because
the function is never called with idx out of bounds.

I think I'd like to fix this in a separate patchset after this one as it's unrelated
to my change. Although it might have to wait until it's merged otherwise there would
probably be an annoying conflict.

What do you think?

Thanks
James

> 
> Thanks
> James
>>
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>