Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct

From: James Clark <james.clark@arm.com>
To: John Garry <john.garry@huawei.com>,
	linux-perf-users@vger.kernel.org, jolsa@redhat.com
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Tommi Rantala <tommi.t.rantala@nokia.com>,
	Christophe JAILLET <christophe.jaillet@wanadoo.fr>,
	Andi Kleen <ak@linux.intel.com>,
	Jin Yao <yao.jin@linux.intel.com>,
	Kajol Jain <kjain@linux.ibm.com>,
	Thomas Richter <tmricht@linux.ibm.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Alexey Budankov <alexey.budankov@linux.intel.com>
Subject: Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
Date: Tue, 17 Nov 2020 17:05:28 +0200	[thread overview]
Message-ID: <5ca5f80d-cae1-3756-13f7-21db6da89840@arm.com> (raw)
In-Reply-To: <0024879f-d326-966d-86b0-8cda91483bfe@huawei.com>

Hi John,

Sorry I missed your review comments here. Replies below:

On 12/11/2020 17:18, John Garry wrote:
> 
>>   +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
>> +{
>> +    if (map) {
> 
> is this check just paranoia?
> 
>> +        WARN_ONCE(refcount_read(&map->refcnt) != 0,
>> +              "cpu_aggr_map refcnt unbalanced\n");
> 
> and this?
> 
>> +        free(map);
>> +    }
>> +}
>> +

The cpu_aggr_map__delete and cpu_aggr_map__put functions were direct
copies of cpu_map__delete and cpu_map__put. I suppose there is more
control over the usages of the new ones so the check could possibly be avoided.

It all depends on whether perf_stat__exit_aggr_mode() is only ever called
once or not. But I think it might make sense to leave the checks for
consistency and in case the maps are used somewhere else in the future.

>> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
>> +{
>> +    if (map && refcount_dec_and_test(&map->refcnt))
>> +        cpu_aggr_map__delete(map);
>> +}
>> +
>>   static void perf_stat__exit_aggr_mode(void)
> 
> ...
> 
>>   +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
>> +{
>> +    struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
>> +
> 
> if (!cpus)
>     return NULL
> 
> cpus->nr = nr;
> ...
> 
> this avoids extra indentation and {}
> 

Do you think I should also make this change to the existing perf_cpu_map__empty_new() function
above for consistency?

>> +    if (cpus != NULL) {
>> +        int i;
>> +
>> +        cpus->nr = nr;
>> +        for (i = 0; i < nr; i++)
>> +            cpus->map[i] = cpu_map__empty_aggr_cpu_id();
>> +
>> +        refcount_set(&cpus->refcnt, 1);
>> +    }
>> +
>> +    return cpus;
>> +}
>> +
>>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>>   {
>>       char path[PATH_MAX];
>> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>>       return ret ?: value;
>>   }
>>   -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
>> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
>> +                    void *data __maybe_unused)
>>   {
>>       int cpu;
>> +    struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>   -    return cpu_map__get_socket_id(cpu);
>> +    socket.id = cpu_map__get_socket_id(cpu);
>> +    return socket;
>>   }
>>   -static int cmp_ids(const void *a, const void *b)
>> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>>   {
>> -    return *(int *)a - *(int *)b;
>> +    struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
>> +    struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
>> +
>> +    return a->id - b->id;
>>   }
>>   -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>> -               int (*f)(struct perf_cpu_map *map, int cpu, void *data),
>> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
>> +               struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>                  void *data)
>>   {
>> -    struct perf_cpu_map *c;
>> +    struct cpu_aggr_map *c;
>>       int nr = cpus->nr;
>> -    int cpu, s1, s2;
>> +    int cpu, s2;
>> +    struct aggr_cpu_id s1;
>>         /* allocate as much as possible */
>> -    c = calloc(1, sizeof(*c) + nr * sizeof(int));
>> +    c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>>       if (!c)
>>           return -1;
>>         for (cpu = 0; cpu < nr; cpu++) {
>>           s1 = f(cpus, cpu, data);
>>           for (s2 = 0; s2 < c->nr; s2++) {
>> -            if (s1 == c->map[s2])
>> +            if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>>                   break;
>>           }
>>           if (s2 == c->nr) {
>> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>           }
>>       }
>>       /* ensure we process id in increasing order */
>> -    qsort(c->map, c->nr, sizeof(int), cmp_ids);
>> +    qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>>         refcount_set(&c->refcnt, 1);
>>       *res = c;
>> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>>       return ret ?: value;
>>   }
>>   -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>   {
>> -    int cpu, die_id, s;
>> +    int cpu, s;
>> +    struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>   -    die_id = cpu_map__get_die_id(cpu);
>> +    die_id.id = cpu_map__get_die_id(cpu);
>>       /* There is no die_id on legacy system. */
>> -    if (die_id == -1)
>> -        die_id = 0;
>> +    if (die_id.id == -1)
>> +        die_id.id = 0;
>>   -    s = cpu_map__get_socket(map, idx, data);
>> +    s = cpu_map__get_socket(map, idx, data).id;
>>       if (s == -1)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         /*
>>        * Encode socket in bit range 15:8
>> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>        * we need a global id. So we combine
>>        * socket + die id
>>        */
>> -    if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
>> -        return -1;
>> +    if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
>> +        return cpu_map__empty_aggr_cpu_id();
>>         if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>   -    return (s << 8) | (die_id & 0xff);
>> +    die_id.id = (s << 8) | (die_id.id & 0xff);
>> +    return die_id;
>>   }
>>     int cpu_map__get_core_id(int cpu)
>> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>>       return cpu__get_node(cpu);
>>   }
>>   -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>   {
>> -    int cpu, s_die;
>> +    int cpu;
>> +    struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
> 
> should pre-existing code be idx >= map->nr? I didn't check the code any deeper

I think you might be right. But there is a mixture of > and >= throughout the file.
So either the same mistake has been made several times or it's not zero indexed.

I will look into it.

Thanks
James
> 
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>