Linux-perf-users Archive on lore.kernel.org
 help / color / Atom feed
* Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
       [not found]   ` <20201028094311.8563-3-james.clark@arm.com>
@ 2020-11-12 15:18     ` John Garry
  2020-11-17 15:05       ` James Clark
  0 siblings, 1 reply; 6+ messages in thread
From: John Garry @ 2020-11-12 15:18 UTC (permalink / raw)
  To: James Clark, linux-perf-users, jolsa
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Tommi Rantala,
	Christophe JAILLET, Andi Kleen, Jin Yao, Kajol Jain,
	Thomas Richter, Kan Liang, Alexey Budankov


>   
> +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
> +{
> +	if (map) {

is this check just paranoia?

> +		WARN_ONCE(refcount_read(&map->refcnt) != 0,
> +			  "cpu_aggr_map refcnt unbalanced\n");

and this?

> +		free(map);
> +	}
> +}
> +
> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
> +{
> +	if (map && refcount_dec_and_test(&map->refcnt))
> +		cpu_aggr_map__delete(map);
> +}
> +
>   static void perf_stat__exit_aggr_mode(void)

...

>   
> +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
> +{
> +	struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
> +

if (!cpus)
	return NULL

cpus->nr = nr;
...

this avoids extra indentation and {}

> +	if (cpus != NULL) {
> +		int i;
> +
> +		cpus->nr = nr;
> +		for (i = 0; i < nr; i++)
> +			cpus->map[i] = cpu_map__empty_aggr_cpu_id();
> +
> +		refcount_set(&cpus->refcnt, 1);
> +	}
> +
> +	return cpus;
> +}
> +
>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>   {
>   	char path[PATH_MAX];
> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>   	return ret ?: value;
>   }
>   
> -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
> +					void *data __maybe_unused)
>   {
>   	int cpu;
> +	struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>   
>   	if (idx > map->nr)
> -		return -1;
> +		return cpu_map__empty_aggr_cpu_id();
>   
>   	cpu = map->map[idx];
>   
> -	return cpu_map__get_socket_id(cpu);
> +	socket.id = cpu_map__get_socket_id(cpu);
> +	return socket;
>   }
>   
> -static int cmp_ids(const void *a, const void *b)
> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>   {
> -	return *(int *)a - *(int *)b;
> +	struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
> +	struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
> +
> +	return a->id - b->id;
>   }
>   
> -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
> -		       int (*f)(struct perf_cpu_map *map, int cpu, void *data),
> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
> +		       struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>   		       void *data)
>   {
> -	struct perf_cpu_map *c;
> +	struct cpu_aggr_map *c;
>   	int nr = cpus->nr;
> -	int cpu, s1, s2;
> +	int cpu, s2;
> +	struct aggr_cpu_id s1;
>   
>   	/* allocate as much as possible */
> -	c = calloc(1, sizeof(*c) + nr * sizeof(int));
> +	c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>   	if (!c)
>   		return -1;
>   
>   	for (cpu = 0; cpu < nr; cpu++) {
>   		s1 = f(cpus, cpu, data);
>   		for (s2 = 0; s2 < c->nr; s2++) {
> -			if (s1 == c->map[s2])
> +			if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>   				break;
>   		}
>   		if (s2 == c->nr) {
> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>   		}
>   	}
>   	/* ensure we process id in increasing order */
> -	qsort(c->map, c->nr, sizeof(int), cmp_ids);
> +	qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>   
>   	refcount_set(&c->refcnt, 1);
>   	*res = c;
> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>   	return ret ?: value;
>   }
>   
> -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>   {
> -	int cpu, die_id, s;
> +	int cpu, s;
> +	struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>   
>   	if (idx > map->nr)
> -		return -1;
> +		return cpu_map__empty_aggr_cpu_id();
>   
>   	cpu = map->map[idx];
>   
> -	die_id = cpu_map__get_die_id(cpu);
> +	die_id.id = cpu_map__get_die_id(cpu);
>   	/* There is no die_id on legacy system. */
> -	if (die_id == -1)
> -		die_id = 0;
> +	if (die_id.id == -1)
> +		die_id.id = 0;
>   
> -	s = cpu_map__get_socket(map, idx, data);
> +	s = cpu_map__get_socket(map, idx, data).id;
>   	if (s == -1)
> -		return -1;
> +		return cpu_map__empty_aggr_cpu_id();
>   
>   	/*
>   	 * Encode socket in bit range 15:8
> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>   	 * we need a global id. So we combine
>   	 * socket + die id
>   	 */
> -	if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
> -		return -1;
> +	if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
> +		return cpu_map__empty_aggr_cpu_id();
>   
>   	if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
> -		return -1;
> +		return cpu_map__empty_aggr_cpu_id();
>   
> -	return (s << 8) | (die_id & 0xff);
> +	die_id.id = (s << 8) | (die_id.id & 0xff);
> +	return die_id;
>   }
>   
>   int cpu_map__get_core_id(int cpu)
> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>   	return cpu__get_node(cpu);
>   }
>   
> -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>   {
> -	int cpu, s_die;
> +	int cpu;
> +	struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>   
>   	if (idx > map->nr)

should pre-existing code be idx >= map->nr? I didn't check the code any 
deeper

> -		return -1;
> +		return cpu_map__empty_aggr_cpu_id();
>   
>   	cpu = map->map[idx];
>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 3/3] perf tools: fix perf stat with large socket IDs
       [not found]     ` <20201108124412.GA196289@krava>
@ 2020-11-12 19:42       ` James Clark
  2020-11-12 21:28         ` Jiri Olsa
  0 siblings, 1 reply; 6+ messages in thread
From: James Clark @ 2020-11-12 19:42 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-perf-users, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
	Namhyung Kim, Tommi Rantala, Christophe JAILLET, Andi Kleen,
	Jin Yao, Kajol Jain, Thomas Richter, Kan Liang, Alexey Budankov



On 08/11/2020 14:44, Jiri Olsa wrote:
> On Wed, Oct 28, 2020 at 09:43:11AM +0000, James Clark wrote:
> 
> SNIP
> 
>> -		 * Encode socket in bit range 31:24
>> -		 * encode die id in bit range 23:16
>>  		 * core_id is relative to socket and die,
>>  		 * we need a global id. So we combine
>>  		 * socket + die id + core id
>>  		 */
>> -		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
>> -			return cpu_map__empty_aggr_cpu_id();
>> -
>> -		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
>> -			return cpu_map__empty_aggr_cpu_id();
>> -
>> -		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
>> -			return cpu_map__empty_aggr_cpu_id();
>> -
>> -		core.id = (env->cpu[cpu].socket_id << 24) |
>> -		       (env->cpu[cpu].die_id << 16) |
>> -		       (env->cpu[cpu].core_id & 0xffff);
>> +		core.socket_id = env->cpu[cpu].socket_id;
>> +		core.die_id = env->cpu[cpu].die_id;
>> +		core.core_id = env->cpu[cpu].core_id;
> 
> all 3 functions above look like they could be united just
> to single one, because they are no longer steping on ech
> other data
> 
> perhaps best as separate patch, so the change is clean
> 
> there are also the 'system retrieval counter' functions,
> where it's possible better to keep it separated from
> performance reasons, but here it's just value assigment

Hi Jiri,

I have a new patchset ready without this change, and I started working on this, but I came
across some issues:

For example if all of the members are filled in regardless of the aggregation type,
then the comparison function that actually decides whether cores should be aggregated or
not will need to be modified to accept the current aggregation mode.

This would be possible, but would have a knock on effect when the values are read
from the system if the code is to be shared.

Also there is a second issue where some of the current functions have different failure
conditions. For example if socket can't be read, then that mode will fail, but other
aggregation modes will still work. If we make the change to unify all the functions then
that could get a bit more complicated.

I just wanted to get your opinion whether to submit the broken up set without this extra
change or carry on trying to work out a solution?


Thanks
James

> 
> 
> SNIP
> 
>>  struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>  {
>> -	int cpu, s;
>> +	int cpu;
>>  	struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>  
>>  	if (idx > map->nr)
>> @@ -201,28 +210,20 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
>>  
>>  	cpu = map->map[idx];
>>  
>> -	die_id.id = cpu_map__get_die_id(cpu);
>> +	die_id.die_id = cpu_map__get_die_id(cpu);
>>  	/* There is no die_id on legacy system. */
>> -	if (die_id.id == -1)
>> -		die_id.id = 0;
>> -
>> -	s = cpu_map__get_socket(map, idx, data).id;
>> -	if (s == -1)
>> -		return cpu_map__empty_aggr_cpu_id();
>> +	if (die_id.die_id == -1)
>> +		die_id.die_id = 0;
> 
> please call the local variable just 'id',
> there are other places like this for socket/core..
> 
> SNIP
> 
>> diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
>> index f654a825dcfb..ba068f245809 100644
>> --- a/tools/perf/util/cpumap.h
>> +++ b/tools/perf/util/cpumap.h
>> @@ -8,7 +8,11 @@
>>  #include <perf/cpumap.h>
>>  
>>  struct aggr_cpu_id {
>> -	int id;
>> +	int socket_id;
>> +	int die_id;
>> +	int core_id;
>> +	int node_id;
>> +	int thread_id;
> 
> please don't use the _id suffix in here, there's already
> id in the struct name
> 
> 
>>  };
>>  
>>  struct cpu_aggr_map {
>> @@ -21,6 +25,7 @@ struct perf_record_cpu_map_data;
>>  
>>  struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
>>  struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
>> +
>>  struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data);
>>  size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
>>  size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
>> @@ -46,21 +51,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
>>  	return sock->map[s];
>>  }
> 
> SNIP
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 3/3] perf tools: fix perf stat with large socket IDs
  2020-11-12 19:42       ` [PATCH V2 3/3] perf tools: fix perf stat with large socket IDs James Clark
@ 2020-11-12 21:28         ` Jiri Olsa
  0 siblings, 0 replies; 6+ messages in thread
From: Jiri Olsa @ 2020-11-12 21:28 UTC (permalink / raw)
  To: James Clark
  Cc: linux-perf-users, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
	Namhyung Kim, Tommi Rantala, Christophe JAILLET, Andi Kleen,
	Jin Yao, Kajol Jain, Thomas Richter, Kan Liang, Alexey Budankov

On Thu, Nov 12, 2020 at 09:42:42PM +0200, James Clark wrote:
> 
> 
> On 08/11/2020 14:44, Jiri Olsa wrote:
> > On Wed, Oct 28, 2020 at 09:43:11AM +0000, James Clark wrote:
> > 
> > SNIP
> > 
> >> -		 * Encode socket in bit range 31:24
> >> -		 * encode die id in bit range 23:16
> >>  		 * core_id is relative to socket and die,
> >>  		 * we need a global id. So we combine
> >>  		 * socket + die id + core id
> >>  		 */
> >> -		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
> >> -			return cpu_map__empty_aggr_cpu_id();
> >> -
> >> -		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
> >> -			return cpu_map__empty_aggr_cpu_id();
> >> -
> >> -		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
> >> -			return cpu_map__empty_aggr_cpu_id();
> >> -
> >> -		core.id = (env->cpu[cpu].socket_id << 24) |
> >> -		       (env->cpu[cpu].die_id << 16) |
> >> -		       (env->cpu[cpu].core_id & 0xffff);
> >> +		core.socket_id = env->cpu[cpu].socket_id;
> >> +		core.die_id = env->cpu[cpu].die_id;
> >> +		core.core_id = env->cpu[cpu].core_id;
> > 
> > all 3 functions above look like they could be united just
> > to single one, because they are no longer steping on ech
> > other data
> > 
> > perhaps best as separate patch, so the change is clean
> > 
> > there are also the 'system retrieval counter' functions,
> > where it's possible better to keep it separated from
> > performance reasons, but here it's just value assigment
> 
> Hi Jiri,
> 
> I have a new patchset ready without this change, and I started working on this, but I came
> across some issues:
> 
> For example if all of the members are filled in regardless of the aggregation type,
> then the comparison function that actually decides whether cores should be aggregated or
> not will need to be modified to accept the current aggregation mode.

ah, because we compare all the values regardless and normaly there'd be -1

> 
> This would be possible, but would have a knock on effect when the values are read
> from the system if the code is to be shared.
> 
> Also there is a second issue where some of the current functions have different failure
> conditions. For example if socket can't be read, then that mode will fail, but other
> aggregation modes will still work. If we make the change to unify all the functions then
> that could get a bit more complicated.
> 
> I just wanted to get your opinion whether to submit the broken up set without this extra
> change or carry on trying to work out a solution?

ok forget it then, seems like possible follow up changes if it's even worthy

thanks for checking on that

jirka


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
  2020-11-12 15:18     ` [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct John Garry
@ 2020-11-17 15:05       ` James Clark
  2020-11-17 15:48         ` James Clark
  0 siblings, 1 reply; 6+ messages in thread
From: James Clark @ 2020-11-17 15:05 UTC (permalink / raw)
  To: John Garry, linux-perf-users, jolsa
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Namhyung Kim, Tommi Rantala,
	Christophe JAILLET, Andi Kleen, Jin Yao, Kajol Jain,
	Thomas Richter, Kan Liang, Alexey Budankov


Hi John,

Sorry I missed your review comments here. Replies below:

On 12/11/2020 17:18, John Garry wrote:
> 
>>   +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
>> +{
>> +    if (map) {
> 
> is this check just paranoia?
> 
>> +        WARN_ONCE(refcount_read(&map->refcnt) != 0,
>> +              "cpu_aggr_map refcnt unbalanced\n");
> 
> and this?
> 
>> +        free(map);
>> +    }
>> +}
>> +

The cpu_aggr_map__delete and cpu_aggr_map__put functions were direct
copies of cpu_map__delete and cpu_map__put. I suppose there is more
control over the usages of the new ones so the check could possibly be avoided.

It all depends on whether perf_stat__exit_aggr_mode() is only ever called
once or not. But I think it might make sense to leave the checks for
consistency and in case the maps are used somewhere else in the future.


>> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
>> +{
>> +    if (map && refcount_dec_and_test(&map->refcnt))
>> +        cpu_aggr_map__delete(map);
>> +}
>> +
>>   static void perf_stat__exit_aggr_mode(void)
> 
> ...
> 
>>   +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
>> +{
>> +    struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
>> +
> 
> if (!cpus)
>     return NULL
> 
> cpus->nr = nr;
> ...
> 
> this avoids extra indentation and {}
> 

Do you think I should also make this change to the existing perf_cpu_map__empty_new() function
above for consistency?


>> +    if (cpus != NULL) {
>> +        int i;
>> +
>> +        cpus->nr = nr;
>> +        for (i = 0; i < nr; i++)
>> +            cpus->map[i] = cpu_map__empty_aggr_cpu_id();
>> +
>> +        refcount_set(&cpus->refcnt, 1);
>> +    }
>> +
>> +    return cpus;
>> +}
>> +
>>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>>   {
>>       char path[PATH_MAX];
>> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>>       return ret ?: value;
>>   }
>>   -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
>> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
>> +                    void *data __maybe_unused)
>>   {
>>       int cpu;
>> +    struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>   -    return cpu_map__get_socket_id(cpu);
>> +    socket.id = cpu_map__get_socket_id(cpu);
>> +    return socket;
>>   }
>>   -static int cmp_ids(const void *a, const void *b)
>> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>>   {
>> -    return *(int *)a - *(int *)b;
>> +    struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
>> +    struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
>> +
>> +    return a->id - b->id;
>>   }
>>   -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>> -               int (*f)(struct perf_cpu_map *map, int cpu, void *data),
>> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
>> +               struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>                  void *data)
>>   {
>> -    struct perf_cpu_map *c;
>> +    struct cpu_aggr_map *c;
>>       int nr = cpus->nr;
>> -    int cpu, s1, s2;
>> +    int cpu, s2;
>> +    struct aggr_cpu_id s1;
>>         /* allocate as much as possible */
>> -    c = calloc(1, sizeof(*c) + nr * sizeof(int));
>> +    c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>>       if (!c)
>>           return -1;
>>         for (cpu = 0; cpu < nr; cpu++) {
>>           s1 = f(cpus, cpu, data);
>>           for (s2 = 0; s2 < c->nr; s2++) {
>> -            if (s1 == c->map[s2])
>> +            if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>>                   break;
>>           }
>>           if (s2 == c->nr) {
>> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>           }
>>       }
>>       /* ensure we process id in increasing order */
>> -    qsort(c->map, c->nr, sizeof(int), cmp_ids);
>> +    qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>>         refcount_set(&c->refcnt, 1);
>>       *res = c;
>> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>>       return ret ?: value;
>>   }
>>   -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>   {
>> -    int cpu, die_id, s;
>> +    int cpu, s;
>> +    struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>   -    die_id = cpu_map__get_die_id(cpu);
>> +    die_id.id = cpu_map__get_die_id(cpu);
>>       /* There is no die_id on legacy system. */
>> -    if (die_id == -1)
>> -        die_id = 0;
>> +    if (die_id.id == -1)
>> +        die_id.id = 0;
>>   -    s = cpu_map__get_socket(map, idx, data);
>> +    s = cpu_map__get_socket(map, idx, data).id;
>>       if (s == -1)
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         /*
>>        * Encode socket in bit range 15:8
>> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>        * we need a global id. So we combine
>>        * socket + die id
>>        */
>> -    if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
>> -        return -1;
>> +    if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
>> +        return cpu_map__empty_aggr_cpu_id();
>>         if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>   -    return (s << 8) | (die_id & 0xff);
>> +    die_id.id = (s << 8) | (die_id.id & 0xff);
>> +    return die_id;
>>   }
>>     int cpu_map__get_core_id(int cpu)
>> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>>       return cpu__get_node(cpu);
>>   }
>>   -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>   {
>> -    int cpu, s_die;
>> +    int cpu;
>> +    struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>>         if (idx > map->nr)
> 
> should pre-existing code be idx >= map->nr? I didn't check the code any deeper

I think you might be right. But there is a mixture of > and >= throughout the file.
So either the same mistake has been made several times or it's not zero indexed.

I will look into it.

Thanks
James
> 
>> -        return -1;
>> +        return cpu_map__empty_aggr_cpu_id();
>>         cpu = map->map[idx];
>>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
  2020-11-17 15:05       ` James Clark
@ 2020-11-17 15:48         ` James Clark
  2020-11-17 15:53           ` James Clark
  0 siblings, 1 reply; 6+ messages in thread
From: James Clark @ 2020-11-17 15:48 UTC (permalink / raw)
  To: John Garry, linux-perf-users, jolsa



On 17/11/2020 17:05, James Clark wrote:
> 
> Hi John,
> 
> Sorry I missed your review comments here. Replies below:
> 
> On 12/11/2020 17:18, John Garry wrote:
>>
>>>   +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
>>> +{
>>> +    if (map) {
>>
>> is this check just paranoia?
>>
>>> +        WARN_ONCE(refcount_read(&map->refcnt) != 0,
>>> +              "cpu_aggr_map refcnt unbalanced\n");
>>
>> and this?
>>
>>> +        free(map);
>>> +    }
>>> +}
>>> +
> 
> The cpu_aggr_map__delete and cpu_aggr_map__put functions were direct
> copies of cpu_map__delete and cpu_map__put. I suppose there is more
> control over the usages of the new ones so the check could possibly be avoided.
> 
> It all depends on whether perf_stat__exit_aggr_mode() is only ever called
> once or not. But I think it might make sense to leave the checks for
> consistency and in case the maps are used somewhere else in the future.
> 
> 
>>> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
>>> +{
>>> +    if (map && refcount_dec_and_test(&map->refcnt))
>>> +        cpu_aggr_map__delete(map);
>>> +}
>>> +
>>>   static void perf_stat__exit_aggr_mode(void)
>>
>> ...
>>
>>>   +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
>>> +{
>>> +    struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
>>> +
>>
>> if (!cpus)
>>     return NULL
>>
>> cpus->nr = nr;
>> ...
>>
>> this avoids extra indentation and {}
>>
> 
> Do you think I should also make this change to the existing perf_cpu_map__empty_new() function
> above for consistency?
> 
> 
>>> +    if (cpus != NULL) {
>>> +        int i;
>>> +
>>> +        cpus->nr = nr;
>>> +        for (i = 0; i < nr; i++)
>>> +            cpus->map[i] = cpu_map__empty_aggr_cpu_id();
>>> +
>>> +        refcount_set(&cpus->refcnt, 1);
>>> +    }
>>> +
>>> +    return cpus;
>>> +}
>>> +
>>>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>>>   {
>>>       char path[PATH_MAX];
>>> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>>>       return ret ?: value;
>>>   }
>>>   -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
>>> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
>>> +                    void *data __maybe_unused)
>>>   {
>>>       int cpu;
>>> +    struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>   -    return cpu_map__get_socket_id(cpu);
>>> +    socket.id = cpu_map__get_socket_id(cpu);
>>> +    return socket;
>>>   }
>>>   -static int cmp_ids(const void *a, const void *b)
>>> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>>>   {
>>> -    return *(int *)a - *(int *)b;
>>> +    struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
>>> +    struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
>>> +
>>> +    return a->id - b->id;
>>>   }
>>>   -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>> -               int (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
>>> +               struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>>                  void *data)
>>>   {
>>> -    struct perf_cpu_map *c;
>>> +    struct cpu_aggr_map *c;
>>>       int nr = cpus->nr;
>>> -    int cpu, s1, s2;
>>> +    int cpu, s2;
>>> +    struct aggr_cpu_id s1;
>>>         /* allocate as much as possible */
>>> -    c = calloc(1, sizeof(*c) + nr * sizeof(int));
>>> +    c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>>>       if (!c)
>>>           return -1;
>>>         for (cpu = 0; cpu < nr; cpu++) {
>>>           s1 = f(cpus, cpu, data);
>>>           for (s2 = 0; s2 < c->nr; s2++) {
>>> -            if (s1 == c->map[s2])
>>> +            if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>>>                   break;
>>>           }
>>>           if (s2 == c->nr) {
>>> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>>           }
>>>       }
>>>       /* ensure we process id in increasing order */
>>> -    qsort(c->map, c->nr, sizeof(int), cmp_ids);
>>> +    qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>>>         refcount_set(&c->refcnt, 1);
>>>       *res = c;
>>> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>>>       return ret ?: value;
>>>   }
>>>   -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>   {
>>> -    int cpu, die_id, s;
>>> +    int cpu, s;
>>> +    struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>   -    die_id = cpu_map__get_die_id(cpu);
>>> +    die_id.id = cpu_map__get_die_id(cpu);
>>>       /* There is no die_id on legacy system. */
>>> -    if (die_id == -1)
>>> -        die_id = 0;
>>> +    if (die_id.id == -1)
>>> +        die_id.id = 0;
>>>   -    s = cpu_map__get_socket(map, idx, data);
>>> +    s = cpu_map__get_socket(map, idx, data).id;
>>>       if (s == -1)
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         /*
>>>        * Encode socket in bit range 15:8
>>> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>        * we need a global id. So we combine
>>>        * socket + die id
>>>        */
>>> -    if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
>>> -        return -1;
>>> +    if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>   -    return (s << 8) | (die_id & 0xff);
>>> +    die_id.id = (s << 8) | (die_id.id & 0xff);
>>> +    return die_id;
>>>   }
>>>     int cpu_map__get_core_id(int cpu)
>>> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>>>       return cpu__get_node(cpu);
>>>   }
>>>   -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>>   {
>>> -    int cpu, s_die;
>>> +    int cpu;
>>> +    struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>>>         if (idx > map->nr)
>>
>> should pre-existing code be idx >= map->nr? I didn't check the code any deeper
> 
> I think you might be right. But there is a mixture of > and >= throughout the file.
> So either the same mistake has been made several times or it's not zero indexed.
> 
> I will look into it.

Hi Garry,

Yes > is an issue and it should be >=. It probably hasn't caused a problem because
the function is never called with idx out of bounds.

I think I'd like to fix this in a separate patchset after this one as it's unrelated
to my change. Although it might have to wait until it's merged otherwise there would
probably be an annoying conflict.

What do you think?


Thanks
James

> 
> Thanks
> James
>>
>>> -        return -1;
>>> +        return cpu_map__empty_aggr_cpu_id();
>>>         cpu = map->map[idx];
>>>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct
  2020-11-17 15:48         ` James Clark
@ 2020-11-17 15:53           ` James Clark
  0 siblings, 0 replies; 6+ messages in thread
From: James Clark @ 2020-11-17 15:53 UTC (permalink / raw)
  To: John Garry, linux-perf-users, jolsa



On 17/11/2020 17:48, James Clark wrote:
> 
> 
> On 17/11/2020 17:05, James Clark wrote:
>>
>> Hi John,
>>
>> Sorry I missed your review comments here. Replies below:
>>
>> On 12/11/2020 17:18, John Garry wrote:
>>>
>>>>   +static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
>>>> +{
>>>> +    if (map) {
>>>
>>> is this check just paranoia?
>>>
>>>> +        WARN_ONCE(refcount_read(&map->refcnt) != 0,
>>>> +              "cpu_aggr_map refcnt unbalanced\n");
>>>
>>> and this?
>>>
>>>> +        free(map);
>>>> +    }
>>>> +}
>>>> +
>>
>> The cpu_aggr_map__delete and cpu_aggr_map__put functions were direct
>> copies of cpu_map__delete and cpu_map__put. I suppose there is more
>> control over the usages of the new ones so the check could possibly be avoided.
>>
>> It all depends on whether perf_stat__exit_aggr_mode() is only ever called
>> once or not. But I think it might make sense to leave the checks for
>> consistency and in case the maps are used somewhere else in the future.
>>
>>
>>>> +static void cpu_aggr_map__put(struct cpu_aggr_map *map)
>>>> +{
>>>> +    if (map && refcount_dec_and_test(&map->refcnt))
>>>> +        cpu_aggr_map__delete(map);
>>>> +}
>>>> +
>>>>   static void perf_stat__exit_aggr_mode(void)
>>>
>>> ...
>>>
>>>>   +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
>>>> +{
>>>> +    struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
>>>> +
>>>
>>> if (!cpus)
>>>     return NULL
>>>
>>> cpus->nr = nr;
>>> ...
>>>
>>> this avoids extra indentation and {}
>>>
>>
>> Do you think I should also make this change to the existing perf_cpu_map__empty_new() function
>> above for consistency?
>>
>>
>>>> +    if (cpus != NULL) {
>>>> +        int i;
>>>> +
>>>> +        cpus->nr = nr;
>>>> +        for (i = 0; i < nr; i++)
>>>> +            cpus->map[i] = cpu_map__empty_aggr_cpu_id();
>>>> +
>>>> +        refcount_set(&cpus->refcnt, 1);
>>>> +    }
>>>> +
>>>> +    return cpus;
>>>> +}
>>>> +
>>>>   static int cpu__get_topology_int(int cpu, const char *name, int *value)
>>>>   {
>>>>       char path[PATH_MAX];
>>>> @@ -111,40 +128,47 @@ int cpu_map__get_socket_id(int cpu)
>>>>       return ret ?: value;
>>>>   }
>>>>   -int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
>>>> +struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
>>>> +                    void *data __maybe_unused)
>>>>   {
>>>>       int cpu;It looks like
>>>> +    struct aggr_cpu_id socket = cpu_map__empty_aggr_cpu_id();
>>>>         if (idx > map->nr)
>>>> -        return -1;
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>         cpu = map->map[idx];
>>>>   -    return cpu_map__get_socket_id(cpu);
>>>> +    socket.id = cpu_map__get_socket_id(cpu);
>>>> +    return socket;
>>>>   }
>>>>   -static int cmp_ids(const void *a, const void *b)
>>>> +static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
>>>>   {
>>>> -    return *(int *)a - *(int *)b;
>>>> +    struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
>>>> +    struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
>>>> +
>>>> +    return a->id - b->id;
>>>>   }
>>>>   -int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>>> -               int (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>>> +int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
>>>> +               struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
>>>>                  void *data)
>>>>   {
>>>> -    struct perf_cpu_map *c;
>>>> +    struct cpu_aggr_map *c;
>>>>       int nr = cpus->nr;
>>>> -    int cpu, s1, s2;
>>>> +    int cpu, s2;
>>>> +    struct aggr_cpu_id s1;
>>>>         /* allocate as much as possible */
>>>> -    c = calloc(1, sizeof(*c) + nr * sizeof(int));
>>>> +    c = calloc(1, sizeof(*c) + nr * sizeof(struct aggr_cpu_id));
>>>>       if (!c)
>>>>           return -1;
>>>>         for (cpu = 0; cpu < nr; cpu++) {
>>>>           s1 = f(cpus, cpu, data);
>>>>           for (s2 = 0; s2 < c->nr; s2++) {
>>>> -            if (s1 == c->map[s2])
>>>> +            if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
>>>>                   break;
>>>>           }
>>>>           if (s2 == c->nr) {
>>>> @@ -153,7 +177,7 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
>>>>           }
>>>>       }
>>>>       /* ensure we process id in increasing order */
>>>> -    qsort(c->map, c->nr, sizeof(int), cmp_ids);
>>>> +    qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
>>>>         refcount_set(&c->refcnt, 1);
>>>>       *res = c;
>>>> @@ -167,23 +191,24 @@ int cpu_map__get_die_id(int cpu)
>>>>       return ret ?: value;
>>>>   }
>>>>   -int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>> +struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>>   {
>>>> -    int cpu, die_id, s;
>>>> +    int cpu, s;
>>>> +    struct aggr_cpu_id die_id = cpu_map__empty_aggr_cpu_id();
>>>>         if (idx > map->nr)
>>>> -        return -1;
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>         cpu = map->map[idx];
>>>>   -    die_id = cpu_map__get_die_id(cpu);
>>>> +    die_id.id = cpu_map__get_die_id(cpu);
>>>>       /* There is no die_id on legacy system. */
>>>> -    if (die_id == -1)
>>>> -        die_id = 0;
>>>> +    if (die_id.id == -1)
>>>> +        die_id.id = 0;
>>>>   -    s = cpu_map__get_socket(map, idx, data);
>>>> +    s = cpu_map__get_socket(map, idx, data).id;
>>>>       if (s == -1)
>>>> -        return -1;
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>         /*
>>>>        * Encode socket in bit range 15:8
>>>> @@ -191,13 +216,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
>>>>        * we need a global id. So we combine
>>>>        * socket + die id
>>>>        */
>>>> -    if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
>>>> -        return -1;
>>>> +    if (WARN_ONCE(die_id.id >> 8, "The die id number is too big.\n"))
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>         if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
>>>> -        return -1;
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>   -    return (s << 8) | (die_id & 0xff);
>>>> +    die_id.id = (s << 8) | (die_id.id & 0xff);
>>>> +    return die_id;
>>>>   }
>>>>     int cpu_map__get_core_id(int cpu)
>>>> @@ -211,21 +237,22 @@ int cpu_map__get_node_id(int cpu)
>>>>       return cpu__get_node(cpu);
>>>>   }
>>>>   -int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>>> +struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
>>>>   {
>>>> -    int cpu, s_die;
>>>> +    int cpu;
>>>> +    struct aggr_cpu_id core = cpu_map__empty_aggr_cpu_id();
>>>>         if (idx > map->nr)
>>>
>>> should pre-existing code be idx >= map->nr? I didn't check the code any deeper
>>
>> I think you might be right. But there is a mixture of > and >= throughout the file.
>> So either the same mistake has been made several times or it's not zero indexed.
>>
>> I will look into it.
> 
> Hi Garry,
> 

Sorry, John, not Garry!

> Yes > is an issue and it should be >=. It probably hasn't caused a problem because
> the function is never called with idx out of bounds.
> 
> I think I'd like to fix this in a separate patchset after this one as it's unrelated
> to my change. Although it might have to wait until it's merged otherwise there would
> probably be an annoying conflict.
> 
> What do you think?
> 
> 
> Thanks
> James
> 
>>
>> Thanks
>> James
>>>
>>>> -        return -1;
>>>> +        return cpu_map__empty_aggr_cpu_id();
>>>>         cpu = map->map[idx];
>>>>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20201004203545.GB217601@krava>
     [not found] ` <20201028094311.8563-1-james.clark@arm.com>
     [not found]   ` <20201028094311.8563-3-james.clark@arm.com>
2020-11-12 15:18     ` [PATCH V2 2/3] perf tools: Replace aggregation ID with a struct John Garry
2020-11-17 15:05       ` James Clark
2020-11-17 15:48         ` James Clark
2020-11-17 15:53           ` James Clark
     [not found]   ` <20201028094311.8563-4-james.clark@arm.com>
     [not found]     ` <20201108124412.GA196289@krava>
2020-11-12 19:42       ` [PATCH V2 3/3] perf tools: fix perf stat with large socket IDs James Clark
2020-11-12 21:28         ` Jiri Olsa

Linux-perf-users Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-perf-users/0 linux-perf-users/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-perf-users linux-perf-users/ https://lore.kernel.org/linux-perf-users \
		linux-perf-users@vger.kernel.org
	public-inbox-index linux-perf-users

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-perf-users


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git