* [PATCH v4 0/3] perf: topology on systems with offline/absent CPUs
@ 2017-02-20 9:33 Jan Stancek
2017-02-20 9:33 ` [PATCH v4 1/3] perf cpu_map: Add cpu__max_present_cpu() Jan Stancek
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Jan Stancek @ 2017-02-20 9:33 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, mingo, acme, alexander.shishkin, jolsa, mhiramat, jstancek
Changes in v4:
- rebase on top of 4.10
- fix style of commit logs
- add comment to process_cpu_topology()
This series aims to address issues on systems with offline/absent CPUs:
1. cpu_topology_map size
Allocation based on _SC_NPROCESSORS_CONF is too small,
there can be CPU IDs that are higher, which causes invalid reads.
2. build_cpu_topology fails for offline/absent CPUs
It is trying to parse sysfs entries, that do not exist.
3. perf topology test fails
Changes affect HEADER_NRCPUS and HEADER_CPU_TOPOLOGY:
For example on a system like this:
_SC_NPROCESSORS_CONF == 16
available: 2 nodes (0-1)
node 0 cpus: 0 6 8 10 16 22 24 26
node 0 size: 12004 MB
node 0 free: 9470 MB
node 1 cpus: 1 7 9 11 23 25 27
node 1 size: 12093 MB
node 1 free: 9406 MB
node distances:
node 0 1
0: 10 20
1: 20 10
HEADER_NRCPUS before this series:
nr_cpus_online = 15
nr_cpus_available = 16
HEADER_CPU_TOPOLOGY before this series:
core_sib_nr: 2
core_siblings: 0,6,8,10,16,22,24,26
core_siblings: 1,7,9,11,23,25,27
thread_sib_nr: 8
thread_siblings: 0,16
thread_siblings: 1
thread_siblings: 6,22
thread_siblings: 7,23
thread_siblings: 8,24
thread_siblings: 9,25
thread_siblings: 10,26
thread_siblings: 11,27
core_id: 0, socket_id: 0
core_id: 0, socket_id: 1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: 10, socket_id: 0
core_id: 10, socket_id: 1
core_id: 1, socket_id: 0
core_id: 1, socket_id: 1
core_id: 9, socket_id: 0
core_id: 9, socket_id: 1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
HEADER_NRCPUS with this series applied:
nr_cpus_online = 15
nr_cpus_available = 28
HEADER_CPU_TOPOLOGY with this series applied:
core_sib_nr: 2
core_siblings: 0,6,8,10,16,22,24,26
core_siblings: 1,7,9,11,23,25,27
thread_sib_nr: 8
thread_siblings: 0,16
thread_siblings: 1
thread_siblings: 6,22
thread_siblings: 7,23
thread_siblings: 8,24
thread_siblings: 9,25
thread_siblings: 10,26
thread_siblings: 11,27
core_id: 0, socket_id: 0
core_id: 0, socket_id: 1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: 10, socket_id: 0
core_id: 10, socket_id: 1
core_id: 1, socket_id: 0
core_id: 1, socket_id: 1
core_id: 9, socket_id: 0
core_id: 9, socket_id: 1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: 0, socket_id: 0
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: -1, socket_id: -1
core_id: 10, socket_id: 0
core_id: 10, socket_id: 1
core_id: 1, socket_id: 0
core_id: 1, socket_id: 1
core_id: 9, socket_id: 0
core_id: 9, socket_id: 1
Jan Stancek (3):
perf cpu_map: Add cpu__max_present_cpu()
perf header: Make build_cpu_topology skip offline/absent CPUs
perf: Replace _SC_NPROCESSORS_CONF with max_present_cpu in
cpu_topology_map
tools/perf/builtin-stat.c | 2 +-
tools/perf/tests/topology.c | 4 +++-
tools/perf/util/cpumap.c | 22 ++++++++++++++++++++++
tools/perf/util/cpumap.h | 1 +
tools/perf/util/env.c | 2 +-
tools/perf/util/header.c | 37 ++++++++++++++++++++++++-------------
6 files changed, 52 insertions(+), 16 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 1/3] perf cpu_map: Add cpu__max_present_cpu()
2017-02-20 9:33 [PATCH v4 0/3] perf: topology on systems with offline/absent CPUs Jan Stancek
@ 2017-02-20 9:33 ` Jan Stancek
2017-02-20 9:33 ` [PATCH v4 2/3] perf header: Make build_cpu_topology skip offline/absent CPUs Jan Stancek
2017-02-20 9:33 ` [PATCH v4 3/3] perf: Replace _SC_NPROCESSORS_CONF with max_present_cpu in cpu_topology_map Jan Stancek
2 siblings, 0 replies; 4+ messages in thread
From: Jan Stancek @ 2017-02-20 9:33 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, mingo, acme, alexander.shishkin, jolsa, mhiramat, jstancek
Similar to cpu__max_cpu() (which returns max possible CPU),
returns max present CPU.
Signed-off-by: Jan Stancek <jstancek@redhat.com>
---
tools/perf/util/cpumap.c | 22 ++++++++++++++++++++++
tools/perf/util/cpumap.h | 1 +
2 files changed, 23 insertions(+)
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 2c0b52264a46..8c7504939113 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -9,6 +9,7 @@
#include "asm/bug.h"
static int max_cpu_num;
+static int max_present_cpu_num;
static int max_node_num;
static int *cpunode_map;
@@ -442,6 +443,7 @@ static void set_max_cpu_num(void)
/* set up default */
max_cpu_num = 4096;
+ max_present_cpu_num = 4096;
mnt = sysfs__mountpoint();
if (!mnt)
@@ -455,6 +457,17 @@ static void set_max_cpu_num(void)
}
ret = get_max_num(path, &max_cpu_num);
+ if (ret)
+ goto out;
+
+ /* get the highest present cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+ if (ret == PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_present_cpu_num);
out:
if (ret)
@@ -505,6 +518,15 @@ int cpu__max_cpu(void)
return max_cpu_num;
}
+int cpu__max_present_cpu(void)
+{
+ if (unlikely(!max_present_cpu_num))
+ set_max_cpu_num();
+
+ return max_present_cpu_num;
+}
+
+
int cpu__get_node(int cpu)
{
if (unlikely(cpunode_map == NULL)) {
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 06bd689f5989..1a0549af8f5c 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -62,6 +62,7 @@ static inline bool cpu_map__empty(const struct cpu_map *map)
int cpu__max_node(void);
int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
int cpu__get_node(int cpu);
int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v4 2/3] perf header: Make build_cpu_topology skip offline/absent CPUs
2017-02-20 9:33 [PATCH v4 0/3] perf: topology on systems with offline/absent CPUs Jan Stancek
2017-02-20 9:33 ` [PATCH v4 1/3] perf cpu_map: Add cpu__max_present_cpu() Jan Stancek
@ 2017-02-20 9:33 ` Jan Stancek
2017-02-20 9:33 ` [PATCH v4 3/3] perf: Replace _SC_NPROCESSORS_CONF with max_present_cpu in cpu_topology_map Jan Stancek
2 siblings, 0 replies; 4+ messages in thread
From: Jan Stancek @ 2017-02-20 9:33 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, mingo, acme, alexander.shishkin, jolsa, mhiramat, jstancek
When build_cpu_topo() encounters offline/absent CPUs,
it fails to find any sysfs entries and returns failure.
This leads to build_cpu_topology() and write_cpu_topology()
failing as well.
Because HEADER_CPU_TOPOLOGY has not been written, read leaves
cpu_topology_map NULL and we get NULL ptr deref at:
...
cmd_test
__cmd_test
test_and_print
run_test
test_session_topology
check_cpu_topology
# perf test 36
36: Session topology :
--- start ---
test child forked, pid 14902
templ file: /tmp/perf-test-4CKocW
failed to write feature HEADER_CPU_TOPOLOGY
perf: Segmentation fault
Obtained 9 stack frames.
./perf(sighandler_dump_stack+0x41) [0x5095f1]
/lib64/libc.so.6(+0x35250) [0x7f4b7c3c9250]
./perf(test_session_topology+0x1db) [0x490ceb]
./perf() [0x475b68]
./perf(cmd_test+0x5b9) [0x4763c9]
./perf() [0x4945a3]
./perf(main+0x69f) [0x427e8f]
/lib64/libc.so.6(__libc_start_main+0xf5) [0x7f4b7c3b5b35]
./perf() [0x427fb9]
test child interrupted
---- end ----
Session topology: FAILED!
This patch makes build_cpu_topology() skip offline/absent CPUs,
by checking their presence against cpu_map built from online CPUs.
Signed-off-by: Jan Stancek <jstancek@redhat.com>
---
| 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d89c9c7ef4e5..ca0f12fb5fd3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -503,24 +503,31 @@ static void free_cpu_topo(struct cpu_topo *tp)
static struct cpu_topo *build_cpu_topology(void)
{
- struct cpu_topo *tp;
+ struct cpu_topo *tp = NULL;
void *addr;
u32 nr, i;
size_t sz;
long ncpus;
int ret = -1;
+ struct cpu_map *map;
ncpus = sysconf(_SC_NPROCESSORS_CONF);
if (ncpus < 0)
return NULL;
+ /* build online CPU map */
+ map = cpu_map__new(NULL);
+ if (map == NULL) {
+ pr_debug("failed to get system cpumap\n");
+ return NULL;
+ }
+
nr = (u32)(ncpus & UINT_MAX);
sz = nr * sizeof(char *);
-
addr = calloc(1, sizeof(*tp) + 2 * sz);
if (!addr)
- return NULL;
+ goto out_free;
tp = addr;
tp->cpu_nr = nr;
@@ -530,10 +537,16 @@ static struct cpu_topo *build_cpu_topology(void)
tp->thread_siblings = addr;
for (i = 0; i < nr; i++) {
+ if (!cpu_map__has(map, i))
+ continue;
+
ret = build_cpu_topo(tp, i);
if (ret < 0)
break;
}
+
+out_free:
+ cpu_map__put(map);
if (ret) {
free_cpu_topo(tp);
tp = NULL;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v4 3/3] perf: Replace _SC_NPROCESSORS_CONF with max_present_cpu in cpu_topology_map
2017-02-20 9:33 [PATCH v4 0/3] perf: topology on systems with offline/absent CPUs Jan Stancek
2017-02-20 9:33 ` [PATCH v4 1/3] perf cpu_map: Add cpu__max_present_cpu() Jan Stancek
2017-02-20 9:33 ` [PATCH v4 2/3] perf header: Make build_cpu_topology skip offline/absent CPUs Jan Stancek
@ 2017-02-20 9:33 ` Jan Stancek
2 siblings, 0 replies; 4+ messages in thread
From: Jan Stancek @ 2017-02-20 9:33 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, mingo, acme, alexander.shishkin, jolsa, mhiramat, jstancek
There are 2 problems wrt. cpu_topology_map on systems with sparse CPUs:
1. offline/absent CPUs will have their socket_id and core_id set to -1
which triggers:
"socket_id number is too big.You may need to upgrade the perf tool."
2. size of cpu_topology_map (perf_env.cpu[]) is allocated based on
_SC_NPROCESSORS_CONF, but can be indexed with CPU ids going above.
Users of perf_env.cpu[] are using CPU id as index. This can lead
to read beyond what was allocated:
==19991== Invalid read of size 4
==19991== at 0x490CEB: check_cpu_topology (topology.c:69)
==19991== by 0x490CEB: test_session_topology (topology.c:106)
...
For example:
_SC_NPROCESSORS_CONF == 16
available: 2 nodes (0-1)
node 0 cpus: 0 6 8 10 16 22 24 26
node 0 size: 12004 MB
node 0 free: 9470 MB
node 1 cpus: 1 7 9 11 23 25 27
node 1 size: 12093 MB
node 1 free: 9406 MB
node distances:
node 0 1
0: 10 20
1: 20 10
This patch changes HEADER_NRCPUS.nr_cpus_available from _SC_NPROCESSORS_CONF
to max_present_cpu and updates any user of cpu_topology_map to iterate
with nr_cpus_avail. As consequence HEADER_CPU_TOPOLOGY core_id and
socket_id lists get longer, but maintain compatibility with pre-patch
state - index to cpu_topology_map is CPU id.
# ./perf test 36 -v
36: Session topology :
--- start ---
test child forked, pid 22211
templ file: /tmp/perf-test-gmdX5i
CPU 0, core 0, socket 0
CPU 1, core 0, socket 1
CPU 6, core 10, socket 0
CPU 7, core 10, socket 1
CPU 8, core 1, socket 0
CPU 9, core 1, socket 1
CPU 10, core 9, socket 0
CPU 11, core 9, socket 1
CPU 16, core 0, socket 0
CPU 22, core 10, socket 0
CPU 23, core 10, socket 1
CPU 24, core 1, socket 0
CPU 25, core 1, socket 1
CPU 26, core 9, socket 0
CPU 27, core 9, socket 1
test child finished with 0
---- end ----
Session topology: Ok
Signed-off-by: Jan Stancek <jstancek@redhat.com>
---
tools/perf/builtin-stat.c | 2 +-
tools/perf/tests/topology.c | 4 +++-
tools/perf/util/env.c | 2 +-
| 20 +++++++++-----------
4 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a02f2e965628..9e2be1c52fbd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1765,7 +1765,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i
cpu = map->map[idx];
- if (cpu >= env->nr_cpus_online)
+ if (cpu >= env->nr_cpus_avail)
return -1;
return cpu;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 98fe69ac553c..803f893550d6 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -65,7 +65,9 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
session = perf_session__new(&file, false, NULL);
TEST_ASSERT_VAL("can't get session", session);
- for (i = 0; i < session->header.env.nr_cpus_online; i++) {
+ for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+ if (!cpu_map__has(map, i))
+ continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
session->header.env.cpu[i].core_id,
session->header.env.cpu[i].socket_id);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index bb964e86b09d..075fc77286bf 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -66,7 +66,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
return 0;
if (env->nr_cpus_avail == 0)
- env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+ env->nr_cpus_avail = cpu__max_present_cpu();
nr_cpus = env->nr_cpus_avail;
if (nr_cpus == -1)
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ca0f12fb5fd3..e97cbf78ef21 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -293,11 +293,7 @@ static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
u32 nrc, nra;
int ret;
- nr = sysconf(_SC_NPROCESSORS_CONF);
- if (nr < 0)
- return -1;
-
- nrc = (u32)(nr & UINT_MAX);
+ nrc = cpu__max_present_cpu();
nr = sysconf(_SC_NPROCESSORS_ONLN);
if (nr < 0)
@@ -511,9 +507,7 @@ static struct cpu_topo *build_cpu_topology(void)
int ret = -1;
struct cpu_map *map;
- ncpus = sysconf(_SC_NPROCESSORS_CONF);
- if (ncpus < 0)
- return NULL;
+ ncpus = cpu__max_present_cpu();
/* build online CPU map */
map = cpu_map__new(NULL);
@@ -1137,7 +1131,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused,
{
int nr, i;
char *str;
- int cpu_nr = ph->env.nr_cpus_online;
+ int cpu_nr = ph->env.nr_cpus_avail;
nr = ph->env.nr_sibling_cores;
str = ph->env.sibling_cores;
@@ -1792,7 +1786,7 @@ static int process_cpu_topology(struct perf_file_section *section,
u32 nr, i;
char *str;
struct strbuf sb;
- int cpu_nr = ph->env.nr_cpus_online;
+ int cpu_nr = ph->env.nr_cpus_avail;
u64 size = 0;
ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
@@ -1873,7 +1867,11 @@ static int process_cpu_topology(struct perf_file_section *section,
if (ph->needs_swap)
nr = bswap_32(nr);
- if (nr > (u32)cpu_nr) {
+ /*
+ * Offline/absent CPUs have their socket_id set to -1,
+ * don't treat that as error.
+ */
+ if (nr != (u32)-1 && nr > (u32)cpu_nr) {
pr_debug("socket_id number is too big."
"You may need to upgrade the perf tool.\n");
goto free_cpu;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-02-20 9:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-20 9:33 [PATCH v4 0/3] perf: topology on systems with offline/absent CPUs Jan Stancek
2017-02-20 9:33 ` [PATCH v4 1/3] perf cpu_map: Add cpu__max_present_cpu() Jan Stancek
2017-02-20 9:33 ` [PATCH v4 2/3] perf header: Make build_cpu_topology skip offline/absent CPUs Jan Stancek
2017-02-20 9:33 ` [PATCH v4 3/3] perf: Replace _SC_NPROCESSORS_CONF with max_present_cpu in cpu_topology_map Jan Stancek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).