From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-10.1 required=3.0 tests=DKIMWL_WL_HIGH,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,INCLUDES_PATCH,MAILING_LIST_MULTI,SIGNED_OFF_BY, SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6A538C432C0 for ; Tue, 3 Dec 2019 13:56:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 31954207DD for ; Tue, 3 Dec 2019 13:56:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1575381402; bh=7Od0qyHVYqaaoPS+f60Ri+bTTON/JcFL+9z5cv5MDVI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:List-ID:From; b=MsECOI6D7EeQn2KIOFIygY+jfkoCVPvt7uTzKzuNGKCxyyAnaLTNHCwW6fXeOO6Br QLOVopYnoccEjLD/r+8o8wSNMh3LiwUEuBiF8aNc6k3qUYCubgFTSEDmZOc8mRZCSB in5Fyz7Xpiy64Apw5q2j8x1nAAVmzQqh5SIoUKqo= Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726915AbfLCN4k (ORCPT ); Tue, 3 Dec 2019 08:56:40 -0500 Received: from mail.kernel.org ([198.145.29.99]:34610 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726806AbfLCN4h (ORCPT ); Tue, 3 Dec 2019 08:56:37 -0500 Received: from quaco.ghostprotocols.net (unknown [179.97.35.50]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPSA id 83B512080F; Tue, 3 Dec 2019 13:56:34 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=default; t=1575381396; bh=7Od0qyHVYqaaoPS+f60Ri+bTTON/JcFL+9z5cv5MDVI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oMH6ZC40OM+r/bcN9wLdG6OaQdZf4ynGBQHG48xa4ZXuYERrea3e00c2bPiC81jRl dVZ9mBCin+/p5pHEMdxL9KJ8MDrgkc1WuOe/Yu4aU64nOW13iOuo/xkFU4eQJaOWoN lt1xyEmds4FQ6N3YajpDAPyMHEXEFKBJEZrNT7L0= From: Arnaldo Carvalho de Melo To: Ingo Molnar , Thomas Gleixner Cc: Jiri Olsa , Namhyung Kim , Clark Williams , linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org, Andi Kleen , Arnaldo Carvalho de Melo Subject: [PATCH 08/23] perf stat: Use affinity for reading Date: Tue, 3 Dec 2019 10:55:51 -0300 Message-Id: <20191203135606.24902-9-acme@kernel.org> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20191203135606.24902-1-acme@kernel.org> References: <20191203135606.24902-1-acme@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Andi Kleen Restructure event reading to use affinity to minimize the number of IPIs needed. Before on a large test case with 94 CPUs: % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 3.16 0.106079 4 22082 read After: 3.43 0.081295 3 22082 read Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 97 ++++++++++++++++++++++----------------- tools/perf/util/evsel.h | 1 + 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cf8516e701e2..a098c2ebf4ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu, * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter(struct evsel *counter, struct timespec *rs) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); - int ncpus, cpu, thread; - - if (target__has_cpu(&target) && !target__has_per_thread(&target)) - ncpus = perf_evsel__nr_cpus(counter); - else - ncpus = 1; + int thread; if (!counter->supported) return -ENOENT; @@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - struct perf_counts_values *count; - - count = perf_counts(counter->counts, cpu, thread); - - /* - * The leader's group read loads data into its group members - * (via perf_evsel__read_counter) and sets threir count->loaded. - */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { - counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; - return -1; - } + struct perf_counts_values *count; - perf_counts__set_loaded(counter->counts, cpu, thread, false); + count = perf_counts(counter->counts, cpu, thread); - if (STAT_RECORD) { - if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { - pr_err("failed to write stat event\n"); - return -1; - } - } + /* + * The leader's group read loads data into its group members + * (via perf_evsel__read_counter()) and sets their count->loaded. + */ + if (!perf_counts__is_loaded(counter->counts, cpu, thread) && + read_single_counter(counter, cpu, thread, rs)) { + counter->counts->scaled = -1; + perf_counts(counter->counts, cpu, thread)->ena = 0; + perf_counts(counter->counts, cpu, thread)->run = 0; + return -1; + } + + perf_counts__set_loaded(counter->counts, cpu, thread, false); - if (verbose > 1) { - fprintf(stat_config.output, - "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), - cpu, - count->val, count->ena, count->run); + if (STAT_RECORD) { + if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + pr_err("failed to write stat event\n"); + return -1; } } + + if (verbose > 1) { + fprintf(stat_config.output, + "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", + perf_evsel__name(counter), + cpu, + count->val, count->ena, count->run); + } } return 0; @@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs) static void read_counters(struct timespec *rs) { struct evsel *counter; - int ret; + struct affinity affinity; + int i, ncpus, cpu; + + if (affinity__setup(&affinity) < 0) + return; + + ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); + if (!target__has_cpu(&target) || target__has_per_thread(&target)) + ncpus = 1; + evlist__for_each_cpu(evsel_list, i, cpu) { + if (i >= ncpus) + break; + affinity__set(&affinity, cpu); + + evlist__for_each_entry(evsel_list, counter) { + if (evsel__cpu_iter_skip(counter, cpu)) + continue; + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + counter->cpu_iter - 1); + } + } + } + affinity__cleanup(&affinity); evlist__for_each_entry(evsel_list, counter) { - ret = read_counter(counter, rs); - if (ret) + if (counter->err) pr_debug("failed to read counter %s\n", counter->name); - - if (ret == 0 && perf_stat_process_counter(&stat_config, counter)) + if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter)) pr_warning("failed to process counter %s\n", counter->name); + counter->err = 0; } } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ca82a93960cd..c8af4bc23f8f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -86,6 +86,7 @@ struct evsel { struct list_head config_terms; struct bpf_object *bpf_obj; int bpf_fd; + int err; bool auto_merge_stats; bool merged_stat; const char * metric_expr; -- 2.21.0