From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754061AbbGRCxn (ORCPT ); Fri, 17 Jul 2015 22:53:43 -0400 Received: from mail-wi0-f182.google.com ([209.85.212.182]:37248 "EHLO mail-wi0-f182.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751900AbbGRCxl (ORCPT ); Fri, 17 Jul 2015 22:53:41 -0400 Date: Sat, 18 Jul 2015 04:53:34 +0200 From: Ingo Molnar To: Linus Torvalds Cc: linux-kernel@vger.kernel.org, Arnaldo Carvalho de Melo , Peter Zijlstra , Thomas Gleixner , Andrew Morton Subject: [GIT PULL] perf fixes Message-ID: <20150718025334.GA15153@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Linus, Please pull the latest perf-urgent-for-linus git tree from: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus # HEAD: a6acd6a41168dc304738e84c8360cda9a6b86887 Merge tag 'perf-urgent-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent Mostly tooling fixes, plus a static key fix fixing /sys/devices/cpu/rdpmc. Thanks, Ingo ------------------> Adrian Hunter (2): perf tools: Fix lockup using 32-bit compat vdso perf auxtrace: Fix misplaced check for HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT Alexey Brodkin (1): perf tools: Really allow to specify custom CC, AR or LD Arnaldo Carvalho de Melo (5): perf thread_map: Fix the sizeof() calculation for map entries perf tools: Fix the detached tarball wrt rbtree copy tools: Copy lib/hweight.c from the kernel sources perf symbols: Store if there is a filter in place perf hists browser: Take the --comm, --dsos, etc filters into account Jiri Olsa (1): perf stat: Fix shadow declaration of close Peter Zijlstra (1): x86, perf: Fix static_key bug in load_mm_cr4() Riku Voipio (1): tools lib: Improve clean target arch/x86/include/asm/mmu_context.h | 2 +- tools/lib/api/Makefile | 2 +- tools/lib/hweight.c | 62 ++++++++++++++++++++++++++++++++++++++ tools/lib/traceevent/Makefile | 2 +- tools/perf/MANIFEST | 2 +- tools/perf/Makefile.perf | 19 ++++++++++-- tools/perf/builtin-stat.c | 4 +-- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/Build | 2 +- tools/perf/util/auxtrace.c | 10 +++--- tools/perf/util/python-ext-sources | 4 +-- tools/perf/util/symbol.c | 2 ++ tools/perf/util/symbol.h | 3 +- tools/perf/util/thread_map.c | 3 +- tools/perf/util/vdso.c | 8 ++--- 15 files changed, 101 insertions(+), 26 deletions(-) create mode 100644 tools/lib/hweight.c diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5e8daee7c5c9..804a3a6030ca 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available; static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_true(&rdpmc_always_available) || + if (static_key_false(&rdpmc_always_available) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 8bd960658463..fe1b02c2c95b 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -36,7 +36,7 @@ $(LIBFILE): $(API_IN) clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) FORCE: diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c new file mode 100644 index 000000000000..0b859b884339 --- /dev/null +++ b/tools/lib/hweight.c @@ -0,0 +1,62 @@ +#include +#include + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +unsigned int __sw_hweight32(unsigned int w) +{ +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0f0f0f0f; + return (w * 0x01010101) >> 24; +#else + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +#endif +} + +unsigned int __sw_hweight16(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x5555); + res = (res & 0x3333) + ((res >> 2) & 0x3333); + res = (res + (res >> 4)) & 0x0F0F; + return (res + (res >> 8)) & 0x00FF; +} + +unsigned int __sw_hweight8(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55); + res = (res & 0x33) + ((res >> 2) & 0x33); + return (res + (res >> 4)) & 0x0F; +} + +unsigned long __sw_hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x5555555555555555ul; + w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; + return (w * 0x0101010101010101ul) >> 56; +#else + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +#endif +} diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 6daaff652aff..7851df1490e0 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -268,7 +268,7 @@ install: install_lib clean: $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ $(RM) TRACEEVENT-CFLAGS tags TAGS PHONY += force plugins diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 09dc0aabb515..d01a0aad5a01 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/hweight.c tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h @@ -57,7 +58,6 @@ include/linux/perf_event.h include/linux/list.h include/linux/hash.h include/linux/stringify.h -lib/hweight.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7a4b549214e3..bba34636b733 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -109,9 +109,22 @@ $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) $(Q)touch $(OUTPUT)PERF-VERSION-FILE -CC = $(CROSS_COMPILE)gcc -LD ?= $(CROSS_COMPILE)ld -AR = $(CROSS_COMPILE)ar +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 37e301a32f43..d99d850e1444 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -343,7 +343,7 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void read_counters(bool close) +static void read_counters(bool close_counters) { struct perf_evsel *counter; @@ -354,7 +354,7 @@ static void read_counters(bool close) if (process_counter(counter)) pr_warning("failed to process counter %s\n", counter->name); - if (close) { + if (close_counters) { perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), thread_map__nr(evsel_list->threads)); } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 7629bef2fd79..fa67613976a8 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -48,7 +48,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser__has_filter(struct hist_browser *hb) { - return hists__has_filter(hb->hists) || hb->min_pcnt; + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; } static int hist_browser__get_folding(struct hist_browser *browser) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 601d11440596..d2d318c59b37 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -143,6 +143,6 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE +$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 7e7405c9b936..83d9dd96fe08 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -53,11 +53,6 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, { struct perf_event_mmap_page *pc = userpg; -#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) - pr_err("Cannot use AUX area tracing mmaps\n"); - return -1; -#endif - WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n"); mm->userpg = userpg; @@ -73,6 +68,11 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, return 0; } +#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + pr_err("Cannot use AUX area tracing mmaps\n"); + return -1; +#endif + pc->aux_offset = mp->offset; pc->aux_size = mp->len; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index e23ded40c79e..0766d98c5da5 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,7 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -../../lib/hweight.c +../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c @@ -19,5 +19,5 @@ util/rblist.c util/stat.c util/strlist.c util/trace-event.c -../../lib/rbtree.c +../lib/rbtree.c util/string.c diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48b588c6951a..60f11414bb5c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1911,6 +1911,8 @@ int setup_list(struct strlist **list, const char *list_str, pr_err("problems parsing %s list\n", list_name); return -1; } + + symbol_conf.has_filter = true; return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index bef47ead1d9b..b98ce51af142 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,7 +105,8 @@ struct symbol_conf { demangle_kernel, filter_relative, show_hist_headers, - branch_callstack; + branch_callstack, + has_filter; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index da7646d767fe..292ae2c90e06 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -136,8 +136,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) if (grow) { struct thread_map *tmp; - tmp = realloc(threads, (sizeof(*threads) + - max_threads * sizeof(pid_t))); + tmp = thread_map__realloc(threads, max_threads); if (tmp == NULL) goto out_free_namelist; diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 4b89118f158d..44d440da15dc 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -236,18 +236,16 @@ static struct dso *__machine__findnew_compat(struct machine *machine, const char *file_name; struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); if (dso) - goto out_unlock; + goto out; file_name = vdso__get_compat_file(vdso_file); if (!file_name) - goto out_unlock; + goto out; dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); -out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); +out: return dso; }