LKML Archive on lore.kernel.org
 help / Atom feed
* [GIT PULL] perf fixes
@ 2019-01-11  7:44 Ingo Molnar
  2019-01-11 18:00 ` pr-tracker-bot
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2019-01-11  7:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 4e72ee8872279a70ebe973172133b98e8acbf54e Merge tag 'perf-core-for-mingo-5.0-20190110' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Tooling changes only: fixes and a few stray improvements.

Most of the diffstat is dominated by a PowerPC related fix of system call 
trace output beautification that allows us to (again) use the UAPI header 
version and sync up with the kernel's version of PowerPC system call 
names in the arch/powerpc/kernel/syscalls/syscall.tbl header.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (19):
      tools headers x86: Sync disabled-features.h
      tools headers uapi: Sync prctl.h with the kernel sources
      tools beauty: Make the prctl option table generator catch all PR_ options
      tools headers uapi: Update i915_drm.h
      tools headers uapi: Sync linux/in.h copy from the kernel sources
      tools headers uapi: Sync linux/kvm.h with the kernel sources
      tools headers uapi: Sync copy of asm-generic/unistd.h with the kernel sources
      tools headers x86: Sync asm/cpufeatures.h copy with the kernel sources
      perf python: Make sure the python binding output directory is in place
      perf test shell: Use a fallback to get the pathname in vfs_getname
      perf trace: Fix ')' placement in "interrupted" syscall lines
      perf trace: Fix alignment for [continued] lines
      perf top: Lift restriction on using callchains without "sym" in --sort
      tools include uapi: Grab a copy of linux/mount.h
      perf beauty: Switch from using uapi/linux/fs.h to uapi/linux/mount.h
      tools include uapi: Sync linux/fs.h copy with the kernel sources
      tools include uapi: Sync linux/vhost.h with the kernel sources
      tools include uapi: Sync linux/if_link.h copy with the kernel sources
      perf symbols: Add 'arch_cpu_idle' to the list of kernel idle symbols

Florian Fainelli (2):
      perf tools: Make find_vdso_map() more modular
      perf tests: Add a test for the ARM 32-bit [vectors] page

Ivan Krylov (1):
      perf annotate: Pass filename to objdump via execl

Jin Yao (2):
      perf stat: Fix endless wait for child process
      perf report: Fix wrong iteration count in --branch-history

Jiri Olsa (2):
      tools thermal tmon: Use -O3 instead of -O1 if available
      tools iio: Override CFLAGS assignments

Mattias Jacobsson (1):
      perf strbuf: Remove redundant va_end() in strbuf_addv()

Ravi Bangoria (2):
      perf powerpc: Rework syscall table generation
      tools headers powerpc: Remove unistd.h

Tzvetomir Stoyanov (7):
      tools lib traceevent: Introduce new libtracevent API: tep_override_comm()
      tools lib traceevent: Initialize host_bigendian at tep_handle allocation
      tools lib traceevent: Rename struct cmdline to struct tep_cmdline
      tools lib traceevent: Changed return logic of trace_seq_printf() and trace_seq_vprintf() APIs
      tools lib traceevent: Changed return logic of tep_register_event_handler() API
      tools lib traceevent: Rename tep_is_file_bigendian() to tep_file_bigendian()
      tools lib traceevent: Remove tep_data_event_from_type() API


 tools/arch/powerpc/include/uapi/asm/unistd.h       | 404 -------------------
 tools/arch/x86/include/asm/cpufeatures.h           |   2 +
 tools/arch/x86/include/asm/disabled-features.h     |   8 +-
 tools/include/uapi/asm-generic/unistd.h            |   4 +-
 tools/include/uapi/drm/i915_drm.h                  |   8 +
 tools/include/uapi/linux/fs.h                      |  60 +--
 tools/include/uapi/linux/if_link.h                 |  19 +
 tools/include/uapi/linux/in.h                      |  10 +-
 tools/include/uapi/linux/kvm.h                     |  19 +
 tools/include/uapi/linux/mount.h                   |  58 +++
 tools/include/uapi/linux/prctl.h                   |   8 +
 tools/include/uapi/linux/vhost.h                   | 113 +-----
 tools/lib/traceevent/event-parse-api.c             |   4 +-
 tools/lib/traceevent/event-parse-local.h           |   4 +-
 tools/lib/traceevent/event-parse.c                 | 129 ++++---
 tools/lib/traceevent/event-parse.h                 |  17 +-
 tools/lib/traceevent/plugin_kvm.c                  |   2 +-
 tools/lib/traceevent/trace-seq.c                   |  17 +-
 tools/perf/Makefile.perf                           |   8 +-
 tools/perf/arch/arm/tests/Build                    |   1 +
 tools/perf/arch/arm/tests/arch-tests.c             |   4 +
 tools/perf/arch/arm/tests/vectors-page.c           |  24 ++
 tools/perf/arch/powerpc/Makefile                   |  15 +-
 .../perf/arch/powerpc/entry/syscalls/mksyscalltbl  |  22 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 427 +++++++++++++++++++++
 tools/perf/builtin-stat.c                          |   3 +-
 tools/perf/builtin-top.c                           |   7 +-
 tools/perf/builtin-trace.c                         |  15 +-
 tools/perf/check-headers.sh                        |   2 +-
 tools/perf/perf-read-vdso.c                        |   6 +-
 tools/perf/tests/shell/lib/probe_vfs_getname.sh    |   3 +-
 tools/perf/tests/tests.h                           |   5 +
 tools/perf/trace/beauty/mount_flags.sh             |   4 +-
 tools/perf/trace/beauty/prctl_option.sh            |   2 +-
 tools/perf/util/annotate.c                         |   8 +-
 tools/perf/util/callchain.c                        |  32 +-
 tools/perf/util/callchain.h                        |   1 +
 tools/perf/util/{find-vdso-map.c => find-map.c}    |   7 +-
 tools/perf/util/machine.c                          |   2 +-
 tools/perf/util/strbuf.c                           |   1 -
 tools/perf/util/symbol.c                           |   1 +
 tools/perf/util/vdso.c                             |   6 +-
 tools/thermal/tmon/Makefile                        |   2 +-
 43 files changed, 797 insertions(+), 697 deletions(-)
 delete mode 100644 tools/arch/powerpc/include/uapi/asm/unistd.h
 create mode 100644 tools/include/uapi/linux/mount.h
 create mode 100644 tools/perf/arch/arm/tests/vectors-page.c
 create mode 100644 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
 rename tools/perf/util/{find-vdso-map.c => find-map.c} (71%)

diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
deleted file mode 100644
index 985534d0b448..000000000000
--- a/tools/arch/powerpc/include/uapi/asm/unistd.h
+++ /dev/null
@@ -1,404 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*
- * This file contains the system call numbers.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
-#define _UAPI_ASM_POWERPC_UNISTD_H_
-
-
-#define __NR_restart_syscall	  0
-#define __NR_exit		  1
-#define __NR_fork		  2
-#define __NR_read		  3
-#define __NR_write		  4
-#define __NR_open		  5
-#define __NR_close		  6
-#define __NR_waitpid		  7
-#define __NR_creat		  8
-#define __NR_link		  9
-#define __NR_unlink		 10
-#define __NR_execve		 11
-#define __NR_chdir		 12
-#define __NR_time		 13
-#define __NR_mknod		 14
-#define __NR_chmod		 15
-#define __NR_lchown		 16
-#define __NR_break		 17
-#define __NR_oldstat		 18
-#define __NR_lseek		 19
-#define __NR_getpid		 20
-#define __NR_mount		 21
-#define __NR_umount		 22
-#define __NR_setuid		 23
-#define __NR_getuid		 24
-#define __NR_stime		 25
-#define __NR_ptrace		 26
-#define __NR_alarm		 27
-#define __NR_oldfstat		 28
-#define __NR_pause		 29
-#define __NR_utime		 30
-#define __NR_stty		 31
-#define __NR_gtty		 32
-#define __NR_access		 33
-#define __NR_nice		 34
-#define __NR_ftime		 35
-#define __NR_sync		 36
-#define __NR_kill		 37
-#define __NR_rename		 38
-#define __NR_mkdir		 39
-#define __NR_rmdir		 40
-#define __NR_dup		 41
-#define __NR_pipe		 42
-#define __NR_times		 43
-#define __NR_prof		 44
-#define __NR_brk		 45
-#define __NR_setgid		 46
-#define __NR_getgid		 47
-#define __NR_signal		 48
-#define __NR_geteuid		 49
-#define __NR_getegid		 50
-#define __NR_acct		 51
-#define __NR_umount2		 52
-#define __NR_lock		 53
-#define __NR_ioctl		 54
-#define __NR_fcntl		 55
-#define __NR_mpx		 56
-#define __NR_setpgid		 57
-#define __NR_ulimit		 58
-#define __NR_oldolduname	 59
-#define __NR_umask		 60
-#define __NR_chroot		 61
-#define __NR_ustat		 62
-#define __NR_dup2		 63
-#define __NR_getppid		 64
-#define __NR_getpgrp		 65
-#define __NR_setsid		 66
-#define __NR_sigaction		 67
-#define __NR_sgetmask		 68
-#define __NR_ssetmask		 69
-#define __NR_setreuid		 70
-#define __NR_setregid		 71
-#define __NR_sigsuspend		 72
-#define __NR_sigpending		 73
-#define __NR_sethostname	 74
-#define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
-#define __NR_getrusage		 77
-#define __NR_gettimeofday	 78
-#define __NR_settimeofday	 79
-#define __NR_getgroups		 80
-#define __NR_setgroups		 81
-#define __NR_select		 82
-#define __NR_symlink		 83
-#define __NR_oldlstat		 84
-#define __NR_readlink		 85
-#define __NR_uselib		 86
-#define __NR_swapon		 87
-#define __NR_reboot		 88
-#define __NR_readdir		 89
-#define __NR_mmap		 90
-#define __NR_munmap		 91
-#define __NR_truncate		 92
-#define __NR_ftruncate		 93
-#define __NR_fchmod		 94
-#define __NR_fchown		 95
-#define __NR_getpriority	 96
-#define __NR_setpriority	 97
-#define __NR_profil		 98
-#define __NR_statfs		 99
-#define __NR_fstatfs		100
-#define __NR_ioperm		101
-#define __NR_socketcall		102
-#define __NR_syslog		103
-#define __NR_setitimer		104
-#define __NR_getitimer		105
-#define __NR_stat		106
-#define __NR_lstat		107
-#define __NR_fstat		108
-#define __NR_olduname		109
-#define __NR_iopl		110
-#define __NR_vhangup		111
-#define __NR_idle		112
-#define __NR_vm86		113
-#define __NR_wait4		114
-#define __NR_swapoff		115
-#define __NR_sysinfo		116
-#define __NR_ipc		117
-#define __NR_fsync		118
-#define __NR_sigreturn		119
-#define __NR_clone		120
-#define __NR_setdomainname	121
-#define __NR_uname		122
-#define __NR_modify_ldt		123
-#define __NR_adjtimex		124
-#define __NR_mprotect		125
-#define __NR_sigprocmask	126
-#define __NR_create_module	127
-#define __NR_init_module	128
-#define __NR_delete_module	129
-#define __NR_get_kernel_syms	130
-#define __NR_quotactl		131
-#define __NR_getpgid		132
-#define __NR_fchdir		133
-#define __NR_bdflush		134
-#define __NR_sysfs		135
-#define __NR_personality	136
-#define __NR_afs_syscall	137 /* Syscall for Andrew File System */
-#define __NR_setfsuid		138
-#define __NR_setfsgid		139
-#define __NR__llseek		140
-#define __NR_getdents		141
-#define __NR__newselect		142
-#define __NR_flock		143
-#define __NR_msync		144
-#define __NR_readv		145
-#define __NR_writev		146
-#define __NR_getsid		147
-#define __NR_fdatasync		148
-#define __NR__sysctl		149
-#define __NR_mlock		150
-#define __NR_munlock		151
-#define __NR_mlockall		152
-#define __NR_munlockall		153
-#define __NR_sched_setparam		154
-#define __NR_sched_getparam		155
-#define __NR_sched_setscheduler		156
-#define __NR_sched_getscheduler		157
-#define __NR_sched_yield		158
-#define __NR_sched_get_priority_max	159
-#define __NR_sched_get_priority_min	160
-#define __NR_sched_rr_get_interval	161
-#define __NR_nanosleep		162
-#define __NR_mremap		163
-#define __NR_setresuid		164
-#define __NR_getresuid		165
-#define __NR_query_module	166
-#define __NR_poll		167
-#define __NR_nfsservctl		168
-#define __NR_setresgid		169
-#define __NR_getresgid		170
-#define __NR_prctl		171
-#define __NR_rt_sigreturn	172
-#define __NR_rt_sigaction	173
-#define __NR_rt_sigprocmask	174
-#define __NR_rt_sigpending	175
-#define __NR_rt_sigtimedwait	176
-#define __NR_rt_sigqueueinfo	177
-#define __NR_rt_sigsuspend	178
-#define __NR_pread64		179
-#define __NR_pwrite64		180
-#define __NR_chown		181
-#define __NR_getcwd		182
-#define __NR_capget		183
-#define __NR_capset		184
-#define __NR_sigaltstack	185
-#define __NR_sendfile		186
-#define __NR_getpmsg		187	/* some people actually want streams */
-#define __NR_putpmsg		188	/* some people actually want streams */
-#define __NR_vfork		189
-#define __NR_ugetrlimit		190	/* SuS compliant getrlimit */
-#define __NR_readahead		191
-#ifndef __powerpc64__			/* these are 32-bit only */
-#define __NR_mmap2		192
-#define __NR_truncate64		193
-#define __NR_ftruncate64	194
-#define __NR_stat64		195
-#define __NR_lstat64		196
-#define __NR_fstat64		197
-#endif
-#define __NR_pciconfig_read	198
-#define __NR_pciconfig_write	199
-#define __NR_pciconfig_iobase	200
-#define __NR_multiplexer	201
-#define __NR_getdents64		202
-#define __NR_pivot_root		203
-#ifndef __powerpc64__
-#define __NR_fcntl64		204
-#endif
-#define __NR_madvise		205
-#define __NR_mincore		206
-#define __NR_gettid		207
-#define __NR_tkill		208
-#define __NR_setxattr		209
-#define __NR_lsetxattr		210
-#define __NR_fsetxattr		211
-#define __NR_getxattr		212
-#define __NR_lgetxattr		213
-#define __NR_fgetxattr		214
-#define __NR_listxattr		215
-#define __NR_llistxattr		216
-#define __NR_flistxattr		217
-#define __NR_removexattr	218
-#define __NR_lremovexattr	219
-#define __NR_fremovexattr	220
-#define __NR_futex		221
-#define __NR_sched_setaffinity	222
-#define __NR_sched_getaffinity	223
-/* 224 currently unused */
-#define __NR_tuxcall		225
-#ifndef __powerpc64__
-#define __NR_sendfile64		226
-#endif
-#define __NR_io_setup		227
-#define __NR_io_destroy		228
-#define __NR_io_getevents	229
-#define __NR_io_submit		230
-#define __NR_io_cancel		231
-#define __NR_set_tid_address	232
-#define __NR_fadvise64		233
-#define __NR_exit_group		234
-#define __NR_lookup_dcookie	235
-#define __NR_epoll_create	236
-#define __NR_epoll_ctl		237
-#define __NR_epoll_wait		238
-#define __NR_remap_file_pages	239
-#define __NR_timer_create	240
-#define __NR_timer_settime	241
-#define __NR_timer_gettime	242
-#define __NR_timer_getoverrun	243
-#define __NR_timer_delete	244
-#define __NR_clock_settime	245
-#define __NR_clock_gettime	246
-#define __NR_clock_getres	247
-#define __NR_clock_nanosleep	248
-#define __NR_swapcontext	249
-#define __NR_tgkill		250
-#define __NR_utimes		251
-#define __NR_statfs64		252
-#define __NR_fstatfs64		253
-#ifndef __powerpc64__
-#define __NR_fadvise64_64	254
-#endif
-#define __NR_rtas		255
-#define __NR_sys_debug_setcontext 256
-/* Number 257 is reserved for vserver */
-#define __NR_migrate_pages	258
-#define __NR_mbind		259
-#define __NR_get_mempolicy	260
-#define __NR_set_mempolicy	261
-#define __NR_mq_open		262
-#define __NR_mq_unlink		263
-#define __NR_mq_timedsend	264
-#define __NR_mq_timedreceive	265
-#define __NR_mq_notify		266
-#define __NR_mq_getsetattr	267
-#define __NR_kexec_load		268
-#define __NR_add_key		269
-#define __NR_request_key	270
-#define __NR_keyctl		271
-#define __NR_waitid		272
-#define __NR_ioprio_set		273
-#define __NR_ioprio_get		274
-#define __NR_inotify_init	275
-#define __NR_inotify_add_watch	276
-#define __NR_inotify_rm_watch	277
-#define __NR_spu_run		278
-#define __NR_spu_create		279
-#define __NR_pselect6		280
-#define __NR_ppoll		281
-#define __NR_unshare		282
-#define __NR_splice		283
-#define __NR_tee		284
-#define __NR_vmsplice		285
-#define __NR_openat		286
-#define __NR_mkdirat		287
-#define __NR_mknodat		288
-#define __NR_fchownat		289
-#define __NR_futimesat		290
-#ifdef __powerpc64__
-#define __NR_newfstatat		291
-#else
-#define __NR_fstatat64		291
-#endif
-#define __NR_unlinkat		292
-#define __NR_renameat		293
-#define __NR_linkat		294
-#define __NR_symlinkat		295
-#define __NR_readlinkat		296
-#define __NR_fchmodat		297
-#define __NR_faccessat		298
-#define __NR_get_robust_list	299
-#define __NR_set_robust_list	300
-#define __NR_move_pages		301
-#define __NR_getcpu		302
-#define __NR_epoll_pwait	303
-#define __NR_utimensat		304
-#define __NR_signalfd		305
-#define __NR_timerfd_create	306
-#define __NR_eventfd		307
-#define __NR_sync_file_range2	308
-#define __NR_fallocate		309
-#define __NR_subpage_prot	310
-#define __NR_timerfd_settime	311
-#define __NR_timerfd_gettime	312
-#define __NR_signalfd4		313
-#define __NR_eventfd2		314
-#define __NR_epoll_create1	315
-#define __NR_dup3		316
-#define __NR_pipe2		317
-#define __NR_inotify_init1	318
-#define __NR_perf_event_open	319
-#define __NR_preadv		320
-#define __NR_pwritev		321
-#define __NR_rt_tgsigqueueinfo	322
-#define __NR_fanotify_init	323
-#define __NR_fanotify_mark	324
-#define __NR_prlimit64		325
-#define __NR_socket		326
-#define __NR_bind		327
-#define __NR_connect		328
-#define __NR_listen		329
-#define __NR_accept		330
-#define __NR_getsockname	331
-#define __NR_getpeername	332
-#define __NR_socketpair		333
-#define __NR_send		334
-#define __NR_sendto		335
-#define __NR_recv		336
-#define __NR_recvfrom		337
-#define __NR_shutdown		338
-#define __NR_setsockopt		339
-#define __NR_getsockopt		340
-#define __NR_sendmsg		341
-#define __NR_recvmsg		342
-#define __NR_recvmmsg		343
-#define __NR_accept4		344
-#define __NR_name_to_handle_at	345
-#define __NR_open_by_handle_at	346
-#define __NR_clock_adjtime	347
-#define __NR_syncfs		348
-#define __NR_sendmmsg		349
-#define __NR_setns		350
-#define __NR_process_vm_readv	351
-#define __NR_process_vm_writev	352
-#define __NR_finit_module	353
-#define __NR_kcmp		354
-#define __NR_sched_setattr	355
-#define __NR_sched_getattr	356
-#define __NR_renameat2		357
-#define __NR_seccomp		358
-#define __NR_getrandom		359
-#define __NR_memfd_create	360
-#define __NR_bpf		361
-#define __NR_execveat		362
-#define __NR_switch_endian	363
-#define __NR_userfaultfd	364
-#define __NR_membarrier		365
-#define __NR_mlock2		378
-#define __NR_copy_file_range	379
-#define __NR_preadv2		380
-#define __NR_pwritev2		381
-#define __NR_kexec_file_load	382
-#define __NR_statx		383
-#define __NR_pkey_alloc		384
-#define __NR_pkey_free		385
-#define __NR_pkey_mprotect	386
-#define __NR_rseq		387
-#define __NR_io_pgetevents	388
-
-#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 28c4a502b419..6d6122524711 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -281,9 +281,11 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_WBNOINVD		(13*32+ 9) /* WBNOINVD instruction */
 #define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
 #define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
 #define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON	(13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
 #define X86_FEATURE_AMD_SSBD		(13*32+24) /* "" Speculative Store Bypass Disable */
 #define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
 #define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 33833d1909af..a5ea841cc6d2 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -16,6 +16,12 @@
 # define DISABLE_MPX	(1<<(X86_FEATURE_MPX & 31))
 #endif
 
+#ifdef CONFIG_X86_SMAP
+# define DISABLE_SMAP	0
+#else
+# define DISABLE_SMAP	(1<<(X86_FEATURE_SMAP & 31))
+#endif
+
 #ifdef CONFIG_X86_INTEL_UMIP
 # define DISABLE_UMIP	0
 #else
@@ -68,7 +74,7 @@
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	(DISABLE_PTI)
 #define DISABLED_MASK8	0
-#define DISABLED_MASK9	(DISABLE_MPX)
+#define DISABLED_MASK9	(DISABLE_MPX|DISABLE_SMAP)
 #define DISABLED_MASK10	0
 #define DISABLED_MASK11	0
 #define DISABLED_MASK12	0
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index c7f3321fbe43..d90127298f12 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -738,9 +738,11 @@ __SYSCALL(__NR_statx,     sys_statx)
 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
+#define __NR_kexec_file_load 294
+__SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
 
 #undef __NR_syscalls
-#define __NR_syscalls 294
+#define __NR_syscalls 295
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index a4446f452040..298b2e197744 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -412,6 +412,14 @@ typedef struct drm_i915_irq_wait {
 	int irq_seq;
 } drm_i915_irq_wait_t;
 
+/*
+ * Different modes of per-process Graphics Translation Table,
+ * see I915_PARAM_HAS_ALIASING_PPGTT
+ */
+#define I915_GEM_PPGTT_NONE	0
+#define I915_GEM_PPGTT_ALIASING	1
+#define I915_GEM_PPGTT_FULL	2
+
 /* Ioctl to query kernel params:
  */
 #define I915_PARAM_IRQ_ACTIVE            1
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index a441ea1bfe6d..121e82ce296b 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -14,6 +14,11 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
+/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
+#if !defined(__KERNEL__)
+#include <linux/mount.h>
+#endif
+
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
  * the file limit at runtime and only root can increase the per-process
@@ -101,57 +106,6 @@ struct inodes_stat_t {
 
 #define NR_FILE  8192	/* this can well be larger on a larger system */
 
-
-/*
- * These are the fs-independent mount-flags: up to 32 flags are supported
- */
-#define MS_RDONLY	 1	/* Mount read-only */
-#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
-#define MS_NODEV	 4	/* Disallow access to device special files */
-#define MS_NOEXEC	 8	/* Disallow program execution */
-#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
-#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
-#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
-#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
-#define MS_NOATIME	1024	/* Do not update access times. */
-#define MS_NODIRATIME	2048	/* Do not update directory access times */
-#define MS_BIND		4096
-#define MS_MOVE		8192
-#define MS_REC		16384
-#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
-				   MS_VERBOSE is deprecated. */
-#define MS_SILENT	32768
-#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
-#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
-#define MS_PRIVATE	(1<<18)	/* change to private */
-#define MS_SLAVE	(1<<19)	/* change to slave */
-#define MS_SHARED	(1<<20)	/* change to shared */
-#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
-#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
-#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
-#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
-#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
-
-/* These sb flags are internal to the kernel */
-#define MS_SUBMOUNT     (1<<26)
-#define MS_NOREMOTELOCK	(1<<27)
-#define MS_NOSEC	(1<<28)
-#define MS_BORN		(1<<29)
-#define MS_ACTIVE	(1<<30)
-#define MS_NOUSER	(1<<31)
-
-/*
- * Superblock flags that can be altered by MS_REMOUNT
- */
-#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
-			 MS_LAZYTIME)
-
-/*
- * Old magic mount flag and mask
- */
-#define MS_MGC_VAL 0xC0ED0000
-#define MS_MGC_MSK 0xffff0000
-
 /*
  * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
  */
@@ -269,7 +223,8 @@ struct fsxattr {
 #define FS_POLICY_FLAGS_PAD_16		0x02
 #define FS_POLICY_FLAGS_PAD_32		0x03
 #define FS_POLICY_FLAGS_PAD_MASK	0x03
-#define FS_POLICY_FLAGS_VALID		0x03
+#define FS_POLICY_FLAG_DIRECT_KEY	0x04	/* use master key directly */
+#define FS_POLICY_FLAGS_VALID		0x07
 
 /* Encryption algorithms */
 #define FS_ENCRYPTION_MODE_INVALID		0
@@ -281,6 +236,7 @@ struct fsxattr {
 #define FS_ENCRYPTION_MODE_AES_128_CTS		6
 #define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
 #define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_ADIANTUM		9
 
 struct fscrypt_policy {
 	__u8 version;
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 1debfa42cba1..d6533828123a 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -288,6 +288,7 @@ enum {
 	IFLA_BR_MCAST_IGMP_VERSION,
 	IFLA_BR_MCAST_MLD_VERSION,
 	IFLA_BR_VLAN_STATS_PER_PORT,
+	IFLA_BR_MULTI_BOOLOPT,
 	__IFLA_BR_MAX,
 };
 
@@ -533,6 +534,7 @@ enum {
 	IFLA_VXLAN_LABEL,
 	IFLA_VXLAN_GPE,
 	IFLA_VXLAN_TTL_INHERIT,
+	IFLA_VXLAN_DF,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
@@ -542,6 +544,14 @@ struct ifla_vxlan_port_range {
 	__be16	high;
 };
 
+enum ifla_vxlan_df {
+	VXLAN_DF_UNSET = 0,
+	VXLAN_DF_SET,
+	VXLAN_DF_INHERIT,
+	__VXLAN_DF_END,
+	VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
 /* GENEVE section */
 enum {
 	IFLA_GENEVE_UNSPEC,
@@ -557,10 +567,19 @@ enum {
 	IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
 	IFLA_GENEVE_LABEL,
 	IFLA_GENEVE_TTL_INHERIT,
+	IFLA_GENEVE_DF,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
 
+enum ifla_geneve_df {
+	GENEVE_DF_UNSET = 0,
+	GENEVE_DF_SET,
+	GENEVE_DF_INHERIT,
+	__GENEVE_DF_END,
+	GENEVE_DF_MAX = __GENEVE_DF_END - 1,
+};
+
 /* PPP section */
 enum {
 	IFLA_PPP_UNSPEC,
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 48e8a225b985..f6052e70bf40 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -266,10 +266,14 @@ struct sockaddr_in {
 
 #define	IN_CLASSD(a)		((((long int) (a)) & 0xf0000000) == 0xe0000000)
 #define	IN_MULTICAST(a)		IN_CLASSD(a)
-#define IN_MULTICAST_NET	0xF0000000
+#define	IN_MULTICAST_NET	0xe0000000
 
-#define	IN_EXPERIMENTAL(a)	((((long int) (a)) & 0xf0000000) == 0xf0000000)
-#define	IN_BADCLASS(a)		IN_EXPERIMENTAL((a))
+#define	IN_BADCLASS(a)		((((long int) (a) ) == 0xffffffff)
+#define	IN_EXPERIMENTAL(a)	IN_BADCLASS((a))
+
+#define	IN_CLASSE(a)		((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define	IN_CLASSE_NET		0xffffffff
+#define	IN_CLASSE_NSHIFT	0
 
 /* Address to accept any incoming messages. */
 #define	INADDR_ANY		((unsigned long int) 0x00000000)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2b7a652c9fa4..6d4ea4b6c922 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -492,6 +492,17 @@ struct kvm_dirty_log {
 	};
 };
 
+/* for KVM_CLEAR_DIRTY_LOG */
+struct kvm_clear_dirty_log {
+	__u32 slot;
+	__u32 num_pages;
+	__u64 first_page;
+	union {
+		void __user *dirty_bitmap; /* one bit per page */
+		__u64 padding2;
+	};
+};
+
 /* for KVM_SET_SIGNAL_MASK */
 struct kvm_signal_mask {
 	__u32 len;
@@ -975,6 +986,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
 #define KVM_CAP_EXCEPTION_PAYLOAD 164
 #define KVM_CAP_ARM_VM_IPA_SIZE 165
+#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
+#define KVM_CAP_HYPERV_CPUID 167
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1421,6 +1434,12 @@ struct kvm_enc_region {
 #define KVM_GET_NESTED_STATE         _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
 #define KVM_SET_NESTED_STATE         _IOW(KVMIO,  0xbf, struct kvm_nested_state)
 
+/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */
+#define KVM_CLEAR_DIRTY_LOG          _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
+
+/* Available with KVM_CAP_HYPERV_CPUID */
+#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
new file mode 100644
index 000000000000..3f9ec42510b0
--- /dev/null
+++ b/tools/include/uapi/linux/mount.h
@@ -0,0 +1,58 @@
+#ifndef _UAPI_LINUX_MOUNT_H
+#define _UAPI_LINUX_MOUNT_H
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ *
+ * Usage of these is restricted within the kernel to core mount(2) code and
+ * callers of sys_mount() only.  Filesystems should be using the SB_*
+ * equivalent instead.
+ */
+#define MS_RDONLY	 1	/* Mount read-only */
+#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
+#define MS_NODEV	 4	/* Disallow access to device special files */
+#define MS_NOEXEC	 8	/* Disallow program execution */
+#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
+#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
+#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
+#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOATIME	1024	/* Do not update access times. */
+#define MS_NODIRATIME	2048	/* Do not update directory access times */
+#define MS_BIND		4096
+#define MS_MOVE		8192
+#define MS_REC		16384
+#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
+				   MS_VERBOSE is deprecated. */
+#define MS_SILENT	32768
+#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
+#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
+#define MS_PRIVATE	(1<<18)	/* change to private */
+#define MS_SLAVE	(1<<19)	/* change to slave */
+#define MS_SHARED	(1<<20)	/* change to shared */
+#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT     (1<<26)
+#define MS_NOREMOTELOCK	(1<<27)
+#define MS_NOSEC	(1<<28)
+#define MS_BORN		(1<<29)
+#define MS_ACTIVE	(1<<30)
+#define MS_NOUSER	(1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+			 MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index b17201edfa09..b4875a93363a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -220,4 +220,12 @@ struct prctl_mm_map {
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
 
+/* Reset arm64 pointer authentication keys */
+#define PR_PAC_RESET_KEYS		54
+# define PR_PAC_APIAKEY			(1UL << 0)
+# define PR_PAC_APIBKEY			(1UL << 1)
+# define PR_PAC_APDAKEY			(1UL << 2)
+# define PR_PAC_APDBKEY			(1UL << 3)
+# define PR_PAC_APGAKEY			(1UL << 4)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 84c3de89696a..40d028eed645 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -11,94 +11,9 @@
  * device configuration.
  */
 
+#include <linux/vhost_types.h>
 #include <linux/types.h>
-#include <linux/compiler.h>
 #include <linux/ioctl.h>
-#include <linux/virtio_config.h>
-#include <linux/virtio_ring.h>
-
-struct vhost_vring_state {
-	unsigned int index;
-	unsigned int num;
-};
-
-struct vhost_vring_file {
-	unsigned int index;
-	int fd; /* Pass -1 to unbind from file. */
-
-};
-
-struct vhost_vring_addr {
-	unsigned int index;
-	/* Option flags. */
-	unsigned int flags;
-	/* Flag values: */
-	/* Whether log address is valid. If set enables logging. */
-#define VHOST_VRING_F_LOG 0
-
-	/* Start of array of descriptors (virtually contiguous) */
-	__u64 desc_user_addr;
-	/* Used structure address. Must be 32 bit aligned */
-	__u64 used_user_addr;
-	/* Available structure address. Must be 16 bit aligned */
-	__u64 avail_user_addr;
-	/* Logging support. */
-	/* Log writes to used structure, at offset calculated from specified
-	 * address. Address must be 32 bit aligned. */
-	__u64 log_guest_addr;
-};
-
-/* no alignment requirement */
-struct vhost_iotlb_msg {
-	__u64 iova;
-	__u64 size;
-	__u64 uaddr;
-#define VHOST_ACCESS_RO      0x1
-#define VHOST_ACCESS_WO      0x2
-#define VHOST_ACCESS_RW      0x3
-	__u8 perm;
-#define VHOST_IOTLB_MISS           1
-#define VHOST_IOTLB_UPDATE         2
-#define VHOST_IOTLB_INVALIDATE     3
-#define VHOST_IOTLB_ACCESS_FAIL    4
-	__u8 type;
-};
-
-#define VHOST_IOTLB_MSG 0x1
-#define VHOST_IOTLB_MSG_V2 0x2
-
-struct vhost_msg {
-	int type;
-	union {
-		struct vhost_iotlb_msg iotlb;
-		__u8 padding[64];
-	};
-};
-
-struct vhost_msg_v2 {
-	__u32 type;
-	__u32 reserved;
-	union {
-		struct vhost_iotlb_msg iotlb;
-		__u8 padding[64];
-	};
-};
-
-struct vhost_memory_region {
-	__u64 guest_phys_addr;
-	__u64 memory_size; /* bytes */
-	__u64 userspace_addr;
-	__u64 flags_padding; /* No flags are currently specified. */
-};
-
-/* All region addresses and sizes must be 4K aligned. */
-#define VHOST_PAGE_SIZE 0x1000
-
-struct vhost_memory {
-	__u32 nregions;
-	__u32 padding;
-	struct vhost_memory_region regions[0];
-};
 
 /* ioctls */
 
@@ -186,31 +101,7 @@ struct vhost_memory {
  * device.  This can be used to stop the ring (e.g. for migration). */
 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
 
-/* Feature bits */
-/* Log all write descriptors. Can be changed while device is active. */
-#define VHOST_F_LOG_ALL 26
-/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
-#define VHOST_NET_F_VIRTIO_NET_HDR 27
-
-/* VHOST_SCSI specific definitions */
-
-/*
- * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
- *
- * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
- *            RFC-v2 vhost-scsi userspace.  Add GET_ABI_VERSION ioctl usage
- * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
- *            All the targets under vhost_wwpn can be seen and used by guset.
- */
-
-#define VHOST_SCSI_ABI_VERSION	1
-
-struct vhost_scsi_target {
-	int abi_version;
-	char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
-	unsigned short vhost_tpgt;
-	unsigned short reserved;
-};
+/* VHOST_SCSI specific defines */
 
 #define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
 #define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
index 8b31c0e00ba3..d463761a58f4 100644
--- a/tools/lib/traceevent/event-parse-api.c
+++ b/tools/lib/traceevent/event-parse-api.c
@@ -194,13 +194,13 @@ void tep_set_page_size(struct tep_handle *pevent, int _page_size)
 }
 
 /**
- * tep_is_file_bigendian - get if the file is in big endian order
+ * tep_file_bigendian - get if the file is in big endian order
  * @pevent: a handle to the tep_handle
  *
  * This returns if the file is in big endian order
  * If @pevent is NULL, 0 is returned.
  */
-int tep_is_file_bigendian(struct tep_handle *pevent)
+int tep_file_bigendian(struct tep_handle *pevent)
 {
 	if(pevent)
 		return pevent->file_bigendian;
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index 9a092dd4a86d..35833ee32d6c 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -7,7 +7,7 @@
 #ifndef _PARSE_EVENTS_INT_H
 #define _PARSE_EVENTS_INT_H
 
-struct cmdline;
+struct tep_cmdline;
 struct cmdline_list;
 struct func_map;
 struct func_list;
@@ -36,7 +36,7 @@ struct tep_handle {
 	int long_size;
 	int page_size;
 
-	struct cmdline *cmdlines;
+	struct tep_cmdline *cmdlines;
 	struct cmdline_list *cmdlist;
 	int cmdline_count;
 
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 69a96e39f0ab..abd4fa5d3088 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -124,15 +124,15 @@ struct tep_print_arg *alloc_arg(void)
 	return calloc(1, sizeof(struct tep_print_arg));
 }
 
-struct cmdline {
+struct tep_cmdline {
 	char *comm;
 	int pid;
 };
 
 static int cmdline_cmp(const void *a, const void *b)
 {
-	const struct cmdline *ca = a;
-	const struct cmdline *cb = b;
+	const struct tep_cmdline *ca = a;
+	const struct tep_cmdline *cb = b;
 
 	if (ca->pid < cb->pid)
 		return -1;
@@ -152,7 +152,7 @@ static int cmdline_init(struct tep_handle *pevent)
 {
 	struct cmdline_list *cmdlist = pevent->cmdlist;
 	struct cmdline_list *item;
-	struct cmdline *cmdlines;
+	struct tep_cmdline *cmdlines;
 	int i;
 
 	cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count);
@@ -179,8 +179,8 @@ static int cmdline_init(struct tep_handle *pevent)
 
 static const char *find_cmdline(struct tep_handle *pevent, int pid)
 {
-	const struct cmdline *comm;
-	struct cmdline key;
+	const struct tep_cmdline *comm;
+	struct tep_cmdline key;
 
 	if (!pid)
 		return "<idle>";
@@ -208,8 +208,8 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid)
  */
 int tep_pid_is_registered(struct tep_handle *pevent, int pid)
 {
-	const struct cmdline *comm;
-	struct cmdline key;
+	const struct tep_cmdline *comm;
+	struct tep_cmdline key;
 
 	if (!pid)
 		return 1;
@@ -232,11 +232,13 @@ int tep_pid_is_registered(struct tep_handle *pevent, int pid)
  * we must add this pid. This is much slower than when cmdlines
  * are added before the array is initialized.
  */
-static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid)
+static int add_new_comm(struct tep_handle *pevent,
+			const char *comm, int pid, bool override)
 {
-	struct cmdline *cmdlines = pevent->cmdlines;
-	const struct cmdline *cmdline;
-	struct cmdline key;
+	struct tep_cmdline *cmdlines = pevent->cmdlines;
+	struct tep_cmdline *cmdline;
+	struct tep_cmdline key;
+	char *new_comm;
 
 	if (!pid)
 		return 0;
@@ -247,8 +249,19 @@ static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid)
 	cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
 		       sizeof(*pevent->cmdlines), cmdline_cmp);
 	if (cmdline) {
-		errno = EEXIST;
-		return -1;
+		if (!override) {
+			errno = EEXIST;
+			return -1;
+		}
+		new_comm = strdup(comm);
+		if (!new_comm) {
+			errno = ENOMEM;
+			return -1;
+		}
+		free(cmdline->comm);
+		cmdline->comm = new_comm;
+
+		return 0;
 	}
 
 	cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1));
@@ -275,21 +288,13 @@ static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid)
 	return 0;
 }
 
-/**
- * tep_register_comm - register a pid / comm mapping
- * @pevent: handle for the pevent
- * @comm: the command line to register
- * @pid: the pid to map the command line to
- *
- * This adds a mapping to search for command line names with
- * a given pid. The comm is duplicated.
- */
-int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
+static int _tep_register_comm(struct tep_handle *pevent,
+			      const char *comm, int pid, bool override)
 {
 	struct cmdline_list *item;
 
 	if (pevent->cmdlines)
-		return add_new_comm(pevent, comm, pid);
+		return add_new_comm(pevent, comm, pid, override);
 
 	item = malloc(sizeof(*item));
 	if (!item)
@@ -312,6 +317,40 @@ int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
 	return 0;
 }
 
+/**
+ * tep_register_comm - register a pid / comm mapping
+ * @pevent: handle for the pevent
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated. If a command with the same pid
+ * already exist, -1 is returned and errno is set to EEXIST
+ */
+int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
+{
+	return _tep_register_comm(pevent, comm, pid, false);
+}
+
+/**
+ * tep_override_comm - register a pid / comm mapping
+ * @pevent: handle for the pevent
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated. If a command with the same pid
+ * already exist, the command string is udapted with the new one
+ */
+int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid)
+{
+	if (!pevent->cmdlines && cmdline_init(pevent)) {
+		errno = ENOMEM;
+		return -1;
+	}
+	return _tep_register_comm(pevent, comm, pid, true);
+}
+
 int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock)
 {
 	pevent->trace_clock = strdup(trace_clock);
@@ -5226,18 +5265,6 @@ int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
 	return trace_parse_common_type(pevent, rec->data);
 }
 
-/**
- * tep_data_event_from_type - find the event by a given type
- * @pevent: a handle to the pevent
- * @type: the type of the event.
- *
- * This returns the event form a given @type;
- */
-struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type)
-{
-	return tep_find_event(pevent, type);
-}
-
 /**
  * tep_data_pid - parse the PID from record
  * @pevent: a handle to the pevent
@@ -5292,8 +5319,8 @@ const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid)
 	return comm;
 }
 
-static struct cmdline *
-pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next)
+static struct tep_cmdline *
+pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct tep_cmdline *next)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
 
@@ -5305,7 +5332,7 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne
 	while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
 		cmdlist = cmdlist->next;
 
-	return (struct cmdline *)cmdlist;
+	return (struct tep_cmdline *)cmdlist;
 }
 
 /**
@@ -5321,10 +5348,10 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne
  * next pid.
  * Also, it does a linear search, so it may be slow.
  */
-struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
-				       struct cmdline *next)
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+					   struct tep_cmdline *next)
 {
-	struct cmdline *cmdline;
+	struct tep_cmdline *cmdline;
 
 	/*
 	 * If the cmdlines have not been converted yet, then use
@@ -5363,7 +5390,7 @@ struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *co
  * Returns the pid for a give cmdline. If @cmdline is NULL, then
  * -1 is returned.
  */
-int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline)
+int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline)
 {
 	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
 
@@ -6593,6 +6620,12 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id,
  *
  * If @id is >= 0, then it is used to find the event.
  * else @sys_name and @event_name are used.
+ *
+ * Returns:
+ *  TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten
+ *  TEP_REGISTER_SUCCESS if a new handler is registered successfully
+ *  negative TEP_ERRNO_... in case of an error
+ *
  */
 int tep_register_event_handler(struct tep_handle *pevent, int id,
 			       const char *sys_name, const char *event_name,
@@ -6610,7 +6643,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id,
 
 	event->handler = func;
 	event->context = context;
-	return 0;
+	return TEP_REGISTER_SUCCESS_OVERWRITE;
 
  not_found:
 	/* Save for later use. */
@@ -6640,7 +6673,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id,
 	pevent->handlers = handle;
 	handle->context = context;
 
-	return -1;
+	return TEP_REGISTER_SUCCESS;
 }
 
 static int handle_matches(struct event_handler *handler, int id,
@@ -6723,8 +6756,10 @@ struct tep_handle *tep_alloc(void)
 {
 	struct tep_handle *pevent = calloc(1, sizeof(*pevent));
 
-	if (pevent)
+	if (pevent) {
 		pevent->ref_count = 1;
+		pevent->host_bigendian = tep_host_bigendian();
+	}
 
 	return pevent;
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 35d37087d3c5..aec48f2aea8a 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -432,6 +432,7 @@ int tep_set_function_resolver(struct tep_handle *pevent,
 			      tep_func_resolver_t *func, void *priv);
 void tep_reset_function_resolver(struct tep_handle *pevent);
 int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid);
+int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid);
 int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock);
 int tep_register_function(struct tep_handle *pevent, char *name,
 			  unsigned long long addr, char *mod);
@@ -484,6 +485,11 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt,
 			 struct tep_event *event, const char *name,
 			 struct tep_record *record, int err);
 
+enum tep_reg_handler {
+	TEP_REGISTER_SUCCESS = 0,
+	TEP_REGISTER_SUCCESS_OVERWRITE,
+};
+
 int tep_register_event_handler(struct tep_handle *pevent, int id,
 			       const char *sys_name, const char *event_name,
 			       tep_event_handler_func func, void *context);
@@ -520,15 +526,14 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
 void tep_data_lat_fmt(struct tep_handle *pevent,
 		      struct trace_seq *s, struct tep_record *record);
 int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
-struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type);
 int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
 int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
 const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid);
-struct cmdline;
-struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
-				       struct cmdline *next);
-int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline);
+struct tep_cmdline;
+struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+					   struct tep_cmdline *next);
+int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline);
 
 void tep_print_field(struct trace_seq *s, void *data,
 		     struct tep_format_field *field);
@@ -553,7 +558,7 @@ int tep_get_long_size(struct tep_handle *pevent);
 void tep_set_long_size(struct tep_handle *pevent, int long_size);
 int tep_get_page_size(struct tep_handle *pevent);
 void tep_set_page_size(struct tep_handle *pevent, int _page_size);
-int tep_is_file_bigendian(struct tep_handle *pevent);
+int tep_file_bigendian(struct tep_handle *pevent);
 void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian);
 int tep_is_host_bigendian(struct tep_handle *pevent);
 void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian);
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
index 754050eea467..64b9c25a1fd3 100644
--- a/tools/lib/traceevent/plugin_kvm.c
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -389,7 +389,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
 	 * We can only use the structure if file is of the same
 	 * endianness.
 	 */
-	if (tep_is_file_bigendian(event->pevent) ==
+	if (tep_file_bigendian(event->pevent) ==
 	    tep_is_host_bigendian(event->pevent)) {
 
 		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
index 8ff1d55954d1..8d5ecd2bf877 100644
--- a/tools/lib/traceevent/trace-seq.c
+++ b/tools/lib/traceevent/trace-seq.c
@@ -100,7 +100,8 @@ static void expand_buffer(struct trace_seq *s)
  * @fmt: printf format string
  *
  * It returns 0 if the trace oversizes the buffer's free
- * space, 1 otherwise.
+ * space, the number of characters printed, or a negative
+ * value in case of an error.
  *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formating of a trace
@@ -129,9 +130,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 		goto try_again;
 	}
 
-	s->len += ret;
+	if (ret > 0)
+		s->len += ret;
 
-	return 1;
+	return ret;
 }
 
 /**
@@ -139,6 +141,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
  * @s: trace sequence descriptor
  * @fmt: printf format string
  *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, the number of characters printed, or a negative
+ * value in case of an error.
+ * *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formating of a trace
  * trace_seq_printf is used to store strings into a special
@@ -163,9 +169,10 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 		goto try_again;
 	}
 
-	s->len += ret;
+	if (ret > 0)
+		s->len += ret;
 
-	return len;
+	return ret;
 }
 
 /**
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index ff29c3372ec3..0ee6795d82cc 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -524,12 +524,14 @@ $(arch_errno_name_array): $(arch_errno_tbl)
 
 all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 
+# Create python binding output directory if not already present
+_dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
+
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
         CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
 	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
-	mkdir -p $(OUTPUT)python && \
 	cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
 
 please_set_SHELL_PATH_to_a_more_modern_shell:
@@ -660,12 +662,12 @@ $(OUTPUT)perf-%: %.o $(PERFLIBS)
 	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
 
 ifndef NO_PERF_READ_VDSO32
-$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c
+$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-map.c
 	$(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 
 ifndef NO_PERF_READ_VDSOX32
-$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
+$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-map.c
 	$(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
 endif
 
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build
index 883c57ff0c08..d9ae2733f9cc 100644
--- a/tools/perf/arch/arm/tests/Build
+++ b/tools/perf/arch/arm/tests/Build
@@ -1,4 +1,5 @@
 libperf-y += regs_load.o
 libperf-y += dwarf-unwind.o
+libperf-y += vectors-page.o
 
 libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c
index 5b1543c98022..6848101a855f 100644
--- a/tools/perf/arch/arm/tests/arch-tests.c
+++ b/tools/perf/arch/arm/tests/arch-tests.c
@@ -10,6 +10,10 @@ struct test arch_tests[] = {
 		.func = test__dwarf_unwind,
 	},
 #endif
+	{
+		.desc = "Vectors page",
+		.func = test__vectors_page,
+	},
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/arch/arm/tests/vectors-page.c b/tools/perf/arch/arm/tests/vectors-page.c
new file mode 100644
index 000000000000..7ffdd79971c8
--- /dev/null
+++ b/tools/perf/arch/arm/tests/vectors-page.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <linux/compiler.h>
+
+#include "debug.h"
+#include "tests/tests.h"
+#include "util/find-map.c"
+
+#define VECTORS__MAP_NAME "[vectors]"
+
+int test__vectors_page(struct test *test __maybe_unused,
+		       int subtest __maybe_unused)
+{
+	void *start, *end;
+
+	if (find_map(&start, &end, VECTORS__MAP_NAME)) {
+		pr_err("%s not found, is CONFIG_KUSER_HELPERS enabled?\n",
+		       VECTORS__MAP_NAME);
+		return TEST_FAIL;
+	}
+
+	return TEST_OK;
+}
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index a111239df182..e58d00d62f02 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -14,18 +14,25 @@ PERF_HAVE_JITDUMP := 1
 out    := $(OUTPUT)arch/powerpc/include/generated/asm
 header32 := $(out)/syscalls_32.c
 header64 := $(out)/syscalls_64.c
-sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
+syskrn := $(srctree)/arch/powerpc/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header64): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+	(diff -B $(sysdef) $(syskrn) >/dev/null) \
+	|| echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@
 
 $(header32): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+	(diff -B $(sysdef) $(syskrn) >/dev/null) \
+	|| echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@
 
 clean::
 	$(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
index ef52e1dd694b..6c58060aa03b 100755
--- a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -9,10 +9,9 @@
 # Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
 
 wordsize=$1
-gcc=$2
-input=$3
+SYSCALL_TBL=$2
 
-if ! test -r $input; then
+if ! test -r $SYSCALL_TBL; then
 	echo "Could not read input file" >&2
 	exit 1
 fi
@@ -20,18 +19,21 @@ fi
 create_table()
 {
 	local wordsize=$1
-	local max_nr
+	local max_nr nr abi sc discard
+	max_nr=-1
+	nr=0
 
 	echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
-	while read sc nr; do
-		printf '\t[%d] = "%s",\n' $nr $sc
-		max_nr=$nr
+	while read nr abi sc discard; do
+		if [ "$max_nr" -lt "$nr" ]; then
+			printf '\t[%d] = "%s",\n' $nr $sc
+			max_nr=$nr
+		fi
 	done
 	echo '};'
 	echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
 }
 
-$gcc -m${wordsize} -E -dM -x c  $input	       \
-	|sed -ne 's/^#define __NR_//p' \
-	|sort -t' ' -k2 -nu	       \
+grep -E "^[[:digit:]]+[[:space:]]+(common|spu|nospu|${wordsize})" $SYSCALL_TBL \
+	|sort -k1 -n                                                           \
 	|create_table ${wordsize}
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..db3bbb8744af
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -0,0 +1,427 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for powerpc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, spu, nospu, 64, or 32 for this file.
+#
+0	nospu	restart_syscall			sys_restart_syscall
+1	nospu	exit				sys_exit
+2	nospu	fork				ppc_fork
+3	common	read				sys_read
+4	common	write				sys_write
+5	common	open				sys_open			compat_sys_open
+6	common	close				sys_close
+7	common	waitpid				sys_waitpid
+8	common	creat				sys_creat
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+11	nospu	execve				sys_execve			compat_sys_execve
+12	common	chdir				sys_chdir
+13	common	time				sys_time			compat_sys_time
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	lchown				sys_lchown
+17	common	break				sys_ni_syscall
+18	32	oldstat				sys_stat			sys_ni_syscall
+18	64	oldstat				sys_ni_syscall
+18	spu	oldstat				sys_ni_syscall
+19	common	lseek				sys_lseek			compat_sys_lseek
+20	common	getpid				sys_getpid
+21	nospu	mount				sys_mount			compat_sys_mount
+22	32	umount				sys_oldumount
+22	64	umount				sys_ni_syscall
+22	spu	umount				sys_ni_syscall
+23	common	setuid				sys_setuid
+24	common	getuid				sys_getuid
+25	common	stime				sys_stime			compat_sys_stime
+26	nospu	ptrace				sys_ptrace			compat_sys_ptrace
+27	common	alarm				sys_alarm
+28	32	oldfstat			sys_fstat			sys_ni_syscall
+28	64	oldfstat			sys_ni_syscall
+28	spu	oldfstat			sys_ni_syscall
+29	nospu	pause				sys_pause
+30	nospu	utime				sys_utime			compat_sys_utime
+31	common	stty				sys_ni_syscall
+32	common	gtty				sys_ni_syscall
+33	common	access				sys_access
+34	common	nice				sys_nice
+35	common	ftime				sys_ni_syscall
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+38	common	rename				sys_rename
+39	common	mkdir				sys_mkdir
+40	common	rmdir				sys_rmdir
+41	common	dup				sys_dup
+42	common	pipe				sys_pipe
+43	common	times				sys_times			compat_sys_times
+44	common	prof				sys_ni_syscall
+45	common	brk				sys_brk
+46	common	setgid				sys_setgid
+47	common	getgid				sys_getgid
+48	nospu	signal				sys_signal
+49	common	geteuid				sys_geteuid
+50	common	getegid				sys_getegid
+51	nospu	acct				sys_acct
+52	nospu	umount2				sys_umount
+53	common	lock				sys_ni_syscall
+54	common	ioctl				sys_ioctl			compat_sys_ioctl
+55	common	fcntl				sys_fcntl			compat_sys_fcntl
+56	common	mpx				sys_ni_syscall
+57	common	setpgid				sys_setpgid
+58	common	ulimit				sys_ni_syscall
+59	32	oldolduname			sys_olduname
+59	64	oldolduname			sys_ni_syscall
+59	spu	oldolduname			sys_ni_syscall
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+62	nospu	ustat				sys_ustat			compat_sys_ustat
+63	common	dup2				sys_dup2
+64	common	getppid				sys_getppid
+65	common	getpgrp				sys_getpgrp
+66	common	setsid				sys_setsid
+67	32	sigaction			sys_sigaction			compat_sys_sigaction
+67	64	sigaction			sys_ni_syscall
+67	spu	sigaction			sys_ni_syscall
+68	common	sgetmask			sys_sgetmask
+69	common	ssetmask			sys_ssetmask
+70	common	setreuid			sys_setreuid
+71	common	setregid			sys_setregid
+72	32	sigsuspend			sys_sigsuspend
+72	64	sigsuspend			sys_ni_syscall
+72	spu	sigsuspend			sys_ni_syscall
+73	32	sigpending			sys_sigpending			compat_sys_sigpending
+73	64	sigpending			sys_ni_syscall
+73	spu	sigpending			sys_ni_syscall
+74	common	sethostname			sys_sethostname
+75	common	setrlimit			sys_setrlimit			compat_sys_setrlimit
+76	32	getrlimit			sys_old_getrlimit		compat_sys_old_getrlimit
+76	64	getrlimit			sys_ni_syscall
+76	spu	getrlimit			sys_ni_syscall
+77	common	getrusage			sys_getrusage			compat_sys_getrusage
+78	common	gettimeofday			sys_gettimeofday		compat_sys_gettimeofday
+79	common	settimeofday			sys_settimeofday		compat_sys_settimeofday
+80	common	getgroups			sys_getgroups
+81	common	setgroups			sys_setgroups
+82	32	select				ppc_select			sys_ni_syscall
+82	64	select				sys_ni_syscall
+82	spu	select				sys_ni_syscall
+83	common	symlink				sys_symlink
+84	32	oldlstat			sys_lstat			sys_ni_syscall
+84	64	oldlstat			sys_ni_syscall
+84	spu	oldlstat			sys_ni_syscall
+85	common	readlink			sys_readlink
+86	nospu	uselib				sys_uselib
+87	nospu	swapon				sys_swapon
+88	nospu	reboot				sys_reboot
+89	32	readdir				sys_old_readdir			compat_sys_old_readdir
+89	64	readdir				sys_ni_syscall
+89	spu	readdir				sys_ni_syscall
+90	common	mmap				sys_mmap
+91	common	munmap				sys_munmap
+92	common	truncate			sys_truncate			compat_sys_truncate
+93	common	ftruncate			sys_ftruncate			compat_sys_ftruncate
+94	common	fchmod				sys_fchmod
+95	common	fchown				sys_fchown
+96	common	getpriority			sys_getpriority
+97	common	setpriority			sys_setpriority
+98	common	profil				sys_ni_syscall
+99	nospu	statfs				sys_statfs			compat_sys_statfs
+100	nospu	fstatfs				sys_fstatfs			compat_sys_fstatfs
+101	common	ioperm				sys_ni_syscall
+102	common	socketcall			sys_socketcall			compat_sys_socketcall
+103	common	syslog				sys_syslog
+104	common	setitimer			sys_setitimer			compat_sys_setitimer
+105	common	getitimer			sys_getitimer			compat_sys_getitimer
+106	common	stat				sys_newstat			compat_sys_newstat
+107	common	lstat				sys_newlstat			compat_sys_newlstat
+108	common	fstat				sys_newfstat			compat_sys_newfstat
+109	32	olduname			sys_uname
+109	64	olduname			sys_ni_syscall
+109	spu	olduname			sys_ni_syscall
+110	common	iopl				sys_ni_syscall
+111	common	vhangup				sys_vhangup
+112	common	idle				sys_ni_syscall
+113	common	vm86				sys_ni_syscall
+114	common	wait4				sys_wait4			compat_sys_wait4
+115	nospu	swapoff				sys_swapoff
+116	common	sysinfo				sys_sysinfo			compat_sys_sysinfo
+117	nospu	ipc				sys_ipc				compat_sys_ipc
+118	common	fsync				sys_fsync
+119	32	sigreturn			sys_sigreturn			compat_sys_sigreturn
+119	64	sigreturn			sys_ni_syscall
+119	spu	sigreturn			sys_ni_syscall
+120	nospu	clone				ppc_clone
+121	common	setdomainname			sys_setdomainname
+122	common	uname				sys_newuname
+123	common	modify_ldt			sys_ni_syscall
+124	common	adjtimex			sys_adjtimex			compat_sys_adjtimex
+125	common	mprotect			sys_mprotect
+126	32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
+126	64	sigprocmask			sys_ni_syscall
+126	spu	sigprocmask			sys_ni_syscall
+127	common	create_module			sys_ni_syscall
+128	nospu	init_module			sys_init_module
+129	nospu	delete_module			sys_delete_module
+130	common	get_kernel_syms			sys_ni_syscall
+131	nospu	quotactl			sys_quotactl
+132	common	getpgid				sys_getpgid
+133	common	fchdir				sys_fchdir
+134	common	bdflush				sys_bdflush
+135	common	sysfs				sys_sysfs
+136	32	personality			sys_personality			ppc64_personality
+136	64	personality			ppc64_personality
+136	spu	personality			ppc64_personality
+137	common	afs_syscall			sys_ni_syscall
+138	common	setfsuid			sys_setfsuid
+139	common	setfsgid			sys_setfsgid
+140	common	_llseek				sys_llseek
+141	common	getdents			sys_getdents			compat_sys_getdents
+142	common	_newselect			sys_select			compat_sys_select
+143	common	flock				sys_flock
+144	common	msync				sys_msync
+145	common	readv				sys_readv			compat_sys_readv
+146	common	writev				sys_writev			compat_sys_writev
+147	common	getsid				sys_getsid
+148	common	fdatasync			sys_fdatasync
+149	nospu	_sysctl				sys_sysctl			compat_sys_sysctl
+150	common	mlock				sys_mlock
+151	common	munlock				sys_munlock
+152	common	mlockall			sys_mlockall
+153	common	munlockall			sys_munlockall
+154	common	sched_setparam			sys_sched_setparam
+155	common	sched_getparam			sys_sched_getparam
+156	common	sched_setscheduler		sys_sched_setscheduler
+157	common	sched_getscheduler		sys_sched_getscheduler
+158	common	sched_yield			sys_sched_yield
+159	common	sched_get_priority_max		sys_sched_get_priority_max
+160	common	sched_get_priority_min		sys_sched_get_priority_min
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
+162	common	nanosleep			sys_nanosleep			compat_sys_nanosleep
+163	common	mremap				sys_mremap
+164	common	setresuid			sys_setresuid
+165	common	getresuid			sys_getresuid
+166	common	query_module			sys_ni_syscall
+167	common	poll				sys_poll
+168	common	nfsservctl			sys_ni_syscall
+169	common	setresgid			sys_setresgid
+170	common	getresgid			sys_getresgid
+171	common	prctl				sys_prctl
+172	nospu	rt_sigreturn			sys_rt_sigreturn		compat_sys_rt_sigreturn
+173	nospu	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
+174	nospu	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+175	nospu	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
+176	nospu	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+177	nospu 	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+178	nospu 	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+179	common	pread64				sys_pread64			compat_sys_pread64
+180	common	pwrite64			sys_pwrite64			compat_sys_pwrite64
+181	common	chown				sys_chown
+182	common	getcwd				sys_getcwd
+183	common	capget				sys_capget
+184	common	capset				sys_capset
+185	nospu	sigaltstack			sys_sigaltstack			compat_sys_sigaltstack
+186	32	sendfile			sys_sendfile			compat_sys_sendfile
+186	64	sendfile			sys_sendfile64
+186	spu	sendfile			sys_sendfile64
+187	common	getpmsg				sys_ni_syscall
+188	common 	putpmsg				sys_ni_syscall
+189	nospu	vfork				ppc_vfork
+190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
+191	common	readahead			sys_readahead			compat_sys_readahead
+192	32	mmap2				sys_mmap2			compat_sys_mmap2
+193	32	truncate64			sys_truncate64			compat_sys_truncate64
+194	32	ftruncate64			sys_ftruncate64			compat_sys_ftruncate64
+195	32	stat64				sys_stat64
+196	32	lstat64				sys_lstat64
+197	32	fstat64				sys_fstat64
+198	nospu 	pciconfig_read			sys_pciconfig_read
+199	nospu 	pciconfig_write			sys_pciconfig_write
+200	nospu 	pciconfig_iobase		sys_pciconfig_iobase
+201	common 	multiplexer			sys_ni_syscall
+202	common	getdents64			sys_getdents64
+203	common	pivot_root			sys_pivot_root
+204	32	fcntl64				sys_fcntl64			compat_sys_fcntl64
+205	common	madvise				sys_madvise
+206	common	mincore				sys_mincore
+207	common	gettid				sys_gettid
+208	common	tkill				sys_tkill
+209	common	setxattr			sys_setxattr
+210	common	lsetxattr			sys_lsetxattr
+211	common	fsetxattr			sys_fsetxattr
+212	common	getxattr			sys_getxattr
+213	common	lgetxattr			sys_lgetxattr
+214	common	fgetxattr			sys_fgetxattr
+215	common	listxattr			sys_listxattr
+216	common	llistxattr			sys_llistxattr
+217	common	flistxattr			sys_flistxattr
+218	common	removexattr			sys_removexattr
+219	common	lremovexattr			sys_lremovexattr
+220	common	fremovexattr			sys_fremovexattr
+221	common	futex				sys_futex			compat_sys_futex
+222	common	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
+223	common	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
+# 224 unused
+225	common	tuxcall				sys_ni_syscall
+226	32	sendfile64			sys_sendfile64			compat_sys_sendfile64
+227	common	io_setup			sys_io_setup			compat_sys_io_setup
+228	common	io_destroy			sys_io_destroy
+229	common	io_getevents			sys_io_getevents		compat_sys_io_getevents
+230	common	io_submit			sys_io_submit			compat_sys_io_submit
+231	common	io_cancel			sys_io_cancel
+232	nospu	set_tid_address			sys_set_tid_address
+233	common	fadvise64			sys_fadvise64			ppc32_fadvise64
+234	nospu	exit_group			sys_exit_group
+235	nospu	lookup_dcookie			sys_lookup_dcookie		compat_sys_lookup_dcookie
+236	common	epoll_create			sys_epoll_create
+237	common	epoll_ctl			sys_epoll_ctl
+238	common	epoll_wait			sys_epoll_wait
+239	common	remap_file_pages		sys_remap_file_pages
+240	common	timer_create			sys_timer_create		compat_sys_timer_create
+241	common	timer_settime			sys_timer_settime		compat_sys_timer_settime
+242	common	timer_gettime			sys_timer_gettime		compat_sys_timer_gettime
+243	common	timer_getoverrun		sys_timer_getoverrun
+244	common	timer_delete			sys_timer_delete
+245	common	clock_settime			sys_clock_settime		compat_sys_clock_settime
+246	common	clock_gettime			sys_clock_gettime		compat_sys_clock_gettime
+247	common	clock_getres			sys_clock_getres		compat_sys_clock_getres
+248	common	clock_nanosleep			sys_clock_nanosleep		compat_sys_clock_nanosleep
+249	32	swapcontext			ppc_swapcontext			ppc32_swapcontext
+249	64	swapcontext			ppc64_swapcontext
+249	spu	swapcontext			sys_ni_syscall
+250	common	tgkill				sys_tgkill
+251	common	utimes				sys_utimes			compat_sys_utimes
+252	common	statfs64			sys_statfs64			compat_sys_statfs64
+253	common	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
+254	32	fadvise64_64			ppc_fadvise64_64
+254	spu	fadvise64_64			sys_ni_syscall
+255	common	rtas				sys_rtas
+256	32	sys_debug_setcontext		sys_debug_setcontext		sys_ni_syscall
+256	64	sys_debug_setcontext		sys_ni_syscall
+256	spu	sys_debug_setcontext		sys_ni_syscall
+# 257 reserved for vserver
+258	nospu	migrate_pages			sys_migrate_pages		compat_sys_migrate_pages
+259	nospu	mbind				sys_mbind			compat_sys_mbind
+260	nospu	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
+261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
+262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
+263	nospu	mq_unlink			sys_mq_unlink
+264	nospu	mq_timedsend			sys_mq_timedsend		compat_sys_mq_timedsend
+265	nospu	mq_timedreceive			sys_mq_timedreceive		compat_sys_mq_timedreceive
+266	nospu	mq_notify			sys_mq_notify			compat_sys_mq_notify
+267	nospu	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
+268	nospu	kexec_load			sys_kexec_load			compat_sys_kexec_load
+269	nospu	add_key				sys_add_key
+270	nospu	request_key			sys_request_key
+271	nospu	keyctl				sys_keyctl			compat_sys_keyctl
+272	nospu	waitid				sys_waitid			compat_sys_waitid
+273	nospu	ioprio_set			sys_ioprio_set
+274	nospu	ioprio_get			sys_ioprio_get
+275	nospu	inotify_init			sys_inotify_init
+276	nospu	inotify_add_watch		sys_inotify_add_watch
+277	nospu	inotify_rm_watch		sys_inotify_rm_watch
+278	nospu	spu_run				sys_spu_run
+279	nospu	spu_create			sys_spu_create
+280	nospu	pselect6			sys_pselect6			compat_sys_pselect6
+281	nospu	ppoll				sys_ppoll			compat_sys_ppoll
+282	common	unshare				sys_unshare
+283	common	splice				sys_splice
+284	common	tee				sys_tee
+285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+286	common	openat				sys_openat			compat_sys_openat
+287	common	mkdirat				sys_mkdirat
+288	common	mknodat				sys_mknodat
+289	common	fchownat			sys_fchownat
+290	common	futimesat			sys_futimesat			compat_sys_futimesat
+291	32	fstatat64			sys_fstatat64
+291	64	newfstatat			sys_newfstatat
+291	spu	newfstatat			sys_newfstatat
+292	common	unlinkat			sys_unlinkat
+293	common	renameat			sys_renameat
+294	common	linkat				sys_linkat
+295	common	symlinkat			sys_symlinkat
+296	common	readlinkat			sys_readlinkat
+297	common	fchmodat			sys_fchmodat
+298	common	faccessat			sys_faccessat
+299	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
+300	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
+301	common	move_pages			sys_move_pages			compat_sys_move_pages
+302	common	getcpu				sys_getcpu
+303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
+304	common	utimensat			sys_utimensat			compat_sys_utimensat
+305	common	signalfd			sys_signalfd			compat_sys_signalfd
+306	common	timerfd_create			sys_timerfd_create
+307	common	eventfd				sys_eventfd
+308	common	sync_file_range2		sys_sync_file_range2		compat_sys_sync_file_range2
+309	nospu	fallocate			sys_fallocate			compat_sys_fallocate
+310	nospu	subpage_prot			sys_subpage_prot
+311	common	timerfd_settime			sys_timerfd_settime		compat_sys_timerfd_settime
+312	common	timerfd_gettime			sys_timerfd_gettime		compat_sys_timerfd_gettime
+313	common	signalfd4			sys_signalfd4			compat_sys_signalfd4
+314	common	eventfd2			sys_eventfd2
+315	common	epoll_create1			sys_epoll_create1
+316	common	dup3				sys_dup3
+317	common	pipe2				sys_pipe2
+318	nospu	inotify_init1			sys_inotify_init1
+319	common	perf_event_open			sys_perf_event_open
+320	common	preadv				sys_preadv			compat_sys_preadv
+321	common	pwritev				sys_pwritev			compat_sys_pwritev
+322	nospu	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+323	nospu	fanotify_init			sys_fanotify_init
+324	nospu	fanotify_mark			sys_fanotify_mark		compat_sys_fanotify_mark
+325	common	prlimit64			sys_prlimit64
+326	common	socket				sys_socket
+327	common	bind				sys_bind
+328	common	connect				sys_connect
+329	common	listen				sys_listen
+330	common	accept				sys_accept
+331	common	getsockname			sys_getsockname
+332	common	getpeername			sys_getpeername
+333	common	socketpair			sys_socketpair
+334	common	send				sys_send
+335	common	sendto				sys_sendto
+336	common	recv				sys_recv			compat_sys_recv
+337	common	recvfrom			sys_recvfrom			compat_sys_recvfrom
+338	common	shutdown			sys_shutdown
+339	common	setsockopt			sys_setsockopt			compat_sys_setsockopt
+340	common	getsockopt			sys_getsockopt			compat_sys_getsockopt
+341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
+342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
+343	common	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg
+344	common	accept4				sys_accept4
+345	common	name_to_handle_at		sys_name_to_handle_at
+346	common	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
+347	common	clock_adjtime			sys_clock_adjtime		compat_sys_clock_adjtime
+348	common	syncfs				sys_syncfs
+349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
+350	common	setns				sys_setns
+351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+353	nospu	finit_module			sys_finit_module
+354	nospu	kcmp				sys_kcmp
+355	common	sched_setattr			sys_sched_setattr
+356	common	sched_getattr			sys_sched_getattr
+357	common	renameat2			sys_renameat2
+358	common	seccomp				sys_seccomp
+359	common	getrandom			sys_getrandom
+360	common	memfd_create			sys_memfd_create
+361	common	bpf				sys_bpf
+362	nospu	execveat			sys_execveat			compat_sys_execveat
+363	32	switch_endian			sys_ni_syscall
+363	64	switch_endian			ppc_switch_endian
+363	spu	switch_endian			sys_ni_syscall
+364	common	userfaultfd			sys_userfaultfd
+365	common	membarrier			sys_membarrier
+378	nospu	mlock2				sys_mlock2
+379	nospu	copy_file_range			sys_copy_file_range
+380	common	preadv2				sys_preadv2			compat_sys_preadv2
+381	common	pwritev2			sys_pwritev2			compat_sys_pwritev2
+382	nospu	kexec_file_load			sys_kexec_file_load
+383	nospu	statx				sys_statx
+384	nospu	pkey_alloc			sys_pkey_alloc
+385	nospu	pkey_free			sys_pkey_free
+386	nospu	pkey_mprotect			sys_pkey_mprotect
+387	nospu	rseq				sys_rseq
+388	nospu	io_pgetevents			sys_io_pgetevents		compat_sys_io_pgetevents
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1410d66192f7..63a3afc7f32b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -561,7 +561,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 					break;
 			}
 		}
-		wait4(child_pid, &status, 0, &stat_config.ru_data);
+		if (child_pid != -1)
+			wait4(child_pid, &status, 0, &stat_config.ru_data);
 
 		if (workload_exec_errno) {
 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fe3ecfb2e64b..f64e312db787 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1028,12 +1028,7 @@ static int perf_top__start_counters(struct perf_top *top)
 
 static int callchain_param__setup_sample_type(struct callchain_param *callchain)
 {
-	if (!perf_hpp_list.sym) {
-		if (callchain->enabled) {
-			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
-			return -EINVAL;
-		}
-	} else if (callchain->mode != CHAIN_NONE) {
+	if (callchain->mode != CHAIN_NONE) {
 		if (callchain_register_param(callchain) < 0) {
 			ui__error("Can't register callchain params.\n");
 			return -EINVAL;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index adbf28183560..ed4583128b9c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1758,6 +1758,7 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 {
 	struct thread_trace *ttrace;
 	size_t printed;
+	int len;
 
 	if (trace->failure_only || trace->current == NULL)
 		return 0;
@@ -1768,9 +1769,14 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 		return 0;
 
 	printed  = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
-	printed += fprintf(trace->output, ")%-*s ...\n", trace->args_alignment, ttrace->entry_str);
-	ttrace->entry_pending = false;
+	printed += len = fprintf(trace->output, "%s)", ttrace->entry_str);
+
+	if (len < trace->args_alignment - 4)
+		printed += fprintf(trace->output, "%-*s", trace->args_alignment - 4 - len, " ");
 
+	printed += fprintf(trace->output, " ...\n");
+
+	ttrace->entry_pending = false;
 	++trace->nr_events_printed;
 
 	return printed;
@@ -2026,9 +2032,10 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	if (ttrace->entry_pending) {
 		printed = fprintf(trace->output, "%s", ttrace->entry_str);
 	} else {
-		fprintf(trace->output, " ... [");
+		printed += fprintf(trace->output, " ... [");
 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
-		fprintf(trace->output, "]: %s()", sc->name);
+		printed += 9;
+		printed += fprintf(trace->output, "]: %s()", sc->name);
 	}
 
 	printed++; /* the closing ')' */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 6cb98f8570a2..7b55613924de 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -10,6 +10,7 @@ include/uapi/linux/fs.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
 include/uapi/linux/in.h
+include/uapi/linux/mount.h
 include/uapi/linux/perf_event.h
 include/uapi/linux/prctl.h
 include/uapi/linux/sched.h
@@ -49,7 +50,6 @@ arch/parisc/include/uapi/asm/errno.h
 arch/powerpc/include/uapi/asm/errno.h
 arch/sparc/include/uapi/asm/errno.h
 arch/x86/include/uapi/asm/errno.h
-arch/powerpc/include/uapi/asm/unistd.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
 include/asm-generic/bitops/__fls.h
diff --git a/tools/perf/perf-read-vdso.c b/tools/perf/perf-read-vdso.c
index 8c0ca0cc428f..aaa5210ea84a 100644
--- a/tools/perf/perf-read-vdso.c
+++ b/tools/perf/perf-read-vdso.c
@@ -5,17 +5,17 @@
 #define VDSO__MAP_NAME "[vdso]"
 
 /*
- * Include definition of find_vdso_map() also used in util/vdso.c for
+ * Include definition of find_map() also used in util/vdso.c for
  * building perf.
  */
-#include "util/find-vdso-map.c"
+#include "util/find-map.c"
 
 int main(void)
 {
 	void *start, *end;
 	size_t size, written;
 
-	if (find_vdso_map(&start, &end))
+	if (find_map(&start, &end, VDSO__MAP_NAME))
 		return 1;
 
 	size = end - start;
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 1c16e56cd93e..7cb99b433888 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -13,7 +13,8 @@ add_probe_vfs_getname() {
 	local verbose=$1
 	if [ $had_vfs_getname -eq 1 ] ; then
 		line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
-		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=result->name:string"
+		perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
+		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string"
 	fi
 }
 
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index b82f55fcc294..399f18ca71a3 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -119,4 +119,9 @@ int test__arch_unwind_sample(struct perf_sample *sample,
 			     struct thread *thread);
 #endif
 #endif
+
+#if defined(__arm__)
+int test__vectors_page(struct test *test, int subtest);
+#endif
+
 #endif /* TESTS_H */
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
index 45547573a1db..847850b2ef6c 100755
--- a/tools/perf/trace/beauty/mount_flags.sh
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -5,11 +5,11 @@
 
 printf "static const char *mount_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
-egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+egrep $regex ${header_dir}/mount.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
 	sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
 	xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
-egrep $regex ${header_dir}/fs.h | \
+egrep $regex ${header_dir}/mount.h | \
 	sed -r "s/$regex/\2 \1/g" | \
 	xargs printf "\t[%s + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index d32f8f1124af..3109d7b05e11 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -4,7 +4,7 @@
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
-regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
+regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*'
 egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ac9805e0bc76..70de8f6b3aee 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1723,15 +1723,14 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	err = asprintf(&command,
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
+		 " -l -d %s %s -C \"$1\" 2>/dev/null|grep -v \"$1:\"|expand",
 		 opts->objdump_path ?: "objdump",
 		 opts->disassembler_style ? "-M " : "",
 		 opts->disassembler_style ?: "",
 		 map__rip_2objdump(map, sym->start),
 		 map__rip_2objdump(map, sym->end),
 		 opts->show_asm_raw ? "" : "--no-show-raw",
-		 opts->annotate_src ? "-S" : "",
-		 symfs_filename, symfs_filename);
+		 opts->annotate_src ? "-S" : "");
 
 	if (err < 0) {
 		pr_err("Failure allocating memory for the command to run\n");
@@ -1756,7 +1755,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		close(stdout_fd[0]);
 		dup2(stdout_fd[1], 1);
 		close(stdout_fd[1]);
-		execl("/bin/sh", "sh", "-c", command, NULL);
+		execl("/bin/sh", "sh", "-c", command, "--", symfs_filename,
+		      NULL);
 		perror(command);
 		exit(-1);
 	}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 32ef7bdca1cf..dc2212e12184 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -766,6 +766,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 			cnode->cycles_count += node->branch_flags.cycles;
 			cnode->iter_count += node->nr_loop_iter;
 			cnode->iter_cycles += node->iter_cycles;
+			cnode->from_count++;
 		}
 	}
 
@@ -1345,10 +1346,10 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 iter_cycles)
+			   u64 iter_cycles, u64 from_count)
 {
 	int printed = 0, i = 0;
-	u64 cycles;
+	u64 cycles, v = 0;
 
 	cycles = cycles_count / branch_count;
 	if (cycles) {
@@ -1357,14 +1358,16 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count) {
-		printed += count_pri64_printf(i++, "iter",
-				iter_count,
-				bf + printed, bfsize - printed);
+	if (iter_count && from_count) {
+		v = iter_count / from_count;
+		if (v) {
+			printed += count_pri64_printf(i++, "iter",
+					v, bf + printed, bfsize - printed);
 
-		printed += count_pri64_printf(i++, "avg_cycles",
-				iter_cycles / iter_count,
-				bf + printed, bfsize - printed);
+			printed += count_pri64_printf(i++, "avg_cycles",
+					iter_cycles / iter_count,
+					bf + printed, bfsize - printed);
+		}
 	}
 
 	if (i)
@@ -1377,6 +1380,7 @@ static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
 			     u64 iter_count, u64 iter_cycles,
+			     u64 from_count,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1389,7 +1393,8 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, iter_cycles);
+				cycles_count, iter_count, iter_cycles,
+				from_count);
 	}
 
 	if (!printed)
@@ -1402,13 +1407,14 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
 				   u64 iter_count, u64 iter_cycles,
+				   u64 from_count,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, iter_cycles, brtype_stat);
+			 iter_count, iter_cycles, from_count, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1422,6 +1428,7 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
 	u64 iter_count, iter_cycles;
+	u64 from_count;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
@@ -1429,11 +1436,12 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
 	cycles_count = clist->cycles_count;
 	iter_count = clist->iter_count;
 	iter_cycles = clist->iter_cycles;
+	from_count = clist->from_count;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
 				       cycles_count, iter_count, iter_cycles,
-				       &clist->brtype_stat);
+				       from_count, &clist->brtype_stat);
 }
 
 static void free_callchain_node(struct callchain_node *node)
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 154560b1eb65..99d38ac019b8 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -118,6 +118,7 @@ struct callchain_list {
 		bool		has_children;
 	};
 	u64			branch_count;
+	u64			from_count;
 	u64			predicted_count;
 	u64			abort_count;
 	u64			cycles_count;
diff --git a/tools/perf/util/find-vdso-map.c b/tools/perf/util/find-map.c
similarity index 71%
rename from tools/perf/util/find-vdso-map.c
rename to tools/perf/util/find-map.c
index d7823e3508fc..7b2300588ece 100644
--- a/tools/perf/util/find-vdso-map.c
+++ b/tools/perf/util/find-map.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-static int find_vdso_map(void **start, void **end)
+static int find_map(void **start, void **end, const char *name)
 {
 	FILE *maps;
 	char line[128];
@@ -7,7 +7,7 @@ static int find_vdso_map(void **start, void **end)
 
 	maps = fopen("/proc/self/maps", "r");
 	if (!maps) {
-		fprintf(stderr, "vdso: cannot open maps\n");
+		fprintf(stderr, "cannot open maps\n");
 		return -1;
 	}
 
@@ -21,8 +21,7 @@ static int find_vdso_map(void **start, void **end)
 		if (m < 0)
 			continue;
 
-		if (!strncmp(&line[m], VDSO__MAP_NAME,
-			     sizeof(VDSO__MAP_NAME) - 1))
+		if (!strncmp(&line[m], name, strlen(name)))
 			found = 1;
 	}
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6fcb3bce0442..143f7057d581 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2005,7 +2005,7 @@ static void save_iterations(struct iterations *iter,
 {
 	int i;
 
-	iter->nr_loop_iter = nr;
+	iter->nr_loop_iter++;
 	iter->cycles = 0;
 
 	for (i = 0; i < nr; i++)
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 9005fbe0780e..23092fd6451d 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -109,7 +109,6 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 			return ret;
 		}
 		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
-		va_end(ap_saved);
 		if (len > strbuf_avail(sb)) {
 			pr_debug("this should not happen, your vsnprintf is broken");
 			va_end(ap_saved);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 01f2c7385e38..48efad6d0f90 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -614,6 +614,7 @@ int modules__parse(const char *filename, void *arg,
 static bool symbol__is_idle(const char *name)
 {
 	const char * const idle_symbols[] = {
+		"arch_cpu_idle",
 		"cpu_idle",
 		"cpu_startup_entry",
 		"intel_idle",
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 741af209b19d..3702cba11d7d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -18,10 +18,10 @@
 #include "debug.h"
 
 /*
- * Include definition of find_vdso_map() also used in perf-read-vdso.c for
+ * Include definition of find_map() also used in perf-read-vdso.c for
  * building perf-read-vdso32 and perf-read-vdsox32.
  */
-#include "find-vdso-map.c"
+#include "find-map.c"
 
 #define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
 
@@ -76,7 +76,7 @@ static char *get_file(struct vdso_file *vdso_file)
 	if (vdso_file->found)
 		return vdso_file->temp_file_name;
 
-	if (vdso_file->error || find_vdso_map(&start, &end))
+	if (vdso_file->error || find_map(&start, &end, VDSO__MAP_NAME))
 		return NULL;
 
 	size = end - start;
diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile
index 89a2444c1df2..59e417ec3e13 100644
--- a/tools/thermal/tmon/Makefile
+++ b/tools/thermal/tmon/Makefile
@@ -6,7 +6,7 @@ VERSION = 1.0
 
 BINDIR=usr/bin
 WARNFLAGS=-Wall -Wshadow -W -Wformat -Wimplicit-function-declaration -Wimplicit-int
-override CFLAGS+= -O1 ${WARNFLAGS}
+override CFLAGS+= $(call cc-option,-O3,-O1) ${WARNFLAGS}
 # Add "-fstack-protector" only if toolchain supports it.
 override CFLAGS+= $(call cc-option,-fstack-protector-strong)
 CC?= $(CROSS_COMPILE)gcc

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2019-01-11  7:44 [GIT PULL] perf fixes Ingo Molnar
@ 2019-01-11 18:00 ` pr-tracker-bot
  0 siblings, 0 replies; 316+ messages in thread
From: pr-tracker-bot @ 2019-01-11 18:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Thomas Gleixner, Andrew Morton

The pull request you sent on Fri, 11 Jan 2019 08:44:18 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/40a31da414c39e3cd8c4137c1ceedf59b7ffd4ce

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2019-02-17 10:10 Ingo Molnar
@ 2019-02-17 16:50 ` pr-tracker-bot
  0 siblings, 0 replies; 316+ messages in thread
From: pr-tracker-bot @ 2019-02-17 16:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

The pull request you sent on Sun, 17 Feb 2019 11:10:51 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/dd6f29da695dbfe5a8ff84ebfdd2110d68e8f511

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2019-02-17 10:10 Ingo Molnar
  2019-02-17 16:50 ` pr-tracker-bot
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2019-02-17 10:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 528871b456026e6127d95b1b2bd8e3a003dc1614 perf/core: Fix impossible ring-buffer sizes warning

Two fixes on the kernel side: fix an over-eager condition that failed 
larger perf ring-buffer sizes, plus fix crashes in the Intel BTS code for 
a corner case, found by fuzzing.

 Thanks,

	Ingo

------------------>
Ingo Molnar (1):
      perf/core: Fix impossible ring-buffer sizes warning

Jiri Olsa (1):
      perf/x86: Add check_period PMU callback


 arch/x86/events/core.c       | 14 ++++++++++++++
 arch/x86/events/intel/core.c |  9 +++++++++
 arch/x86/events/perf_event.h | 16 ++++++++++++++--
 include/linux/perf_event.h   |  5 +++++
 kernel/events/core.c         | 16 ++++++++++++++++
 kernel/events/ring_buffer.c  |  2 +-
 6 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 374a19712e20..b684f0294f35 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2278,6 +2278,19 @@ void perf_check_microcode(void)
 		x86_pmu.check_microcode();
 }
 
+static int x86_pmu_check_period(struct perf_event *event, u64 value)
+{
+	if (x86_pmu.check_period && x86_pmu.check_period(event, value))
+		return -EINVAL;
+
+	if (value && x86_pmu.limit_period) {
+		if (x86_pmu.limit_period(event, value) > value)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static struct pmu pmu = {
 	.pmu_enable		= x86_pmu_enable,
 	.pmu_disable		= x86_pmu_disable,
@@ -2302,6 +2315,7 @@ static struct pmu pmu = {
 	.event_idx		= x86_pmu_event_idx,
 	.sched_task		= x86_pmu_sched_task,
 	.task_ctx_size          = sizeof(struct x86_perf_task_context),
+	.check_period		= x86_pmu_check_period,
 };
 
 void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index daafb893449b..730978dff63f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3587,6 +3587,11 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
 	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
+static int intel_pmu_check_period(struct perf_event *event, u64 value)
+{
+	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3667,6 +3672,8 @@ static __initconst const struct x86_pmu core_pmu = {
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
 	.cpu_dead		= intel_pmu_cpu_dead,
+
+	.check_period		= intel_pmu_check_period,
 };
 
 static struct attribute *intel_pmu_attrs[];
@@ -3711,6 +3718,8 @@ static __initconst const struct x86_pmu intel_pmu = {
 
 	.guest_get_msrs		= intel_guest_get_msrs,
 	.sched_task		= intel_pmu_sched_task,
+
+	.check_period		= intel_pmu_check_period,
 };
 
 static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfc..d46fd6754d92 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -646,6 +646,11 @@ struct x86_pmu {
 	 * Intel host/guest support (KVM)
 	 */
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
+
+	/*
+	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
+	 */
+	int (*check_period) (struct perf_event *event, u64 period);
 };
 
 struct x86_perf_task_context {
@@ -857,7 +862,7 @@ static inline int amd_pmu_init(void)
 
 #ifdef CONFIG_CPU_SUP_INTEL
 
-static inline bool intel_pmu_has_bts(struct perf_event *event)
+static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned int hw_event, bts_event;
@@ -868,7 +873,14 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
 	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
 	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
-	return hw_event == bts_event && hwc->sample_period == 1;
+	return hw_event == bts_event && period == 1;
+}
+
+static inline bool intel_pmu_has_bts(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	return intel_pmu_has_bts_period(event, hwc->sample_period);
 }
 
 int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1d5c551a5add..e1a051724f7e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -447,6 +447,11 @@ struct pmu {
 	 * Filter events for PMU-specific reasons.
 	 */
 	int (*filter_match)		(struct perf_event *event); /* optional */
+
+	/*
+	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
+	 */
+	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
 };
 
 enum perf_addr_filter_action_t {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e5ede6918050..26d6edab051a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4963,6 +4963,11 @@ static void __perf_event_period(struct perf_event *event,
 	}
 }
 
+static int perf_event_check_period(struct perf_event *event, u64 value)
+{
+	return event->pmu->check_period(event, value);
+}
+
 static int perf_event_period(struct perf_event *event, u64 __user *arg)
 {
 	u64 value;
@@ -4979,6 +4984,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 	if (event->attr.freq && value > sysctl_perf_event_sample_rate)
 		return -EINVAL;
 
+	if (perf_event_check_period(event, value))
+		return -EINVAL;
+
 	event_function_call(event, __perf_event_period, &value);
 
 	return 0;
@@ -9391,6 +9399,11 @@ static int perf_pmu_nop_int(struct pmu *pmu)
 	return 0;
 }
 
+static int perf_event_nop_int(struct perf_event *event, u64 value)
+{
+	return 0;
+}
+
 static DEFINE_PER_CPU(unsigned int, nop_txn_flags);
 
 static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags)
@@ -9691,6 +9704,9 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 		pmu->pmu_disable = perf_pmu_nop_void;
 	}
 
+	if (!pmu->check_period)
+		pmu->check_period = perf_event_nop_int;
+
 	if (!pmu->event_idx)
 		pmu->event_idx = perf_event_idx_default;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 309ef5a64af5..5ab4fe3b1dcc 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -734,7 +734,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
 	size = sizeof(struct ring_buffer);
 	size += nr_pages * sizeof(void *);
 
-	if (order_base_2(size) >= MAX_ORDER)
+	if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER)
 		goto fail;
 
 	rb = kzalloc(size, GFP_KERNEL);

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2019-02-10  9:01 Ingo Molnar
@ 2019-02-10 18:30 ` pr-tracker-bot
  0 siblings, 0 replies; 316+ messages in thread
From: pr-tracker-bot @ 2019-02-10 18:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner

The pull request you sent on Sun, 10 Feb 2019 10:01:09 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/212146f0800e151bd61b98fb6fe4b8b6778a649a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2019-02-10  9:01 Ingo Molnar
  2019-02-10 18:30 ` pr-tracker-bot
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2019-02-10  9:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo, Thomas Gleixner

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 3bb2600657dac78580e5b8fecc202eaaff5d4ced Merge tag 'perf-urgent-for-mingo-5.0-20190205' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

A couple of kernel side fixes:

 - Fix the Intel uncore driver on certain hardware configurations
 - Fix a CPU hotplug related memory allocation bug
 - Remove a spurious WARN()

Plus also a handful of perf tooling fixes.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (4):
      perf clang: Do not use 'return std::move(something)'
      tools headers uapi: Sync linux/in.h copy from the kernel sources
      perf symbols: Add fallback definitions for GELF_ST_VISIBILITY()
      perf trace: Support multiple "vfs_getname" probes

Gustavo A. R. Silva (1):
      perf tests evsel-tp-sched: Fix bitwise operator

Jiri Olsa (1):
      perf symbols: Filter out hidden symbols from labels

Kan Liang (1):
      perf/x86/intel/uncore: Add Node ID mask

Mark Rutland (1):
      perf/core: Don't WARN() for impossible ring-buffer sizes

Peter Zijlstra (1):
      perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu()

Ravi Bangoria (1):
      perf mem/c2c: Fix perf_mem_events to support powerpc

Tony Jones (1):
      perf script python: Add Python3 support to tests/attr.py


 arch/x86/events/intel/core.c              | 16 +++++++++++-----
 arch/x86/events/intel/uncore_snbep.c      |  4 +++-
 kernel/events/ring_buffer.c               |  3 +++
 tools/include/uapi/linux/in.h             |  2 +-
 tools/perf/Documentation/perf-c2c.txt     | 16 ++++++++++++----
 tools/perf/Documentation/perf-mem.txt     |  2 +-
 tools/perf/arch/powerpc/util/Build        |  1 +
 tools/perf/arch/powerpc/util/mem-events.c | 11 +++++++++++
 tools/perf/builtin-trace.c                | 25 +++++++++++++++++-------
 tools/perf/tests/attr.py                  | 32 ++++++++++++++++++-------------
 tools/perf/tests/evsel-tp-sched.c         |  2 +-
 tools/perf/util/c++/clang.cpp             |  2 +-
 tools/perf/util/mem-events.c              |  2 +-
 tools/perf/util/symbol-elf.c              | 23 +++++++++++++++++++++-
 14 files changed, 105 insertions(+), 36 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/mem-events.c

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
 }
 
 static void intel_pmu_cpu_dying(int cpu)
+{
+	fini_debug_store_on_cpu(cpu);
+
+	if (x86_pmu.counter_freezing)
+		disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 	struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
 	}
 
 	free_excl_cntrs(cpu);
-
-	fini_debug_store_on_cpu(cpu);
-
-	if (x86_pmu.counter_freezing)
-		disable_counter_freeze();
 }
 
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
 	.cpu_prepare		= intel_pmu_cpu_prepare,
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
+	.cpu_dead		= intel_pmu_cpu_dead,
 };
 
 static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_prepare		= intel_pmu_cpu_prepare,
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
+	.cpu_dead		= intel_pmu_cpu_dead,
+
 	.guest_get_msrs		= intel_guest_get_msrs,
 	.sched_task		= intel_pmu_sched_task,
 };
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index c07bee31abe8..b10e04387f38 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1222,6 +1222,8 @@ static struct pci_driver snbep_uncore_pci_driver = {
 	.id_table	= snbep_uncore_pci_ids,
 };
 
+#define NODE_ID_MASK	0x7
+
 /*
  * build pci bus to socket mapping
  */
@@ -1243,7 +1245,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
 		err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
 		if (err)
 			break;
-		nodeid = config;
+		nodeid = config & NODE_ID_MASK;
 		/* get the Node ID mapping */
 		err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
 		if (err)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 4a9937076331..309ef5a64af5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
 	size = sizeof(struct ring_buffer);
 	size += nr_pages * sizeof(void *);
 
+	if (order_base_2(size) >= MAX_ORDER)
+		goto fail;
+
 	rb = kzalloc(size, GFP_KERNEL);
 	if (!rb)
 		goto fail;
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index f6052e70bf40..a55cb8b10165 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -268,7 +268,7 @@ struct sockaddr_in {
 #define	IN_MULTICAST(a)		IN_CLASSD(a)
 #define	IN_MULTICAST_NET	0xe0000000
 
-#define	IN_BADCLASS(a)		((((long int) (a) ) == 0xffffffff)
+#define	IN_BADCLASS(a)		(((long int) (a) ) == (long int)0xffffffff)
 #define	IN_EXPERIMENTAL(a)	IN_BADCLASS((a))
 
 #define	IN_CLASSE(a)		((((long int) (a)) & 0xf0000000) == 0xf0000000)
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 095aebdc5bb7..e6150f21267d 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -19,8 +19,11 @@ C2C stands for Cache To Cache.
 The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
 you to track down the cacheline contentions.
 
-The tool is based on x86's load latency and precise store facility events
-provided by Intel CPUs. These events provide:
+On x86, the tool is based on load latency and precise store facility events
+provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
+with thresholding feature.
+
+These events provide:
   - memory address of the access
   - type of the access (load and store details)
   - latency (in cycles) of the load access
@@ -46,7 +49,7 @@ RECORD OPTIONS
 
 -l::
 --ldlat::
-	Configure mem-loads latency.
+	Configure mem-loads latency. (x86 only)
 
 -k::
 --all-kernel::
@@ -119,11 +122,16 @@ Following perf record options are configured by default:
   -W,-d,--phys-data,--sample-cpu
 
 Unless specified otherwise with '-e' option, following events are monitored by
-default:
+default on x86:
 
   cpu/mem-loads,ldlat=30/P
   cpu/mem-stores/P
 
+and following on PowerPC:
+
+  cpu/mem-loads/
+  cpu/mem-stores/
+
 User can pass any 'perf record' option behind '--' mark, like (to enable
 callchains and system wide monitoring):
 
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index f8d2167cf3e7..199ea0f0a6c0 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -82,7 +82,7 @@ RECORD OPTIONS
 	Be more verbose (show counter open errors, etc)
 
 --ldlat <n>::
-	Specify desired latency for loads event.
+	Specify desired latency for loads event. (x86 only)
 
 In addition, for report all perf report options are valid, and for record
 all perf record options.
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 2e6595310420..ba98bd006488 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -2,6 +2,7 @@ libperf-y += header.o
 libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c
new file mode 100644
index 000000000000..d08311f04e95
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/mem-events.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mem-events.h"
+
+/* PowerPC does not support 'ldlat' parameter. */
+char *perf_mem_events__name(int i)
+{
+	if (i == PERF_MEM_EVENTS__LOAD)
+		return (char *) "cpu/mem-loads/";
+
+	return (char *) "cpu/mem-stores/";
+}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ed4583128b9c..b36061cd1ab8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
 
 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 {
-	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
+	bool found = false;
+	struct perf_evsel *evsel, *tmp;
+	struct parse_events_error err = { .idx = 0, };
+	int ret = parse_events(evlist, "probe:vfs_getname*", &err);
 
-	if (IS_ERR(evsel))
+	if (ret)
 		return false;
 
-	if (perf_evsel__field(evsel, "pathname") == NULL) {
+	evlist__for_each_entry_safe(evlist, evsel, tmp) {
+		if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+			continue;
+
+		if (perf_evsel__field(evsel, "pathname")) {
+			evsel->handler = trace__vfs_getname;
+			found = true;
+			continue;
+		}
+
+		list_del_init(&evsel->node);
+		evsel->evlist = NULL;
 		perf_evsel__delete(evsel);
-		return false;
 	}
 
-	evsel->handler = trace__vfs_getname;
-	perf_evlist__add(evlist, evsel);
-	return true;
+	return found;
 }
 
 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index 44090a9a19f3..e952127e4fb0 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -1,6 +1,8 @@
 #! /usr/bin/python
 # SPDX-License-Identifier: GPL-2.0
 
+from __future__ import print_function
+
 import os
 import sys
 import glob
@@ -8,7 +10,11 @@ import optparse
 import tempfile
 import logging
 import shutil
-import ConfigParser
+
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
 
 def data_equal(a, b):
     # Allow multiple values in assignment separated by '|'
@@ -100,20 +106,20 @@ class Event(dict):
     def equal(self, other):
         for t in Event.terms:
             log.debug("      [%s] %s %s" % (t, self[t], other[t]));
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 return False
             if not data_equal(self[t], other[t]):
                 return False
         return True
 
     def optional(self):
-        if self.has_key('optional') and self['optional'] == '1':
+        if 'optional' in self and self['optional'] == '1':
             return True
         return False
 
     def diff(self, other):
         for t in Event.terms:
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 continue
             if not data_equal(self[t], other[t]):
                 log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
@@ -134,7 +140,7 @@ class Event(dict):
 #   - expected values assignments
 class Test(object):
     def __init__(self, path, options):
-        parser = ConfigParser.SafeConfigParser()
+        parser = configparser.SafeConfigParser()
         parser.read(path)
 
         log.warning("running '%s'" % path)
@@ -193,7 +199,7 @@ class Test(object):
         return True
 
     def load_events(self, path, events):
-        parser_event = ConfigParser.SafeConfigParser()
+        parser_event = configparser.SafeConfigParser()
         parser_event.read(path)
 
         # The event record section header contains 'event' word,
@@ -207,7 +213,7 @@ class Test(object):
             # Read parent event if there's any
             if (':' in section):
                 base = section[section.index(':') + 1:]
-                parser_base = ConfigParser.SafeConfigParser()
+                parser_base = configparser.SafeConfigParser()
                 parser_base.read(self.test_dir + '/' + base)
                 base_items = parser_base.items('event')
 
@@ -322,9 +328,9 @@ def run_tests(options):
     for f in glob.glob(options.test_dir + '/' + options.test):
         try:
             Test(f, options).run()
-        except Unsup, obj:
+        except Unsup as obj:
             log.warning("unsupp  %s" % obj.getMsg())
-        except Notest, obj:
+        except Notest as obj:
             log.warning("skipped %s" % obj.getMsg())
 
 def setup_log(verbose):
@@ -363,7 +369,7 @@ def main():
     parser.add_option("-p", "--perf",
                       action="store", type="string", dest="perf")
     parser.add_option("-v", "--verbose",
-                      action="count", dest="verbose")
+                      default=0, action="count", dest="verbose")
 
     options, args = parser.parse_args()
     if args:
@@ -373,7 +379,7 @@ def main():
     setup_log(options.verbose)
 
     if not options.test_dir:
-        print 'FAILED no -d option specified'
+        print('FAILED no -d option specified')
         sys.exit(-1)
 
     if not options.test:
@@ -382,8 +388,8 @@ def main():
     try:
         run_tests(options)
 
-    except Fail, obj:
-        print "FAILED %s" % obj.getMsg();
+    except Fail as obj:
+        print("FAILED %s" % obj.getMsg())
         sys.exit(-1)
 
     sys.exit(0)
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 5f8501c68da4..5cbba70bcdd0 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 		return -1;
 	}
 
-	is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED);
+	is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED);
 	if (should_be_signed && !is_signed) {
 		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
 			 evsel->name, name, is_signed, should_be_signed);
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index 89512504551b..39c0004f2886 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module)
 	}
 	PM.run(*Module);
 
-	return std::move(Buffer);
+	return Buffer;
 }
 
 }
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 93f74d8d3cdd..42c3e5a229d2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 static char mem_loads_name[100];
 static bool mem_loads_name__init;
 
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
 {
 	if (i == PERF_MEM_EVENTS__LOAD) {
 		if (!mem_loads_name__init) {
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 66a84d5846c8..dca7dfae69ad 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -19,6 +19,20 @@
 #define EM_AARCH64	183  /* ARM 64 bit */
 #endif
 
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o)	((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same.  */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o)	ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field.  */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val)	ELF64_ST_VISIBILITY (val)
+#endif
+
 typedef Elf64_Nhdr GElf_Nhdr;
 
 #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
@@ -87,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym)
 	return GELF_ST_TYPE(sym->st_info);
 }
 
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+	return GELF_ST_VISIBILITY(sym->st_other);
+}
+
 #ifndef STT_GNU_IFUNC
 #define STT_GNU_IFUNC 10
 #endif
@@ -111,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
 	return elf_sym__type(sym) == STT_NOTYPE &&
 		sym->st_name != 0 &&
 		sym->st_shndx != SHN_UNDEF &&
-		sym->st_shndx != SHN_ABS;
+		sym->st_shndx != SHN_ABS &&
+		elf_sym__visibility(sym) != STV_HIDDEN &&
+		elf_sym__visibility(sym) != STV_INTERNAL;
 }
 
 static bool elf_sym__filter(GElf_Sym *sym)

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-11-30  6:25 Ingo Molnar
@ 2018-11-30 21:00 ` pr-tracker-bot
  0 siblings, 0 replies; 316+ messages in thread
From: pr-tracker-bot @ 2018-11-30 21:00 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

The pull request you sent on Fri, 30 Nov 2018 07:25:07 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/a1b3cf6d943800059adc262c4d839524c529db2d

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-11-30  6:25 Ingo Molnar
  2018-11-30 21:00 ` pr-tracker-bot
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-11-30  6:25 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 09d3f015d1e1b4fee7e9bbdcf54201d239393391 uprobes: Fix handle_swbp() vs. unregister() + register() race once more

Misc fixes:

 - a counter freezing related regression fix,
 - an uprobes race fix,
 - an Intel PMU unusual event combination fix,
 - and diverse tooling fixes.

 Thanks,

	Ingo

------------------>
Andrea Parri (1):
      uprobes: Fix handle_swbp() vs. unregister() + register() race once more

Arnaldo Carvalho de Melo (5):
      tools build feature: Check if get_current_dir_name() is available
      tools headers uapi: Synchronize i915_drm.h
      tools arch x86: Update tools's copy of cpufeatures.h
      tools uapi asm-generic: Synchronize ioctls.h
      perf tools beauty ioctl: Support new ISO7816 commands

Jiri Olsa (5):
      perf tools: Fix crash on synthesizing the unit
      perf tools: Restore proper cwd on return from mnt namespace
      perf/x86/intel: Move branch tracing setup to the Intel-specific source file
      perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts()
      perf/x86/intel: Disallow precise_ip on BTS events

Peter Zijlstra (1):
      perf/x86/intel: Fix regression by default disabling perfmon v4 interrupt handling


 Documentation/admin-guide/kernel-parameters.txt |  3 +-
 arch/x86/events/core.c                          | 20 --------
 arch/x86/events/intel/core.c                    | 68 +++++++++++++++++++------
 arch/x86/events/perf_event.h                    | 13 +++--
 kernel/events/uprobes.c                         | 12 ++++-
 tools/arch/x86/include/asm/cpufeatures.h        |  2 +
 tools/build/Makefile.feature                    |  1 +
 tools/build/feature/Makefile                    |  4 ++
 tools/build/feature/test-all.c                  |  5 ++
 tools/build/feature/test-get_current_dir_name.c | 10 ++++
 tools/include/uapi/asm-generic/ioctls.h         |  2 +
 tools/include/uapi/drm/i915_drm.h               | 22 ++++++++
 tools/perf/Makefile.config                      |  5 ++
 tools/perf/tests/attr/base-record               |  2 +-
 tools/perf/trace/beauty/ioctl.c                 |  1 +
 tools/perf/util/Build                           |  1 +
 tools/perf/util/evsel.c                         |  2 +-
 tools/perf/util/get_current_dir_name.c          | 18 +++++++
 tools/perf/util/namespaces.c                    | 17 ++++++-
 tools/perf/util/namespaces.h                    |  1 +
 tools/perf/util/util.h                          |  4 ++
 21 files changed, 166 insertions(+), 47 deletions(-)
 create mode 100644 tools/build/feature/test-get_current_dir_name.c
 create mode 100644 tools/perf/util/get_current_dir_name.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 81d1d5a74728..5463d5a4d85c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -856,7 +856,8 @@
 			causing system reset or hang due to sending
 			INIT from AP to BSP.
 
-	disable_counter_freezing [HW]
+	perf_v4_pmi=	[X86,INTEL]
+			Format: <bool>
 			Disable Intel PMU counter freezing feature.
 			The feature only exists starting from
 			Arch Perfmon v4 (Skylake and newer).
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 106911b603bd..374a19712e20 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
 	if (config == -1LL)
 		return -EINVAL;
 
-	/*
-	 * Branch tracing:
-	 */
-	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-	    !attr->freq && hwc->sample_period == 1) {
-		/* BTS is not supported by this architecture. */
-		if (!x86_pmu.bts_active)
-			return -EOPNOTSUPP;
-
-		/* BTS is currently only allowed for user-mode. */
-		if (!attr->exclude_kernel)
-			return -EOPNOTSUPP;
-
-		/* disallow bts if conflicting events are present */
-		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-			return -EBUSY;
-
-		event->destroy = hw_perf_lbr_event_destroy;
-	}
-
 	hwc->config |= config;
 
 	return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 273c62e81546..ecc3e34ca955 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 	return handled;
 }
 
-static bool disable_counter_freezing;
+static bool disable_counter_freezing = true;
 static int __init intel_perf_counter_freezing_setup(char *s)
 {
-	disable_counter_freezing = true;
-	pr_info("Intel PMU Counter freezing feature disabled\n");
+	bool res;
+
+	if (kstrtobool(s, &res))
+		return -EINVAL;
+
+	disable_counter_freezing = !res;
 	return 1;
 }
-__setup("disable_counter_freezing", intel_perf_counter_freezing_setup);
+__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
 
 /*
  * Simplified handler for Arch Perfmon v4:
@@ -2470,16 +2474,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 static struct event_constraint *
 intel_bts_constraints(struct perf_event *event)
 {
-	struct hw_perf_event *hwc = &event->hw;
-	unsigned int hw_event, bts_event;
-
-	if (event->attr.freq)
-		return NULL;
-
-	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
-	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
-	if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+	if (unlikely(intel_pmu_has_bts(event)))
 		return &bts_constraint;
 
 	return NULL;
@@ -3098,10 +3093,51 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 	return flags;
 }
 
+static int intel_pmu_bts_config(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+
+	if (unlikely(intel_pmu_has_bts(event))) {
+		/* BTS is not supported by this architecture. */
+		if (!x86_pmu.bts_active)
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (!attr->exclude_kernel)
+			return -EOPNOTSUPP;
+
+		/* BTS is not allowed for precise events. */
+		if (attr->precise_ip)
+			return -EOPNOTSUPP;
+
+		/* disallow bts if conflicting events are present */
+		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+			return -EBUSY;
+
+		event->destroy = hw_perf_lbr_event_destroy;
+	}
+
+	return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	return intel_pmu_bts_config(event);
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
 
+	if (ret)
+		return ret;
+
+	ret = intel_pmu_bts_config(event);
 	if (ret)
 		return ret;
 
@@ -3127,7 +3163,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		/*
 		 * BTS is set up earlier in this path, so don't account twice
 		 */
-		if (!intel_pmu_has_bts(event)) {
+		if (!unlikely(intel_pmu_has_bts(event))) {
 			/* disallow lbr if conflicting events are present */
 			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
 				return -EBUSY;
@@ -3596,7 +3632,7 @@ static __initconst const struct x86_pmu core_pmu = {
 	.enable_all		= core_pmu_enable_all,
 	.enable			= core_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
+	.hw_config		= core_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdd..78d7b7031bfc 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -859,11 +859,16 @@ static inline int amd_pmu_init(void)
 
 static inline bool intel_pmu_has_bts(struct perf_event *event)
 {
-	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-	    !event->attr.freq && event->hw.sample_period == 1)
-		return true;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int hw_event, bts_event;
+
+	if (event->attr.freq)
+		return false;
+
+	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
-	return false;
+	return hw_event == bts_event && hwc->sample_period == 1;
 }
 
 int intel_pmu_save_and_restart(struct perf_event *event);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 96d4bee83489..322e97bbb437 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -829,7 +829,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
 
-	smp_wmb(); /* pairs with rmb() in find_active_uprobe() */
+	smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */
 	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
 
  out:
@@ -2178,10 +2178,18 @@ static void handle_swbp(struct pt_regs *regs)
 	 * After we hit the bp, _unregister + _register can install the
 	 * new and not-yet-analyzed uprobe at the same address, restart.
 	 */
-	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
 		goto out;
 
+	/*
+	 * Pairs with the smp_wmb() in prepare_uprobe().
+	 *
+	 * Guarantees that if we see the UPROBE_COPY_INSN bit set, then
+	 * we must also see the stores to &uprobe->arch performed by the
+	 * prepare_uprobe() call.
+	 */
+	smp_rmb();
+
 	/* Tracing handlers use ->utask to communicate with fetch methods */
 	if (!get_utask())
 		goto out;
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 89a048c2faec..28c4a502b419 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -331,6 +331,8 @@
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index f216b2f5c3d7..d74bb9414d7c 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -33,6 +33,7 @@ FEATURE_TESTS_BASIC :=                  \
         dwarf_getlocations              \
         fortify-source                  \
         sync-compare-and-swap           \
+        get_current_dir_name            \
         glibc                           \
         gtk2                            \
         gtk2-infobar                    \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0516259be70f..304b984f11b9 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -7,6 +7,7 @@ FILES=                                          \
          test-dwarf_getlocations.bin            \
          test-fortify-source.bin                \
          test-sync-compare-and-swap.bin         \
+         test-get_current_dir_name.bin          \
          test-glibc.bin                         \
          test-gtk2.bin                          \
          test-gtk2-infobar.bin                  \
@@ -101,6 +102,9 @@ $(OUTPUT)test-bionic.bin:
 $(OUTPUT)test-libelf.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-get_current_dir_name.bin:
+	$(BUILD)
+
 $(OUTPUT)test-glibc.bin:
 	$(BUILD)
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 8dc20a61341f..56722bfe6bdd 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -34,6 +34,10 @@
 # include "test-libelf-mmap.c"
 #undef main
 
+#define main main_test_get_current_dir_name
+# include "test-get_current_dir_name.c"
+#undef main
+
 #define main main_test_glibc
 # include "test-glibc.c"
 #undef main
@@ -174,6 +178,7 @@ int main(int argc, char *argv[])
 	main_test_hello();
 	main_test_libelf();
 	main_test_libelf_mmap();
+	main_test_get_current_dir_name();
 	main_test_glibc();
 	main_test_dwarf();
 	main_test_dwarf_getlocations();
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
new file mode 100644
index 000000000000..573000f93212
--- /dev/null
+++ b/tools/build/feature/test-get_current_dir_name.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+int main(void)
+{
+	free(get_current_dir_name());
+	return 0;
+}
diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h
index 040651735662..cdc9f4ca8c27 100644
--- a/tools/include/uapi/asm-generic/ioctls.h
+++ b/tools/include/uapi/asm-generic/ioctls.h
@@ -79,6 +79,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..a4446f452040 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e30d20fb482d..a0e8c23f9125 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -299,6 +299,11 @@ ifndef NO_BIONIC
   endif
 endif
 
+ifeq ($(feature-get_current_dir_name), 1)
+  CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
+endif
+
+
 ifdef NO_LIBELF
   NO_DWARF := 1
   NO_DEMANGLE := 1
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 37940665f736..efd0157b9d22 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -9,7 +9,7 @@ size=112
 config=0
 sample_period=*
 sample_type=263
-read_format=0
+read_format=0|4
 disabled=1
 inherit=1
 pinned=0
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 5d2a7fd8d407..eae59ad15ce3 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -31,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size)
 	"TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 	"TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT",
 	"TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER",
+	"TIOCGISO7816", "TIOCSISO7816",
 	[_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ecd9f9ceda77..b7bf201fe8a8 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ libperf-y += evlist.o
 libperf-y += evsel.o
 libperf-y += evsel_fprintf.o
 libperf-y += find_bit.o
+libperf-y += get_current_dir_name.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
 libperf-y += llvm-utils.o
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d37bb1566cd9..dbc0466db368 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1092,7 +1092,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->exclude_user   = 1;
 	}
 
-	if (evsel->own_cpus)
+	if (evsel->own_cpus || evsel->unit)
 		evsel->attr.read_format |= PERF_FORMAT_ID;
 
 	/*
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000000..267aa609a582
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "util.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdlib.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+	char pwd[PATH_MAX];
+
+	return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index cf8bd123cf73..aed170bd4384 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <asm/bug.h>
 
 struct namespaces *namespaces__new(struct namespaces_event *event)
 {
@@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	char curpath[PATH_MAX];
 	int oldns = -1;
 	int newns = -1;
+	char *oldcwd = NULL;
 
 	if (nc == NULL)
 		return;
@@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
 		return;
 
+	oldcwd = get_current_dir_name();
+	if (!oldcwd)
+		return;
+
 	oldns = open(curpath, O_RDONLY);
 	if (oldns < 0)
-		return;
+		goto errout;
 
 	newns = open(nsi->mntns_path, O_RDONLY);
 	if (newns < 0)
@@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (setns(newns, CLONE_NEWNS) < 0)
 		goto errout;
 
+	nc->oldcwd = oldcwd;
 	nc->oldns = oldns;
 	nc->newns = newns;
 	return;
 
 errout:
+	free(oldcwd);
 	if (oldns > -1)
 		close(oldns);
 	if (newns > -1)
@@ -223,11 +231,16 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 
 void nsinfo__mountns_exit(struct nscookie *nc)
 {
-	if (nc == NULL || nc->oldns == -1 || nc->newns == -1)
+	if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
 		return;
 
 	setns(nc->oldns, CLONE_NEWNS);
 
+	if (nc->oldcwd) {
+		WARN_ON_ONCE(chdir(nc->oldcwd));
+		zfree(&nc->oldcwd);
+	}
+
 	if (nc->oldns > -1) {
 		close(nc->oldns);
 		nc->oldns = -1;
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index cae1a9a39722..d5f46c09ea31 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -38,6 +38,7 @@ struct nsinfo {
 struct nscookie {
 	int			oldns;
 	int			newns;
+	char			*oldcwd;
 };
 
 int nsinfo__init(struct nsinfo *nsi);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 14508ee7707a..ece040b799f6 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -59,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint,
 
 const char *perf_tip(const char *dirpath);
 
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif
+
 #ifndef HAVE_SCHED_GETCPU_SUPPORT
 int sched_getcpu(void);
 #endif

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-11-17 10:55 Ingo Molnar
@ 2018-11-18 20:05 ` pr-tracker-bot
  0 siblings, 0 replies; 316+ messages in thread
From: pr-tracker-bot @ 2018-11-18 20:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

The pull request you sent on Sat, 17 Nov 2018 11:55:18 +0100:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b53e27f618b58d50db72375eb8e1b6ddcef7cdb5

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-11-17 10:55 Ingo Molnar
  2018-11-18 20:05 ` pr-tracker-bot
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-11-17 10:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 4d47d6407ac7b4b442a4e717488a3bb137398b6c perf/x86/intel/uncore: Support CoffeeLake 8th CBOX

Fix uncore PMU enumeration for CofeeLake CPUs.

 Thanks,

	Ingo

------------------>
Kan Liang (2):
      perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs
      perf/x86/intel/uncore: Support CoffeeLake 8th CBOX


 arch/x86/events/intel/uncore.h     |  33 +++++++---
 arch/x86/events/intel/uncore_snb.c | 121 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 144 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index e17ab885b1e9..cb46d602a6b8 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -129,8 +129,15 @@ struct intel_uncore_box {
 	struct intel_uncore_extra_reg shared_regs[0];
 };
 
-#define UNCORE_BOX_FLAG_INITIATED	0
-#define UNCORE_BOX_FLAG_CTL_OFFS8	1 /* event config registers are 8-byte apart */
+/* CFL uncore 8th cbox MSRs */
+#define CFL_UNC_CBO_7_PERFEVTSEL0		0xf70
+#define CFL_UNC_CBO_7_PER_CTR0			0xf76
+
+#define UNCORE_BOX_FLAG_INITIATED		0
+/* event config registers are 8-byte apart */
+#define UNCORE_BOX_FLAG_CTL_OFFS8		1
+/* CFL 8th CBOX has different MSR space */
+#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS	2
 
 struct uncore_event_desc {
 	struct kobj_attribute attr;
@@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
 static inline
 unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
 {
-	return box->pmu->type->event_ctl +
-		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-		uncore_msr_box_offset(box);
+	if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+		return CFL_UNC_CBO_7_PERFEVTSEL0 +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+	} else {
+		return box->pmu->type->event_ctl +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+		       uncore_msr_box_offset(box);
+	}
 }
 
 static inline
 unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
 {
-	return box->pmu->type->perf_ctr +
-		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-		uncore_msr_box_offset(box);
+	if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+		return CFL_UNC_CBO_7_PER_CTR0 +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+	} else {
+		return box->pmu->type->perf_ctr +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+		       uncore_msr_box_offset(box);
+	}
 }
 
 static inline
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 8527c3e1038b..2593b0d7aeee 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -15,6 +15,25 @@
 #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC	0x1910
 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC	0x190f
 #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC	0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC	0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC	0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC	0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC	0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC	0x591f
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC	0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC	0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC	0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC	0x3ec4
+#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC	0x3e0f
+#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC	0x3e1f
+#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC	0x3ec2
+#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC	0x3e30
+#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC	0x3e18
+#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC	0x3ec6
+#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC	0x3e31
+#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC	0x3e33
+#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC	0x3eca
+#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC	0x3e32
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -202,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
 		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
 			SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
 	}
+
+	/* The 8th CBOX has different MSR space */
+	if (box->pmu->pmu_idx == 7)
+		__set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags);
 }
 
 static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
@@ -228,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = {
 static struct intel_uncore_type skl_uncore_cbox = {
 	.name		= "cbox",
 	.num_counters   = 4,
-	.num_boxes	= 5,
+	.num_boxes	= 8,
 	.perf_ctr_bits	= 44,
 	.fixed_ctr_bits	= 48,
 	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
@@ -569,7 +592,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
-
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -618,6 +716,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
 	IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
 	IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
+	IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core Y */
+	IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U */
+	IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U Quad Core */
+	IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core S Dual Core */
+	IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core S Quad Core */
+	IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core U 2 Cores */
+	IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core U 4 Cores */
+	IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core H 4 Cores */
+	IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core H 6 Cores */
+	IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 2 Cores Desktop */
+	IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Desktop */
+	IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Desktop */
+	IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Desktop */
+	IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Work Station */
+	IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Work Station */
+	IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Work Station */
+	IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Server */
+	IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Server */
+	IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Server */
 	{  /* end marker */ }
 };
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-10-20  8:10 Ingo Molnar
@ 2018-10-20 13:28 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 316+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-20 13:28 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

On Sat, Oct 20, 2018 at 10:10:39AM +0200, Ingo Molnar wrote:
> Greg,
> 
> Please pull the latest perf-urgent-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

Now merged, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-10-20  8:10 Ingo Molnar
  2018-10-20 13:28 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-10-20  8:10 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Greg,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 20e8e72d0fa8e26202932c30d592bade73fdc701 Merge tag 'perf-urgent-for-mingo-4.19-20181017' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Misc perf tooling fixes.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (3):
      tools arch uapi: Sync the x86 kvm.h copy
      tools headers uapi: Sync kvm.h copy
      perf tools: Stop fallbacking to kallsyms for vdso symbols lookup

David Miller (1):
      perf cpu_map: Align cpu map synthesized events properly.

Jarod Wilson (1):
      perf tools: Fix use of alternatives to find JDIR

Jiri Olsa (5):
      Revert "perf tools: Fix PMU term format max value calculation"
      perf vendor events intel: Fix wrong filter_band* values for uncore events
      perf evsel: Store ids for events with their own cpus perf_event__synthesize_event_update_cpus
      perf tools: Fix tracing_path_mount proper path
      perf tools: Pass build flags to traceevent build

Milian Wolff (1):
      perf report: Don't crash on invalid inline debug information


 tools/arch/x86/include/uapi/asm/kvm.h              |  1 +
 tools/include/uapi/linux/kvm.h                     |  1 +
 tools/lib/api/fs/tracing_path.c                    |  4 ++--
 tools/perf/Makefile.config                         |  2 +-
 tools/perf/Makefile.perf                           |  2 +-
 tools/perf/builtin-report.c                        |  1 +
 .../pmu-events/arch/x86/ivytown/uncore-power.json  | 16 ++++++++--------
 .../pmu-events/arch/x86/jaketown/uncore-power.json | 16 ++++++++--------
 tools/perf/util/event.c                            | 22 +++-------------------
 tools/perf/util/evsel.c                            |  3 +++
 tools/perf/util/pmu.c                              | 13 +++++++------
 tools/perf/util/srcline.c                          |  3 +++
 12 files changed, 39 insertions(+), 45 deletions(-)

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 86299efa804a..fd23d5778ea1 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -377,6 +377,7 @@ struct kvm_sync_regs {
 
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE	(1 << 2)
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 07548de5c988..251be353f950 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -952,6 +952,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_NESTED_STATE 157
 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 120037496f77..5afb11b30fca 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -36,7 +36,7 @@ static const char *tracing_path_tracefs_mount(void)
 
 	__tracing_path_set("", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 static const char *tracing_path_debugfs_mount(void)
@@ -49,7 +49,7 @@ static const char *tracing_path_debugfs_mount(void)
 
 	__tracing_path_set("tracing/", mnt);
 
-	return mnt;
+	return tracing_path;
 }
 
 const char *tracing_path_mount(void)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index f6d1a03c7523..e30d20fb482d 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -833,7 +833,7 @@ ifndef NO_JVMTI
     JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
   else
     ifneq (,$(wildcard /usr/sbin/alternatives))
-      JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
+      JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
     endif
   endif
   ifndef JDIR
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5224ade3d5af..0be411695379 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -635,7 +635,7 @@ $(LIBPERF_IN): prepare FORCE
 $(LIB_FILE): $(LIBPERF_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
 
-LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
 
 $(LIBTRACEEVENT): FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 76e12bcd1765..b2188e623e22 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -981,6 +981,7 @@ int cmd_report(int argc, const char **argv)
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_event__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
+			.event_update	 = perf_event__process_event_update,
 			.feature	 = process_feature_event,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
index d40498f2cb1e..635c09fda1d9 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -188,7 +188,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -199,7 +199,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -210,7 +210,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -221,7 +221,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -232,7 +232,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -243,7 +243,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -254,7 +254,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -265,7 +265,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
index 16034bfd06dd..8755693d86c6 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -187,7 +187,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
-        "Filter": "filter_band0=1200",
+        "Filter": "filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -198,7 +198,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
-        "Filter": "filter_band1=2000",
+        "Filter": "filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -209,7 +209,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
-        "Filter": "filter_band2=3000",
+        "Filter": "filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -220,7 +220,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
-        "Filter": "filter_band3=4000",
+        "Filter": "filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
@@ -231,7 +231,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xb",
         "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band0=1200",
+        "Filter": "edge=1,filter_band0=12",
         "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_1200mhz_cycles %",
         "PerPkg": "1",
@@ -242,7 +242,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xc",
         "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band1=2000",
+        "Filter": "edge=1,filter_band1=20",
         "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_2000mhz_cycles %",
         "PerPkg": "1",
@@ -253,7 +253,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xd",
         "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band2=4000",
+        "Filter": "edge=1,filter_band2=30",
         "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_3000mhz_cycles %",
         "PerPkg": "1",
@@ -264,7 +264,7 @@
         "Counter": "0,1,2,3",
         "EventCode": "0xe",
         "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
-        "Filter": "edge=1,filter_band3=4000",
+        "Filter": "edge=1,filter_band3=40",
         "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
         "MetricName": "freq_ge_4000mhz_cycles %",
         "PerPkg": "1",
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0cd42150f712..bc646185f8d9 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1081,6 +1081,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max
 	}
 
 	*size += sizeof(struct cpu_map_data);
+	*size = PERF_ALIGN(*size, sizeof(u64));
 	return zalloc(*size);
 }
 
@@ -1560,26 +1561,9 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
 
 		return NULL;
 	}
-try_again:
+
 	al->map = map_groups__find(mg, al->addr);
-	if (al->map == NULL) {
-		/*
-		 * If this is outside of all known maps, and is a negative
-		 * address, try to look it up in the kernel dso, as it might be
-		 * a vsyscall or vdso (which executes in user-mode).
-		 *
-		 * XXX This is nasty, we should have a symbol list in the
-		 * "[vdso]" dso, but for now lets use the old trick of looking
-		 * in the whole kernel symbol list.
-		 */
-		if (cpumode == PERF_RECORD_MISC_USER && machine &&
-		    mg != &machine->kmaps &&
-		    machine__kernel_ip(machine, al->addr)) {
-			mg = &machine->kmaps;
-			load_map = true;
-			goto try_again;
-		}
-	} else {
+	if (al->map != NULL) {
 		/*
 		 * Kernel maps might be changed when loading symbols so loading
 		 * must be done prior to using kernel maps.
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1a61628a1c12..e596ae358c4d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1089,6 +1089,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->exclude_user   = 1;
 	}
 
+	if (evsel->own_cpus)
+		evsel->attr.read_format |= PERF_FORMAT_ID;
+
 	/*
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index afd68524ffa9..7799788f662f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -930,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
 
 static __u64 pmu_format_max_value(const unsigned long *format)
 {
-	__u64 w = 0;
-	int fbit;
-
-	for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
-		w |= (1ULL << fbit);
+	int w;
 
-	return w;
+	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+	if (!w)
+		return 0;
+	if (w < 64)
+		return (1ULL << w) - 1;
+	return -1;
 }
 
 /*
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 09d6746e6ec8..e767c4a9d4d2 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso,
 	struct symbol *inline_sym;
 	char *demangled = NULL;
 
+	if (!funcname)
+		funcname = "??";
+
 	if (dso) {
 		demangled = dso__demangle_sym(dso, 0, funcname);
 		if (demangled)

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-10-11  9:12 Ingo Molnar
@ 2018-10-11 12:32 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 316+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-11 12:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Linus Torvalds, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

On Thu, Oct 11, 2018 at 11:12:02AM +0200, Ingo Molnar wrote:
> 
> Greg,
> 
> Please pull the latest perf-urgent-for-linus git tree from:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

Now merged, thanks.

greg k-h


^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-10-11  9:12 Ingo Molnar
  2018-10-11 12:32 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-10-11  9:12 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton


Greg,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: c1883f10cfe05c707cce46d6999411c50a2413ca Merge tag 'perf-urgent-for-mingo-4.19-20181005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Misc perf tooling fixes.

 Thanks,

	Ingo

------------------>
Adrian Hunter (2):
      perf script python: Fix export-to-postgresql.py occasional failure
      perf script python: Fix export-to-sqlite.py sample columns

Arnaldo Carvalho de Melo (1):
      perf python: Use -Wno-redundant-decls to build with PYTHON=python3

Milian Wolff (2):
      perf report: Don't try to map ip to invalid map
      perf record: Use unmapped IP for inline callchain cursors


 tools/perf/scripts/python/export-to-postgresql.py | 9 +++++++++
 tools/perf/scripts/python/export-to-sqlite.py     | 6 +++++-
 tools/perf/util/machine.c                         | 8 +++++---
 tools/perf/util/setup.py                          | 2 +-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index efcaf6cac2eb..e46f51b17513 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -204,14 +204,23 @@ from ctypes import *
 libpq = CDLL("libpq.so.5")
 PQconnectdb = libpq.PQconnectdb
 PQconnectdb.restype = c_void_p
+PQconnectdb.argtypes = [ c_char_p ]
 PQfinish = libpq.PQfinish
+PQfinish.argtypes = [ c_void_p ]
 PQstatus = libpq.PQstatus
+PQstatus.restype = c_int
+PQstatus.argtypes = [ c_void_p ]
 PQexec = libpq.PQexec
 PQexec.restype = c_void_p
+PQexec.argtypes = [ c_void_p, c_char_p ]
 PQresultStatus = libpq.PQresultStatus
+PQresultStatus.restype = c_int
+PQresultStatus.argtypes = [ c_void_p ]
 PQputCopyData = libpq.PQputCopyData
+PQputCopyData.restype = c_int
 PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
 PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.restype = c_int
 PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index f827bf77e9d2..e4bb82c8aba9 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -440,7 +440,11 @@ def branch_type_table(*x):
 
 def sample_table(*x):
 	if branches:
-		bind_exec(sample_query, 18, x)
+		for xx in x[0:15]:
+			sample_query.addBindValue(str(xx))
+		for xx in x[19:22]:
+			sample_query.addBindValue(str(xx))
+		do_query_(sample_query)
 	else:
 		bind_exec(sample_query, 22, x)
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c4acd2001db0..111ae858cbcb 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2286,7 +2286,8 @@ static int append_inlines(struct callchain_cursor *cursor,
 	if (!symbol_conf.inline_name || !map || !sym)
 		return ret;
 
-	addr = map__rip_2objdump(map, ip);
+	addr = map__map_ip(map, ip);
+	addr = map__rip_2objdump(map, addr);
 
 	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
 	if (!inline_node) {
@@ -2312,7 +2313,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
 	struct callchain_cursor *cursor = arg;
 	const char *srcline = NULL;
-	u64 addr;
+	u64 addr = entry->ip;
 
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 		return 0;
@@ -2324,7 +2325,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	 * Convert entry->ip from a virtual address to an offset in
 	 * its corresponding binary.
 	 */
-	addr = map__map_ip(entry->map, entry->ip);
+	if (entry->map)
+		addr = map__map_ip(entry->map, entry->ip);
 
 	srcline = callchain_srcline(entry->map, entry->sym, addr);
 	return callchain_cursor_append(cursor, entry->ip,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 97efbcad076e..1942f6dd24f6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -35,7 +35,7 @@ class install_lib(_install_lib):
 
 cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
-cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
 if cc != "clang":
     cflags += ['-Wno-cast-function-type' ]
 

----- End forwarded message -----

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-10-11  8:59 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-10-11  8:59 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner

Greg,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: c1883f10cfe05c707cce46d6999411c50a2413ca Merge tag 'perf-urgent-for-mingo-4.19-20181005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Misc perf tooling fixes.

 Thanks,

	Ingo

------------------>
Adrian Hunter (2):
      perf script python: Fix export-to-postgresql.py occasional failure
      perf script python: Fix export-to-sqlite.py sample columns

Arnaldo Carvalho de Melo (1):
      perf python: Use -Wno-redundant-decls to build with PYTHON=python3

Milian Wolff (2):
      perf report: Don't try to map ip to invalid map
      perf record: Use unmapped IP for inline callchain cursors


 tools/perf/scripts/python/export-to-postgresql.py | 9 +++++++++
 tools/perf/scripts/python/export-to-sqlite.py     | 6 +++++-
 tools/perf/util/machine.c                         | 8 +++++---
 tools/perf/util/setup.py                          | 2 +-
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index efcaf6cac2eb..e46f51b17513 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -204,14 +204,23 @@ from ctypes import *
 libpq = CDLL("libpq.so.5")
 PQconnectdb = libpq.PQconnectdb
 PQconnectdb.restype = c_void_p
+PQconnectdb.argtypes = [ c_char_p ]
 PQfinish = libpq.PQfinish
+PQfinish.argtypes = [ c_void_p ]
 PQstatus = libpq.PQstatus
+PQstatus.restype = c_int
+PQstatus.argtypes = [ c_void_p ]
 PQexec = libpq.PQexec
 PQexec.restype = c_void_p
+PQexec.argtypes = [ c_void_p, c_char_p ]
 PQresultStatus = libpq.PQresultStatus
+PQresultStatus.restype = c_int
+PQresultStatus.argtypes = [ c_void_p ]
 PQputCopyData = libpq.PQputCopyData
+PQputCopyData.restype = c_int
 PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
 PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.restype = c_int
 PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index f827bf77e9d2..e4bb82c8aba9 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -440,7 +440,11 @@ def branch_type_table(*x):
 
 def sample_table(*x):
 	if branches:
-		bind_exec(sample_query, 18, x)
+		for xx in x[0:15]:
+			sample_query.addBindValue(str(xx))
+		for xx in x[19:22]:
+			sample_query.addBindValue(str(xx))
+		do_query_(sample_query)
 	else:
 		bind_exec(sample_query, 22, x)
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index c4acd2001db0..111ae858cbcb 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2286,7 +2286,8 @@ static int append_inlines(struct callchain_cursor *cursor,
 	if (!symbol_conf.inline_name || !map || !sym)
 		return ret;
 
-	addr = map__rip_2objdump(map, ip);
+	addr = map__map_ip(map, ip);
+	addr = map__rip_2objdump(map, addr);
 
 	inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
 	if (!inline_node) {
@@ -2312,7 +2313,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 {
 	struct callchain_cursor *cursor = arg;
 	const char *srcline = NULL;
-	u64 addr;
+	u64 addr = entry->ip;
 
 	if (symbol_conf.hide_unresolved && entry->sym == NULL)
 		return 0;
@@ -2324,7 +2325,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	 * Convert entry->ip from a virtual address to an offset in
 	 * its corresponding binary.
 	 */
-	addr = map__map_ip(entry->map, entry->ip);
+	if (entry->map)
+		addr = map__map_ip(entry->map, entry->ip);
 
 	srcline = callchain_srcline(entry->map, entry->sym, addr);
 	return callchain_cursor_append(cursor, entry->ip,
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 97efbcad076e..1942f6dd24f6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -35,7 +35,7 @@ class install_lib(_install_lib):
 
 cflags = getenv('CFLAGS', '').split()
 # switch off several checks (need to be at the end of cflags list)
-cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ]
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
 if cc != "clang":
     cflags += ['-Wno-cast-function-type' ]
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-10-05  9:55 ` Ingo Molnar
@ 2018-10-05 23:30   ` Greg Kroah-Hartman
  0 siblings, 0 replies; 316+ messages in thread
From: Greg Kroah-Hartman @ 2018-10-05 23:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, Linus Torvalds, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

On Fri, Oct 05, 2018 at 11:55:24AM +0200, Ingo Molnar wrote:
> 
> * Ingo Molnar <mingo@kernel.org> wrote:
> 
> > Linus,
> 
> ... and Greg as well!! ;-)

Heh, not a big deal :)

Now merged, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2018-10-05  9:42 Ingo Molnar
@ 2018-10-05  9:55 ` Ingo Molnar
  2018-10-05 23:30   ` Greg Kroah-Hartman
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-10-05  9:55 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton


* Ingo Molnar <mingo@kernel.org> wrote:

> Linus,

... and Greg as well!! ;-)

Thanks,

	Ingo

ps. script fixed.

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-10-05  9:42 Ingo Molnar
  2018-10-05  9:55 ` Ingo Molnar
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2018-10-05  9:42 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Linus Torvalds, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: d7cbbe49a9304520181fb8c9272d1327deec8453 perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events

Misc fixes:

 - fix a CPU#0 hot unplug bug and a PCI enumeration bug in the x86 Intel uncore PMU driver
 - fix a CPU event enumeration bug in the x86 AMD PMU driver
 - fix a perf ring-buffer corruption bug when using tracepoints
 - fix a PMU unregister locking bug

 Thanks,

	Ingo

------------------>
Jiri Olsa (1):
      perf/ring_buffer: Prevent concurent ring buffer access

Kan Liang (1):
      perf/x86/intel/uncore: Fix PCI BDF address of M3UPI on SKX

Masayoshi Mizuma (1):
      perf/x86/intel/uncore: Use boot_cpu_data.phys_proc_id instead of hardcorded physical package ID 0

Natarajan, Janakarajan (1):
      perf/x86/amd/uncore: Set ThreadMask and SliceMask for L3 Cache perf events

Peter Zijlstra (1):
      perf/core: Fix perf_pmu_unregister() locking


 arch/x86/events/amd/uncore.c         | 10 ++++++++++
 arch/x86/events/intel/uncore_snbep.c | 14 +++++++-------
 arch/x86/include/asm/perf_event.h    |  8 ++++++++
 kernel/events/core.c                 | 11 ++++-------
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 981ba5e8241b..8671de126eac 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -36,6 +36,7 @@
 
 static int num_counters_llc;
 static int num_counters_nb;
+static bool l3_mask;
 
 static HLIST_HEAD(uncore_unused_list);
 
@@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event)
 	hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;
 	hwc->idx = -1;
 
+	/*
+	 * SliceMask and ThreadMask need to be set for certain L3 events in
+	 * Family 17h. For other events, the two fields do not affect the count.
+	 */
+	if (l3_mask)
+		hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);
+
 	if (event->cpu < 0)
 		return -EINVAL;
 
@@ -525,6 +533,7 @@ static int __init amd_uncore_init(void)
 		amd_llc_pmu.name	  = "amd_l3";
 		format_attr_event_df.show = &event_show_df;
 		format_attr_event_l3.show = &event_show_l3;
+		l3_mask			  = true;
 	} else {
 		num_counters_nb		  = NUM_COUNTERS_NB;
 		num_counters_llc	  = NUM_COUNTERS_L2;
@@ -532,6 +541,7 @@ static int __init amd_uncore_init(void)
 		amd_llc_pmu.name	  = "amd_l2";
 		format_attr_event_df	  = format_attr_event;
 		format_attr_event_l3	  = format_attr_event;
+		l3_mask			  = false;
 	}
 
 	amd_nb_pmu.attr_groups	= amd_uncore_attr_groups_df;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 51d7c117e3c7..c07bee31abe8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
 
 void bdx_uncore_cpu_init(void)
 {
-	int pkg = topology_phys_to_logical_pkg(0);
+	int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id);
 
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
@@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = {
 		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
 	},
 	{ /* M3UPI0 Link 0 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0),
 	},
 	{ /* M3UPI0 Link 1 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1),
 	},
 	{ /* M3UPI1 Link 2 */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
-		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2),
 	},
 	{ /* end: all zeroes */ }
 };
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 12f54082f4c8..78241b736f2a 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -46,6 +46,14 @@
 #define INTEL_ARCH_EVENT_MASK	\
 	(ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
 
+#define AMD64_L3_SLICE_SHIFT				48
+#define AMD64_L3_SLICE_MASK				\
+	((0xFULL) << AMD64_L3_SLICE_SHIFT)
+
+#define AMD64_L3_THREAD_SHIFT				56
+#define AMD64_L3_THREAD_MASK				\
+	((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+
 #define X86_RAW_EVENT_MASK		\
 	(ARCH_PERFMON_EVENTSEL_EVENT |	\
 	 ARCH_PERFMON_EVENTSEL_UMASK |	\
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dcb093e7b377..5a97f34bc14c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8314,6 +8314,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
 			goto unlock;
 
 		list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+			if (event->cpu != smp_processor_id())
+				continue;
 			if (event->attr.type != PERF_TYPE_TRACEPOINT)
 				continue;
 			if (event->attr.config != entry->type)
@@ -9431,9 +9433,7 @@ static void free_pmu_context(struct pmu *pmu)
 	if (pmu->task_ctx_nr > perf_invalid_context)
 		return;
 
-	mutex_lock(&pmus_lock);
 	free_percpu(pmu->pmu_cpu_context);
-	mutex_unlock(&pmus_lock);
 }
 
 /*
@@ -9689,12 +9689,8 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
-	int remove_device;
-
 	mutex_lock(&pmus_lock);
-	remove_device = pmu_bus_running;
 	list_del_rcu(&pmu->entry);
-	mutex_unlock(&pmus_lock);
 
 	/*
 	 * We dereference the pmu list under both SRCU and regular RCU, so
@@ -9706,13 +9702,14 @@ void perf_pmu_unregister(struct pmu *pmu)
 	free_percpu(pmu->pmu_disable_count);
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
-	if (remove_device) {
+	if (pmu_bus_running) {
 		if (pmu->nr_addr_filters)
 			device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
 		device_del(pmu->dev);
 		put_device(pmu->dev);
 	}
 	free_pmu_context(pmu);
+	mutex_unlock(&pmus_lock);
 }
 EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-09-15 13:11 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-09-15 13:11 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton, Jiri Olsa

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: cb48b6a26cace226d8b299a48c73e808eb0c4f61 Merge tag 'perf-urgent-for-mingo-4.19-20180912' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Mostly tooling fixes, but also breakpoint and x86 PMU driver fixes.

 Thanks,

	Ingo

------------------>
Adrian Hunter (1):
      perf tools: Fix maps__find_symbol_by_name()

Arnaldo Carvalho de Melo (7):
      perf tools: Streamline bpf examples and headers installation
      tools headers uapi: Update tools's copy of linux/perf_event.h
      tools headers uapi: Update tools's copy of asm-generic/unistd.h
      tools headers uapi: Update tools's copy of drm/drm.h
      tools headers uapi: Update tools's copies of kvm headers
      tools headers uapi: Update tools's copy of linux/vhost.h
      tools headers uapi: Update tools's copy of linux/if_link.h

Chris Phlipot (2):
      perf util: Fix bad memory access in trace info.
      perf event-parse: Use fixed size string for comms

Hisao Tanabe (1):
      perf evsel: Fix potential null pointer dereference in perf_evsel__new_idx()

Jacek Tomaka (1):
      perf/x86/intel: Add support/quirk for the MISPREDICT bit on Knights Landing CPUs

Jiri Olsa (5):
      perf tests: Add breakpoint modify tests
      perf/hw_breakpoint: Modify breakpoint even if the new attr has disabled set
      perf/hw_breakpoint: Remove superfluous bp->attr.disabled = 0
      perf/hw_breakpoint: Enable breakpoint in modify_user_hw_breakpoint
      perf/hw_breakpoint: Simplify breakpoint enable in perf_event_modify_breakpoint

Kim Phillips (2):
      perf arm64: Fix include path for asm-generic/unistd.h
      perf annotate: Fix parsing aarch64 branch instructions after objdump update

Martin Liška (1):
      perf annotate: Properly interpret indirect call

Peter Zijlstra (1):
      perf/UAPI: Clearly mark __PERF_SAMPLE_CALLCHAIN_EARLY as internal use

Sandipan Das (1):
      perf probe powerpc: Ignore SyS symbols irrespective of endianness

Yabin Cui (1):
      perf/core: Force USER_DS when recording user stack data


 arch/x86/events/intel/lbr.c                       |   4 +
 include/uapi/linux/perf_event.h                   |   2 +-
 kernel/events/core.c                              |  15 +-
 kernel/events/hw_breakpoint.c                     |  13 +-
 tools/arch/arm/include/uapi/asm/kvm.h             |  13 ++
 tools/arch/arm64/include/uapi/asm/kvm.h           |  13 ++
 tools/arch/s390/include/uapi/asm/kvm.h            |   5 +-
 tools/arch/x86/include/uapi/asm/kvm.h             |  37 ++++
 tools/include/uapi/asm-generic/unistd.h           |   4 +-
 tools/include/uapi/drm/drm.h                      |   9 +
 tools/include/uapi/linux/if_link.h                |  17 ++
 tools/include/uapi/linux/kvm.h                    |   6 +
 tools/include/uapi/linux/perf_event.h             |   2 +-
 tools/include/uapi/linux/vhost.h                  |  18 ++
 tools/perf/Makefile.perf                          |  14 +-
 tools/perf/arch/arm64/Makefile                    |   5 +-
 tools/perf/arch/arm64/entry/syscalls/mksyscalltbl |   6 +-
 tools/perf/arch/powerpc/util/sym-handling.c       |   4 +-
 tools/perf/arch/x86/include/arch-tests.h          |   1 +
 tools/perf/arch/x86/tests/Build                   |   1 +
 tools/perf/arch/x86/tests/arch-tests.c            |   6 +
 tools/perf/arch/x86/tests/bp-modify.c             | 213 ++++++++++++++++++++++
 tools/perf/util/annotate.c                        |  32 +++-
 tools/perf/util/annotate.h                        |   1 +
 tools/perf/util/evsel.c                           |   5 +-
 tools/perf/util/map.c                             |  11 ++
 tools/perf/util/trace-event-info.c                |   2 +-
 tools/perf/util/trace-event-parse.c               |   7 +-
 28 files changed, 423 insertions(+), 43 deletions(-)
 create mode 100644 tools/perf/arch/x86/tests/bp-modify.c

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f3e006bed9a7..c88ed39582a1 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1272,4 +1272,8 @@ void intel_pmu_lbr_init_knl(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+
+	/* Knights Landing does have MISPREDICT bit */
+	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
+		x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
 }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index eeb787b1c53c..f35eb72739c0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -144,7 +144,7 @@ enum perf_event_sample_format {
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 
-	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63,
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2a62b96600ad..c80549bf82c6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2867,16 +2867,11 @@ static int perf_event_modify_breakpoint(struct perf_event *bp,
 	_perf_event_disable(bp);
 
 	err = modify_user_hw_breakpoint_check(bp, attr, true);
-	if (err) {
-		if (!bp->attr.disabled)
-			_perf_event_enable(bp);
 
-		return err;
-	}
-
-	if (!attr->disabled)
+	if (!bp->attr.disabled)
 		_perf_event_enable(bp);
-	return 0;
+
+	return err;
 }
 
 static int perf_event_modify_attr(struct perf_event *event,
@@ -5948,6 +5943,7 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
 		unsigned long sp;
 		unsigned int rem;
 		u64 dyn_size;
+		mm_segment_t fs;
 
 		/*
 		 * We dump:
@@ -5965,7 +5961,10 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
 
 		/* Data. */
 		sp = perf_user_stack_pointer(regs);
+		fs = get_fs();
+		set_fs(USER_DS);
 		rem = __output_copy_user(handle, (void *) sp, dump_size);
+		set_fs(fs);
 		dyn_size = dump_size - rem;
 
 		perf_output_skip(handle, rem);
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b3814fce5ecb..d6b56180827c 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -509,6 +509,8 @@ modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *a
  */
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
+	int err;
+
 	/*
 	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 	 * will not be possible to raise IPIs that invoke __perf_event_disable.
@@ -520,15 +522,12 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 	else
 		perf_event_disable(bp);
 
-	if (!attr->disabled) {
-		int err = modify_user_hw_breakpoint_check(bp, attr, false);
+	err = modify_user_hw_breakpoint_check(bp, attr, false);
 
-		if (err)
-			return err;
+	if (!bp->attr.disabled)
 		perf_event_enable(bp);
-		bp->attr.disabled = 0;
-	}
-	return 0;
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 16e006f708ca..4602464ebdfb 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 4e76630dd655..97c3478ee6e7 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
 /*
  * KVM s390 specific structures and definitions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_FPRS   (1UL << 8)
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
 #define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
 		struct {
 			__u64 reserved1[2];
 			__u64 gscb[4];
+			__u64 etoken;
+			__u64 etoken_extension;
 		};
 	};
 };
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..86299efa804a 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+
+struct kvm_vmx_nested_state {
+	__u64 vmxon_pa;
+	__u64 vmcs_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+	/* KVM_STATE_* flags */
+	__u16 flags;
+
+	/* 0 for VMX, 1 for SVM.  */
+	__u16 format;
+
+	/* 128 for SVM, 128 + VMCS size for VMX.  */
+	__u32 size;
+
+	union {
+		/* VMXON, VMCS */
+		struct kvm_vmx_nested_state vmx;
+
+		/* Pad the header to 128 bytes.  */
+		__u8 pad[120];
+	};
+
+	__u8 data[0];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 42990676a55e..df4bedb9b01c 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -734,9 +734,11 @@ __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 __SYSCALL(__NR_statx,     sys_statx)
 #define __NR_io_pgetevents 292
 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_rseq 293
+__SYSCALL(__NR_rseq, sys_rseq)
 
 #undef __NR_syscalls
-#define __NR_syscalls 293
+#define __NR_syscalls 294
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 9c660e1688ab..300f336633f2 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -687,6 +687,15 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_ASPECT_RATIO    4
 
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index cf01b6824244..43391e2d1153 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -164,6 +164,8 @@ enum {
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
 	IFLA_NEW_IFINDEX,
+	IFLA_MIN_MTU,
+	IFLA_MAX_MTU,
 	__IFLA_MAX
 };
 
@@ -334,6 +336,7 @@ enum {
 	IFLA_BRPORT_GROUP_FWD_MASK,
 	IFLA_BRPORT_NEIGH_SUPPRESS,
 	IFLA_BRPORT_ISOLATED,
+	IFLA_BRPORT_BACKUP_PORT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -459,6 +462,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+	IFLA_XFRM_UNSPEC,
+	IFLA_XFRM_LINK,
+	IFLA_XFRM_IF_ID,
+	__IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
 	MACSEC_VALIDATE_DISABLED = 0,
 	MACSEC_VALIDATE_CHECK = 1,
@@ -920,6 +933,7 @@ enum {
 	XDP_ATTACHED_DRV,
 	XDP_ATTACHED_SKB,
 	XDP_ATTACHED_HW,
+	XDP_ATTACHED_MULTI,
 };
 
 enum {
@@ -928,6 +942,9 @@ enum {
 	IFLA_XDP_ATTACHED,
 	IFLA_XDP_FLAGS,
 	IFLA_XDP_PROG_ID,
+	IFLA_XDP_DRV_PROG_ID,
+	IFLA_XDP_SKB_PROG_ID,
+	IFLA_XDP_HW_PROG_ID,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index b6270a3b38e9..07548de5c988 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -949,6 +949,9 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_GET_MSR_FEATURES 153
 #define KVM_CAP_HYPERV_EVENTFD 154
 #define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_S390_HPAGE_1M 156
+#define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1391,6 +1394,9 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_HYPERV_EVENTFD */
 #define KVM_HYPERV_EVENTFD        _IOW(KVMIO,  0xbd, struct kvm_hyperv_eventfd)
 
+/* Available with KVM_CAP_NESTED_STATE */
+#define KVM_GET_NESTED_STATE         _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
+#define KVM_SET_NESTED_STATE         _IOW(KVMIO,  0xbf, struct kvm_nested_state)
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index eeb787b1c53c..f35eb72739c0 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -144,7 +144,7 @@ enum perf_event_sample_format {
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 
-	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63,
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
 
 /*
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index c51f8e5cc608..84c3de89696a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
 };
 
 #define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
 
 struct vhost_msg {
 	int type;
@@ -74,6 +75,15 @@ struct vhost_msg {
 	};
 };
 
+struct vhost_msg_v2 {
+	__u32 type;
+	__u32 reserved;
+	union {
+		struct vhost_iotlb_msg iotlb;
+		__u8 padding[64];
+	};
+};
+
 struct vhost_memory_region {
 	__u64 guest_phys_addr;
 	__u64 memory_size; /* bytes */
@@ -160,6 +170,14 @@ struct vhost_memory {
 #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,	\
 					 struct vhost_vring_state)
 
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index b3d1b12a5081..5224ade3d5af 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -777,14 +777,12 @@ endif
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 ifndef NO_LIBBPF
-	$(call QUIET_INSTALL, lib) \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
-	$(call QUIET_INSTALL, include/bpf) \
-		$(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
-	$(call QUIET_INSTALL, lib) \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
-	$(call QUIET_INSTALL, examples/bpf) \
-		$(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+	$(call QUIET_INSTALL, bpf-headers) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+		$(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+	$(call QUIET_INSTALL, bpf-examples) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
+		$(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
 endif
 	$(call QUIET_INSTALL, perf-archive) \
 		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index f013b115dc86..dbef716a1913 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -11,7 +11,8 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 
 out    := $(OUTPUT)arch/arm64/include/generated/asm
 header := $(out)/syscalls.c
-sysdef := $(srctree)/tools/include/uapi/asm-generic/unistd.h
+incpath := $(srctree)/tools
+sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h
 sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
 systbl := $(sysprf)/mksyscalltbl
 
@@ -19,7 +20,7 @@ systbl := $(sysprf)/mksyscalltbl
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(sysdef) > $@
+	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
 
 clean::
 	$(call QUIET_CLEAN, arm64) $(RM) $(header)
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 52e197317d3e..2dbb8cade048 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -11,7 +11,8 @@
 
 gcc=$1
 hostcc=$2
-input=$3
+incpath=$3
+input=$4
 
 if ! test -r $input; then
 	echo "Could not read input file" >&2
@@ -28,7 +29,6 @@ create_table_from_c()
 
 	cat <<-_EoHEADER
 		#include <stdio.h>
-		#define __ARCH_WANT_RENAMEAT
 		#include "$input"
 		int main(int argc, char *argv[])
 		{
@@ -42,7 +42,7 @@ create_table_from_c()
 	printf "%s\n" "	printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);"
 	printf "}\n"
 
-	} | $hostcc -o $create_table_exe -x c -
+	} | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
 
 	$create_table_exe
 
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 20e7d74d86cd..10a44e946f77 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 
 #endif
 
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
 int arch__choose_best_symbol(struct symbol *syma,
 			     struct symbol *symb __maybe_unused)
 {
 	char *sym = syma->name;
 
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 	/* Skip over any initial dot */
 	if (*sym == '.')
 		sym++;
+#endif
 
 	/* Avoid "SyS" kernel syscall aliases */
 	if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
@@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma,
 	return SYMBOL_A;
 }
 
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
 /* Allow matching against dot variants */
 int arch__compare_symbol_names(const char *namea, const char *nameb)
 {
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index c1bd979b957b..613709cfbbd0 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,6 +9,7 @@ struct test;
 int test__rdpmc(struct test *test __maybe_unused, int subtest);
 int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__bp_modify(struct test *test, int subtest);
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 8e2c5a38c3b9..586849ff83a0 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -5,3 +5,4 @@ libperf-y += arch-tests.o
 libperf-y += rdpmc.o
 libperf-y += perf-time-to-tsc.o
 libperf-$(CONFIG_AUXTRACE) += insn-x86.o
+libperf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index cc1802ff5410..d47d3f8e3c8e 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -23,6 +23,12 @@ struct test arch_tests[] = {
 		.desc = "x86 instruction decoder - new instructions",
 		.func = test__insn_x86,
 	},
+#endif
+#if defined(__x86_64__)
+	{
+		.desc = "x86 bp modify",
+		.func = test__bp_modify,
+	},
 #endif
 	{
 		.func = NULL,
diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c
new file mode 100644
index 000000000000..f53e4406709f
--- /dev/null
+++ b/tools/perf/arch/x86/tests/bp-modify.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <asm/ptrace.h>
+#include <errno.h>
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+static noinline int bp_1(void)
+{
+	pr_debug("in %s\n", __func__);
+	return 0;
+}
+
+static noinline int bp_2(void)
+{
+	pr_debug("in %s\n", __func__);
+	return 0;
+}
+
+static int spawn_child(void)
+{
+	int child = fork();
+
+	if (child == 0) {
+		/*
+		 * The child sets itself for as tracee and
+		 * waits in signal for parent to trace it,
+		 * then it calls bp_1 and quits.
+		 */
+		int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+
+		if (err) {
+			pr_debug("failed to PTRACE_TRACEME\n");
+			exit(1);
+		}
+
+		raise(SIGCONT);
+		bp_1();
+		exit(0);
+	}
+
+	return child;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it and checks it was properly changed.
+ */
+static int bp_modify1(void)
+{
+	pid_t child;
+	int status;
+	unsigned long rip = 0, dr7 = 1;
+
+	child = spawn_child();
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 1\n");
+		return TEST_FAIL;
+	}
+
+	/*
+	 * The parent does following steps:
+	 *  - creates a new breakpoint (id 0) for bp_2 function
+	 *  - changes that breakponit to bp_1 function
+	 *  - waits for the breakpoint to hit and checks
+	 *    it has proper rip of bp_1 function
+	 *  - detaches the child
+	 */
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_2)) {
+		pr_debug("failed to set breakpoint, 1st time: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_1)) {
+		pr_debug("failed to set breakpoint, 2nd time: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[7]), dr7)) {
+		pr_debug("failed to set dr7: %s\n", strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+		goto out;
+	}
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 2\n");
+		return TEST_FAIL;
+	}
+
+	rip = ptrace(PTRACE_PEEKUSER, child,
+		     offsetof(struct user_regs_struct, rip), NULL);
+	if (rip == (unsigned long) -1) {
+		pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+	if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+		return TEST_FAIL;
+	}
+
+	return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it to bogus value and checks the original
+ * breakpoint is hit.
+ */
+static int bp_modify2(void)
+{
+	pid_t child;
+	int status;
+	unsigned long rip = 0, dr7 = 1;
+
+	child = spawn_child();
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 1\n");
+		return TEST_FAIL;
+	}
+
+	/*
+	 * The parent does following steps:
+	 *  - creates a new breakpoint (id 0) for bp_1 function
+	 *  - tries to change that breakpoint to (-1) address
+	 *  - waits for the breakpoint to hit and checks
+	 *    it has proper rip of bp_1 function
+	 *  - detaches the child
+	 */
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), bp_1)) {
+		pr_debug("failed to set breakpoint: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	if (ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[7]), dr7)) {
+		pr_debug("failed to set dr7: %s\n", strerror(errno));
+		goto out;
+	}
+
+	if (!ptrace(PTRACE_POKEUSER, child,
+		   offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) {
+		pr_debug("failed, breakpoint set to bogus address\n");
+		goto out;
+	}
+
+	if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+		goto out;
+	}
+
+	waitpid(child, &status, 0);
+	if (WIFEXITED(status)) {
+		pr_debug("tracee exited prematurely 2\n");
+		return TEST_FAIL;
+	}
+
+	rip = ptrace(PTRACE_PEEKUSER, child,
+		     offsetof(struct user_regs_struct, rip), NULL);
+	if (rip == (unsigned long) -1) {
+		pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+			 strerror(errno));
+		goto out;
+	}
+
+	pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+	if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+		pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+		return TEST_FAIL;
+	}
+
+	return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+int test__bp_modify(struct test *test __maybe_unused,
+		    int subtest __maybe_unused)
+{
+	TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
+	TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2());
+
+	return 0;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 20061cf42288..28cd6a17491b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -246,8 +246,14 @@ static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_s
 
 indirect_call:
 	tok = strchr(endptr, '*');
-	if (tok != NULL)
-		ops->target.addr = strtoull(tok + 1, NULL, 16);
+	if (tok != NULL) {
+		endptr++;
+
+		/* Indirect call can use a non-rip register and offset: callq  *0x8(%rbx).
+		 * Do not parse such instruction.  */
+		if (strstr(endptr, "(%r") == NULL)
+			ops->target.addr = strtoull(endptr, NULL, 16);
+	}
 	goto find_target;
 }
 
@@ -276,7 +282,19 @@ bool ins__is_call(const struct ins *ins)
 	return ins->ops == &call_ops || ins->ops == &s390_call_ops;
 }
 
-static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms)
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70:       b.cs    ffff2000084470f4 <generic_exec_single+0x314>  // b.hs, b.nlast
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+	if (ops->raw_comment && c > ops->raw_comment)
+		return NULL;
+
+	return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
 {
 	struct map *map = ms->map;
 	struct symbol *sym = ms->sym;
@@ -285,6 +303,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	};
 	const char *c = strchr(ops->raw, ',');
 	u64 start, end;
+
+	ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+	c = validate_comma(c, ops);
+
 	/*
 	 * Examples of lines to parse for the _cpp_lex_token@@Base
 	 * function:
@@ -304,6 +326,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 		ops->target.addr = strtoull(c, NULL, 16);
 		if (!ops->target.addr) {
 			c = strchr(c, ',');
+			c = validate_comma(c, ops);
 			if (c++ != NULL)
 				ops->target.addr = strtoull(c, NULL, 16);
 		}
@@ -361,9 +384,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
 
 	c = strchr(ops->raw, ',');
+	c = validate_comma(c, ops);
+
 	if (c != NULL) {
 		const char *c2 = strchr(c + 1, ',');
 
+		c2 = validate_comma(c2, ops);
 		/* check for 3-op insn */
 		if (c2 != NULL)
 			c = c2;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 005a5fe8a8c6..5399ba2321bb 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -22,6 +22,7 @@ struct ins {
 
 struct ins_operands {
 	char	*raw;
+	char	*raw_comment;
 	struct {
 		char	*raw;
 		char	*name;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c980bbff6353..1a61628a1c12 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -251,8 +251,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 {
 	struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
 
-	if (evsel != NULL)
-		perf_evsel__init(evsel, attr, idx);
+	if (!evsel)
+		return NULL;
+	perf_evsel__init(evsel, attr, idx);
 
 	if (perf_evsel__is_bpf_output(evsel)) {
 		evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 36d0763311ef..6a6929f208b4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -576,6 +576,13 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg,
 	return NULL;
 }
 
+static bool map__contains_symbol(struct map *map, struct symbol *sym)
+{
+	u64 ip = map->unmap_ip(map, sym->start);
+
+	return ip >= map->start && ip < map->end;
+}
+
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 					 struct map **mapp)
 {
@@ -591,6 +598,10 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
 
 		if (sym == NULL)
 			continue;
+		if (!map__contains_symbol(pos, sym)) {
+			sym = NULL;
+			continue;
+		}
 		if (mapp != NULL)
 			*mapp = pos;
 		goto out;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index c85d0d1a65ed..7b0ca7cbb7de 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -377,7 +377,7 @@ static int record_ftrace_printk(void)
 
 static int record_saved_cmdline(void)
 {
-	unsigned int size;
+	unsigned long long size;
 	char *path;
 	struct stat st;
 	int ret, err = 0;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 920b1d58a068..e76214f8d596 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -164,16 +164,15 @@ void parse_ftrace_printk(struct tep_handle *pevent,
 void parse_saved_cmdline(struct tep_handle *pevent,
 			 char *file, unsigned int size __maybe_unused)
 {
-	char *comm;
+	char comm[17]; /* Max comm length in the kernel is 16. */
 	char *line;
 	char *next = NULL;
 	int pid;
 
 	line = strtok_r(file, "\n", &next);
 	while (line) {
-		sscanf(line, "%d %ms", &pid, &comm);
-		tep_register_comm(pevent, comm, pid);
-		free(comm);
+		if (sscanf(line, "%d %16s", &pid, comm) == 2)
+			tep_register_comm(pevent, comm, pid);
 		line = strtok_r(NULL, "\n", &next);
 	}
 }

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-07-30 17:53 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-07-30 17:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 7f635ff187ab6be0b350b3ec06791e376af238ab perf/core: Fix crash when using HW tracing kernel filters

Misc fixes:

 - AMD IBS data corruptor fix (uncovered by UBSAN)
 - an Intel PEBS entry unwind error fix
 - a HW-tracing crash fix
 - a MAINTAINERS update

 Thanks,

	Ingo

------------------>
Ananth N Mavinakayanahalli (1):
      MAINTAINERS: Add Naveen N. Rao as kprobes co-maintainer

Mathieu Poirier (1):
      perf/core: Fix crash when using HW tracing kernel filters

Peter Zijlstra (1):
      perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)

Thomas Gleixner (1):
      perf/x86/amd/ibs: Don't access non-started event


 MAINTAINERS                     |  2 +-
 arch/x86/events/amd/ibs.c       |  6 +++++-
 arch/x86/events/intel/core.c    |  3 +++
 arch/x86/events/intel/ds.c      | 25 +++++++++++--------------
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  2 ++
 kernel/events/core.c            | 10 ++++++++--
 7 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0fe4228f78cb..42a884c1b0f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7984,7 +7984,7 @@ F:	lib/test_kmod.c
 F:	tools/testing/selftests/kmod/
 
 KPROBES
-M:	Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+M:	Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Masami Hiramatsu <mhiramat@kernel.org>
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 4b98101209a1..d50bb4dc0650 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 {
 	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 	struct perf_event *event = pcpu->event;
-	struct hw_perf_event *hwc = &event->hw;
+	struct hw_perf_event *hwc;
 	struct perf_sample_data data;
 	struct perf_raw_record raw;
 	struct pt_regs regs;
@@ -602,6 +602,10 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 		return 0;
 	}
 
+	if (WARN_ON_ONCE(!event))
+		goto fail;
+
+	hwc = &event->hw;
 	msr = hwc->config_base;
 	buf = ibs_data.regs;
 	rdmsrl(msr, *buf);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 707b2a96e516..86f0c15dcc2d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
+
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
 	}
 
 	if (needs_branch_stack(event)) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8cf03f101938..8dbba77e0518 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		data->data_src.val = val;
 	}
 
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happend between the record and
+	 * PMI.
+	 */
+	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		data->callchain = perf_callchain(event, iregs);
+
 	/*
 	 * We use the interrupt regs as a base because the PEBS record does not
 	 * contain a full regs set, specifically it seems to lack segment
 	 * descriptors, which get used by things like user_mode().
 	 *
 	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
-	 *
-	 * We must however always use BP,SP from iregs for the unwinder to stay
-	 * sane; the record BP,SP can point into thin air when the record is
-	 * from a previous PMI context or an (I)RET happend between the record
-	 * and PMI.
 	 */
 	*regs = *iregs;
 
@@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		regs->si = pebs->si;
 		regs->di = pebs->di;
 
-		/*
-		 * Per the above; only set BP,SP if we don't need callchains.
-		 *
-		 * XXX: does this make sense?
-		 */
-		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
-			regs->bp = pebs->bp;
-			regs->sp = pebs->sp;
-		}
+		regs->bp = pebs->bp;
+		regs->sp = pebs->sp;
 
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1fa12887ec02..87f6db437e4a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark);
+extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b8e288a1f740..eeb787b1c53c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -143,6 +143,8 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
+
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63,
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f0434a9951a..eec2d5fb676b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt)
 
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
-static struct perf_callchain_entry *
+struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
 	bool kernel = !event->attr.exclude_callchain_kernel;
@@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
-		data->callchain = perf_callchain(event, regs);
+		if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+			data->callchain = perf_callchain(event, regs);
+
 		size += data->callchain->nr;
 
 		header->size += size * sizeof(u64);
@@ -7335,6 +7337,10 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
 				     struct file *file, unsigned long offset,
 				     unsigned long size)
 {
+	/* d_inode(NULL) won't be equal to any mapped user-space file */
+	if (!filter->path.dentry)
+		return false;
+
 	if (d_inode(filter->path.dentry) != file_inode(file))
 		return false;
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-07-13 19:59 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-07-13 19:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 6e1d33b24a2b4aebcb05782e937ebc9a7a4a3f50 Merge tag 'perf-urgent-for-mingo-4.18-20180711' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Misc tooling fixes: ptyhon3 related fixes, gcc8 fix, bashism fixes and some other 
smaller fixes.

 Thanks,

	Ingo

------------------>
Janne Huttunen (1):
      perf script python: Fix dict reference counting

Jeremy Cline (7):
      perf tools: Generate a Python script compatible with Python 2 and 3
      perf scripts python: Add Python 3 support to Core.py
      perf scripts python: Add Python 3 support to SchedGui.py
      perf scripts python: Add Python 3 support to Util.py
      perf scripts python: Add Python 3 support to sched-migration.py
      perf scripts python: Add Python 3 support to EventClass.py
      perf tools: Use python-config --includes rather than --cflags

Jiri Olsa (2):
      perf tools: Fix compilation errors on gcc8
      perf stat: Fix --interval_clear option

Kim Phillips (4):
      perf test shell: Replace '|&' with '2>&1 |' to work with more shells
      perf test shell: Make perf's inet_pton test more portable
      perf llvm-utils: Remove bashism from kernel include fetch script
      perf test shell: Prevent temporary editor files from being considered test scripts


 tools/perf/Makefile.config                         |  3 +-
 tools/perf/arch/x86/util/perf_regs.c               |  2 +-
 tools/perf/builtin-stat.c                          |  2 +-
 tools/perf/jvmti/jvmti_agent.c                     |  3 +-
 .../python/Perf-Trace-Util/lib/Perf/Trace/Core.py  | 40 +++++++++-------------
 .../Perf-Trace-Util/lib/Perf/Trace/EventClass.py   |  4 ++-
 .../Perf-Trace-Util/lib/Perf/Trace/SchedGui.py     |  2 +-
 .../python/Perf-Trace-Util/lib/Perf/Trace/Util.py  | 11 +++---
 tools/perf/scripts/python/sched-migration.py       | 14 +++++---
 tools/perf/tests/builtin-test.c                    |  2 +-
 .../tests/shell/record+probe_libc_inet_pton.sh     | 37 +++++++++++---------
 tools/perf/tests/shell/trace+probe_vfs_getname.sh  |  2 +-
 tools/perf/util/llvm-utils.c                       |  6 ++--
 .../util/scripting-engines/trace-event-python.c    | 37 +++++++++-----------
 14 files changed, 84 insertions(+), 81 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b5ac356ba323..f5a3b402589e 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -207,8 +207,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 4b2caf6d48e7..fead6b3b4206 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 		else if (rm[2].rm_so != rm[2].rm_eo)
 			prefix[0] = '+';
 		else
-			strncpy(prefix, "+0", 2);
+			scnprintf(prefix, sizeof(prefix), "+0");
 	}
 
 	/* Rename register */
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 22547a490e1f..05be023c3f0e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1742,7 +1742,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 		}
 	}
 
-	if ((num_print_interval == 0 && metric_only) || interval_clear)
+	if ((num_print_interval == 0 || interval_clear) && metric_only)
 		print_metric_headers(" ", true);
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 0c6d1002b524..ac1bcdc17dae 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -35,6 +35,7 @@
 #include <sys/mman.h>
 #include <syscall.h> /* for gettid() */
 #include <err.h>
+#include <linux/kernel.h>
 
 #include "jvmti_agent.h"
 #include "../util/jitdump.h"
@@ -249,7 +250,7 @@ void *jvmti_open(void)
 	/*
 	 * jitdump file name
 	 */
-	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
 
 	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
 	if (fd == -1)
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index 38dfb720fb6f..54ace2f6bc36 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -31,10 +31,8 @@ symbolic_fields = autodict()
     string = ""
 
     if flag_fields[event_name][field_name]:
-	print_delim = 0
-        keys = flag_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        print_delim = 0
+        for idx in sorted(flag_fields[event_name][field_name]['values']):
             if not value and not idx:
                 string += flag_fields[event_name][field_name]['values'][idx]
                 break
@@ -51,14 +49,12 @@ symbolic_fields = autodict()
     string = ""
 
     if symbolic_fields[event_name][field_name]:
-        keys = symbolic_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        for idx in sorted(symbolic_fields[event_name][field_name]['values']):
             if not value and not idx:
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
-	    if (value == idx):
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+            if (value == idx):
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
 
     return string
@@ -74,19 +70,17 @@ trace_flags = { 0x00: "NONE", \
     string = ""
     print_delim = 0
 
-    keys = trace_flags.keys()
-
-    for idx in keys:
-	if not value and not idx:
-	    string += "NONE"
-	    break
-
-	if idx and (value & idx) == idx:
-	    if print_delim:
-		string += " | ";
-	    string += trace_flags[idx]
-	    print_delim = 1
-	    value &= ~idx
+    for idx in trace_flags:
+        if not value and not idx:
+            string += "NONE"
+            break
+
+        if idx and (value & idx) == idx:
+            if print_delim:
+                string += " | ";
+            string += trace_flags[idx]
+            print_delim = 1
+            value &= ~idx
 
     return string
 
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
index 81a56cd2b3c1..21a7a1298094 100755
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
@@ -8,6 +8,7 @@
 # PerfEvent is the base class for all perf event sample, PebsEvent
 # is a HW base Intel x86 PEBS event, and user could add more SW/HW
 # event classes based on requirements.
+from __future__ import print_function
 
 import struct
 
@@ -44,7 +45,8 @@ EVTYPE_IBS      = 3
                 PerfEvent.event_num += 1
 
         def show(self):
-                print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso)
+                print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" %
+                      (self.name, self.symbol, self.comm, self.dso))
 
 #
 # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index fdd92f699055..cac7b2542ee8 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -11,7 +11,7 @@
 try:
 	import wx
 except ImportError:
-	raise ImportError, "You need to install the wxpython lib for this script"
+	raise ImportError("You need to install the wxpython lib for this script")
 
 
 class RootFrame(wx.Frame):
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index f6c84966e4f8..7384dcb628c4 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -5,6 +5,7 @@
 # This software may be distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
+from __future__ import print_function
 
 import errno, os
 
@@ -33,7 +34,7 @@ NSECS_PER_SEC    = 1000000000
     return str
 
 def add_stats(dict, key, value):
-	if not dict.has_key(key):
+	if key not in dict:
 		dict[key] = (value, value, value, 1)
 	else:
 		min, max, avg, count = dict[key]
@@ -72,10 +73,10 @@ audit_package_warned = False
 except:
 	if not audit_package_warned:
 		audit_package_warned = True
-		print "Install the audit-libs-python package to get syscall names.\n" \
-                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
-                    "\n  # yum install audit-libs-python (Fedora)" \
-                    "\n  etc.\n"
+		print("Install the audit-libs-python package to get syscall names.\n"
+                    "For example:\n  # apt-get install python-audit (Ubuntu)"
+                    "\n  # yum install audit-libs-python (Fedora)"
+                    "\n  etc.\n")
 
 def syscall_name(id):
 	try:
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index de66cb3b72c9..3473e7f66081 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -9,13 +9,17 @@
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
-
+from __future__ import print_function
 
 import os
 import sys
 
 from collections import defaultdict
-from UserList import UserList
+try:
+    from UserList import UserList
+except ImportError:
+    # Python 3: UserList moved to the collections package
+    from collections import UserList
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -300,7 +304,7 @@ threads = { 0 : "idle"}
 		if i == -1:
 			return
 
-		for i in xrange(i, len(self.data)):
+		for i in range(i, len(self.data)):
 			timeslice = self.data[i]
 			if timeslice.start > end:
 				return
@@ -336,8 +340,8 @@ threads = { 0 : "idle"}
 		on_cpu_task = self.current_tsk[headers.cpu]
 
 		if on_cpu_task != -1 and on_cpu_task != prev_pid:
-			print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
-				(headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
+			print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
+				headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
 
 		threads[prev_pid] = prev_comm
 		threads[next_pid] = next_comm
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 2bde505e2e7e..dd850a26d579 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -422,7 +422,7 @@ static const char *shell_test__description(char *description, size_t size,
 
 #define for_each_shell_test(dir, base, ent)	\
 	while ((ent = readdir(dir)) != NULL)	\
-		if (!is_directory(base, ent))
+		if (!is_directory(base, ent) && ent->d_name[0] != '.')
 
 static const char *shell_tests__dir(char *path, size_t size)
 {
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 263057039693..94e513e62b34 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -14,35 +14,40 @@ libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1
 nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 
 trace_libc_inet_pton_backtrace() {
-	idx=0
-	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-	expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+
+	expected=`mktemp -u /tmp/expected.XXX`
+
+	echo "ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" > $expected
+	echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf,max-stack=4'
-		expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
-		expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
-		expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
+		echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	*)
 		eventattr='max-stack=3'
-		expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$"
-		expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
+		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	esac
 
-	file=`mktemp -u /tmp/perf.data.XXX`
+	perf_data=`mktemp -u /tmp/perf.data.XXX`
+	perf_script=`mktemp -u /tmp/perf.script.XXX`
+	perf record -e probe_libc:inet_pton/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $perf_data > $perf_script
 
-	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
-	perf script -i $file | while read line ; do
+	exec 3<$perf_script
+	exec 4<$expected
+	while read line <&3 && read -r pattern <&4; do
+		[ -z "$pattern" ] && break
 		echo $line
-		echo "$line" | egrep -q "${expected[$idx]}"
+		echo "$line" | egrep -q "$pattern"
 		if [ $? -ne 0 ] ; then
-			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
+			printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
 			exit 1
 		fi
-		let idx+=1
-		[ -z "${expected[$idx]}" ] && break
 	done
 
 	# If any statements are executed from this point onwards,
@@ -58,6 +63,6 @@ skip_if_no_perf_probe && \
 perf probe -q $libc inet_pton && \
 trace_libc_inet_pton_backtrace
 err=$?
-rm -f ${file}
+rm -f ${perf_data} ${perf_script} ${expected}
 perf probe -q -d probe_libc:inet_pton
 exit $err
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 55ad9793d544..4ce276efe6b4 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
 	perf trace -e $evts touch $file 2>&1 | \
 	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 976e658e38dc..5e94857dfca2 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -266,16 +266,16 @@ static const char *kinc_fetch_script =
 "#!/usr/bin/env sh\n"
 "if ! test -d \"$KBUILD_DIR\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "TMPDIR=`mktemp -d`\n"
 "if test -z \"$TMPDIR\"\n"
 "then\n"
-"    exit -1\n"
+"    exit 1\n"
 "fi\n"
 "cat << EOF > $TMPDIR/Makefile\n"
 "obj-y := dummy.o\n"
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 46e9e19ab1ac..bc32e57d17be 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -908,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	if (_PyTuple_Resize(&t, n) == -1)
 		Py_FatalError("error resizing Python tuple");
 
-	if (!dict) {
+	if (!dict)
 		call_object(handler, t, handler_name);
-	} else {
+	else
 		call_object(handler, t, default_handler_name);
-		Py_DECREF(dict);
-	}
 
-	Py_XDECREF(all_entries_dict);
 	Py_DECREF(t);
 }
 
@@ -1235,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample,
 
 	call_object(handler, t, handler_name);
 
-	Py_DECREF(dict);
 	Py_DECREF(t);
 }
 
@@ -1627,6 +1623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
+	fprintf(ofp, "from __future__ import print_function\n\n");
 	fprintf(ofp, "import os\n");
 	fprintf(ofp, "import sys\n\n");
 
@@ -1636,10 +1633,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "from Core import *\n\n\n");
 
 	fprintf(ofp, "def trace_begin():\n");
-	fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
 
 	fprintf(ofp, "def trace_end():\n");
-	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
 
 	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "def %s__%s(", event->system, event->name);
@@ -1675,7 +1672,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 			"common_secs, common_nsecs,\n\t\t\t"
 			"common_pid, common_comm)\n\n");
 
-		fprintf(ofp, "\t\tprint \"");
+		fprintf(ofp, "\t\tprint(\"");
 
 		not_first = 0;
 		count = 0;
@@ -1736,31 +1733,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 				fprintf(ofp, "%s", f->name);
 		}
 
-		fprintf(ofp, ")\n\n");
+		fprintf(ofp, "))\n\n");
 
-		fprintf(ofp, "\t\tprint 'Sample: {'+"
-			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+		fprintf(ofp, "\t\tprint('Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 		fprintf(ofp, "\t\tfor node in common_callchain:");
 		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
 		fprintf(ofp, "\n\t\t\telse:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n");
-		fprintf(ofp, "\t\tprint \"\\n\"\n\n");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+		fprintf(ofp, "\t\tprint()\n\n");
 
 	}
 
 	fprintf(ofp, "def trace_unhandled(event_name, context, "
 		"event_fields_dict, perf_sample_dict):\n");
 
-	fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n");
-	fprintf(ofp, "\t\tprint 'Sample: {'+"
-		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+	fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+	fprintf(ofp, "\t\tprint('Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 	fprintf(ofp, "def print_header("
 		"event_name, cpu, secs, nsecs, pid, comm):\n"
-		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
-		"(event_name, cpu, secs, nsecs, pid, comm),\n\n");
+		"\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
 
 	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
 		"\treturn delimiter.join"

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-06-30  8:44 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-06-30  8:44 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 9331510135640429711afbd0c810686100824a79 perf/core: Move inline keyword at the beginning of declaration

Tooling fixes mostly, plus a build warning fix.

 Thanks,

	Ingo

------------------>
Adrian Hunter (1):
      perf intel-pt: Fix packet decoding of CYC packets

Arnaldo Carvalho de Melo (5):
      tools headers uapi: Synchronize drm/drm.h
      perf tools: Update x86's syscall_64.tbl, adding 'io_pgetevents' and 'rseq'
      tools include powerpc: Update arch/powerpc/include/uapi/asm/unistd.h copy to get 'rseq' syscall
      tools include uapi: Update if_link.h to pick IFLA_{BRPORT_ISOLATED,VXLAN_TTL_INHERIT}
      tools include uapi: Synchronize bpf.h with the kernel

Ingo Molnar (1):
      tools/headers: Pick up latest kernel ABIs

Jiri Olsa (3):
      perf tests: Add event parsing error handling to parse events test
      perf tests: Add valid callback for parse-events test
      perf bench: Fix numa report output code

Mathieu Malaterre (1):
      perf/core: Move inline keyword at the beginning of declaration

Ravi Bangoria (3):
      perf script: Add missing output fields in a hint
      perf script: Fix crash because of missing evsel->priv
      perf tools: Fix crash caused by accessing feat_ops[HEADER_LAST_FEATURE]

Sandipan Das (1):
      perf report powerpc: Fix crash if callchain is empty

Thomas Richter (5):
      perf record: Support s390 random socket_id assignment
      perf test session topology: Fix test on s390
      perf alias: Remove trailing newline when reading sysfs files
      perf alias: Rebuild alias expression string to make it comparable
      perf stat: Remove duplicate event counting

Yonghong Song (1):
      perf tools: Fix a clang 7.0 compilation error


 kernel/events/core.c                               |  2 +-
 tools/arch/arm/include/uapi/asm/kvm.h              |  1 +
 tools/arch/arm64/include/uapi/asm/kvm.h            |  1 +
 tools/arch/powerpc/include/uapi/asm/kvm.h          |  1 +
 tools/arch/powerpc/include/uapi/asm/unistd.h       |  1 +
 tools/arch/x86/include/asm/cpufeatures.h           |  2 +
 tools/include/uapi/drm/drm.h                       |  7 ++
 tools/include/uapi/linux/bpf.h                     |  2 +-
 tools/include/uapi/linux/if_link.h                 |  2 +
 tools/include/uapi/linux/kvm.h                     |  1 +
 tools/perf/arch/powerpc/util/skip-callchain-idx.c  |  2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  2 +
 tools/perf/bench/numa.c                            |  5 +-
 tools/perf/builtin-annotate.c                      | 11 ++-
 tools/perf/builtin-report.c                        |  3 +-
 tools/perf/builtin-script.c                        | 30 ++++++-
 tools/perf/tests/parse-events.c                    | 25 ++++--
 tools/perf/tests/topology.c                        |  1 +
 tools/perf/util/c++/clang.cpp                      | 11 ++-
 tools/perf/util/header.c                           | 12 ++-
 .../util/intel-pt-decoder/intel-pt-pkt-decoder.c   |  2 +-
 tools/perf/util/pmu.c                              | 99 +++++++++++++++++++++-
 22 files changed, 201 insertions(+), 22 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b30c4f..8f0434a9951a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6482,7 +6482,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		data->phys_addr = perf_virt_to_phys(data->addr);
 }
 
-static void __always_inline
+static __always_inline void
 __perf_event_output(struct perf_event *event,
 		    struct perf_sample_data *data,
 		    struct pt_regs *regs,
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index caae4843cb70..16e006f708ca 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 04b3256f8e6d..4e76630dd655 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -91,6 +91,7 @@ struct kvm_regs {
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
 #define KVM_VGIC_ITS_ADDR_TYPE		4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION	5
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 833ed9a16adf..1b32b56a03d3 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
index 389c36fd8299..ac5ba55066dd 100644
--- a/tools/arch/powerpc/include/uapi/asm/unistd.h
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -398,5 +398,6 @@
 #define __NR_pkey_alloc		384
 #define __NR_pkey_free		385
 #define __NR_pkey_mprotect	386
+#define __NR_rseq		387
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index fb00a2fca990..5701f5cecd31 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -282,7 +282,9 @@
 #define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
 #define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
 #define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_SSBD		(13*32+24) /* "" Speculative Store Bypass Disable */
 #define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO		(13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 6fdff5945c8a..9c660e1688ab 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -680,6 +680,13 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_ATOMIC	3
 
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e0b06784f227..59b19b6a40d7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2630,7 +2630,7 @@ struct bpf_fib_lookup {
 	union {
 		/* inputs to lookup */
 		__u8	tos;		/* AF_INET  */
-		__be32	flowlabel;	/* AF_INET6 */
+		__be32	flowinfo;	/* AF_INET6, flow_label + priority */
 
 		/* output: metric of fib result (IPv4/IPv6 only) */
 		__u32	rt_metric;
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 68699f654118..cf01b6824244 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -333,6 +333,7 @@ enum {
 	IFLA_BRPORT_BCAST_FLOOD,
 	IFLA_BRPORT_GROUP_FWD_MASK,
 	IFLA_BRPORT_NEIGH_SUPPRESS,
+	IFLA_BRPORT_ISOLATED,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -516,6 +517,7 @@ enum {
 	IFLA_VXLAN_COLLECT_METADATA,
 	IFLA_VXLAN_LABEL,
 	IFLA_VXLAN_GPE,
+	IFLA_VXLAN_TTL_INHERIT,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 39e364c70caf..b6270a3b38e9 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -948,6 +948,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_BPB 152
 #define KVM_CAP_GET_MSR_FEATURES 153
 #define KVM_CAP_HYPERV_EVENTFD 154
+#define KVM_CAP_HYPERV_TLBFLUSH 155
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 3598b8b75d27..ef5d59a5742e 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -243,7 +243,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
 	u64 ip;
 	u64 skip_slot = -1;
 
-	if (chain->nr < 3)
+	if (!chain || chain->nr < 3)
 		return skip_slot;
 
 	ip = chain->ips[2];
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe42666d0c..f0b1709a5ffb 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,8 @@
 330	common	pkey_alloc		__x64_sys_pkey_alloc
 331	common	pkey_free		__x64_sys_pkey_free
 332	common	statx			__x64_sys_statx
+333	common	io_pgetevents		__x64_sys_io_pgetevents
+334	common	rseq			__x64_sys_rseq
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 63eb49082774..44195514b19e 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata)
 	u8 *global_data;
 	u8 *process_data;
 	u8 *thread_data;
-	u64 bytes_done;
+	u64 bytes_done, secs;
 	long work_done;
 	u32 l;
 	struct rusage rusage;
@@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata)
 	timersub(&stop, &start0, &diff);
 	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
 	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
-	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
+	secs = td->runtime_ns / NSEC_PER_SEC;
+	td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
 
 	getrusage(RUSAGE_THREAD, &rusage);
 	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5eb22cc56363..8180319285af 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -283,6 +283,15 @@ static int process_sample_event(struct perf_tool *tool,
 	return ret;
 }
 
+static int process_feature_event(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(tool, event, session);
+	return 0;
+}
+
 static int hist_entry__tty_annotate(struct hist_entry *he,
 				    struct perf_evsel *evsel,
 				    struct perf_annotate *ann)
@@ -471,7 +480,7 @@ int cmd_annotate(int argc, const char **argv)
 			.attr	= perf_event__process_attr,
 			.build_id = perf_event__process_build_id,
 			.tracing_data   = perf_event__process_tracing_data,
-			.feature	= perf_event__process_feature,
+			.feature	= process_feature_event,
 			.ordered_events = true,
 			.ordering_requires_timestamps = true,
 		},
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdb5b6949832..c04dc7b53797 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -217,7 +217,8 @@ static int process_feature_event(struct perf_tool *tool,
 	}
 
 	/*
-	 * All features are received, we can force the
+	 * (feat_id = HEADER_LAST_FEATURE) is the end marker which
+	 * means all features are received, now we can force the
 	 * group if needed.
 	 */
 	setup_forced_leader(rep, session->evlist);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index a31d7082188e..568ddfac3213 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1834,6 +1834,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	struct perf_evlist *evlist;
 	struct perf_evsel *evsel, *pos;
 	int err;
+	static struct perf_evsel_script *es;
 
 	err = perf_event__process_attr(tool, event, pevlist);
 	if (err)
@@ -1842,6 +1843,19 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	evlist = *pevlist;
 	evsel = perf_evlist__last(*pevlist);
 
+	if (!evsel->priv) {
+		if (scr->per_event_dump) {
+			evsel->priv = perf_evsel_script__new(evsel,
+						scr->session->data);
+		} else {
+			es = zalloc(sizeof(*es));
+			if (!es)
+				return -ENOMEM;
+			es->fp = stdout;
+			evsel->priv = es;
+		}
+	}
+
 	if (evsel->attr.type >= PERF_TYPE_MAX &&
 	    evsel->attr.type != PERF_TYPE_SYNTH)
 		return 0;
@@ -3030,6 +3044,15 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
+static int process_feature_event(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	if (event->feat.feat_id < HEADER_LAST_FEATURE)
+		return perf_event__process_feature(tool, event, session);
+	return 0;
+}
+
 #ifdef HAVE_AUXTRACE_SUPPORT
 static int perf_script__process_auxtrace_info(struct perf_tool *tool,
 					      union perf_event *event,
@@ -3074,7 +3097,7 @@ int cmd_script(int argc, const char **argv)
 			.attr		 = process_attr,
 			.event_update   = perf_event__process_event_update,
 			.tracing_data	 = perf_event__process_tracing_data,
-			.feature	 = perf_event__process_feature,
+			.feature	 = process_feature_event,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_script__process_auxtrace_info,
@@ -3125,8 +3148,9 @@ int cmd_script(int argc, const char **argv)
 		     "+field to add and -field to remove."
 		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
+		     "addr,symoff,srcline,period,iregs,uregs,brstack,"
+		     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
+		     "callindent,insn,insnlen,synth,phys_addr,metric,misc",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7d4077068454..61211918bfba 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1309,6 +1309,11 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist)
 	return 0;
 }
 
+static bool test__intel_pt_valid(void)
+{
+	return !!perf_pmu__find("intel_pt");
+}
+
 static int test__intel_pt(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1375,6 +1380,7 @@ struct evlist_test {
 	const char *name;
 	__u32 type;
 	const int id;
+	bool (*valid)(void);
 	int (*check)(struct perf_evlist *evlist);
 };
 
@@ -1648,6 +1654,7 @@ static struct evlist_test test__events[] = {
 	},
 	{
 		.name  = "intel_pt//u",
+		.valid = test__intel_pt_valid,
 		.check = test__intel_pt,
 		.id    = 52,
 	},
@@ -1686,17 +1693,24 @@ static struct terms_test test__terms[] = {
 
 static int test_event(struct evlist_test *e)
 {
+	struct parse_events_error err = { .idx = 0, };
 	struct perf_evlist *evlist;
 	int ret;
 
+	if (e->valid && !e->valid()) {
+		pr_debug("... SKIP");
+		return 0;
+	}
+
 	evlist = perf_evlist__new();
 	if (evlist == NULL)
 		return -ENOMEM;
 
-	ret = parse_events(evlist, e->name, NULL);
+	ret = parse_events(evlist, e->name, &err);
 	if (ret) {
-		pr_debug("failed to parse event '%s', err %d\n",
-			 e->name, ret);
+		pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+			 e->name, ret, err.str);
+		parse_events_print_error(&err, e->name);
 	} else {
 		ret = e->check(evlist);
 	}
@@ -1714,10 +1728,11 @@ static int test_events(struct evlist_test *events, unsigned cnt)
 	for (i = 0; i < cnt; i++) {
 		struct evlist_test *e = &events[i];
 
-		pr_debug("running test %d '%s'\n", e->id, e->name);
+		pr_debug("running test %d '%s'", e->id, e->name);
 		ret1 = test_event(e);
 		if (ret1)
 			ret2 = ret1;
+		pr_debug("\n");
 	}
 
 	return ret2;
@@ -1799,7 +1814,7 @@ static int test_pmu_events(void)
 	}
 
 	while (!ret && (ent = readdir(dir))) {
-		struct evlist_test e;
+		struct evlist_test e = { .id = 0, };
 		char name[2 * NAME_MAX + 1 + 12 + 3];
 
 		/* Names containing . are special and cannot be used directly */
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 40e30a26b23c..9497d02f69e6 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -45,6 +45,7 @@ static int session_write_header(char *path)
 
 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
 	perf_header__set_feat(&session->header, HEADER_NRCPUS);
+	perf_header__set_feat(&session->header, HEADER_ARCH);
 
 	session->header.data_size += DATA_SIZE;
 
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index bf31ceab33bd..89512504551b 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module)
 	raw_svector_ostream ostream(*Buffer);
 
 	legacy::PassManager PM;
-	if (TargetMachine->addPassesToEmitFile(PM, ostream,
-					       TargetMachine::CGFT_ObjectFile)) {
+	bool NotAdded;
+#if CLANG_VERSION_MAJOR < 7
+	NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream,
+						      TargetMachine::CGFT_ObjectFile);
+#else
+	NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr,
+						      TargetMachine::CGFT_ObjectFile);
+#endif
+	if (NotAdded) {
 		llvm::errs() << "TargetMachine can't emit a file of this type\n";
 		return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);;
 	}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 540cd2dcd3e7..653ff65aa2c3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2129,6 +2129,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 	int cpu_nr = ff->ph->env.nr_cpus_avail;
 	u64 size = 0;
 	struct perf_header *ph = ff->ph;
+	bool do_core_id_test = true;
 
 	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
 	if (!ph->env.cpu)
@@ -2183,6 +2184,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 		return 0;
 	}
 
+	/* On s390 the socket_id number is not related to the numbers of cpus.
+	 * The socket_id number might be higher than the numbers of cpus.
+	 * This depends on the configuration.
+	 */
+	if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4))
+		do_core_id_test = false;
+
 	for (i = 0; i < (u32)cpu_nr; i++) {
 		if (do_read_u32(ff, &nr))
 			goto free_cpu;
@@ -2192,7 +2200,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
 		if (do_read_u32(ff, &nr))
 			goto free_cpu;
 
-		if (nr != (u32)-1 && nr > (u32)cpu_nr) {
+		if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
 			pr_debug("socket_id number is too big."
 				 "You may need to upgrade the perf tool.\n");
 			goto free_cpu;
@@ -3456,7 +3464,7 @@ int perf_event__process_feature(struct perf_tool *tool,
 		pr_warning("invalid record type %d in pipe-mode\n", type);
 		return 0;
 	}
-	if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) {
+	if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) {
 		pr_warning("invalid record type %d in pipe-mode\n", type);
 		return -1;
 	}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index ba4c9dd18643..d426761a549d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -366,7 +366,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
 		if (len < offs)
 			return INTEL_PT_NEED_MORE_BYTES;
 		byte = buf[offs++];
-		payload |= (byte >> 1) << shift;
+		payload |= ((uint64_t)byte >> 1) << shift;
 	}
 
 	packet->type = INTEL_PT_CYC;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d2fb597c9a8c..3ba6a1742f91 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -234,6 +234,74 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
 	return 0;
 }
 
+static void perf_pmu_assign_str(char *name, const char *field, char **old_str,
+				char **new_str)
+{
+	if (!*old_str)
+		goto set_new;
+
+	if (*new_str) {	/* Have new string, check with old */
+		if (strcasecmp(*old_str, *new_str))
+			pr_debug("alias %s differs in field '%s'\n",
+				 name, field);
+		zfree(old_str);
+	} else		/* Nothing new --> keep old string */
+		return;
+set_new:
+	*old_str = *new_str;
+	*new_str = NULL;
+}
+
+static void perf_pmu_update_alias(struct perf_pmu_alias *old,
+				  struct perf_pmu_alias *newalias)
+{
+	perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc);
+	perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
+			    &newalias->long_desc);
+	perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
+	perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr,
+			    &newalias->metric_expr);
+	perf_pmu_assign_str(old->name, "metric_name", &old->metric_name,
+			    &newalias->metric_name);
+	perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
+	old->scale = newalias->scale;
+	old->per_pkg = newalias->per_pkg;
+	old->snapshot = newalias->snapshot;
+	memcpy(old->unit, newalias->unit, sizeof(old->unit));
+}
+
+/* Delete an alias entry. */
+static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+{
+	zfree(&newalias->name);
+	zfree(&newalias->desc);
+	zfree(&newalias->long_desc);
+	zfree(&newalias->topic);
+	zfree(&newalias->str);
+	zfree(&newalias->metric_expr);
+	zfree(&newalias->metric_name);
+	parse_events_terms__purge(&newalias->terms);
+	free(newalias);
+}
+
+/* Merge an alias, search in alias list. If this name is already
+ * present merge both of them to combine all information.
+ */
+static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias,
+				 struct list_head *alist)
+{
+	struct perf_pmu_alias *a;
+
+	list_for_each_entry(a, alist, list) {
+		if (!strcasecmp(newalias->name, a->name)) {
+			perf_pmu_update_alias(a, newalias);
+			perf_pmu_free_alias(newalias);
+			return true;
+		}
+	}
+	return false;
+}
+
 static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 				 char *desc, char *val,
 				 char *long_desc, char *topic,
@@ -241,9 +309,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 				 char *metric_expr,
 				 char *metric_name)
 {
+	struct parse_events_term *term;
 	struct perf_pmu_alias *alias;
 	int ret;
 	int num;
+	char newval[256];
 
 	alias = malloc(sizeof(*alias));
 	if (!alias)
@@ -262,6 +332,27 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 		return ret;
 	}
 
+	/* Scan event and remove leading zeroes, spaces, newlines, some
+	 * platforms have terms specified as
+	 * event=0x0091 (read from files ../<PMU>/events/<FILE>
+	 * and terms specified as event=0x91 (read from JSON files).
+	 *
+	 * Rebuild string to make alias->str member comparable.
+	 */
+	memset(newval, 0, sizeof(newval));
+	ret = 0;
+	list_for_each_entry(term, &alias->terms, list) {
+		if (ret)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 ",");
+		if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%#x", term->config, term->val.num);
+		else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+			ret += scnprintf(newval + ret, sizeof(newval) - ret,
+					 "%s=%s", term->config, term->val.str);
+	}
+
 	alias->name = strdup(name);
 	if (dir) {
 		/*
@@ -285,9 +376,10 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
 		snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
 	}
 	alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1;
-	alias->str = strdup(val);
+	alias->str = strdup(newval);
 
-	list_add_tail(&alias->list, list);
+	if (!perf_pmu_merge_alias(alias, list))
+		list_add_tail(&alias->list, list);
 
 	return 0;
 }
@@ -303,6 +395,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI
 
 	buf[ret] = 0;
 
+	/* Remove trailing newline from sysfs file */
+	rtrim(buf);
+
 	return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL,
 				     NULL, NULL, NULL);
 }


^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-06-04  9:04 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-06-04  9:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 4e9ae0d3d5bf4e2f1b466ba451bd18f2c5b69845 Merge tag 'perf-urgent-for-mingo-4.17-20180602' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Leftover perf tooling fixes from the v4.17 cycle: they sync up updated ABI headers 
with their tooling versions.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (4):
      perf trace beauty prctl: Default header_dir to cwd to work without parms
      tools headers: Synchronize prctl.h ABI header
      tools headers: Sync x86 cpufeatures.h with the kernel sources
      perf tools intel-pt-decoder: Update insn.h from the kernel sources


 tools/arch/x86/include/asm/cpufeatures.h | 20 ++++++++++++++------
 tools/include/uapi/linux/prctl.h         | 12 ++++++++++++
 tools/perf/trace/beauty/prctl_option.sh  |  2 +-
 tools/perf/util/intel-pt-decoder/insn.h  | 18 ++++++++++++++++++
 4 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 578793e97431..fb00a2fca990 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 #define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
@@ -334,6 +340,7 @@
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
 /*
  * BUG word(s)
@@ -363,5 +370,6 @@
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index af5f8c2df87a..db9f15f5db04 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@ struct prctl_mm_map {
 # define PR_SVE_VL_LEN_MASK		0xffff
 # define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
 
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL		52
+#define PR_SET_SPECULATION_CTRL		53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS		0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED		0
+# define PR_SPEC_PRCTL			(1UL << 0)
+# define PR_SPEC_ENABLE			(1UL << 1)
+# define PR_SPEC_DISABLE		(1UL << 2)
+# define PR_SPEC_FORCE_DISABLE		(1UL << 3)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 0be4138fbe71..f24722146ebe 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
 regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e23578c7b1be..2669c9f748e4 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+	return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+		(insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+		 X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
 #endif /* _ASM_X86_INSN_H */

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-03-31 10:40 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-03-31 10:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Thomas Gleixner, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Jiri Olsa, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: f67b15037a7a50c57f72e69a6d59941ad90a0f0f perf/hwbp: Simplify the perf-hwbp code, fix documentation

Two fixlets.

 Thanks,

	Ingo

------------------>
Linus Torvalds (1):
      perf/hwbp: Simplify the perf-hwbp code, fix documentation

Stephane Eranian (1):
      perf/x86/intel: Fix linear IP of PEBS real_ip on Haswell and later CPUs


 arch/x86/events/intel/ds.c    | 25 +++++++++++++++++--------
 kernel/events/hw_breakpoint.c | 30 +++++++-----------------------
 2 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index d8015235ba76..5e526c54247e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1153,6 +1153,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	if (pebs == NULL)
 		return;
 
+	regs->flags &= ~PERF_EFLAGS_EXACT;
 	sample_type = event->attr.sample_type;
 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 
@@ -1197,7 +1198,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	 */
 	*regs = *iregs;
 	regs->flags = pebs->flags;
-	set_linear_ip(regs, pebs->ip);
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		regs->ax = pebs->ax;
@@ -1233,13 +1233,22 @@ static void setup_pebs_sample_data(struct perf_event *event,
 #endif
 	}
 
-	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-		regs->ip = pebs->real_ip;
-		regs->flags |= PERF_EFLAGS_EXACT;
-	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
-		regs->flags |= PERF_EFLAGS_EXACT;
-	else
-		regs->flags &= ~PERF_EFLAGS_EXACT;
+	if (event->attr.precise_ip > 1) {
+		/* Haswell and later have the eventing IP, so use it: */
+		if (x86_pmu.intel_cap.pebs_format >= 2) {
+			set_linear_ip(regs, pebs->real_ip);
+			regs->flags |= PERF_EFLAGS_EXACT;
+		} else {
+			/* Otherwise use PEBS off-by-1 IP: */
+			set_linear_ip(regs, pebs->ip);
+
+			/* ... and try to fix it up using the LBR entries: */
+			if (intel_pmu_pebs_fixup_ip(regs))
+				regs->flags |= PERF_EFLAGS_EXACT;
+		}
+	} else
+		set_linear_ip(regs, pebs->ip);
+
 
 	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 3f8cb1e14588..253ae2da13c3 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -427,16 +427,9 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
  * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
  * @bp: the breakpoint structure to modify
  * @attr: new breakpoint attributes
- * @triggered: callback to trigger when we hit the breakpoint
- * @tsk: pointer to 'task_struct' of the process to which the address belongs
  */
 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 {
-	u64 old_addr = bp->attr.bp_addr;
-	u64 old_len = bp->attr.bp_len;
-	int old_type = bp->attr.bp_type;
-	int err = 0;
-
 	/*
 	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 	 * will not be possible to raise IPIs that invoke __perf_event_disable.
@@ -451,27 +444,18 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 	bp->attr.bp_addr = attr->bp_addr;
 	bp->attr.bp_type = attr->bp_type;
 	bp->attr.bp_len = attr->bp_len;
+	bp->attr.disabled = 1;
 
-	if (attr->disabled)
-		goto end;
-
-	err = validate_hw_breakpoint(bp);
-	if (!err)
-		perf_event_enable(bp);
+	if (!attr->disabled) {
+		int err = validate_hw_breakpoint(bp);
 
-	if (err) {
-		bp->attr.bp_addr = old_addr;
-		bp->attr.bp_type = old_type;
-		bp->attr.bp_len = old_len;
-		if (!bp->attr.disabled)
-			perf_event_enable(bp);
+		if (err)
+			return err;
 
-		return err;
+		perf_event_enable(bp);
+		bp->attr.disabled = 0;
 	}
 
-end:
-	bp->attr.disabled = attr->disabled;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-03-25  8:53 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-03-25  8:53 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andrew Morton, Thomas Gleixner

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: c917e0f259908e75bd2a65877e25f9d90c22c848 perf/cgroup: Fix child event counting bug

Misc kernel side fixes:

 - a cgroup events counting fix
 - an x86 Intel PMU truncated-parameter fix
 - an x86 RDPMC fix
 - an x86 API naming fix/rename
 - an x86 uncore driver big-hardware PCI enumeration fix
 - an x86 uncore driver filter constraint fix

 Thanks,

	Ingo

------------------>
Dan Carpenter (1):
      perf/x86/intel: Don't accidentally clear high bits in bdw_limit_period()

Kan Liang (3):
      perf/x86/intel: Disable userspace RDPMC usage for large PEBS
      perf/x86/intel: Rename confusing 'freerunning PEBS' API and implementation to 'large PEBS'
      perf/x86/intel/uncore: Fix multi-domain PCI CHA enumeration bug on Skylake servers

Song Liu (1):
      perf/cgroup: Fix child event counting bug

Stephane Eranian (1):
      perf/x86/intel/uncore: Add missing filter constraint for SKX CHA event


 arch/x86/events/core.c               |  3 ++-
 arch/x86/events/intel/core.c         | 14 +++++++-------
 arch/x86/events/intel/ds.c           |  6 +++---
 arch/x86/events/intel/uncore_snbep.c | 32 ++++++++++++++++++--------------
 arch/x86/events/perf_event.h         |  6 +++---
 kernel/events/core.c                 | 21 ++++++++++++++++-----
 6 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 140d33288e78..88797c80b3e0 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2118,7 +2118,8 @@ static int x86_pmu_event_init(struct perf_event *event)
 			event->destroy(event);
 	}
 
-	if (READ_ONCE(x86_pmu.attr_rdpmc))
+	if (READ_ONCE(x86_pmu.attr_rdpmc) &&
+	    !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
 		event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
 
 	return err;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 56457cb73448..1e41d7508d99 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2952,9 +2952,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event)
 	return intel_pebs_aliases_precdist(event);
 }
 
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 {
-	unsigned long flags = x86_pmu.free_running_flags;
+	unsigned long flags = x86_pmu.large_pebs_flags;
 
 	if (event->attr.use_clockid)
 		flags &= ~PERF_SAMPLE_TIME;
@@ -2976,8 +2976,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		if (!event->attr.freq) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
 			if (!(event->attr.sample_type &
-			      ~intel_pmu_free_running_flags(event)))
-				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+			      ~intel_pmu_large_pebs_flags(event)))
+				event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
 		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
@@ -3194,7 +3194,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
 			X86_CONFIG(.event=0xc0, .umask=0x01)) {
 		if (left < 128)
 			left = 128;
-		left &= ~0x3fu;
+		left &= ~0x3fULL;
 	}
 	return left;
 }
@@ -3460,7 +3460,7 @@ static __initconst const struct x86_pmu core_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
-	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
+	.large_pebs_flags	= LARGE_PEBS_FLAGS,
 
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32-bit width,
@@ -3502,7 +3502,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.event_map		= intel_pmu_event_map,
 	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
 	.apic			= 1,
-	.free_running_flags	= PEBS_FREERUNNING_FLAGS,
+	.large_pebs_flags	= LARGE_PEBS_FLAGS,
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 18c25ab28557..d8015235ba76 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
 	bool needed_cb = pebs_needs_sched_cb(cpuc);
 
 	cpuc->n_pebs++;
-	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs++;
 
 	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
 	bool needed_cb = pebs_needs_sched_cb(cpuc);
 
 	cpuc->n_pebs--;
-	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
 		cpuc->n_large_pebs--;
 
 	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -1530,7 +1530,7 @@ void __init intel_ds_init(void)
 			x86_pmu.pebs_record_size =
 						sizeof(struct pebs_record_skl);
 			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
-			x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+			x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
 			break;
 
 		default:
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 22ec65bc033a..c98b943e58b4 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = {
 	SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8),
 	SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3),
 	EVENT_EXTRA_END
 };
 
@@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = {
 	NULL,
 };
 
+/*
+ * To determine the number of CHAs, it should read bits 27:0 in the CAPID6
+ * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083.
+ */
+#define SKX_CAPID6		0x9c
+#define SKX_CHA_BIT_MASK	GENMASK(27, 0)
+
 static int skx_count_chabox(void)
 {
-	struct pci_dev *chabox_dev = NULL;
-	int bus, count = 0;
+	struct pci_dev *dev = NULL;
+	u32 val = 0;
 
-	while (1) {
-		chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
-		if (!chabox_dev)
-			break;
-		if (count == 0)
-			bus = chabox_dev->bus->number;
-		if (bus != chabox_dev->bus->number)
-			break;
-		count++;
-	}
+	dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev);
+	if (!dev)
+		goto out;
 
-	pci_dev_put(chabox_dev);
-	return count;
+	pci_read_config_dword(dev, SKX_CAPID6, &val);
+	val &= SKX_CHA_BIT_MASK;
+out:
+	pci_dev_put(dev);
+	return hweight32(val);
 }
 
 void skx_uncore_cpu_init(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78f91ec1056e..39cd0615f04f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -69,7 +69,7 @@ struct event_constraint {
 #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
 #define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
 #define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING	0x0800 /* use freerunning PEBS */
+#define PERF_X86_EVENT_LARGE_PEBS	0x0800 /* use large PEBS */
 
 
 struct amd_nb {
@@ -88,7 +88,7 @@ struct amd_nb {
  * REGS_USER can be handled for events limited to ring 3.
  *
  */
-#define PEBS_FREERUNNING_FLAGS \
+#define LARGE_PEBS_FLAGS \
 	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
@@ -608,7 +608,7 @@ struct x86_pmu {
 	struct event_constraint *pebs_constraints;
 	void		(*pebs_aliases)(struct perf_event *event);
 	int 		max_pebs_events;
-	unsigned long	free_running_flags;
+	unsigned long	large_pebs_flags;
 
 	/*
 	 * Intel LBR
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4b838470fac4..709a55b9ad97 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -724,9 +724,15 @@ static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
 
 static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
 {
-	struct perf_cgroup *cgrp_out = cpuctx->cgrp;
-	if (cgrp_out)
-		__update_cgrp_time(cgrp_out);
+	struct perf_cgroup *cgrp = cpuctx->cgrp;
+	struct cgroup_subsys_state *css;
+
+	if (cgrp) {
+		for (css = &cgrp->css; css; css = css->parent) {
+			cgrp = container_of(css, struct perf_cgroup, css);
+			__update_cgrp_time(cgrp);
+		}
+	}
 }
 
 static inline void update_cgrp_time_from_event(struct perf_event *event)
@@ -754,6 +760,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 {
 	struct perf_cgroup *cgrp;
 	struct perf_cgroup_info *info;
+	struct cgroup_subsys_state *css;
 
 	/*
 	 * ctx->lock held by caller
@@ -764,8 +771,12 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 		return;
 
 	cgrp = perf_cgroup_from_task(task, ctx);
-	info = this_cpu_ptr(cgrp->info);
-	info->timestamp = ctx->timestamp;
+
+	for (css = &cgrp->css; css; css = css->parent) {
+		cgrp = container_of(css, struct perf_cgroup, css);
+		info = this_cpu_ptr(cgrp->info);
+		info->timestamp = ctx->timestamp;
+	}
 }
 
 static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2018-02-06 21:29 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2018-02-06 21:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 59410f5ac70a0949a6f06ba43eecb8f99be31291 Merge tag 'perf-urgent-for-mingo-4.16-20180205' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Tooling fixes, plus add missing interval sampling to certain x86 PEBS events.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (5):
      tools headers: Synchronize sound/asound.h
      tooling headers: Synchronize updated s390 kvm UAPI headers
      tools headers: Sync {tools/,}arch/powerpc/include/uapi/asm/kvm.h
      tools headers: Synchronize uapi/linux/sched.h
      tools headers: Synchoronize x86 features UAPI headers

Jiri Olsa (3):
      perf evsel: Fix period/freq terms setup
      perf record: Fix period option handling
      x86/events/intel/ds: Add PERF_SAMPLE_PERIOD into PEBS_FREERUNNING_FLAGS

Ravi Bangoria (2):
      perf trace: Fix call-graph output
      perf tools: Add trace/beauty/generated/ into .gitignore


 arch/x86/events/perf_event.h                   |  3 ++-
 tools/arch/powerpc/include/uapi/asm/kvm.h      | 25 +++++++++++++++++++++++++
 tools/arch/s390/include/uapi/asm/kvm.h         |  5 ++++-
 tools/arch/x86/include/asm/cpufeatures.h       | 24 ++++++++++++++++++++----
 tools/arch/x86/include/asm/disabled-features.h |  3 ++-
 tools/arch/x86/include/asm/required-features.h |  3 ++-
 tools/include/uapi/linux/kvm.h                 |  4 ++++
 tools/include/uapi/linux/sched.h               |  5 +++++
 tools/include/uapi/sound/asound.h              |  9 +++++++++
 tools/perf/.gitignore                          |  1 +
 tools/perf/builtin-record.c                    |  3 ++-
 tools/perf/builtin-trace.c                     |  5 ++++-
 tools/perf/perf.h                              |  1 +
 tools/perf/util/evsel.c                        | 13 ++++++++++---
 14 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8e4ea143ed96..78f91ec1056e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -93,7 +93,8 @@ struct amd_nb {
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
 	PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
-	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
+	PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
+	PERF_SAMPLE_PERIOD)
 
 #define PEBS_REGS \
 	(PERF_REG_X86_AX | \
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 61d6049f4c1e..637b7263cb86 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info {
 	__u32	ap_encodings[8];
 };
 
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+	__u64	character;		/* characteristics of the CPU */
+	__u64	behaviour;		/* recommended software behaviour */
+	__u64	character_mask;		/* valid bits in character */
+	__u64	behaviour_mask;		/* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31		(1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED	(1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30	(1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2	(1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV	(1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED	(1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF	(1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS	(1ULL << 56)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY	(1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR		(1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61)
+
 /* Per-vcpu XICS interrupt controller state */
 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
 
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 38535a57fef8..4cdaa55fabfe 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -224,6 +224,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_RICCB  (1UL << 7)
 #define KVM_SYNC_FPRS   (1UL << 8)
 #define KVM_SYNC_GSCB   (1UL << 9)
+#define KVM_SYNC_BPBC   (1UL << 10)
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
 #define SDNXL (1UL << SDNXC)
@@ -247,7 +248,9 @@ struct kvm_sync_regs {
 	};
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;		/* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
-	__u8 padding1[52];	/* riccb needs to be 64byte aligned */
+	__u8 bpbc : 1;		/* bp mode */
+	__u8 reserved2 : 7;
+	__u8 padding1[51];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
 	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
 	union {
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 21ac898df2d8..1d9199e1c2ad 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS			18	   /* N 32-bit words worth of info */
+#define NCAPINTS			19	   /* N 32-bit words worth of info */
 #define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
@@ -203,12 +203,15 @@
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
+
+#define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -243,6 +246,7 @@
 #define X86_FEATURE_AVX512IFMA		( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB		( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT		( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF		( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER		( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD		( 9*32+28) /* AVX-512 Conflict Detection */
@@ -268,6 +272,9 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
@@ -316,6 +323,13 @@
 #define X86_FEATURE_SUCCOR		(17*32+ 1) /* Uncorrectable error containment and recovery */
 #define X86_FEATURE_SMCA		(17*32+ 3) /* Scalable MCA */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
 /*
  * BUG word(s)
  */
@@ -342,5 +356,7 @@
 #define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index b027633e7300..33833d1909af 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -77,6 +77,7 @@
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
 #define DISABLED_MASK17	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index d91ba04dd007..fb3a6de7440b 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -106,6 +106,7 @@
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	(NEED_LA57)
 #define REQUIRED_MASK17	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 496e59a2738b..8fb90a0819c3 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
 #define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_PPC_GET_CPU_CHAR 151
+#define KVM_CAP_S390_BPB 152
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
 /* Available with KVM_CAP_PPC_RADIX_MMU */
 #define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
+/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
+#define KVM_PPC_GET_CPU_CHAR	  _IOR(KVMIO,  0xb1, struct kvm_ppc_cpu_char)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 30a9e51bbb1e..22627f80063e 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -49,5 +49,10 @@
  */
 #define SCHED_FLAG_RESET_ON_FORK	0x01
 #define SCHED_FLAG_RECLAIM		0x02
+#define SCHED_FLAG_DL_OVERRUN		0x04
+
+#define SCHED_FLAG_ALL	(SCHED_FLAG_RESET_ON_FORK	| \
+			 SCHED_FLAG_RECLAIM		| \
+			 SCHED_FLAG_DL_OVERRUN)
 
 #endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index c227ccba60ae..07d61583fd02 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -214,6 +214,11 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_IMA_ADPCM	((__force snd_pcm_format_t) 22)
 #define	SNDRV_PCM_FORMAT_MPEG		((__force snd_pcm_format_t) 23)
 #define	SNDRV_PCM_FORMAT_GSM		((__force snd_pcm_format_t) 24)
+#define	SNDRV_PCM_FORMAT_S20_LE	((__force snd_pcm_format_t) 25) /* in four bytes, LSB justified */
+#define	SNDRV_PCM_FORMAT_S20_BE	((__force snd_pcm_format_t) 26) /* in four bytes, LSB justified */
+#define	SNDRV_PCM_FORMAT_U20_LE	((__force snd_pcm_format_t) 27) /* in four bytes, LSB justified */
+#define	SNDRV_PCM_FORMAT_U20_BE	((__force snd_pcm_format_t) 28) /* in four bytes, LSB justified */
+/* gap in the numbering for a future standard linear format */
 #define	SNDRV_PCM_FORMAT_SPECIAL	((__force snd_pcm_format_t) 31)
 #define	SNDRV_PCM_FORMAT_S24_3LE	((__force snd_pcm_format_t) 32)	/* in three bytes */
 #define	SNDRV_PCM_FORMAT_S24_3BE	((__force snd_pcm_format_t) 33)	/* in three bytes */
@@ -248,6 +253,8 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_FLOAT		SNDRV_PCM_FORMAT_FLOAT_LE
 #define	SNDRV_PCM_FORMAT_FLOAT64	SNDRV_PCM_FORMAT_FLOAT64_LE
 #define	SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE
+#define	SNDRV_PCM_FORMAT_S20		SNDRV_PCM_FORMAT_S20_LE
+#define	SNDRV_PCM_FORMAT_U20		SNDRV_PCM_FORMAT_U20_LE
 #endif
 #ifdef SNDRV_BIG_ENDIAN
 #define	SNDRV_PCM_FORMAT_S16		SNDRV_PCM_FORMAT_S16_BE
@@ -259,6 +266,8 @@ typedef int __bitwise snd_pcm_format_t;
 #define	SNDRV_PCM_FORMAT_FLOAT		SNDRV_PCM_FORMAT_FLOAT_BE
 #define	SNDRV_PCM_FORMAT_FLOAT64	SNDRV_PCM_FORMAT_FLOAT64_BE
 #define	SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE
+#define	SNDRV_PCM_FORMAT_S20		SNDRV_PCM_FORMAT_S20_BE
+#define	SNDRV_PCM_FORMAT_U20		SNDRV_PCM_FORMAT_U20_BE
 #endif
 
 typedef int __bitwise snd_pcm_subformat_t;
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 643cc4ba6872..3e5135dded16 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -31,5 +31,6 @@ config.mak.autogen
 .config-detected
 util/intel-pt-decoder/inat-tables.c
 arch/*/include/generated/
+trace/beauty/generated/
 pmu-events/pmu-events.c
 pmu-events/jevents
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 65681a1a292a..bf4ca749d1ac 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1566,7 +1566,8 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
 			&record.opts.sample_time_set,
 			"Record the sample timestamps"),
-	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
+	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
+			"Record the sample period"),
 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 		    "don't sample"),
 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 17d11deeb88d..e7f1b182fc15 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1661,9 +1661,12 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
 				    struct callchain_cursor *cursor)
 {
 	struct addr_location al;
+	int max_stack = evsel->attr.sample_max_stack ?
+			evsel->attr.sample_max_stack :
+			trace->max_stack;
 
 	if (machine__resolve(trace->host, &al, sample) < 0 ||
-	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
+	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
 		return -1;
 
 	return 0;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2357f4ccc9c7..cfe46236a5e5 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -50,6 +50,7 @@ struct record_opts {
 	bool	     sample_time_set;
 	bool	     sample_cpu;
 	bool	     period;
+	bool	     period_set;
 	bool	     running_time;
 	bool	     full_auxtrace;
 	bool	     auxtrace_snapshot_mode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 66fa45198a11..ff359c9ece2e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -745,12 +745,14 @@ static void apply_config_terms(struct perf_evsel *evsel,
 			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
 				attr->sample_period = term->val.period;
 				attr->freq = 0;
+				perf_evsel__reset_sample_bit(evsel, PERIOD);
 			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_FREQ:
 			if (!(term->weak && opts->user_freq != UINT_MAX)) {
 				attr->sample_freq = term->val.freq;
 				attr->freq = 1;
+				perf_evsel__set_sample_bit(evsel, PERIOD);
 			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_TIME:
@@ -969,9 +971,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 	if (target__has_cpu(&opts->target) || opts->sample_cpu)
 		perf_evsel__set_sample_bit(evsel, CPU);
 
-	if (opts->period)
-		perf_evsel__set_sample_bit(evsel, PERIOD);
-
 	/*
 	 * When the user explicitly disabled time don't force it here.
 	 */
@@ -1073,6 +1072,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 	apply_config_terms(evsel, opts, track);
 
 	evsel->ignore_missing_thread = opts->ignore_missing_thread;
+
+	/* The --period option takes the precedence. */
+	if (opts->period_set) {
+		if (opts->period)
+			perf_evsel__set_sample_bit(evsel, PERIOD);
+		else
+			perf_evsel__reset_sample_bit(evsel, PERIOD);
+	}
 }
 
 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-12-06 22:17 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-12-06 22:17 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 34c9ca37aaec2e307b837bb099d3b44f0ea04ddc tooling/headers: Synchronize updated s390 and x86 UAPI headers

This tree includes perf namespace support kernel side fixes, plus an accumulated 
set of perf tooling fixes - including UAPI header synchronization that should make 
the perf build less noisy.

 Thanks,

	Ingo

------------------>
Adrian Hunter (1):
      perf intel-pt: Bring instruction decoder files into line with the kernel

Andi Kleen (1):
      perf record: Fix -c/-F options for cpu event aliases

Andrei Vagin (1):
      perf trace: Fix an exit code of trace__symbols_init

Arnaldo Carvalho de Melo (16):
      perf evlist: Set the correct idx when adding dummy events
      perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay
      perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv
      perf script: Fix --per-event-dump for auxtrace synth evsels
      perf machine: Guard against NULL in machine__exit()
      perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels
      perf report: Ignore kptr_restrict when not sampling the kernel
      perf record: Ignore kptr_restrict when not sampling the kernel
      perf top: Ignore kptr_restrict when not sampling the kernel
      tools headers: Synchronize kernel ABI headers wrt SPDX tags
      tools headers: Synchronize perf_event.h header
      tools headers uapi: Synchronize drm/drm.h
      tools headers: Synchronize drm/i915_drm.h
      tools headers: Synchronize KVM arch ABI headers
      tools headers: Synchronize prctl.h ABI header
      tools headers: Syncronize mman.h ABI header

Ingo Molnar (2):
      tools/headers: Synchronize kernel x86 UAPI headers
      tooling/headers: Synchronize updated s390 and x86 UAPI headers

Jiri Olsa (3):
      perf: Fix header.size for namespace events
      perf top: Fix window dimensions change handling
      perf top: Use signal interface for SIGWINCH handler

Namhyung Kim (1):
      perf help: Fix a bug during strstart() conversion

Ravi Bangoria (1):
      perf annotate: Do not truncate instruction names at 6 chars

Satheesh Rajendran (1):
      perf bench numa: Fixup discontiguous/sparse numa nodes

Thomas Richter (3):
      perf test shell: Fix check open filename arg using 'perf trace' on s390x
      perf test shell: Fix test case probe libc's inet_pton on s390x
      perf test: Fix test 21 for s390x

Vasily Averin (1):
      perf/core: Fix memory leak triggered by perf --namespace


 kernel/events/core.c                               |   5 +-
 tools/arch/arm/include/uapi/asm/kvm.h              |   7 +
 tools/arch/arm64/include/uapi/asm/kvm.h            |   7 +
 tools/arch/s390/include/uapi/asm/kvm.h             |   4 -
 tools/arch/s390/include/uapi/asm/kvm_perf.h        |   4 -
 tools/arch/x86/include/asm/cpufeatures.h           | 537 +++++++++++----------
 tools/arch/x86/include/asm/disabled-features.h     |   8 +-
 tools/include/uapi/asm-generic/mman.h              |   1 +
 tools/include/uapi/drm/drm.h                       |  41 ++
 tools/include/uapi/drm/i915_drm.h                  |  33 +-
 tools/include/uapi/linux/kcmp.h                    |   1 +
 tools/include/uapi/linux/kvm.h                     |   1 +
 tools/include/uapi/linux/perf_event.h              |   1 +
 tools/include/uapi/linux/prctl.h                   |  10 +
 tools/perf/bench/numa.c                            |  56 ++-
 tools/perf/builtin-help.c                          |   4 +-
 tools/perf/builtin-record.c                        |  42 +-
 tools/perf/builtin-report.c                        |   3 +
 tools/perf/builtin-script.c                        |  31 +-
 tools/perf/builtin-top.c                           |  36 +-
 tools/perf/builtin-trace.c                         |   6 +-
 .../perf/tests/shell/trace+probe_libc_inet_pton.sh |   7 +-
 tools/perf/tests/shell/trace+probe_vfs_getname.sh  |   6 +-
 tools/perf/tests/task-exit.c                       |   4 +
 tools/perf/trace/beauty/mmap.c                     |   3 +
 tools/perf/util/annotate.c                         |  18 +-
 tools/perf/util/evlist.c                           |  14 +-
 tools/perf/util/evlist.h                           |   2 +
 tools/perf/util/evsel.c                            |  14 +-
 tools/perf/util/evsel.h                            |   1 +
 tools/perf/util/intel-pt-decoder/inat.h            |  10 +
 .../perf/util/intel-pt-decoder/x86-opcode-map.txt  |   2 +-
 tools/perf/util/machine.c                          |   3 +
 tools/perf/util/parse-events.c                     |   2 +
 tools/perf/util/parse-events.h                     |   3 +
 tools/perf/util/pmu.c                              |   5 +
 36 files changed, 594 insertions(+), 338 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 16beab4767e1..5961ef6dfd64 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6639,6 +6639,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	struct perf_namespaces_event *namespaces_event = data;
 	struct perf_output_handle handle;
 	struct perf_sample_data sample;
+	u16 header_size = namespaces_event->event_id.header.size;
 	int ret;
 
 	if (!perf_event_namespaces_match(event))
@@ -6649,7 +6650,7 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	ret = perf_output_begin(&handle, event,
 				namespaces_event->event_id.header.size);
 	if (ret)
-		return;
+		goto out;
 
 	namespaces_event->event_id.pid = perf_event_pid(event,
 							namespaces_event->task);
@@ -6661,6 +6662,8 @@ static void perf_event_namespaces_output(struct perf_event *event,
 	perf_event__output_id_sample(event, &handle, &sample);
 
 	perf_output_end(&handle);
+out:
+	namespaces_event->event_id.header.size = header_size;
 }
 
 static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 1f57bbe82b6f..6edd177bb1c7 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -152,6 +152,12 @@ struct kvm_arch_memory_slot {
 	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
 #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
 
+/* PL1 Physical Timer Registers */
+#define KVM_REG_ARM_PTIMER_CTL		ARM_CP15_REG32(0, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CNT		ARM_CP15_REG64(0, 14)
+#define KVM_REG_ARM_PTIMER_CVAL		ARM_CP15_REG64(2, 14)
+
+/* Virtual Timer Registers */
 #define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
 #define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
@@ -216,6 +222,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES		1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES	2
 #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
+#define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 51149ec75fe4..9abbf3044654 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -196,6 +196,12 @@ struct kvm_arch_memory_slot {
 
 #define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
 
+/* Physical Timer EL0 Registers */
+#define KVM_REG_ARM_PTIMER_CTL		ARM64_SYS_REG(3, 3, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 2, 2)
+#define KVM_REG_ARM_PTIMER_CNT		ARM64_SYS_REG(3, 3, 14, 0, 1)
+
+/* EL0 Virtual Timer Registers */
 #define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
@@ -228,6 +234,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_ITS_SAVE_TABLES           1
 #define   KVM_DEV_ARM_ITS_RESTORE_TABLES        2
 #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
+#define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 9ad172dcd912..38535a57fef8 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -6,10 +6,6 @@
  *
  * Copyright IBM Corp. 2008
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
  */
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
index c36c97ffdc6f..84606b8cc49e 100644
--- a/tools/arch/s390/include/uapi/asm/kvm_perf.h
+++ b/tools/arch/s390/include/uapi/asm/kvm_perf.h
@@ -4,10 +4,6 @@
  *
  * Copyright 2014 IBM Corp.
  * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_KVM_PERF_S390_H
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 793690fbda36..c0b0e9e8aa66 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,173 +13,176 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	18	/* N 32-bit words worth of info */
-#define NBUGINTS	1	/* N 32-bit bug flags */
+#define NCAPINTS			18	   /* N 32-bit words worth of info */
+#define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
  * this feature bit is not displayed in /proc/cpuinfo at all.
+ *
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
  */
 
-/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
-#define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE		( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE		( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC		( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR		( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE		( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE		( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8		( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC	( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP		( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR	( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE		( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA		( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV	( 0*32+15) /* CMOV instructions */
-					  /* (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT		( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36	( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN		( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH	( 0*32+19) /* CLFLUSH instruction */
-#define X86_FEATURE_DS		( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI	( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX		( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR	( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
-#define X86_FEATURE_XMM		( 0*32+25) /* "sse" */
-#define X86_FEATURE_XMM2	( 0*32+26) /* "sse2" */
-#define X86_FEATURE_SELFSNOOP	( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT		( 0*32+28) /* Hyper-Threading */
-#define X86_FEATURE_ACC		( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64	( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE		( 0*32+31) /* Pending Break Enable */
+/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+#define X86_FEATURE_FPU			( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME			( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE			( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE			( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC			( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR			( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE			( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE			( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8			( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC		( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP			( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR		( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE			( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA			( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV		( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT			( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36		( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN			( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH		( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS			( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI		( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX			( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR		( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM			( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2		( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP		( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT			( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC			( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64		( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE			( 0*32+31) /* Pending Break Enable */
 
 /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
 /* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL	( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP		( 1*32+19) /* MP Capable. */
-#define X86_FEATURE_NX		( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT	( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT	( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
-#define X86_FEATURE_GBPAGES	( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP	( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM		( 1*32+29) /* Long Mode (x86-64) */
-#define X86_FEATURE_3DNOWEXT	( 1*32+30) /* AMD 3DNow! extensions */
-#define X86_FEATURE_3DNOW	( 1*32+31) /* 3DNow! */
+#define X86_FEATURE_SYSCALL		( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP			( 1*32+19) /* MP Capable */
+#define X86_FEATURE_NX			( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT		( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT		( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES		( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP		( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM			( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT		( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW		( 1*32+31) /* 3DNow */
 
 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY	( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN	( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI	( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY		( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN		( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI		( 2*32+ 3) /* LongRun table interface */
 
 /* Other features, Linux-defined mapping, word 3 */
 /* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX	( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR	( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR	( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR	( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-/* cpu types for specific tunings: */
-#define X86_FEATURE_K8		( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7		( 3*32+ 5) /* "" Athlon */
-#define X86_FEATURE_P3		( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4		( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP		( 3*32+ 9) /* smp kernel running on up */
-#define X86_FEATURE_ART		( 3*32+10) /* Platform has always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS	( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS		( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32	( 3*32+14) /* "" syscall in ia32 userspace */
-#define X86_FEATURE_SYSENTER32	( 3*32+15) /* "" sysenter in ia32 userspace */
-#define X86_FEATURE_REP_GOOD	( 3*32+16) /* rep microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
-#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
-#define X86_FEATURE_ACC_POWER	( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL	( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS	( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY	( 3*32+22) /* cpu topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC	( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID	( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID	( 3*32+26) /* has extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM     ( 3*32+27) /* multi-node processor */
-#define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX		( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR		( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR		( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR		( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+/* CPU types for specific tunings: */
+#define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7			( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP			( 3*32+ 9) /* SMP kernel running on UP */
+#define X86_FEATURE_ART			( 3*32+10) /* Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON	( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS		( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS			( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC	( 3*32+17) /* "" MFENCE synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY		( 3*32+22) /* CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE	( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC		( 3*32+24) /* TSC does not stop in C states */
+#define X86_FEATURE_CPUID		( 3*32+25) /* CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID		( 3*32+26) /* Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM		( 3*32+27) /* AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF		( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* TSC has known frequency */
 
-/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
-#define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ	( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64	( 4*32+ 2) /* 64-bit Debug Store */
-#define X86_FEATURE_MWAIT	( 4*32+ 3) /* "monitor" Monitor/Mwait support */
-#define X86_FEATURE_DSCPL	( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
-#define X86_FEATURE_VMX		( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX		( 4*32+ 6) /* Safer mode */
-#define X86_FEATURE_EST		( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2		( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3	( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID		( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG	( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA		( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16	( 4*32+13) /* CMPXCHG16B */
-#define X86_FEATURE_XTPR	( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM	( 4*32+15) /* Performance Capabilities */
-#define X86_FEATURE_PCID	( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA		( 4*32+18) /* Direct Cache Access */
-#define X86_FEATURE_XMM4_1	( 4*32+19) /* "sse4_1" SSE-4.1 */
-#define X86_FEATURE_XMM4_2	( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC	( 4*32+21) /* x2APIC */
-#define X86_FEATURE_MOVBE	( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT      ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* Tsc deadline timer */
-#define X86_FEATURE_AES		( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE	( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
-#define X86_FEATURE_OSXSAVE	( 4*32+27) /* "" XSAVE enabled in the OS */
-#define X86_FEATURE_AVX		( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C	( 4*32+29) /* 16-bit fp conversions */
-#define X86_FEATURE_RDRAND	( 4*32+30) /* The RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR	( 4*32+31) /* Running on a hypervisor */
+/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+#define X86_FEATURE_XMM3		( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ		( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64		( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT		( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
+#define X86_FEATURE_DSCPL		( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+#define X86_FEATURE_VMX			( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX			( 4*32+ 6) /* Safer Mode eXtensions */
+#define X86_FEATURE_EST			( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2			( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3		( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID			( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG		( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA			( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16		( 4*32+13) /* CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR		( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM		( 4*32+15) /* Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID		( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA			( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1		( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2		( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC		( 4*32+21) /* X2APIC */
+#define X86_FEATURE_MOVBE		( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT		( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER	( 4*32+24) /* TSC deadline timer */
+#define X86_FEATURE_AES			( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE		( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE		( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX			( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C		( 4*32+29) /* 16-bit FP conversions */
+#define X86_FEATURE_RDRAND		( 4*32+30) /* RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR		( 4*32+31) /* Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
-#define X86_FEATURE_XSTORE	( 5*32+ 2) /* "rng" RNG present (xstore) */
-#define X86_FEATURE_XSTORE_EN	( 5*32+ 3) /* "rng_en" RNG enabled */
-#define X86_FEATURE_XCRYPT	( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
-#define X86_FEATURE_XCRYPT_EN	( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2	( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN	( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE		( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN	( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM		( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN	( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_XSTORE		( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN		( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT		( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN		( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2		( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN		( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE			( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN		( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM			( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN		( 5*32+13) /* PMM enabled */
 
-/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
-#define X86_FEATURE_LAHF_LM	( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY	( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM		( 6*32+ 2) /* Secure virtual machine */
-#define X86_FEATURE_EXTAPIC	( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY	( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM		( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A	( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW	( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS		( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP		( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT	( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT		( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP		( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4	( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE		( 6*32+17) /* translation cache extension */
-#define X86_FEATURE_NODEID_MSR	( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM		( 6*32+21) /* trailing bit manipulations */
-#define X86_FEATURE_TOPOEXT	( 6*32+22) /* topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
-#define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC	( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
+/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+#define X86_FEATURE_LAHF_LM		( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY		( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM			( 6*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC		( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY		( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM			( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A		( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE		( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH	( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW		( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS			( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP			( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT		( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT			( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP			( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4		( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE			( 6*32+17) /* Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR		( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM			( 6*32+21) /* Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT		( 6*32+22) /* Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE	( 6*32+23) /* Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB		( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT		( 6*32+26) /* Data breakpoint extension */
+#define X86_FEATURE_PTSC		( 6*32+27) /* Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC		( 6*32+28) /* Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX		( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
 
 /*
  * Auxiliary flags: Linux defined - For features scattered in various
@@ -187,146 +190,154 @@
  *
  * Reuse free bits when adding new feature flags!
  */
-#define X86_FEATURE_RING3MWAIT	( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB		( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB		( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3	( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2	( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3	( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_RING3MWAIT		( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT		( 7*32+ 1) /* Intel CPUID faulting */
+#define X86_FEATURE_CPB			( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB			( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3		( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 
-#define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 
-#define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
-#define X86_FEATURE_MBA         ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 
 /* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_VNMI        ( 8*32+ 1) /* Intel Virtual NMI */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
-#define X86_FEATURE_EPT         ( 8*32+ 3) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID        ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI		( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY	( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT			( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID		( 8*32+ 4) /* Intel Virtual Processor ID */
 
-#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
-#define X86_FEATURE_XENPV       ( 8*32+16) /* "" Xen paravirtual guest */
+#define X86_FEATURE_VMMCALL		( 8*32+15) /* Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV		( 8*32+16) /* "" Xen paravirtual guest */
 
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
-#define X86_FEATURE_FSGSBASE	( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
-#define X86_FEATURE_TSC_ADJUST	( 9*32+ 1) /* TSC adjustment MSR 0x3b */
-#define X86_FEATURE_BMI1	( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE		( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2	( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_SMEP	( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2	( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS	( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
-#define X86_FEATURE_INVPCID	( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM		( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM		( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_MPX		( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A	( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F	( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ	( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
-#define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA  ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER	( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD	( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI	( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW	( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL	( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
+#define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST		( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_BMI1		( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE			( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2		( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP		( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2		( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS		( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID		( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM			( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM			( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX			( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A		( 9*32+15) /* Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F		( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ		( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED		( 9*32+18) /* RDSEED instruction */
+#define X86_FEATURE_ADX			( 9*32+19) /* ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP		( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA		( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB		( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_AVX512PF		( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER		( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD		( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI		( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW		( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL		( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
 
-/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
-#define X86_FEATURE_XSAVEOPT	(10*32+ 0) /* XSAVEOPT */
-#define X86_FEATURE_XSAVEC	(10*32+ 1) /* XSAVEC */
-#define X86_FEATURE_XGETBV1	(10*32+ 2) /* XGETBV with ECX = 1 */
-#define X86_FEATURE_XSAVES	(10*32+ 3) /* XSAVES/XRSTORS */
+/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
+#define X86_FEATURE_XSAVEOPT		(10*32+ 0) /* XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC		(10*32+ 1) /* XSAVEC instruction */
+#define X86_FEATURE_XGETBV1		(10*32+ 2) /* XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES		(10*32+ 3) /* XSAVES/XRSTORS instructions */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
-#define X86_FEATURE_CQM_LLC	(11*32+ 1) /* LLC QoS if 1 */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+#define X86_FEATURE_CQM_LLC		(11*32+ 1) /* LLC QoS if 1 */
 
-/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
-#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
-#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC	(12*32+ 0) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL	(12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL	(12*32+ 2) /* LLC Local MBM monitoring */
 
-/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
-#define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
+/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+#define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 
-/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
-#define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA		(14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT	(14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN		(14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS		(14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP		(14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY	(14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP	(14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+#define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA			(14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT		(14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN			(14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS			(14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP			(14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY		(14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW	(14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP		(14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ		(14*32+11) /* HWP Package Level Request */
 
-/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
-#define X86_FEATURE_NPT		(15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV	(15*32+ 1) /* LBR Virtualization support */
-#define X86_FEATURE_SVML	(15*32+ 2) /* "svm_lock" SVM locking MSR */
-#define X86_FEATURE_NRIPS	(15*32+ 3) /* "nrip_save" SVM next_rip save */
-#define X86_FEATURE_TSCRATEMSR  (15*32+ 4) /* "tsc_scale" TSC scaling support */
-#define X86_FEATURE_VMCBCLEAN   (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+#define X86_FEATURE_NPT			(15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV		(15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML		(15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS		(15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR		(15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN		(15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID		(15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS	(15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER		(15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD		(15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
-#define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+#define X86_FEATURE_AVX512VBMI		(16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP		(16*32+ 2) /* User Mode Instruction Protection */
+#define X86_FEATURE_PKU			(16*32+ 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE		(16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2	(16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI		(16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES		(16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 
-/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
+/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR		(17*32+ 1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA		(17*32+ 3) /* Scalable MCA */
 
 /*
  * BUG word(s)
  */
-#define X86_BUG(x)		(NCAPINTS*32 + (x))
+#define X86_BUG(x)			(NCAPINTS*32 + (x))
 
-#define X86_BUG_F00F		X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV		X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA		X86_BUG(2) /* Cyrix 6x86 coma */
-#define X86_BUG_AMD_TLB_MMATCH	X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
-#define X86_BUG_AMD_APIC_C1E	X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP		X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_F00F			X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV			X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA			X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH		X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E		X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP			X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK		X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR		X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS		X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
  * to avoid confusion.
  */
-#define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX			X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_NULL_SEG		X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE		X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR			X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400		X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index c10c9128f54e..14d6d5007314 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -16,6 +16,12 @@
 # define DISABLE_MPX	(1<<(X86_FEATURE_MPX & 31))
 #endif
 
+#ifdef CONFIG_X86_INTEL_UMIP
+# define DISABLE_UMIP	0
+#else
+# define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31))
+#endif
+
 #ifdef CONFIG_X86_64
 # define DISABLE_VME		(1<<(X86_FEATURE_VME & 31))
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
@@ -63,7 +69,7 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 2dffcbf705b3..653687d9771b 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -13,6 +13,7 @@
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+#define MAP_SYNC	0x80000		/* perform synchronous page faults for the mapping */
 
 /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
 
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 97677cd6964d..6fdff5945c8a 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -737,6 +737,28 @@ struct drm_syncobj_array {
 	__u32 pad;
 };
 
+/* Query current scanout sequence number */
+struct drm_crtc_get_sequence {
+	__u32 crtc_id;		/* requested crtc_id */
+	__u32 active;		/* return: crtc output is active */
+	__u64 sequence;		/* return: most recent vblank sequence */
+	__s64 sequence_ns;	/* return: most recent time of first pixel out */
+};
+
+/* Queue event to be delivered at specified sequence. Time stamp marks
+ * when the first pixel of the refresh cycle leaves the display engine
+ * for the display
+ */
+#define DRM_CRTC_SEQUENCE_RELATIVE		0x00000001	/* sequence is relative to current */
+#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS		0x00000002	/* Use next sequence if we've missed */
+
+struct drm_crtc_queue_sequence {
+	__u32 crtc_id;
+	__u32 flags;
+	__u64 sequence;		/* on input, target sequence. on output, actual sequence */
+	__u64 user_data;	/* user data passed to event */
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -819,6 +841,9 @@ extern "C" {
 
 #define DRM_IOCTL_WAIT_VBLANK		DRM_IOWR(0x3a, union drm_wait_vblank)
 
+#define DRM_IOCTL_CRTC_GET_SEQUENCE	DRM_IOWR(0x3b, struct drm_crtc_get_sequence)
+#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE	DRM_IOWR(0x3c, struct drm_crtc_queue_sequence)
+
 #define DRM_IOCTL_UPDATE_DRAW		DRM_IOW(0x3f, struct drm_update_draw)
 
 #define DRM_IOCTL_MODE_GETRESOURCES	DRM_IOWR(0xA0, struct drm_mode_card_res)
@@ -863,6 +888,11 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
 #define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
+#define DRM_IOCTL_MODE_CREATE_LEASE	DRM_IOWR(0xC6, struct drm_mode_create_lease)
+#define DRM_IOCTL_MODE_LIST_LESSEES	DRM_IOWR(0xC7, struct drm_mode_list_lessees)
+#define DRM_IOCTL_MODE_GET_LEASE	DRM_IOWR(0xC8, struct drm_mode_get_lease)
+#define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
@@ -893,6 +923,7 @@ struct drm_event {
 
 #define DRM_EVENT_VBLANK 0x01
 #define DRM_EVENT_FLIP_COMPLETE 0x02
+#define DRM_EVENT_CRTC_SEQUENCE	0x03
 
 struct drm_event_vblank {
 	struct drm_event base;
@@ -903,6 +934,16 @@ struct drm_event_vblank {
 	__u32 crtc_id; /* 0 on older kernels that do not support this */
 };
 
+/* Event delivered at sequence. Time stamp marks when the first pixel
+ * of the refresh cycle leaves the display engine for the display
+ */
+struct drm_event_crtc_sequence {
+	struct drm_event	base;
+	__u64			user_data;
+	__s64			time_ns;
+	__u64			sequence;
+};
+
 /* typedef area */
 #ifndef __KERNEL__
 typedef struct drm_clip_rect drm_clip_rect_t;
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 9816590d3ad2..ac3c6503ca27 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -397,10 +397,20 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
 
-/* Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
  * priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
  */
 #define I915_PARAM_HAS_SCHEDULER	 41
+#define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
+#define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
+#define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+
 #define I915_PARAM_HUC_STATUS		 42
 
 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -1309,14 +1319,16 @@ struct drm_i915_reg_read {
 	 * be specified
 	 */
 	__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
 	__u64 val; /* Return value */
 };
 /* Known registers:
  *
  * Render engine timestamp - 0x2358 + 64bit - gen7+
  * - Note this register returns an invalid value if using the default
- *   single instruction 8byte read, in order to workaround that use
- *   offset (0x2538 | 1) instead.
+ *   single instruction 8byte read, in order to workaround that pass
+ *   flag I915_REG_READ_8B_WA in offset field.
  *
  */
 
@@ -1359,6 +1371,10 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE	0x4
 #define I915_CONTEXT_PARAM_BANNABLE	0x5
+#define I915_CONTEXT_PARAM_PRIORITY	0x6
+#define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
+#define   I915_CONTEXT_DEFAULT_PRIORITY		0
+#define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
 	__u64 value;
 };
 
@@ -1510,9 +1526,14 @@ struct drm_i915_perf_oa_config {
 	__u32 n_boolean_regs;
 	__u32 n_flex_regs;
 
-	__u64 __user mux_regs_ptr;
-	__u64 __user boolean_regs_ptr;
-	__u64 __user flex_regs_ptr;
+	/*
+	 * These fields are pointers to tuples of u32 values (register
+	 * address, value). For example the expected length of the buffer
+	 * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+	 */
+	__u64 mux_regs_ptr;
+	__u64 boolean_regs_ptr;
+	__u64 flex_regs_ptr;
 };
 
 #if defined(__cplusplus)
diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h
index 481e103da78e..ef1305010925 100644
--- a/tools/include/uapi/linux/kcmp.h
+++ b/tools/include/uapi/linux/kcmp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_KCMP_H
 #define _UAPI_LINUX_KCMP_H
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7e99999d6236..282d7613fce8 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -931,6 +931,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_SMT_POSSIBLE 147
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_S390_AIS_MIGRATION 150
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 362493a2f950..b9a4953018ed 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -942,6 +942,7 @@ enum perf_callchain_context {
 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
 #define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
 #define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION		0x08	/* sample collided with another */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index a8d0759a9e40..af5f8c2df87a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _LINUX_PRCTL_H
 #define _LINUX_PRCTL_H
 
@@ -197,4 +198,13 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL			50	/* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL			51	/* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK		0xffff
+# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index d95fdcc26f4b..944070e98a2c 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -216,6 +216,47 @@ static const char * const numa_usage[] = {
 	NULL
 };
 
+/*
+ * To get number of numa nodes present.
+ */
+static int nr_numa_nodes(void)
+{
+	int i, nr_nodes = 0;
+
+	for (i = 0; i < g->p.nr_nodes; i++) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			nr_nodes++;
+	}
+
+	return nr_nodes;
+}
+
+/*
+ * To check if given numa node is present.
+ */
+static int is_node_present(int node)
+{
+	return numa_bitmask_isbitset(numa_nodes_ptr, node);
+}
+
+/*
+ * To check given numa node has cpus.
+ */
+static bool node_has_cpus(int node)
+{
+	struct bitmask *cpu = numa_allocate_cpumask();
+	unsigned int i;
+
+	if (cpu && !numa_node_to_cpus(node, cpu)) {
+		for (i = 0; i < cpu->size; i++) {
+			if (numa_bitmask_isbitset(cpu, i))
+				return true;
+		}
+	}
+
+	return false; /* lets fall back to nocpus safely */
+}
+
 static cpu_set_t bind_to_cpu(int target_cpu)
 {
 	cpu_set_t orig_mask, mask;
@@ -244,12 +285,12 @@ static cpu_set_t bind_to_cpu(int target_cpu)
 
 static cpu_set_t bind_to_node(int target_node)
 {
-	int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+	int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
 	cpu_set_t orig_mask, mask;
 	int cpu;
 	int ret;
 
-	BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+	BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
 	BUG_ON(!cpus_per_node);
 
 	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
@@ -649,7 +690,7 @@ static int parse_setup_node_list(void)
 			int i;
 
 			for (i = 0; i < mul; i++) {
-				if (t >= g->p.nr_tasks) {
+				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
 					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
 					goto out;
 				}
@@ -964,6 +1005,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	sum = 0;
 
 	for (node = 0; node < g->p.nr_nodes; node++) {
+		if (!is_node_present(node))
+			continue;
 		nr = nodes[node];
 		nr_min = min(nr, nr_min);
 		nr_max = max(nr, nr_max);
@@ -984,8 +1027,11 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 	process_groups = 0;
 
 	for (node = 0; node < g->p.nr_nodes; node++) {
-		int processes = count_node_processes(node);
+		int processes;
 
+		if (!is_node_present(node))
+			continue;
+		processes = count_node_processes(node);
 		nr = nodes[node];
 		tprintf(" %2d/%-2d", nr, processes);
 
@@ -1291,7 +1337,7 @@ static void print_summary(void)
 
 	printf("\n ###\n");
 	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
-		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
 	printf(" #      %5dx %5ldMB global  shared mem operations\n",
 			g->p.nr_loops, g->p.bytes_global/1024/1024);
 	printf(" #      %5dx %5ldMB process shared mem operations\n",
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index bd1fedef3d1c..a0f7ed2b869b 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -284,7 +284,7 @@ static int perf_help_config(const char *var, const char *value, void *cb)
 		add_man_viewer(value);
 		return 0;
 	}
-	if (!strstarts(var, "man."))
+	if (strstarts(var, "man."))
 		return add_man_viewer_info(var, value);
 
 	return 0;
@@ -314,7 +314,7 @@ static const char *cmd_to_page(const char *perf_cmd)
 
 	if (!perf_cmd)
 		return "perf";
-	else if (!strstarts(perf_cmd, "perf"))
+	else if (strstarts(perf_cmd, "perf"))
 		return perf_cmd;
 
 	return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3d7f33e19df2..003255910c05 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -339,6 +339,22 @@ static int record__open(struct record *rec)
 	struct perf_evsel_config_term *err_term;
 	int rc = 0;
 
+	/*
+	 * For initial_delay we need to add a dummy event so that we can track
+	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
+	 * real events, the ones asked by the user.
+	 */
+	if (opts->initial_delay) {
+		if (perf_evlist__add_dummy(evlist))
+			return -ENOMEM;
+
+		pos = perf_evlist__first(evlist);
+		pos->tracking = 0;
+		pos = perf_evlist__last(evlist);
+		pos->tracking = 1;
+		pos->attr.enable_on_exec = 1;
+	}
+
 	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, pos) {
@@ -749,17 +765,19 @@ static int record__synthesize(struct record *rec, bool tail)
 			goto out;
 	}
 
-	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
-						 machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/kallsyms permission or run as root.\n");
-
-	err = perf_event__synthesize_modules(tool, process_synthesized_event,
-					     machine);
-	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
-			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
-			   "Check /proc/modules permission or run as root.\n");
+	if (!perf_evlist__exclude_kernel(rec->evlist)) {
+		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+							 machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/kallsyms permission or run as root.\n");
+
+		err = perf_event__synthesize_modules(tool, process_synthesized_event,
+						     machine);
+		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+				   "Check /proc/modules permission or run as root.\n");
+	}
 
 	if (perf_guest) {
 		machines__process_guests(&session->machines,
@@ -1693,7 +1711,7 @@ int cmd_record(int argc, const char **argv)
 
 	err = -ENOMEM;
 
-	if (symbol_conf.kptr_restrict)
+	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
 		pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 "check /proc/sys/kernel/kptr_restrict.\n\n"
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1394cd8d96f7..af5dd038195e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -441,6 +441,9 @@ static void report__warn_kptr_restrict(const struct report *rep)
 	struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
 	struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
 
+	if (perf_evlist__exclude_kernel(rep->session->evlist))
+		return;
+
 	if (kernel_map == NULL ||
 	    (kernel_map->dso->hit &&
 	     (kernel_kmap->ref_reloc_sym == NULL ||
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 68f36dc0344f..9b43bda45a41 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script)
 	struct perf_evsel *evsel;
 
 	evlist__for_each_entry(script->session->evlist, evsel) {
+		/*
+		 * Already setup? I.e. we may be called twice in cases like
+		 * Intel PT, one for the intel_pt// and dummy events, then
+		 * for the evsels syntheized from the auxtrace info.
+		 *
+		 * Ses perf_script__process_auxtrace_info.
+		 */
+		if (evsel->priv != NULL)
+			continue;
+
 		evsel->priv = perf_evsel_script__new(evsel, script->session->data);
 		if (evsel->priv == NULL)
 			goto out_err_fclose;
@@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+static int perf_script__process_auxtrace_info(struct perf_tool *tool,
+					      union perf_event *event,
+					      struct perf_session *session)
+{
+	int ret = perf_event__process_auxtrace_info(tool, event, session);
+
+	if (ret == 0) {
+		struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+		ret = perf_script__setup_per_event_dump(script);
+	}
+
+	return ret;
+}
+#else
+#define perf_script__process_auxtrace_info 0
+#endif
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv)
 			.feature	 = perf_event__process_feature,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
-			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace_info	 = perf_script__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
 			.auxtrace_error	 = perf_event__process_auxtrace_error,
 			.stat		 = perf_event__process_stat_event,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 477a8699f0b5..9e0d2645ae13 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -77,6 +77,7 @@
 #include "sane_ctype.h"
 
 static volatile int done;
+static volatile int resize;
 
 #define HEADER_LINE_NR  5
 
@@ -85,11 +86,13 @@ static void perf_top__update_print_entries(struct perf_top *top)
 	top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
 }
 
-static void perf_top__sig_winch(int sig __maybe_unused,
-				siginfo_t *info __maybe_unused, void *arg)
+static void winch_sig(int sig __maybe_unused)
 {
-	struct perf_top *top = arg;
+	resize = 1;
+}
 
+static void perf_top__resize(struct perf_top *top)
+{
 	get_term_dimensions(&top->winsize);
 	perf_top__update_print_entries(top);
 }
@@ -473,12 +476,8 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
 		case 'e':
 			prompt_integer(&top->print_entries, "Enter display entries (lines)");
 			if (top->print_entries == 0) {
-				struct sigaction act = {
-					.sa_sigaction = perf_top__sig_winch,
-					.sa_flags     = SA_SIGINFO,
-				};
-				perf_top__sig_winch(SIGWINCH, NULL, top);
-				sigaction(SIGWINCH, &act, NULL);
+				perf_top__resize(top);
+				signal(SIGWINCH, winch_sig);
 			} else {
 				signal(SIGWINCH, SIG_DFL);
 			}
@@ -732,14 +731,16 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (!machine->kptr_restrict_warned &&
 	    symbol_conf.kptr_restrict &&
 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
-		ui__warning(
+		if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+			ui__warning(
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
 			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
 			  " modules" : "");
-		if (use_browser <= 0)
-			sleep(5);
+			if (use_browser <= 0)
+				sleep(5);
+		}
 		machine->kptr_restrict_warned = true;
 	}
 
@@ -1030,6 +1031,11 @@ static int __cmd_top(struct perf_top *top)
 
 		if (hits == top->samples)
 			ret = perf_evlist__poll(top->evlist, 100);
+
+		if (resize) {
+			perf_top__resize(top);
+			resize = 0;
+		}
 	}
 
 	ret = 0;
@@ -1352,12 +1358,8 @@ int cmd_top(int argc, const char **argv)
 
 	get_term_dimensions(&top.winsize);
 	if (top.print_entries == 0) {
-		struct sigaction act = {
-			.sa_sigaction = perf_top__sig_winch,
-			.sa_flags     = SA_SIGINFO,
-		};
 		perf_top__update_print_entries(&top);
-		sigaction(SIGWINCH, &act, NULL);
+		signal(SIGWINCH, winch_sig);
 	}
 
 	status = __cmd_top(&top);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f2757d38c7d7..84debdbad327 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1152,12 +1152,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 	if (trace->host == NULL)
 		return -ENOMEM;
 
-	if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
-		return -errno;
+	err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
+	if (err < 0)
+		goto out;
 
 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
 					    evlist->threads, trace__tool_process, false,
 					    trace->opts.proc_map_timeout, 1);
+out:
 	if (err)
 		symbol__exit();
 
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index 7a84d73324e3..8b3da21a08f1 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -10,8 +10,8 @@
 
 . $(dirname $0)/lib/probe.sh
 
-ld=$(realpath /lib64/ld*.so.* | uniq)
-libc=$(echo $ld | sed 's/ld/libc/g')
+libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
+nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 
 trace_libc_inet_pton_backtrace() {
 	idx=0
@@ -37,6 +37,9 @@ trace_libc_inet_pton_backtrace() {
 	done
 }
 
+# Check for IPv6 interface existence
+ip a sh lo | fgrep -q inet6 || exit 2
+
 skip_if_no_perf_probe && \
 perf probe -q $libc inet_pton && \
 trace_libc_inet_pton_backtrace
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 2e68c5f120da..2a9ef080efd0 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -17,8 +17,10 @@ skip_if_no_perf_probe || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	perf trace -e open touch $file 2>&1 | \
-	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+	test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; }
+
+	perf trace -e ${svc:-open} touch $file 2>&1 | \
+	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
 
 
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bc4a7344e274..89c8e1604ca7 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -84,7 +84,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 
 	evsel = perf_evlist__first(evlist);
 	evsel->attr.task = 1;
+#ifdef __s390x__
+	evsel->attr.sample_freq = 1000000;
+#else
 	evsel->attr.sample_freq = 1;
+#endif
 	evsel->attr.inherit = 0;
 	evsel->attr.watermark = 0;
 	evsel->attr.wakeup_events = 1;
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9e1668b2c5d7..417e3ecfe9d7 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -62,6 +62,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 	P_MMAP_FLAG(POPULATE);
 	P_MMAP_FLAG(STACK);
 	P_MMAP_FLAG(UNINITIALIZED);
+#ifdef MAP_SYNC
+	P_MMAP_FLAG(SYNC);
+#endif
 #undef P_MMAP_FLAG
 
 	if (flags)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index da1c4c4a0dd8..3369c7830260 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -165,7 +165,7 @@ static void ins__delete(struct ins_operands *ops)
 static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
 			      struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw);
+	return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw);
 }
 
 int ins__scnprintf(struct ins *ins, char *bf, size_t size,
@@ -230,12 +230,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
 	if (ops->target.name)
-		return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name);
+		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
 
 	if (ops->target.addr == 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr);
+	return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);
 }
 
 static struct ins_ops call_ops = {
@@ -299,7 +299,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			c++;
 	}
 
-	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64,
+	return scnprintf(bf, size, "%-6s %.*s%" PRIx64,
 			 ins->name, c ? c - ops->raw : 0, ops->raw,
 			 ops->target.offset);
 }
@@ -372,7 +372,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
 	if (ops->locked.ins.ops == NULL)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	printed = scnprintf(bf, size, "%-6.6s ", ins->name);
+	printed = scnprintf(bf, size, "%-6s ", ins->name);
 	return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
 					size - printed, ops->locked.ops);
 }
@@ -448,7 +448,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m
 static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s,%s", ins->name,
+	return scnprintf(bf, size, "%-6s %s,%s", ins->name,
 			 ops->source.name ?: ops->source.raw,
 			 ops->target.name ?: ops->target.raw);
 }
@@ -488,7 +488,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
 static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
-	return scnprintf(bf, size, "%-6.6s %s", ins->name,
+	return scnprintf(bf, size, "%-6s %s", ins->name,
 			 ops->target.name ?: ops->target.raw);
 }
 
@@ -500,7 +500,7 @@ static struct ins_ops dec_ops = {
 static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
 			  struct ins_operands *ops __maybe_unused)
 {
-	return scnprintf(bf, size, "%-6.6s", "nop");
+	return scnprintf(bf, size, "%-6s", "nop");
 }
 
 static struct ins_ops nop_ops = {
@@ -924,7 +924,7 @@ void disasm_line__free(struct disasm_line *dl)
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
 {
 	if (raw || !dl->ins.ops)
-		return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw);
+		return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw);
 
 	return ins__scnprintf(&dl->ins, bf, size, &dl->ops);
 }
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c6c891e154a6..b62e523a7035 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -257,7 +257,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)
 		.config = PERF_COUNT_SW_DUMMY,
 		.size	= sizeof(attr), /* to capture ABI version */
 	};
-	struct perf_evsel *evsel = perf_evsel__new(&attr);
+	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
 
 	if (evsel == NULL)
 		return -ENOMEM;
@@ -1786,3 +1786,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
 state_err:
 	return;
 }
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (!evsel->attr.exclude_kernel)
+			return false;
+	}
+
+	return true;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e72ae64c11ac..491f69542920 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -312,4 +312,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
 
 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 					    union perf_event *event);
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f894893c203d..d5fbcf8c7aa7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -733,12 +733,16 @@ static void apply_config_terms(struct perf_evsel *evsel,
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
 		case PERF_EVSEL__CONFIG_TERM_PERIOD:
-			attr->sample_period = term->val.period;
-			attr->freq = 0;
+			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+				attr->sample_period = term->val.period;
+				attr->freq = 0;
+			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_FREQ:
-			attr->sample_freq = term->val.freq;
-			attr->freq = 1;
+			if (!(term->weak && opts->user_freq != UINT_MAX)) {
+				attr->sample_freq = term->val.freq;
+				attr->freq = 1;
+			}
 			break;
 		case PERF_EVSEL__CONFIG_TERM_TIME:
 			if (term->val.time)
@@ -1371,7 +1375,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader,
 static int
 perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)
 {
-	struct perf_stat_evsel *ps = leader->priv;
+	struct perf_stat_evsel *ps = leader->stats;
 	u64 read_format = leader->attr.read_format;
 	int size = perf_evsel__read_size(leader);
 	u64 *data = ps->group_data;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 9277df96ffda..157f49e8a772 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -67,6 +67,7 @@ struct perf_evsel_config_term {
 		bool	overwrite;
 		char	*branch;
 	} val;
+	bool weak;
 };
 
 struct perf_stat_evsel;
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 125ecd2a300d..52dc8d911173 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -97,6 +97,16 @@
 #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)
 #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS)
 
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE	0
+#define INAT_SEG_REG_DEFAULT	1
+#define INAT_SEG_REG_CS		2
+#define INAT_SEG_REG_SS		3
+#define INAT_SEG_REG_DS		4
+#define INAT_SEG_REG_ES		5
+#define INAT_SEG_REG_FS		6
+#define INAT_SEG_REG_GS		7
+
 /* Attribute search APIs */
 extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
 extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index 12e377184ee4..c4d55919fac1 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -896,7 +896,7 @@ EndTable
 
 GrpTable: Grp3_1
 0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
 2: NOT Eb
 3: NEG Eb
 4: MUL AL,Eb
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6a8d03c3d9b7..270f3223c6df 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -172,6 +172,9 @@ void machine__exit(struct machine *machine)
 {
 	int i;
 
+	if (machine == NULL)
+		return;
+
 	machine__destroy_kernel_maps(machine);
 	map_groups__exit(&machine->kmaps);
 	dsos__exit(&machine->dsos);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a7fcd95961ef..170316795a18 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1116,6 +1116,7 @@ do {								\
 	INIT_LIST_HEAD(&__t->list);				\
 	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\
 	__t->val.__name = __val;				\
+	__t->weak	= term->weak;				\
 	list_add_tail(&__t->list, head_terms);			\
 } while (0)
 
@@ -2410,6 +2411,7 @@ static int new_term(struct parse_events_term **_term,
 
 	*term = *temp;
 	INIT_LIST_HEAD(&term->list);
+	term->weak = false;
 
 	switch (term->type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index be337c266697..88108cd11b4c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -101,6 +101,9 @@ struct parse_events_term {
 	/* error string indexes for within parsed string */
 	int err_term;
 	int err_val;
+
+	/* Coming from implicit alias */
+	bool weak;
 };
 
 struct parse_events_error {
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 07cb2ac041d7..80fb1593913a 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -405,6 +405,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,
 			parse_events_terms__purge(&list);
 			return ret;
 		}
+		/*
+		 * Weak terms don't override command line options,
+		 * which we don't want for implicit terms in aliases.
+		 */
+		cloned->weak = true;
 		list_add_tail(&cloned->list, &list);
 	}
 	list_splice(&list, terms);

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-11-26 12:40 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-11-26 12:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 4a31b424ac0656d1bb17520ee861144fe7a19664 perf/core: Fix memory leak triggered by perf --namespace

Misc fixes: two PMU driver fixes and a memory leak fix.

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      perf/x86/intel: Hide TSX events when RTM is not supported

Kan Liang (1):
      perf/x86/intel/uncore: Add event constraint for BDX PCU

Vasily Averin (1):
      perf/core: Fix memory leak triggered by perf --namespace


 arch/x86/events/intel/core.c         | 35 +++++++++++++++++++++++------------
 arch/x86/events/intel/uncore_snbep.c |  8 ++++++++
 kernel/events/core.c                 |  1 +
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9fb9a1f1e47b..f94855000d4e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3730,6 +3730,19 @@ EVENT_ATTR_STR(cycles-t,	cycles_t,	"event=0x3c,in_tx=1");
 EVENT_ATTR_STR(cycles-ct,	cycles_ct,	"event=0x3c,in_tx=1,in_tx_cp=1");
 
 static struct attribute *hsw_events_attrs[] = {
+	EVENT_PTR(mem_ld_hsw),
+	EVENT_PTR(mem_st_hsw),
+	EVENT_PTR(td_slots_issued),
+	EVENT_PTR(td_slots_retired),
+	EVENT_PTR(td_fetch_bubbles),
+	EVENT_PTR(td_total_slots),
+	EVENT_PTR(td_total_slots_scale),
+	EVENT_PTR(td_recovery_bubbles),
+	EVENT_PTR(td_recovery_bubbles_scale),
+	NULL
+};
+
+static struct attribute *hsw_tsx_events_attrs[] = {
 	EVENT_PTR(tx_start),
 	EVENT_PTR(tx_commit),
 	EVENT_PTR(tx_abort),
@@ -3742,18 +3755,16 @@ static struct attribute *hsw_events_attrs[] = {
 	EVENT_PTR(el_conflict),
 	EVENT_PTR(cycles_t),
 	EVENT_PTR(cycles_ct),
-	EVENT_PTR(mem_ld_hsw),
-	EVENT_PTR(mem_st_hsw),
-	EVENT_PTR(td_slots_issued),
-	EVENT_PTR(td_slots_retired),
-	EVENT_PTR(td_fetch_bubbles),
-	EVENT_PTR(td_total_slots),
-	EVENT_PTR(td_total_slots_scale),
-	EVENT_PTR(td_recovery_bubbles),
-	EVENT_PTR(td_recovery_bubbles_scale),
 	NULL
 };
 
+static __init struct attribute **get_hsw_events_attrs(void)
+{
+	return boot_cpu_has(X86_FEATURE_RTM) ?
+		merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) :
+		hsw_events_attrs;
+}
+
 static ssize_t freeze_on_smi_show(struct device *cdev,
 				  struct device_attribute *attr,
 				  char *buf)
@@ -4182,7 +4193,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
-		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.cpu_events = get_hsw_events_attrs();
 		x86_pmu.lbr_double_abort = true;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
@@ -4221,7 +4232,7 @@ __init int intel_pmu_init(void)
 
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
-		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.cpu_events = get_hsw_events_attrs();
 		x86_pmu.limit_period = bdw_limit_period;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
@@ -4279,7 +4290,7 @@ __init int intel_pmu_init(void)
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
 		extra_attr = merge_attr(extra_attr, skl_format_attr);
-		x86_pmu.cpu_events = hsw_events_attrs;
+		x86_pmu.cpu_events = get_hsw_events_attrs();
 		intel_pmu_pebs_data_source_skl(
 			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
 		pr_cont("Skylake events, ");
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 95cb19f4e06f..f4e4168455a8 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3035,11 +3035,19 @@ static struct intel_uncore_type *bdx_msr_uncores[] = {
 	NULL,
 };
 
+/* Bit 7 'Use Occupancy' is not available for counter 0 on BDX */
+static struct event_constraint bdx_uncore_pcu_constraints[] = {
+	EVENT_CONSTRAINT(0x80, 0xe, 0x80),
+	EVENT_CONSTRAINT_END
+};
+
 void bdx_uncore_cpu_init(void)
 {
 	if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 	uncore_msr_uncores = bdx_msr_uncores;
+
+	hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
 }
 
 static struct intel_uncore_type bdx_uncore_ha = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 10cdb9c26b5d..ab5ac84f82e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6756,6 +6756,7 @@ static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
 		ns_inode = ns_path.dentry->d_inode;
 		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
 		ns_link_info->ino = ns_inode->i_ino;
+		path_put(&ns_path);
 	}
 }
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* Re: [GIT PULL] perf fixes
  2017-11-05 14:40 Ingo Molnar
@ 2017-11-09  8:13 ` Markus Trippelsdorf
  0 siblings, 0 replies; 316+ messages in thread
From: Markus Trippelsdorf @ 2017-11-09  8:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, Peter Zijlstra,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton,
	Jiri Olsa

On 2017.11.05 at 15:40 +0100, Ingo Molnar wrote:
> Linus,
>
> Please pull the latest perf-urgent-for-linus git tree from:
>
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus
>
> Jiri Olsa (1):
>       perf tools: Unwind properly location after REJECT

The patch above breaks the build for me:

util/parse-events.l: In function ‘parse_events_lex’:
util/parse-events-flex.c:4722:16: error: ‘reject_used_but_not_detected’ undeclared (first use in this function)
  */
                ^
util/parse-events.l:159:2: note: in expansion of macro ‘REJECT’
  REJECT
  ^~~~~~
util/parse-events.l:343:26: note: in expansion of macro ‘USER_REJECT’
 {bpf_source}  { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); }
                          ^~~~~~~~~~~
util/parse-events-flex.c:4722:16: note: each undeclared identifier is reported only once for each function it appears in
  */
                ^
util/parse-events.l:159:2: note: in expansion of macro ‘REJECT’
  REJECT
  ^~~~~~
util/parse-events.l:343:26: note: in expansion of macro ‘USER_REJECT’
 {bpf_source}  { if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); }
                          ^~~~~~~~~~~
mv: cannot stat 'util/.parse-events-flex.o.tmp': No such file or directory

--
Markus

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-11-05 14:40 Ingo Molnar
  2017-11-09  8:13 ` Markus Trippelsdorf
  0 siblings, 1 reply; 316+ messages in thread
From: Ingo Molnar @ 2017-11-05 14:40 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: fb7df12d645cfba6a76a45fdcc7e3f7fbbcda661 tools/headers: Synchronize kernel ABI headers

Various fixes:

 - synchronize kernel and tooling headers

 - cgroup support fix

 - two tooling fixes

 Thanks,

	Ingo

------------------>
Ingo Molnar (1):
      tools/headers: Synchronize kernel ABI headers

Jiri Olsa (1):
      perf tools: Unwind properly location after REJECT

Ravi Bangoria (1):
      perf symbols: Fix memory corruption because of zero length symbols

Tejun Heo (1):
      perf/cgroup: Fix perf cgroup hierarchy support


 include/uapi/sound/asound.h                      |  6 +++---
 kernel/events/core.c                             |  6 ++++--
 tools/arch/x86/include/asm/disabled-features.h   |  1 -
 tools/arch/x86/include/asm/required-features.h   |  1 -
 tools/arch/x86/include/uapi/asm/unistd.h         |  1 +
 tools/arch/x86/lib/memcpy_64.S                   |  5 ++++-
 tools/include/asm-generic/bitops/__fls.h         |  1 +
 tools/include/asm-generic/bitops/arch_hweight.h  |  1 +
 tools/include/asm-generic/bitops/const_hweight.h |  1 +
 tools/include/asm-generic/bitops/fls.h           |  1 +
 tools/include/asm-generic/bitops/fls64.h         |  1 +
 tools/include/asm/export.h                       |  7 +++++++
 tools/include/linux/hash.h                       |  1 -
 tools/include/uapi/asm-generic/ioctls.h          |  1 +
 tools/include/uapi/asm-generic/mman-common.h     |  1 +
 tools/include/uapi/asm-generic/mman.h            |  4 ++--
 tools/include/uapi/linux/bpf.h                   |  4 ++--
 tools/include/uapi/linux/bpf_common.h            |  2 +-
 tools/include/uapi/linux/fcntl.h                 |  2 +-
 tools/include/uapi/linux/hw_breakpoint.h         |  2 +-
 tools/include/uapi/linux/kvm.h                   |  1 +
 tools/include/uapi/linux/mman.h                  |  2 +-
 tools/include/uapi/linux/perf_event.h            |  1 +
 tools/include/uapi/linux/sched.h                 |  1 +
 tools/include/uapi/linux/stat.h                  |  2 +-
 tools/include/uapi/linux/vhost.h                 |  1 +
 tools/include/uapi/sound/asound.h                |  1 +
 tools/perf/util/annotate.c                       | 12 +++++++++++-
 tools/perf/util/parse-events.l                   |  8 ++++++--
 29 files changed, 57 insertions(+), 21 deletions(-)
 create mode 100644 tools/include/asm/export.h

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 299a822d2c4e..c227ccba60ae 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -94,7 +94,7 @@ enum {
 	SNDRV_HWDEP_IFACE_VX,		/* Digigram VX cards */
 	SNDRV_HWDEP_IFACE_MIXART,	/* Digigram miXart cards */
 	SNDRV_HWDEP_IFACE_USX2Y,	/* Tascam US122, US224 & US428 usb */
-	SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */	
+	SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */
 	SNDRV_HWDEP_IFACE_BLUETOOTH,	/* Bluetooth audio */
 	SNDRV_HWDEP_IFACE_USX2Y_PCM,	/* Tascam US122, US224 & US428 rawusb pcm */
 	SNDRV_HWDEP_IFACE_PCXHR,	/* Digigram PCXHR */
@@ -384,7 +384,7 @@ struct snd_mask {
 
 struct snd_pcm_hw_params {
 	unsigned int flags;
-	struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - 
+	struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK -
 			       SNDRV_PCM_HW_PARAM_FIRST_MASK + 1];
 	struct snd_mask mres[5];	/* reserved masks */
 	struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL -
@@ -857,7 +857,7 @@ typedef int __bitwise snd_ctl_elem_iface_t;
 #define SNDRV_CTL_ELEM_ACCESS_INACTIVE		(1<<8)	/* control does actually nothing, but may be updated */
 #define SNDRV_CTL_ELEM_ACCESS_LOCK		(1<<9)	/* write lock */
 #define SNDRV_CTL_ELEM_ACCESS_OWNER		(1<<10)	/* write lock owner */
-#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK	(1<<28)	/* kernel use a TLV callback */ 
+#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK	(1<<28)	/* kernel use a TLV callback */
 #define SNDRV_CTL_ELEM_ACCESS_USER		(1<<29) /* user space element */
 /* bits 30 and 31 are obsoleted (for indirect access) */
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9d93db81fa36..10cdb9c26b5d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -901,9 +901,11 @@ list_update_cgroup_event(struct perf_event *event,
 	cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
 	/* cpuctx->cgrp is NULL unless a cgroup event is active in this CPU .*/
 	if (add) {
+		struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
+
 		list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list));
-		if (perf_cgroup_from_task(current, ctx) == event->cgrp)
-			cpuctx->cgrp = event->cgrp;
+		if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+			cpuctx->cgrp = cgrp;
 	} else {
 		list_del(cpuctx_entry);
 		cpuctx->cgrp = NULL;
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index c1a6d5d0da0d..c10c9128f54e 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -1,4 +1,3 @@
-/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_DISABLED_FEATURES_H
 #define _ASM_X86_DISABLED_FEATURES_H
 
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 59ac6baafb6a..d91ba04dd007 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -1,4 +1,3 @@
-/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_REQUIRED_FEATURES_H
 #define _ASM_X86_REQUIRED_FEATURES_H
 
diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h
index a26df0d75cd0..30d7d04d72d6 100644
--- a/tools/arch/x86/include/uapi/asm/unistd.h
+++ b/tools/arch/x86/include/uapi/asm/unistd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_ASM_X86_UNISTD_H
 #define _UAPI_ASM_X86_UNISTD_H
 
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index ecf2c2067281..9a53a06e5a3e 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -1,10 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 /*
  * We build a jump to memcpy_orig by default which gets NOPped out on
@@ -41,6 +41,8 @@ ENTRY(memcpy)
 	ret
 ENDPROC(memcpy)
 ENDPROC(__memcpy)
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
 
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
@@ -275,6 +277,7 @@ ENTRY(memcpy_mcsafe_unrolled)
 	xorq %rax, %rax
 	ret
 ENDPROC(memcpy_mcsafe_unrolled)
+EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
 
 	.section .fixup, "ax"
 	/* Return -EFAULT for any failure */
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index a60a7ccb6782..03f721a8a2b1 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_BITOPS___FLS_H_
 #define _ASM_GENERIC_BITOPS___FLS_H_
 
diff --git a/tools/include/asm-generic/bitops/arch_hweight.h b/tools/include/asm-generic/bitops/arch_hweight.h
index 6a211f40665c..c2705e1d220d 100644
--- a/tools/include/asm-generic/bitops/arch_hweight.h
+++ b/tools/include/asm-generic/bitops/arch_hweight.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
 #define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
 
diff --git a/tools/include/asm-generic/bitops/const_hweight.h b/tools/include/asm-generic/bitops/const_hweight.h
index 0a7e06623470..149faeeeeaf2 100644
--- a/tools/include/asm-generic/bitops/const_hweight.h
+++ b/tools/include/asm-generic/bitops/const_hweight.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
 #define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
 
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 0576d1f42f43..753aecaab641 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_BITOPS_FLS_H_
 #define _ASM_GENERIC_BITOPS_FLS_H_
 
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index b097cf8444e3..866f2b2304ff 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_GENERIC_BITOPS_FLS64_H_
 #define _ASM_GENERIC_BITOPS_FLS64_H_
 
diff --git a/tools/include/asm/export.h b/tools/include/asm/export.h
new file mode 100644
index 000000000000..2cb1a0d83035
--- /dev/null
+++ b/tools/include/asm/export.h
@@ -0,0 +1,7 @@
+#ifndef _TOOLS_ASM_EXPORT_H
+#define _TOOLS_ASM_EXPORT_H
+
+#define EXPORT_SYMBOL(x)
+#define EXPORT_SYMBOL_GPL(x)
+
+#endif /* _TOOLS_ASM_EXPORT_H */
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
index 2c4183bbc504..ad6fa21d977b 100644
--- a/tools/include/linux/hash.h
+++ b/tools/include/linux/hash.h
@@ -1,4 +1,3 @@
-/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_HASH_H
 #define _LINUX_HASH_H
 /* Fast hashing routine for ints,  longs and pointers.
diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h
index 14baf9f23a14..040651735662 100644
--- a/tools/include/uapi/asm-generic/ioctls.h
+++ b/tools/include/uapi/asm-generic/ioctls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef __ASM_GENERIC_IOCTLS_H
 #define __ASM_GENERIC_IOCTLS_H
 
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 203268f9231e..6d319c46fd90 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef __ASM_GENERIC_MMAN_COMMON_H
 #define __ASM_GENERIC_MMAN_COMMON_H
 
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index f7c7b4355e56..2dffcbf705b3 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -1,8 +1,8 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef __ASM_GENERIC_MMAN_H
 #define __ASM_GENERIC_MMAN_H
 
-#include <uapi/asm-generic/mman-common.h>
+#include <asm-generic/mman-common.h>
 
 #define MAP_GROWSDOWN	0x0100		/* stack-like segment */
 #define MAP_DENYWRITE	0x0800		/* ETXTBSY */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 01cc7ba39924..30f2ce76b517 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
@@ -569,10 +570,9 @@ union bpf_attr {
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
  *
- * int bpf_sk_redirect_map(skb, map, key, flags)
+ * int bpf_sk_redirect_map(map, key, flags)
  *     Redirect skb to a sock in map using key as a lookup key for the
  *     sock in map.
- *     @skb: pointer to skb
  *     @map: pointer to sockmap
  *     @key: key to lookup sock in map
  *     @flags: reserved for future use
diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h
index 64ba734aba80..18be90725ab0 100644
--- a/tools/include/uapi/linux/bpf_common.h
+++ b/tools/include/uapi/linux/bpf_common.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI__LINUX_BPF_COMMON_H__
 #define _UAPI__LINUX_BPF_COMMON_H__
 
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index b6705247afe8..6448cdd9a350 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_FCNTL_H
 #define _UAPI_LINUX_FCNTL_H
 
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
index 6394ea9d5524..965e4d8606d8 100644
--- a/tools/include/uapi/linux/hw_breakpoint.h
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_HW_BREAKPOINT_H
 #define _UAPI_LINUX_HW_BREAKPOINT_H
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 838887587411..7e99999d6236 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef __LINUX_KVM_H
 #define __LINUX_KVM_H
 
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 64d2b4e556e5..bfd5938fede6 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_MMAN_H
 #define _UAPI_LINUX_MMAN_H
 
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 140ae638cfd6..362493a2f950 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * Performance events:
  *
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index e2a6c7b3510b..30a9e51bbb1e 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_SCHED_H
 #define _UAPI_LINUX_SCHED_H
 
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 9eac599afd91..7b35e98d3c58 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_STAT_H
 #define _UAPI_LINUX_STAT_H
 
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 60180c0b5dc6..c51f8e5cc608 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _LINUX_VHOST_H
 #define _LINUX_VHOST_H
 /* Userspace interface for in-kernel virtio accelerators. */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 87bf30b182df..c227ccba60ae 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  *  Advanced Linux Sound Architecture - ALSA - Driver
  *  Copyright (c) 1994-2003 by Jaroslav Kysela <perex@perex.cz>,
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4397a8b6e6cd..aa66791b1bfc 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -606,9 +606,19 @@ static struct arch *arch__find(const char *name)
 int symbol__alloc_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	const size_t size = symbol__size(sym);
+	size_t size = symbol__size(sym);
 	size_t sizeof_sym_hist;
 
+	/*
+	 * Add buffer of one element for zero length symbol.
+	 * When sample is taken from first instruction of
+	 * zero length symbol, perf still resolves it and
+	 * shows symbol name in perf report and allows to
+	 * annotate it.
+	 */
+	if (size == 0)
+		size = 1;
+
 	/* Check for overflow when calculating sizeof_sym_hist */
 	if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
 		return -1;
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index dcfdafdc2f1c..6680e4fb7967 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -154,6 +154,10 @@ do {							\
 	yycolumn += yyleng;				\
 } while (0);
 
+#define USER_REJECT		\
+	yycolumn -= yyleng;	\
+	REJECT
+
 %}
 
 %x mem
@@ -335,8 +339,8 @@ r{num_raw_hex}		{ return raw(yyscanner); }
 {num_hex}		{ return value(yyscanner, 16); }
 
 {modifier_event}	{ return str(yyscanner, PE_MODIFIER_EVENT); }
-{bpf_object}		{ if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_OBJECT); }
-{bpf_source}		{ if (!isbpf(yyscanner)) REJECT; return str(yyscanner, PE_BPF_SOURCE); }
+{bpf_object}		{ if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_OBJECT); }
+{bpf_source}		{ if (!isbpf(yyscanner)) USER_REJECT; return str(yyscanner, PE_BPF_SOURCE); }
 {name}			{ return pmu_str_check(yyscanner); }
 "/"			{ BEGIN(config); return '/'; }
 -			{ return '-'; }

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-10-14 16:04 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-10-14 16:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 47a74bdcbfeff543f706dc0e385eebbb5d655ed2 Merge tag 'perf-urgent-for-mingo-4.14-20171010' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Some tooling fixes plus three kernel fixes: a memory leak fix, a statistics fix 
and a crash fix.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (1):
      tools include uapi bpf.h: Sync kernel ABI header with tooling header

Colin Ian King (1):
      perf/x86/intel/uncore: Fix memory leaks on allocation failures

Mark Rutland (1):
      perf pmu: Unbreak perf record for arm/arm64 with events with explicit PMU

Mark Santaniello (1):
      perf script: Add missing separator for "-F ip,brstack" (and brstackoff)

Ravi Bangoria (1):
      perf callchain: Compare dsos (as well) for CCKEY_FUNCTION

Will Deacon (1):
      perf/core: Avoid freeing static PMU contexts when PMU is unregistered

leilei.lin (1):
      perf/core: Fix cgroup time when scheduling descendants


 arch/x86/events/intel/uncore.c | 12 +++++++--
 kernel/events/core.c           | 10 +++++++-
 tools/include/uapi/linux/bpf.h |  2 +-
 tools/perf/builtin-script.c    |  4 +--
 tools/perf/util/callchain.c    |  6 ++++-
 tools/perf/util/parse-events.c |  9 ++++---
 tools/perf/util/pmu.c          | 56 +++++++++++++++++++++++++++++++-----------
 tools/perf/util/pmu.h          |  1 +
 8 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 1c5390f1cf09..d45e06346f14 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -822,7 +822,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 		pmus[i].type	= type;
 		pmus[i].boxes	= kzalloc(size, GFP_KERNEL);
 		if (!pmus[i].boxes)
-			return -ENOMEM;
+			goto err;
 	}
 
 	type->pmus = pmus;
@@ -836,7 +836,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
 					sizeof(*attr_group), GFP_KERNEL);
 		if (!attr_group)
-			return -ENOMEM;
+			goto err;
 
 		attrs = (struct attribute **)(attr_group + 1);
 		attr_group->name = "events";
@@ -849,7 +849,15 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 	}
 
 	type->pmu_group = &uncore_pmu_attr_group;
+
 	return 0;
+
+err:
+	for (i = 0; i < type->num_boxes; i++)
+		kfree(pmus[i].boxes);
+	kfree(pmus);
+
+	return -ENOMEM;
 }
 
 static int __init
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6bc21e202ae4..9d93db81fa36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -662,7 +662,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
 	/*
 	 * Do not update time when cgroup is not active
 	 */
-	if (cgrp == event->cgrp)
+       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
 		__update_cgrp_time(event->cgrp);
 }
 
@@ -8955,6 +8955,14 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn)
 
 static void free_pmu_context(struct pmu *pmu)
 {
+	/*
+	 * Static contexts such as perf_sw_context have a global lifetime
+	 * and may be shared between different PMUs. Avoid freeing them
+	 * when a single PMU is going away.
+	 */
+	if (pmu->task_ctx_nr > perf_invalid_context)
+		return;
+
 	mutex_lock(&pmus_lock);
 	free_percpu(pmu->pmu_cpu_context);
 	mutex_unlock(&pmus_lock);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 43ab5c402f98..f90860d1f897 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -312,7 +312,7 @@ union bpf_attr {
  *     jump into another BPF program
  *     @ctx: context pointer passed to next program
  *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
- *     @index: index inside array that selects specific program to run
+ *     @index: 32-bit index inside array that selects specific program to run
  *     Return: 0 on success or negative error
  *
  * int bpf_clone_redirect(skb, ifindex, flags)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3d4c3b5e1868..0c977b6e0f8b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -586,7 +586,7 @@ static void print_sample_brstack(struct perf_sample *sample,
 			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		}
 
-		printf("0x%"PRIx64, from);
+		printf(" 0x%"PRIx64, from);
 		if (PRINT_FIELD(DSO)) {
 			printf("(");
 			map__fprintf_dsoname(alf.map, stdout);
@@ -681,7 +681,7 @@ static void print_sample_brstackoff(struct perf_sample *sample,
 		if (alt.map && !alt.map->dso->adjust_symbols)
 			to = map__map_ip(alt.map, to);
 
-		printf("0x%"PRIx64, from);
+		printf(" 0x%"PRIx64, from);
 		if (PRINT_FIELD(DSO)) {
 			printf("(");
 			map__fprintf_dsoname(alf.map, stdout);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index be09d77cade0..a971caf3759d 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -685,6 +685,8 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 {
 	struct symbol *sym = node->sym;
 	u64 left, right;
+	struct dso *left_dso = NULL;
+	struct dso *right_dso = NULL;
 
 	if (callchain_param.key == CCKEY_SRCLINE) {
 		enum match_result match = match_chain_srcline(node, cnode);
@@ -696,12 +698,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 	if (cnode->ms.sym && sym && callchain_param.key == CCKEY_FUNCTION) {
 		left = cnode->ms.sym->start;
 		right = sym->start;
+		left_dso = cnode->ms.map->dso;
+		right_dso = node->map->dso;
 	} else {
 		left = cnode->ip;
 		right = node->ip;
 	}
 
-	if (left == right) {
+	if (left == right && left_dso == right_dso) {
 		if (node->branch) {
 			cnode->branch_count++;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f6257fb4f08c..39b15968eab1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -309,10 +309,11 @@ static char *get_config_name(struct list_head *head_terms)
 static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
 	    struct perf_event_attr *attr,
-	    char *name, struct cpu_map *cpus,
+	    char *name, struct perf_pmu *pmu,
 	    struct list_head *config_terms, bool auto_merge_stats)
 {
 	struct perf_evsel *evsel;
+	struct cpu_map *cpus = pmu ? pmu->cpus : NULL;
 
 	event_attr_init(attr);
 
@@ -323,7 +324,7 @@ __add_event(struct list_head *list, int *idx,
 	(*idx)++;
 	evsel->cpus        = cpu_map__get(cpus);
 	evsel->own_cpus    = cpu_map__get(cpus);
-	evsel->system_wide = !!cpus;
+	evsel->system_wide = pmu ? pmu->is_uncore : false;
 	evsel->auto_merge_stats = auto_merge_stats;
 
 	if (name)
@@ -1233,7 +1234,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
+		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
 		return evsel ? 0 : -ENOMEM;
 	}
 
@@ -1254,7 +1255,7 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 		return -EINVAL;
 
 	evsel = __add_event(list, &parse_state->idx, &attr,
-			    get_config_name(head_config), pmu->cpus,
+			    get_config_name(head_config), pmu,
 			    &config_terms, auto_merge_stats);
 	if (evsel) {
 		evsel->unit = info.unit;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ac16a9db1fb5..1c4d7b4e4fb5 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -470,17 +470,36 @@ static void pmu_read_sysfs(void)
 	closedir(dir);
 }
 
+static struct cpu_map *__pmu_cpumask(const char *path)
+{
+	FILE *file;
+	struct cpu_map *cpus;
+
+	file = fopen(path, "r");
+	if (!file)
+		return NULL;
+
+	cpus = cpu_map__read(file);
+	fclose(file);
+	return cpus;
+}
+
+/*
+ * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
+ * may have a "cpus" file.
+ */
+#define CPUS_TEMPLATE_UNCORE	"%s/bus/event_source/devices/%s/cpumask"
+#define CPUS_TEMPLATE_CPU	"%s/bus/event_source/devices/%s/cpus"
+
 static struct cpu_map *pmu_cpumask(const char *name)
 {
-	struct stat st;
 	char path[PATH_MAX];
-	FILE *file;
 	struct cpu_map *cpus;
 	const char *sysfs = sysfs__mountpoint();
 	const char *templates[] = {
-		 "%s/bus/event_source/devices/%s/cpumask",
-		 "%s/bus/event_source/devices/%s/cpus",
-		 NULL
+		CPUS_TEMPLATE_UNCORE,
+		CPUS_TEMPLATE_CPU,
+		NULL
 	};
 	const char **template;
 
@@ -489,20 +508,25 @@ static struct cpu_map *pmu_cpumask(const char *name)
 
 	for (template = templates; *template; template++) {
 		snprintf(path, PATH_MAX, *template, sysfs, name);
-		if (stat(path, &st) == 0)
-			break;
+		cpus = __pmu_cpumask(path);
+		if (cpus)
+			return cpus;
 	}
 
-	if (!*template)
-		return NULL;
+	return NULL;
+}
 
-	file = fopen(path, "r");
-	if (!file)
-		return NULL;
+static bool pmu_is_uncore(const char *name)
+{
+	char path[PATH_MAX];
+	struct cpu_map *cpus;
+	const char *sysfs = sysfs__mountpoint();
 
-	cpus = cpu_map__read(file);
-	fclose(file);
-	return cpus;
+	snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
+	cpus = __pmu_cpumask(path);
+	cpu_map__put(cpus);
+
+	return !!cpus;
 }
 
 /*
@@ -617,6 +641,8 @@ static struct perf_pmu *pmu_lookup(const char *name)
 
 	pmu->cpus = pmu_cpumask(name);
 
+	pmu->is_uncore = pmu_is_uncore(name);
+
 	INIT_LIST_HEAD(&pmu->format);
 	INIT_LIST_HEAD(&pmu->aliases);
 	list_splice(&format, &pmu->format);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 389e9729331f..fe0de0502ce2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -22,6 +22,7 @@ struct perf_pmu {
 	char *name;
 	__u32 type;
 	bool selectable;
+	bool is_uncore;
 	struct perf_event_attr *default_config;
 	struct cpu_map *cpus;
 	struct list_head format;  /* HEAD struct perf_pmu_format -> list */

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-09-13 18:00 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-09-13 18:00 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Andrew Morton, Thomas Gleixner

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: b130a699c07155a1d6ef7d971a5f3bf0e3818d5a Merge tag 'perf-urgent-for-mingo-4.14-20170912' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

A handful of tooling fixes.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (1):
      tools include linux: Guard against redefinition of some macros

Jiri Olsa (4):
      tools lib api: Fix make DEBUG=1 build
      perf tools: Open perf.data with O_CLOEXEC flag
      perf ui progress: Make sure we always define step value
      perf ui progress: Fix progress update

Milian Wolff (3):
      perf tests: Fix compile when libunwind's unwind.h is available
      perf tools: Support running perf binaries with a dash in their name
      perf stat: Wait for the correct child

Taeung Song (1):
      perf config: Check not only section->from_system_config but also item's


 tools/include/linux/compiler-gcc.h |  9 ++++++---
 tools/lib/api/Makefile             |  8 +++++++-
 tools/perf/builtin-config.c        |  2 +-
 tools/perf/builtin-stat.c          |  2 +-
 tools/perf/perf.c                  | 14 ++++++++++----
 tools/perf/tests/dwarf-unwind.c    |  2 +-
 tools/perf/ui/progress.c           |  9 +++++++--
 tools/perf/util/data.c             | 13 ++++++++++++-
 8 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index bd39b2090ad1..3723b9f8f964 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,11 +21,14 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
 #define  noinline	__attribute__((noinline))
-
+#ifndef __packed
 #define __packed	__attribute__((packed))
-
+#endif
+#ifndef __noreturn
 #define __noreturn	__attribute__((noreturn))
-
+#endif
+#ifndef __aligned
 #define __aligned(x)	__attribute__((aligned(x)))
+#endif
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 #define __scanf(a, b)	__attribute__((format(scanf, a, b)))
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 4563ba7ede6f..1e83e3c07448 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -17,13 +17,19 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC
 
+ifeq ($(DEBUG),0)
 ifeq ($(CC_NO_CLANG), 0)
   CFLAGS += -O3
 else
   CFLAGS += -O6
 endif
+endif
+
+ifeq ($(DEBUG),0)
+  CFLAGS += -D_FORTIFY_SOURCE
+endif
 
 # Treat warnings as errors unless directed not to
 ifneq ($(WERROR),0)
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 3ddcc6e2abeb..a1d82e33282c 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -59,7 +59,7 @@ static int set_config(struct perf_config_set *set, const char *file_name,
 		fprintf(fp, "[%s]\n", section->name);
 
 		perf_config_items__for_each_entry(&section->items, item) {
-			if (!use_system_config && section->from_system_config)
+			if (!use_system_config && item->from_system_config)
 				continue;
 			if (item->value)
 				fprintf(fp, "\t%s = %s\n",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 85e992d9215b..69523ed55894 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -707,7 +707,7 @@ static int __run_perf_stat(int argc, const char **argv)
 				process_interval();
 			}
 		}
-		wait(&status);
+		waitpid(child_pid, &status, 0);
 
 		if (workload_exec_errno) {
 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index e0279babe0c0..2f19e03c5c40 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -467,15 +467,21 @@ int main(int argc, const char **argv)
 	 *  - cannot execute it externally (since it would just do
 	 *    the same thing over again)
 	 *
-	 * So we just directly call the internal command handler, and
-	 * die if that one cannot handle it.
+	 * So we just directly call the internal command handler. If that one
+	 * fails to handle this, then maybe we just run a renamed perf binary
+	 * that contains a dash in its name. To handle this scenario, we just
+	 * fall through and ignore the "xxxx" part of the command string.
 	 */
 	if (strstarts(cmd, "perf-")) {
 		cmd += 5;
 		argv[0] = cmd;
 		handle_internal_command(argc, argv);
-		fprintf(stderr, "cannot handle %s internally", cmd);
-		goto out;
+		/*
+		 * If the command is handled, the above function does not
+		 * return undo changes and fall through in such a case.
+		 */
+		cmd -= 5;
+		argv[0] = cmd;
 	}
 	if (strstarts(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 2a7b9b47bbcb..9ba1d216a89f 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -6,7 +6,7 @@
 #include "debug.h"
 #include "machine.h"
 #include "event.h"
-#include "unwind.h"
+#include "../util/unwind.h"
 #include "perf_regs.h"
 #include "map.h"
 #include "thread.h"
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index a0f24c7115c5..ae91c8148edf 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
 #include "../cache.h"
 #include "progress.h"
 
@@ -14,10 +15,14 @@ struct ui_progress_ops *ui_progress__ops = &null_progress__ops;
 
 void ui_progress__update(struct ui_progress *p, u64 adv)
 {
+	u64 last = p->curr;
+
 	p->curr += adv;
 
 	if (p->curr >= p->next) {
-		p->next += p->step;
+		u64 nr = DIV_ROUND_UP(p->curr - last, p->step);
+
+		p->next += nr * p->step;
 		ui_progress__ops->update(p);
 	}
 }
@@ -25,7 +30,7 @@ void ui_progress__update(struct ui_progress *p, u64 adv)
 void ui_progress__init(struct ui_progress *p, u64 total, const char *title)
 {
 	p->curr = 0;
-	p->next = p->step = total / 16;
+	p->next = p->step = total / 16 ?: 1;
 	p->total = total;
 	p->title = title;
 
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index e84bbc8ec058..263f5a906ba5 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -10,6 +10,16 @@
 #include "util.h"
 #include "debug.h"
 
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC	0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC	010000000
+#else
+#define O_CLOEXEC	02000000
+#endif
+#endif
+
 static bool check_pipe(struct perf_data_file *file)
 {
 	struct stat st;
@@ -96,7 +106,8 @@ static int open_file_write(struct perf_data_file *file)
 	if (check_backup(file))
 		return -1;
 
-	fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR);
+	fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+		  S_IRUSR|S_IWUSR);
 
 	if (fd < 0)
 		pr_err("failed to open %s : %s\n", file->path,

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-09-12 15:32 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-09-12 15:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 770e96125515daf1c7bc179323f2e0d488dfe6ac Merge tag 'perf-core-for-mingo-4.14-20170901' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Perf tooling updates and fixes.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (4):
      perf syscalltbl: Support glob matching on syscall names
      perf trace: Support syscall name globbing
      perf stat: Only auto-merge events that are PMU aliases
      perf annotate browser: Help for cycling thru hottest instructions with TAB/shift+TAB

Jack Henschel (1):
      perf intel-pt: Fix syntax in documentation of config option

Jin Yao (1):
      perf report: Calculate the average cycles of iterations

Kan Liang (5):
      perf tools: Support new sample type for physical address
      perf sort: Add sort option for physical address
      perf mem: Support physical address
      perf script: Support physical address
      perf test: Add test case for PERF_SAMPLE_PHYS_ADDR

Ravi Bangoria (1):
      perf test powerpc: Fix 'Object code reading' test

Sukadev Bhattiprolu (1):
      perf vendor events powerpc: Remove duplicate events


 tools/include/uapi/linux/perf_event.h              |   4 +-
 tools/perf/Documentation/intel-pt.txt              |   2 +-
 tools/perf/Documentation/perf-mem.txt              |   4 +
 tools/perf/Documentation/perf-record.txt           |   5 +-
 tools/perf/Documentation/perf-report.txt           |   1 +
 tools/perf/Documentation/perf-script.txt           |   2 +-
 tools/perf/Documentation/perf-trace.txt            |   2 +-
 tools/perf/builtin-mem.c                           |  97 ++++++++++++-----
 tools/perf/builtin-record.c                        |   2 +
 tools/perf/builtin-script.c                        |  15 ++-
 tools/perf/builtin-stat.c                          |   2 +-
 tools/perf/builtin-trace.c                         |  39 ++++++-
 tools/perf/perf.h                                  |   1 +
 .../pmu-events/arch/powerpc/power9/frontend.json   |   7 +-
 .../perf/pmu-events/arch/powerpc/power9/other.json | 120 ---------------------
 .../pmu-events/arch/powerpc/power9/pipeline.json   |   7 +-
 tools/perf/pmu-events/arch/powerpc/power9/pmc.json |   7 +-
 tools/perf/tests/code-reading.c                    |   5 +
 tools/perf/tests/sample-parsing.c                  |   6 +-
 tools/perf/ui/browsers/annotate.c                  |   3 +-
 tools/perf/ui/browsers/hists.c                     |   8 +-
 tools/perf/ui/stdio/hist.c                         |  10 +-
 tools/perf/util/callchain.c                        |  49 ++++-----
 tools/perf/util/callchain.h                        |   9 +-
 tools/perf/util/event.h                            |   1 +
 tools/perf/util/evsel.c                            |  19 +++-
 tools/perf/util/evsel.h                            |   1 +
 tools/perf/util/hist.c                             |   4 +
 tools/perf/util/hist.h                             |   1 +
 tools/perf/util/machine.c                          |  96 ++++++++++-------
 tools/perf/util/parse-events.c                     |  24 +++--
 tools/perf/util/session.c                          |   3 +
 tools/perf/util/sort.c                             |  42 ++++++++
 tools/perf/util/sort.h                             |   1 +
 tools/perf/util/symbol.h                           |   1 +
 tools/perf/util/syscalltbl.c                       |  33 ++++++
 tools/perf/util/syscalltbl.h                       |   3 +
 37 files changed, 368 insertions(+), 268 deletions(-)

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-07-21 10:15 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-07-21 10:15 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Thomas Gleixner,
	Arnaldo Carvalho de Melo, Jiri Olsa, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: df6c3db8d30fb1699ccbc403196b86324f4257af perf/x86/intel: Add proper condition to run sched_task callbacks

Two hw-enablement patches, two race fixes, three fixes for regressions of 
semantics, plus a number of tooling fixes.

 Thanks,

	Ingo

------------------>
Alexander Shishkin (1):
      perf/core: Fix scheduling regression of pinned groups

Arnaldo Carvalho de Melo (3):
      perf evsel: Fix attr.exclude_kernel setting for default cycles:p
      perf evsel: State in the default event name if attr.exclude_kernel is set
      perf symbols: Accept zero as the kernel base address

Harry Pan (1):
      perf/x86/intel: Enable C-state residency events for Apollo Lake

Ingo Molnar (1):
      Revert "perf/core: Drop kernel samples even though :u is specified"

Jin Yao (1):
      perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target

Jiri Olsa (3):
      perf/x86/intel: Fix debug_store reset field for freq events
      perf/core: Fix locking for children siblings group read
      perf/x86/intel: Add proper condition to run sched_task callbacks

Kan Liang (1):
      perf/x86/intel: Add Goldmont Plus CPU PMU support


 arch/x86/events/intel/core.c   | 164 ++++++++++++++++++++++++++++++++++++++++-
 arch/x86/events/intel/cstate.c |  26 +++++--
 arch/x86/events/intel/ds.c     |  22 ++++--
 arch/x86/events/intel/lbr.c    |   4 +
 arch/x86/events/perf_event.h   |   2 +
 kernel/events/core.c           |  33 +++------
 tools/perf/ui/browser.c        |   2 +-
 tools/perf/util/evsel.c        |   8 +-
 tools/perf/util/machine.c      |   2 +-
 9 files changed, 221 insertions(+), 42 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa1..98b0f0729527 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs
 	},
 };
 
+static __initconst const u64 glp_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
+			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0xe08,	/* DTLB_LOAD_MISSES.WALK_COMPLETED */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0xe49,	/* DTLB_STORE_MISSES.WALK_COMPLETED */
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
+			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+};
+
+static __initconst const u64 glp_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+};
+
 #define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL	BIT_ULL(21)
@@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event)
 	return 0;
 }
 
+static struct event_constraint counter0_constraint =
+			INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
+
 static struct event_constraint counter2_constraint =
 			EVENT_CONSTRAINT(0, 0x4, 0);
 
@@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	return c;
 }
 
+static struct event_constraint *
+glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+			  struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	/* :ppp means to do reduced skid PEBS which is PMC0 only. */
+	if (event->attr.precise_ip == 3)
+		return &counter0_constraint;
+
+	c = intel_get_event_constraints(cpuc, idx, event);
+
+	return c;
+}
+
 /*
  * Broadwell:
  *
@@ -3265,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu)
 static void intel_pmu_sched_task(struct perf_event_context *ctx,
 				 bool sched_in)
 {
-	if (x86_pmu.pebs_active)
-		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
-		intel_pmu_lbr_sched_task(ctx, sched_in);
+	intel_pmu_pebs_sched_task(ctx, sched_in);
+	intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -3838,6 +3968,32 @@ __init int intel_pmu_init(void)
 		pr_cont("Goldmont events, ");
 		break;
 
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_glm_extra_regs;
+		/*
+		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+		 * for precise cycles.
+		 */
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.get_event_constraints = glp_get_event_constraints;
+		x86_pmu.cpu_events = glm_events_attrs;
+		/* Goldmont Plus has 4-wide pipeline */
+		event_attr_td_total_slots_scale_glm.event_str = "4";
+		pr_cont("Goldmont plus events, ");
+		break;
+
 	case INTEL_FAM6_WESTMERE:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 238ae3248ba5..4cf100ff2a37 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,16 +40,16 @@
  * Model specific counters:
  *	MSR_CORE_C1_RES: CORE C1 Residency Counter
  *			 perf code: 0x00
- *			 Available model: SLM,AMT
+ *			 Available model: SLM,AMT,GLM
  *			 Scope: Core (each processor core has a MSR)
  *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
  *			       perf code: 0x01
- *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM
  *			       Scope: Core
  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL
+ *						SKL,KNL,GLM
  *			       Scope: Core
  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *			       perf code: 0x03
@@ -57,16 +57,17 @@
  *			       Scope: Core
  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *			       perf code: 0x00
- *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *			       perf code: 0x01
  *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
+ *						GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *			       perf code: 0x02
  *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- *						SKL,KNL
+ *						SKL,KNL,GLM
  *			       Scope: Package (physical package)
  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *			       perf code: 0x03
@@ -82,7 +83,7 @@
  *			       Scope: Package (physical package)
  *	MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
  *			       perf code: 0x06
- *			       Available model: HSW ULT only
+ *			       Available model: HSW ULT, GLM
  *			       Scope: Package (physical package)
  *
  */
@@ -504,6 +505,17 @@ static const struct cstate_model knl_cstates __initconst = {
 };
 
 
+static const struct cstate_model glm_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
+				  BIT(PERF_CSTATE_CORE_C3_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
 
 #define X86_CSTATES_MODEL(model, states)				\
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
@@ -546,6 +558,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..a322fed5f8ed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
 	x86_pmu.drain_pebs(&regs);
 }
 
-void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
-{
-	if (!sched_in)
-		intel_pmu_drain_pebs_buffer();
-}
-
 /*
  * PEBS
  */
@@ -651,6 +645,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_glp_pebs_event_constraints[] = {
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */
@@ -816,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
 }
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!sched_in && pebs_needs_sched_cb(cpuc))
+		intel_pmu_drain_pebs_buffer();
+}
+
 static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
@@ -889,6 +897,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
 		ds->pebs_event_reset[hwc->idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+	} else {
+		ds->pebs_event_reset[hwc->idx] = 0;
 	}
 }
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb261656a320..955457a30197 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
+	if (!cpuc->lbr_users)
+		return;
+
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53728eea1bed..476aec3a4cab 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[];
 
 extern struct event_constraint intel_glm_pebs_event_constraints[];
 
+extern struct event_constraint intel_glp_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4d2c32f98482..c17c0881fd36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
 
 	lockdep_assert_held(&ctx->lock);
 
+	/*
+	 * It's 'group type', really, because if our group leader is
+	 * pinned, so are we.
+	 */
+	if (event->group_leader != event)
+		event = event->group_leader;
+
 	event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
 	if (!ctx->task)
 		event_type |= EVENT_CPU;
@@ -4365,7 +4372,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
 static int __perf_read_group_add(struct perf_event *leader,
 					u64 read_format, u64 *values)
 {
+	struct perf_event_context *ctx = leader->ctx;
 	struct perf_event *sub;
+	unsigned long flags;
 	int n = 1; /* skip @nr */
 	int ret;
 
@@ -4395,12 +4404,15 @@ static int __perf_read_group_add(struct perf_event *leader,
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
 
+	raw_spin_lock_irqsave(&ctx->lock, flags);
+
 	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		values[n++] += perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
 	}
 
+	raw_spin_unlock_irqrestore(&ctx->lock, flags);
 	return 0;
 }
 
@@ -7308,21 +7320,6 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
-static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
-{
-	/*
-	 * Due to interrupt latency (AKA "skid"), we may enter the
-	 * kernel before taking an overflow, even if the PMU is only
-	 * counting user events.
-	 * To avoid leaking information to userspace, we must always
-	 * reject kernel samples when exclude_kernel is set.
-	 */
-	if (event->attr.exclude_kernel && !user_mode(regs))
-		return false;
-
-	return true;
-}
-
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7344,12 +7341,6 @@ static int __perf_event_overflow(struct perf_event *event,
 	ret = __perf_event_account_interrupt(event, throttle);
 
 	/*
-	 * For security, drop the skid kernel samples if necessary.
-	 */
-	if (!sample_is_allowed(event, regs))
-		return ret;
-
-	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
 	 */
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index a4d3762cd825..83874b0e266c 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -704,7 +704,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
 		ui_browser__gotorc(browser, row, column + 1);
 		SLsmg_draw_hline(2);
 
-		if (row++ == 0)
+		if (++row == 0)
 			goto out;
 	} else
 		row = 0;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 87b431886670..413f74df08de 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -273,7 +273,7 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
 		.config	= PERF_COUNT_HW_CPU_CYCLES,
-		.exclude_kernel	= 1,
+		.exclude_kernel	= geteuid() != 0,
 	};
 	struct perf_evsel *evsel;
 
@@ -298,8 +298,10 @@ struct perf_evsel *perf_evsel__new_cycles(void)
 		goto out;
 
 	/* use asprintf() because free(evsel) assumes name is allocated */
-	if (asprintf(&evsel->name, "cycles%.*s",
-		     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
+	if (asprintf(&evsel->name, "cycles%s%s%.*s",
+		     (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
+		     attr.exclude_kernel ? "u" : "",
+		     attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
 		goto error_free;
 out:
 	return evsel;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5de2b86b9880..2e9eb6aa3ce2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2209,7 +2209,7 @@ int machine__get_kernel_start(struct machine *machine)
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
 		err = map__load(map);
-		if (map->start)
+		if (!err)
 			machine->kernel_start = map->start;
 	}
 	return err;

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-06-10  8:39 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-06-10  8:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim,
	Peter Zijlstra, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 47c1ded7fef108c730b803cd386241beffcdd15c Merge tag 'perf-urgent-for-mingo-4.12-20170608' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

This is mostly tooling fixes, plus an instruction pointer filtering fix. It's more 
fixes than usual - Arnaldo got back from a longer vacation and there was a 
backlog.

 Thanks,

	Ingo

------------------>
Andi Kleen (1):
      perf stat: Only print NMI watchdog hint when enabled

Jin Yao (1):
      perf/core: Drop kernel samples even though :u is specified

Jiri Olsa (2):
      perf trace: Add mmap alias for s390
      perf test: Disable breakpoint signal tests for powerpc

Kim Phillips (2):
      perf annotate: Fix branch instruction with multiple operands
      perf annotate: Add missing powerpc triplet

Milian Wolff (2):
      perf report: Include partial stacks unwound with libdw
      perf report: Ensure the perf DSO mapping matches what libdw sees

Namhyung Kim (12):
      perf header: Set proper module name when build-id event found
      perf symbols: Set module info when build-id event found
      perf symbols: Use correct filename for compressed modules in build-id cache
      perf annotate: Fix symbolic link of build-id cache
      perf tools: Fix a memory leak in __open_dso()
      perf tools: Introduce dso__decompress_kmodule_{fd,path}
      perf annotate: Use dso__decompress_kmodule_path()
      perf tools: Decompress kernel module when reading DSO data
      perf tools: Consolidate error path in __open_dso()
      perf tests: Decompress kernel module before objdump
      perf symbols: Keep DSO->symtab_type after decompress
      perf symbols: Kill dso__build_id_is_kmod()

Ravi Bangoria (1):
      perf annotate: Fix failure when filename has special chars

SeongJae Park (6):
      perf probe: Fix examples section of documentation
      perf script: Fix outdated comment for perf-trace-python
      perf script: Fix documentation errors
      perf script python: Fix wrong code snippets in documentation
      perf script python: Updated trace_unhandled() signature
      perf script python: Remove dups in documentation examples


 kernel/events/core.c                               |  21 +++++
 tools/perf/Documentation/perf-probe.txt            |   8 +-
 tools/perf/Documentation/perf-script-perl.txt      |   2 +-
 tools/perf/Documentation/perf-script-python.txt    |  23 ++---
 tools/perf/arch/common.c                           |   1 +
 tools/perf/builtin-stat.c                          |   5 +-
 tools/perf/builtin-trace.c                         |   4 +
 tools/perf/tests/bp_signal.c                       |  14 +++
 tools/perf/tests/builtin-test.c                    |   7 ++
 tools/perf/tests/code-reading.c                    |  20 ++++-
 tools/perf/tests/tests.h                           |   3 +
 tools/perf/util/annotate.c                         |  72 +++++++++------
 tools/perf/util/build-id.c                         |  45 ----------
 tools/perf/util/build-id.h                         |   1 -
 tools/perf/util/dso.c                              | 100 +++++++++++++++++++--
 tools/perf/util/dso.h                              |   9 ++
 tools/perf/util/header.c                           |  12 ++-
 tools/perf/util/machine.c                          |  11 +--
 .../util/scripting-engines/trace-event-python.c    |   2 +-
 tools/perf/util/symbol-elf.c                       |  41 +--------
 tools/perf/util/symbol.c                           |   4 -
 tools/perf/util/unwind-libdw.c                     |  10 ++-
 22 files changed, 259 insertions(+), 156 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..6c4e523dc1e2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7316,6 +7316,21 @@ int perf_event_account_interrupt(struct perf_event *event)
 	return __perf_event_account_interrupt(event, 1);
 }
 
+static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+	/*
+	 * Due to interrupt latency (AKA "skid"), we may enter the
+	 * kernel before taking an overflow, even if the PMU is only
+	 * counting user events.
+	 * To avoid leaking information to userspace, we must always
+	 * reject kernel samples when exclude_kernel is set.
+	 */
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return false;
+
+	return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -7337,6 +7352,12 @@ static int __perf_event_overflow(struct perf_event *event,
 	ret = __perf_event_account_interrupt(event, throttle);
 
 	/*
+	 * For security, drop the skid kernel samples if necessary.
+	 */
+	if (!sample_is_allowed(event, regs))
+		return ret;
+
+	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
 	 */
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index e6c9902c6d82..165c2b1d4317 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -240,9 +240,13 @@ EXAMPLES
  or
  ./perf probe --add='schedule:12 cpu'
 
- this will add one or more probes which has the name start with "schedule".
+Add one or more probes which has the name start with "schedule".
 
- Add probes on lines in schedule() function which calls update_rq_clock().
+ ./perf probe schedule*
+ or
+ ./perf probe --add='schedule*'
+
+Add probes on lines in schedule() function which calls update_rq_clock().
 
  ./perf probe 'schedule;update_rq_clock*'
  or
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
index dfbb506d2c34..142606c0ec9c 100644
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -39,7 +39,7 @@ EVENT HANDLERS
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index 54acba221558..51ec2d20068a 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -149,10 +149,8 @@ def raw_syscalls__sys_enter(event_name, context, common_cpu,
 		print "id=%d, args=%s\n" % \
 		(id, args),
 
-def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm):
-		print_header(event_name, common_cpu, common_secs, common_nsecs,
-		common_pid, common_comm)
+def trace_unhandled(event_name, context, event_fields_dict):
+		print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
 
 def print_header(event_name, cpu, secs, nsecs, pid, comm):
 	print "%-20s %5u %05u.%09u %8u %-20s " % \
@@ -321,7 +319,7 @@ So those are the essential steps in writing and running a script.  The
 process can be generalized to any tracepoint or set of tracepoints
 you're interested in - basically find the tracepoint(s) you're
 interested in by looking at the list of available events shown by
-'perf list' and/or look in /sys/kernel/debug/tracing events for
+'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
 generate a skeleton script using 'perf script -g python' and modify the
@@ -334,7 +332,7 @@ right place, you can have your script listed alongside the other
 scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -383,8 +381,6 @@ to be located in the perf/scripts/python directory in the kernel
 
 ----
 # ls -al kernel-source/tools/perf/scripts/python
-
-root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
 total 32
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
@@ -399,7 +395,7 @@ otherwise your script won't show up at run-time), 'perf script -l'
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf script -l
+# perf script -l
 List of available trace scripts:
   wakeup-latency                       system-wide min/max/avg wakeup latency
   rw-by-file <comm>                    r/w activity for a program, by file
@@ -437,7 +433,7 @@ EVENT HANDLERS
 When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
-ignored (or passed to a 'trace_handled' function, see below) and the
+ignored (or passed to a 'trace_unhandled' function, see below) and the
 next event is processed.
 
 Most of the event's field values are passed as arguments to the
@@ -532,7 +528,7 @@ Aside from the event handler functions discussed above, every script
 gives scripts a chance to do setup tasks:
 
 ----
-def trace_begin:
+def trace_begin():
     pass
 ----
 
@@ -541,7 +537,7 @@ Aside from the event handler functions discussed above, every script
  as display results:
 
 ----
-def trace_end:
+def trace_end():
     pass
 ----
 
@@ -550,8 +546,7 @@ Aside from the event handler functions discussed above, every script
  of common arguments are passed into it:
 
 ----
-def trace_unhandled(event_name, context, common_cpu, common_secs,
-        common_nsecs, common_pid, common_comm):
+def trace_unhandled(event_name, context, event_fields_dict):
     pass
 ----
 
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 837067f48a4c..6b40e9f01740 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -26,6 +26,7 @@ const char *const arm64_triplets[] = {
 
 const char *const powerpc_triplets[] = {
 	"powerpc-unknown-linux-gnu-",
+	"powerpc-linux-gnu-",
 	"powerpc64-unknown-linux-gnu-",
 	"powerpc64-linux-gnu-",
 	"powerpc64le-linux-gnu-",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a935b5023732..ad9324d1daf9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1578,6 +1578,7 @@ static void print_header(int argc, const char **argv)
 static void print_footer(void)
 {
 	FILE *output = stat_config.output;
+	int n;
 
 	if (!null_run)
 		fprintf(output, "\n");
@@ -1590,7 +1591,9 @@ static void print_footer(void)
 	}
 	fprintf(output, "\n\n");
 
-	if (print_free_counters_hint)
+	if (print_free_counters_hint &&
+	    sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+	    n > 0)
 		fprintf(output,
 "Some events weren't counted. Try disabling the NMI watchdog:\n"
 "	echo 0 > /proc/sys/kernel/nmi_watchdog\n"
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d014350adc52..4b2a5d298197 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -681,6 +681,10 @@ static struct syscall_fmt {
 	{ .name	    = "mlockall",   .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
 	{ .name	    = "mmap",	    .hexret = true,
+/* The standard mmap maps to old_mmap on s390x */
+#if defined(__s390x__)
+	.alias = "old_mmap",
+#endif
 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
 			     [2] = SCA_MMAP_PROT, /* prot */
 			     [3] = SCA_MMAP_FLAGS, /* flags */ }, },
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index e7664fe3bd33..8ba2c4618fe9 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -288,3 +288,17 @@ int test__bp_signal(int subtest __maybe_unused)
 	return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
 		TEST_OK : TEST_FAIL;
 }
+
+bool test__bp_signal_is_supported(void)
+{
+/*
+ * The powerpc so far does not have support to even create
+ * instruction breakpoint using the perf event interface.
+ * Once it's there we can release this.
+ */
+#ifdef __powerpc__
+	return false;
+#else
+	return true;
+#endif
+}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9e08d297f1a9..3ccfd58a8c3c 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -97,10 +97,12 @@ static struct test generic_tests[] = {
 	{
 		.desc = "Breakpoint overflow signal handler",
 		.func = test__bp_signal,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Breakpoint overflow sampling",
 		.func = test__bp_signal_overflow,
+		.is_supported = test__bp_signal_is_supported,
 	},
 	{
 		.desc = "Number of exit events of a simple workload",
@@ -401,6 +403,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 		if (!perf_test__matches(t, curr, argc, argv))
 			continue;
 
+		if (t->is_supported && !t->is_supported()) {
+			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
+			continue;
+		}
+
 		pr_info("%2d: %-*s:", i, width, t->desc);
 
 		if (intlist__find(skiplist, i)) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 1f14e7612cbb..94b7c7b02bde 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -229,6 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 	unsigned char buf2[BUFSZ];
 	size_t ret_len;
 	u64 objdump_addr;
+	const char *objdump_name;
+	char decomp_name[KMOD_DECOMP_LEN];
 	int ret;
 
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
@@ -289,9 +291,25 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 		state->done[state->done_cnt++] = al.map->start;
 	}
 
+	objdump_name = al.map->dso->long_name;
+	if (dso__needs_decompress(al.map->dso)) {
+		if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
+						 decomp_name,
+						 sizeof(decomp_name)) < 0) {
+			pr_debug("decompression failed\n");
+			return -1;
+		}
+
+		objdump_name = decomp_name;
+	}
+
 	/* Read the object code using objdump */
 	objdump_addr = map__rip_2objdump(al.map, al.addr);
-	ret = read_via_objdump(al.map->dso->long_name, objdump_addr, buf2, len);
+	ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
+
+	if (dso__needs_decompress(al.map->dso))
+		unlink(objdump_name);
+
 	if (ret > 0) {
 		/*
 		 * The kernel maps are inaccurate - assume objdump is right in
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 631859629403..577363809c9b 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -34,6 +34,7 @@ struct test {
 		int (*get_nr)(void);
 		const char *(*get_desc)(int subtest);
 	} subtest;
+	bool (*is_supported)(void);
 };
 
 /* Tests */
@@ -99,6 +100,8 @@ const char *test__clang_subtest_get_desc(int subtest);
 int test__clang_subtest_get_nr(void);
 int test__unit_number__scnprint(int subtest);
 
+bool test__bp_signal_is_supported(void);
+
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 683f8340460c..ddbd56df9187 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -239,10 +239,20 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 	const char *s = strchr(ops->raw, '+');
 	const char *c = strchr(ops->raw, ',');
 
-	if (c++ != NULL)
+	/*
+	 * skip over possible up to 2 operands to get to address, e.g.:
+	 * tbnz	 w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+	 */
+	if (c++ != NULL) {
 		ops->target.addr = strtoull(c, NULL, 16);
-	else
+		if (!ops->target.addr) {
+			c = strchr(c, ',');
+			if (c++ != NULL)
+				ops->target.addr = strtoull(c, NULL, 16);
+		}
+	} else {
 		ops->target.addr = strtoull(ops->raw, NULL, 16);
+	}
 
 	if (s++ != NULL) {
 		ops->target.offset = strtoull(s, NULL, 16);
@@ -257,10 +267,27 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
 			   struct ins_operands *ops)
 {
+	const char *c = strchr(ops->raw, ',');
+
 	if (!ops->target.addr || ops->target.offset < 0)
 		return ins__raw_scnprintf(ins, bf, size, ops);
 
-	return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, ops->target.offset);
+	if (c != NULL) {
+		const char *c2 = strchr(c + 1, ',');
+
+		/* check for 3-op insn */
+		if (c2 != NULL)
+			c = c2;
+		c++;
+
+		/* mirror arch objdump's space-after-comma style */
+		if (*c == ' ')
+			c++;
+	}
+
+	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64,
+			 ins->name, c ? c - ops->raw : 0, ops->raw,
+			 ops->target.offset);
 }
 
 static struct ins_ops jump_ops = {
@@ -1294,6 +1321,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	char linkname[PATH_MAX];
 	char *build_id_filename;
 	char *build_id_path = NULL;
+	char *pos;
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso))
@@ -1313,7 +1341,14 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
 	if (!build_id_path)
 		return -1;
 
-	dirname(build_id_path);
+	/*
+	 * old style build-id cache has name of XX/XXXXXXX.. while
+	 * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+	 * extract the build-id part of dirname in the new style only.
+	 */
+	pos = strrchr(build_id_path, '/');
+	if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+		dirname(build_id_path);
 
 	if (dso__is_kcore(dso) ||
 	    readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
@@ -1396,31 +1431,10 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 				sizeof(symfs_filename));
 		}
 	} else if (dso__needs_decompress(dso)) {
-		char tmp[PATH_MAX];
-		struct kmod_path m;
-		int fd;
-		bool ret;
-
-		if (kmod_path__parse_ext(&m, symfs_filename))
-			goto out;
-
-		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
-
-		fd = mkstemp(tmp);
-		if (fd < 0) {
-			free(m.ext);
-			goto out;
-		}
-
-		ret = decompress_to_file(m.ext, symfs_filename, fd);
-
-		if (ret)
-			pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename);
-
-		free(m.ext);
-		close(fd);
+		char tmp[KMOD_DECOMP_LEN];
 
-		if (!ret)
+		if (dso__decompress_kmodule_path(dso, symfs_filename,
+						 tmp, sizeof(tmp)) < 0)
 			goto out;
 
 		strcpy(symfs_filename, tmp);
@@ -1429,7 +1443,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map, const char *arch_na
 	snprintf(command, sizeof(command),
 		 "%s %s%s --start-address=0x%016" PRIx64
 		 " --stop-address=0x%016" PRIx64
-		 " -l -d %s %s -C %s 2>/dev/null|grep -v %s:|expand",
+		 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
 		 objdump_path ? objdump_path : "objdump",
 		 disassembler_style ? "-M " : "",
 		 disassembler_style ? disassembler_style : "",
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 168cc49654e7..e0148b081bdf 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -278,51 +278,6 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size)
 	return bf;
 }
 
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size)
-{
-	char *id_name = NULL, *ch;
-	struct stat sb;
-	char sbuild_id[SBUILD_ID_SIZE];
-
-	if (!dso->has_build_id)
-		goto err;
-
-	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
-	id_name = build_id_cache__linkname(sbuild_id, NULL, 0);
-	if (!id_name)
-		goto err;
-	if (access(id_name, F_OK))
-		goto err;
-	if (lstat(id_name, &sb) == -1)
-		goto err;
-	if ((size_t)sb.st_size > size - 1)
-		goto err;
-	if (readlink(id_name, bf, size - 1) < 0)
-		goto err;
-
-	bf[sb.st_size] = '\0';
-
-	/*
-	 * link should be:
-	 * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92
-	 */
-	ch = strrchr(bf, '/');
-	if (!ch)
-		goto err;
-	if (ch - 3 < bf)
-		goto err;
-
-	free(id_name);
-	return strncmp(".ko", ch - 3, 3) == 0;
-err:
-	pr_err("Invalid build id: %s\n", id_name ? :
-					 dso->long_name ? :
-					 dso->short_name ? :
-					 "[unknown]");
-	free(id_name);
-	return false;
-}
-
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 8a89b195c1fc..96690a55c62c 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -17,7 +17,6 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
 				    size_t size);
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size);
-bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct perf_evsel *evsel,
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index a96a99d2369f..4e7ab611377a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -248,6 +248,64 @@ bool dso__needs_decompress(struct dso *dso)
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
+static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf)
+{
+	int fd = -1;
+	struct kmod_path m;
+
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (kmod_path__parse_ext(&m, dso->long_name))
+		return -1;
+
+	if (!m.comp)
+		goto out;
+
+	fd = mkstemp(tmpbuf);
+	if (fd < 0) {
+		dso->load_errno = errno;
+		goto out;
+	}
+
+	if (!decompress_to_file(m.ext, name, fd)) {
+		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+		close(fd);
+		fd = -1;
+	}
+
+out:
+	free(m.ext);
+	return fd;
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	unlink(tmpbuf);
+	return fd;
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len)
+{
+	char tmpbuf[] = KMOD_DECOMP_NAME;
+	int fd;
+
+	fd = decompress_kmodule(dso, name, tmpbuf);
+	if (fd < 0) {
+		unlink(tmpbuf);
+		return -1;
+	}
+
+	strncpy(pathname, tmpbuf, len);
+	close(fd);
+	return 0;
+}
+
 /*
  * Parses kernel module specified in @path and updates
  * @m argument like:
@@ -335,6 +393,21 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 	return 0;
 }
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine)
+{
+	if (machine__is_host(machine))
+		dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+	else
+		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+	/* _KMODULE_COMP should be next to _KMODULE */
+	if (m->kmod && m->comp)
+		dso->symtab_type++;
+
+	dso__set_short_name(dso, strdup(m->name), true);
+}
+
 /*
  * Global list of open DSOs and the counter.
  */
@@ -381,7 +454,7 @@ static int do_open(char *name)
 
 static int __open_dso(struct dso *dso, struct machine *machine)
 {
-	int fd;
+	int fd = -EINVAL;
 	char *root_dir = (char *)"";
 	char *name = malloc(PATH_MAX);
 
@@ -392,15 +465,30 @@ static int __open_dso(struct dso *dso, struct machine *machine)
 		root_dir = machine->root_dir;
 
 	if (dso__read_binary_type_filename(dso, dso->binary_type,
-					    root_dir, name, PATH_MAX)) {
-		free(name);
-		return -EINVAL;
-	}
+					    root_dir, name, PATH_MAX))
+		goto out;
 
 	if (!is_regular_file(name))
-		return -EINVAL;
+		goto out;
+
+	if (dso__needs_decompress(dso)) {
+		char newpath[KMOD_DECOMP_LEN];
+		size_t len = sizeof(newpath);
+
+		if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+			fd = -dso->load_errno;
+			goto out;
+		}
+
+		strcpy(name, newpath);
+	}
 
 	fd = do_open(name);
+
+	if (dso__needs_decompress(dso))
+		unlink(name);
+
+out:
 	free(name);
 	return fd;
 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 12350b171727..bd061ba7b47c 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -244,6 +244,12 @@ bool is_supported_compression(const char *ext);
 bool is_kernel_module(const char *pathname, int cpumode);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+				 char *pathname, size_t len);
+
+#define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
 
 struct kmod_path {
 	char *name;
@@ -259,6 +265,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 #define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false)
 #define kmod_path__parse_ext(__m, __p)  __kmod_path__parse(__m, __p, false, true)
 
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+			  struct machine *machine);
+
 /*
  * The dso__data_* external interface provides following functions:
  *   dso__data_get_fd
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 314a07151fb7..5cac8d5e009a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1469,8 +1469,16 @@ static int __event_process_build_id(struct build_id_event *bev,
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (!is_kernel_module(filename, cpumode))
-			dso->kernel = dso_type;
+		if (dso_type != DSO_TYPE_USER) {
+			struct kmod_path m = { .name = NULL, };
+
+			if (!kmod_path__parse_name(&m, filename) && m.kmod)
+				dso__set_module_info(dso, &m, machine);
+			else
+				dso->kernel = dso_type;
+
+			free(m.name);
+		}
 
 		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
 				  sbuild_id);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d97e014c3df3..d7f31cb0a4cb 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -572,16 +572,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 		if (dso == NULL)
 			goto out_unlock;
 
-		if (machine__is_host(machine))
-			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
-		else
-			dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
-
-		/* _KMODULE_COMP should be next to _KMODULE */
-		if (m->kmod && m->comp)
-			dso->symtab_type++;
-
-		dso__set_short_name(dso, strdup(m->name), true);
+		dso__set_module_info(dso, m, machine);
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 9d92af7d0718..40de3cb40d21 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1219,7 +1219,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# be retrieved using Python functions of the form "
 		"common_*(context).\n");
 
-	fprintf(ofp, "# See the perf-trace-python Documentation for the list "
+	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
 	fprintf(ofp, "import os\n");
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e7ee47f7377a..502505cf236a 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -637,43 +637,6 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata)
 	return 0;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name,
-			      enum dso_binary_type type)
-{
-	int fd = -1;
-	char tmpbuf[] = "/tmp/perf-kmod-XXXXXX";
-	struct kmod_path m;
-
-	if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP &&
-	    type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		return -1;
-
-	if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		name = dso->long_name;
-
-	if (kmod_path__parse_ext(&m, name) || !m.comp)
-		return -1;
-
-	fd = mkstemp(tmpbuf);
-	if (fd < 0) {
-		dso->load_errno = errno;
-		goto out;
-	}
-
-	if (!decompress_to_file(m.ext, name, fd)) {
-		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
-		close(fd);
-		fd = -1;
-	}
-
-	unlink(tmpbuf);
-
-out:
-	free(m.ext);
-	return fd;
-}
-
 bool symsrc__possibly_runtime(struct symsrc *ss)
 {
 	return ss->dynsym || ss->opdsec;
@@ -705,9 +668,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 	int fd;
 
 	if (dso__needs_decompress(dso)) {
-		fd = decompress_kmodule(dso, name, type);
+		fd = dso__decompress_kmodule_fd(dso, name);
 		if (fd < 0)
 			return -1;
+
+		type = dso->symtab_type;
 	} else {
 		fd = open(name, O_RDONLY);
 		if (fd < 0) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 8f2b068ff756..e7a98dbd2aed 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1562,10 +1562,6 @@ int dso__load(struct dso *dso, struct map *map)
 	if (!runtime_ss && syms_ss)
 		runtime_ss = syms_ss;
 
-	if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE)
-		if (dso__build_id_is_kmod(dso, name, PATH_MAX))
-			kmod = true;
-
 	if (syms_ss)
 		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
 	else
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 943a06291587..da45c4be5fb3 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -39,6 +39,14 @@ static int __report_module(struct addr_location *al, u64 ip,
 		return 0;
 
 	mod = dwfl_addrmodule(ui->dwfl, ip);
+	if (mod) {
+		Dwarf_Addr s;
+
+		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		if (s != al->map->start)
+			mod = 0;
+	}
+
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
 				      dso->long_name, -1, al->map->start,
@@ -224,7 +232,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 
 	err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
 
-	if (err && !ui->max_stack)
+	if (err && ui->max_stack != max_stack)
 		err = 0;
 
 	/*

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-05-12  7:31 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-05-12  7:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 88b0193d9418c00340e45e0a913a0813bc6c8c96 perf/callchain: Force USER_DS when invoking perf_callchain_user()

Mostly tooling updates, but also two kernel fixes: a call chain handling 
robustness fix and an x86 PMU driver event definition fix.

 Thanks,

	Ingo

------------------>

Arnaldo Carvalho de Melo (9):
      perf buildid: Move prototypes from util.h to build-id.h
      perf tools: Move event prototypes from util.h to event.h
      perf memswap: Split the byteswap memory range wrappers from util.[ch]
      perf tools: Move HAS_BOOL define to where perl headers are used
      perf ui gtk: Move gtk .so name to the only place where it is used
      perf units: Move parse_tag_value() to units.[ch]
      tools lib string: Adopt prefixcmp() from perf and subcmd
      perf symbols: Accept symbols starting at address 0
      tools build: Fixup sched_getcpu feature test

Ingo Molnar (2):
      Merge tag 'perf-core-for-mingo-4.12-20170503' of git://git.kernel.org/.../acme/linux into perf/urgent
      Merge tag 'perf-urgent-for-mingo-4.12-20170504' of git://git.kernel.org/.../acme/linux into perf/urgent

Kim Phillips (3):
      perf tools: Fix spelling mistakes
      perf annotate: Fix AArch64 comment char
      perf tests kmod-path: Don't fail if compressed modules aren't supported

Paul Clarke (1):
      perf symbols: Allow user probes on versioned symbols

Taeung Song (1):
      perf config: Refactor a duplicated code for obtaining config file name

Vince Weaver (1):
      perf/x86: Fix Broadwell-EP DRAM RAPL events

Will Deacon (1):
      perf/callchain: Force USER_DS when invoking perf_callchain_user()


 arch/x86/events/intel/rapl.c                         |  2 +-
 kernel/events/callchain.c                            |  6 ++++++
 tools/build/feature/test-sched_getcpu.c              |  2 ++
 tools/include/linux/string.h                         |  2 ++
 tools/lib/string.c                                   |  9 +++++++++
 tools/lib/subcmd/help.c                              |  1 +
 tools/lib/subcmd/parse-options.c                     |  1 +
 tools/lib/subcmd/subcmd-util.h                       |  9 ---------
 tools/perf/Documentation/perf-c2c.txt                |  4 ++--
 tools/perf/Documentation/perf-record.txt             |  2 +-
 tools/perf/Documentation/perf-report.txt             |  6 +++---
 tools/perf/Documentation/perf.data-file-format.txt   |  4 ++--
 tools/perf/Documentation/tips.txt                    |  2 +-
 tools/perf/arch/arm64/annotate/instructions.c        |  2 +-
 tools/perf/arch/powerpc/util/sym-handling.c          | 12 ++++++++++++
 tools/perf/builtin-buildid-cache.c                   | 13 ++++++++-----
 tools/perf/builtin-c2c.c                             |  1 +
 tools/perf/builtin-config.c                          | 19 ++++++++-----------
 tools/perf/builtin-top.c                             |  2 +-
 tools/perf/builtin-trace.c                           |  1 +
 tools/perf/perf.c                                    |  1 +
 tools/perf/tests/hists_cumulate.c                    |  1 +
 tools/perf/tests/hists_filter.c                      |  1 +
 tools/perf/tests/hists_output.c                      |  1 +
 tools/perf/tests/kmod-path.c                         |  2 ++
 tools/perf/tests/sdt.c                               |  1 -
 tools/perf/ui/setup.c                                |  3 +++
 tools/perf/util/Build                                |  1 +
 tools/perf/util/build-id.h                           |  4 ++++
 tools/perf/util/event.c                              | 10 ++++++----
 tools/perf/util/event.h                              | 14 +++++++++++---
 tools/perf/util/evsel.c                              |  1 +
 tools/perf/util/header.c                             |  1 +
 tools/perf/util/intel-pt.c                           |  1 +
 tools/perf/util/machine.c                            | 28 +++++++++++++++++-----------
 tools/perf/util/map.c                                |  5 -----
 tools/perf/util/map.h                                |  5 +++--
 tools/perf/util/memswap.c                            | 24 ++++++++++++++++++++++++
 tools/perf/util/memswap.h                            |  7 +++++++
 tools/perf/util/scripting-engines/trace-event-perl.c |  4 +++-
 tools/perf/util/session.c                            |  1 +
 tools/perf/util/strbuf.c                             |  9 ---------
 tools/perf/util/symbol.c                             | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
 tools/perf/util/symbol.h                             | 11 +++++++++++
 tools/perf/util/units.c                              | 29 +++++++++++++++++++++++++++++
 tools/perf/util/units.h                              |  7 +++++++
 tools/perf/util/util.c                               | 49 -------------------------------------------------
 tools/perf/util/util.h                               | 26 --------------------------
 48 files changed, 252 insertions(+), 167 deletions(-)
 create mode 100644 tools/perf/util/memswap.c
 create mode 100644 tools/perf/util/memswap.h

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-03-07 20:30 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-03-07 20:30 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 9d020d33fc1b2faa0eb35859df1381ca5dc94ffe Merge branch 'linus' into perf/urgent, to resolve conflict

This includes a fix for a crash if certain special addresses are kprobed, plus 
does a rename of two Kconfig variables that were a minor misnomer.

 Thanks,

	Ingo

------------------>
Anton Blanchard (1):
      perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS

Masami Hiramatsu (1):
      kprobes/x86: Fix kernel panic when certain exception-handling addresses are probed


 Documentation/trace/kprobetrace.txt     |  2 +-
 Documentation/trace/uprobetracer.txt    |  2 +-
 arch/s390/configs/default_defconfig     |  2 +-
 arch/s390/configs/gcov_defconfig        |  2 +-
 arch/s390/configs/performance_defconfig |  2 +-
 arch/s390/defconfig                     |  2 +-
 arch/x86/kernel/kprobes/common.h        |  2 +-
 arch/x86/kernel/kprobes/core.c          |  6 +++---
 arch/x86/kernel/kprobes/opt.c           |  2 +-
 kernel/trace/Kconfig                    |  6 +++---
 kernel/trace/Makefile                   |  4 ++--
 kernel/trace/trace.c                    | 10 +++++-----
 kernel/trace/trace_probe.h              |  4 ++--
 13 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index e4991fb1eedc..41ef9d8efe95 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes
 functions). Unlike the Tracepoint based event, this can be added and removed
 dynamically, on the fly.
 
-To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
 
 Similar to the events tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index fa7b680ee8a0..bf526a7c5559 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -7,7 +7,7 @@
 Overview
 --------
 Uprobe based trace events are similar to kprobe based trace events.
-To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y.
 
 Similar to the kprobe-event tracer, this doesn't need to be activated via
 current_tracer. Instead of that, add probe points via
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 143b1e00b818..4b176fe83da4 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index f05d2d6e1087..0de46cc397f6 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 2358bf33c5ef..e167557b434c 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 68bfd09f1b02..97189dbaf34b 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
 CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_UPROBE_EVENT=y
+CONFIG_UPROBE_EVENTS=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_TRACE_ENUM_MAP_FILE=y
 CONFIG_KPROBES_SANITY_TEST=y
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index c6ee63f927ab..d688826e5736 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -67,7 +67,7 @@
 #endif
 
 /* Ensure if the instruction can be boostable */
-extern int can_boost(kprobe_opcode_t *instruction);
+extern int can_boost(kprobe_opcode_t *instruction, void *addr);
 /* Recover instruction if given address is probed */
 extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
 					 unsigned long addr);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe1640..88b3c942473d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes);
  * Returns non-zero if opcode is boostable.
  * RIP relative instructions are adjusted at copying time in 64 bits mode
  */
-int can_boost(kprobe_opcode_t *opcodes)
+int can_boost(kprobe_opcode_t *opcodes, void *addr)
 {
 	kprobe_opcode_t opcode;
 	kprobe_opcode_t *orig_opcodes = opcodes;
 
-	if (search_exception_tables((unsigned long)opcodes))
+	if (search_exception_tables((unsigned long)addr))
 		return 0;	/* Page fault may occur on this address. */
 
 retry:
@@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p)
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	if (can_boost(p->ainsn.insn))
+	if (can_boost(p->ainsn.insn, p->addr))
 		p->ainsn.boostable = 0;
 	else
 		p->ainsn.boostable = -1;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 3d1bee9d6a72..3e7c6e5a08ff 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
 
 	while (len < RELATIVEJUMP_SIZE) {
 		ret = __copy_instruction(dest + len, src + len);
-		if (!ret || !can_boost(dest + len))
+		if (!ret || !can_boost(dest + len, src + len))
 			return -EINVAL;
 		len += ret;
 	}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d5038005eb5d..d4a06e714645 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
-config KPROBE_EVENT
+config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
 	bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
 	  This option is also required by perf-probe subcommand of perf tools.
 	  If you want to use perf tools, this option is strongly recommended.
 
-config UPROBE_EVENT
+config UPROBE_EVENTS
 	bool "Enable uprobes-based dynamic events"
 	depends on ARCH_SUPPORTS_UPROBES
 	depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
 
 config BPF_EVENTS
 	depends on BPF_SYSCALL
-	depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+	depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
 	bool
 	default y
 	help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e57980845549..90f2701d92a7 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
 obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 707445ceb7ef..f35109514a01 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
 	"\t  accepts: event-definitions (one definition per line)\n"
 	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
 	"\t           -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
 	"\t    place: <path>:<offset>\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 0c0ae54d44c6..903273c93e61 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
 {
 	return NULL;
 }
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
 
 struct probe_arg {
 	struct fetch_param	fetch;

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-02-28  8:01 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-02-28  8:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Jiri Olsa, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 1572e45a924f254d9570093abde46430c3172e3d perf/core: Fix the perf_cpu_time_max_percent check

Misc fixes on the kernel and tooling side - nothing in particular stands out.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (3):
      tools perf scripting python: clang doesn't have -spec, remove it
      perf python: Filter out -specs=/a/b/c from the python binding cc options
      perf session: Fix DEBUG=1 build with clang

Jan Stancek (3):
      perf cpumap: Add cpu__max_present_cpu()
      perf header: Make build_cpu_topology skip offline/absent CPUs
      perf tools: Replace _SC_NPROCESSORS_CONF with max_present_cpu in cpu_topology_map

Jiri Olsa (5):
      perf build: Add special fixdep cleaning rule
      perf tools: Move new_term arguments into struct parse_events_term template
      perf tools: Fail on using multiple bits long terms without value
      perf stat: Add -a as default target
      perf record: Add -a as default target

Namhyung Kim (6):
      perf utils: Add perf_quiet_option()
      perf utils: Check verbose flag properly
      perf report: Add -q/--quiet option
      perf diff: Add -q/--quiet option
      perf annotate: Add -q/--quiet option
      perf record: Honor --quiet option properly

Peter Zijlstra (2):
      perf/core: Remove confusing comment and move put_ctx()
      perf/core: Fix perf_event_enable_on_exec() timekeeping (again)

Steven Rostedt (VMware) (1):
      tools lib traceevent: It's preempt not prempt

Tan Xiaojun (1):
      perf/core: Fix the perf_cpu_time_max_percent check


 kernel/events/core.c                               | 12 ++--
 tools/build/Makefile                               |  4 +-
 tools/build/Makefile.include                       |  3 +
 tools/lib/traceevent/event-parse.c                 |  4 +-
 tools/lib/traceevent/event-parse.h                 |  2 +-
 tools/perf/Documentation/perf-annotate.txt         |  4 ++
 tools/perf/Documentation/perf-diff.txt             |  4 ++
 tools/perf/Documentation/perf-record.txt           |  2 +-
 tools/perf/Documentation/perf-report.txt           |  4 ++
 tools/perf/Documentation/perf-stat.txt             |  2 +-
 tools/perf/Makefile.config                         |  7 +++
 tools/perf/Makefile.perf                           |  4 +-
 tools/perf/builtin-annotate.c                      |  4 ++
 tools/perf/builtin-diff.c                          | 14 +++--
 tools/perf/builtin-mem.c                           |  4 +-
 tools/perf/builtin-record.c                        |  8 ++-
 tools/perf/builtin-report.c                        | 21 +++++--
 tools/perf/builtin-sched.c                         | 12 ++--
 tools/perf/builtin-stat.c                          | 11 ++--
 tools/perf/builtin-top.c                           |  2 +-
 tools/perf/builtin-trace.c                         |  6 +-
 tools/perf/pmu-events/json.c                       |  2 +-
 tools/perf/tests/attr.c                            |  2 +-
 tools/perf/tests/builtin-test.c                    |  2 +-
 tools/perf/tests/code-reading.c                    |  2 +-
 tools/perf/tests/fdarray.c                         |  2 +-
 tools/perf/tests/llvm.c                            |  2 +-
 tools/perf/tests/parse-events.c                    |  2 +-
 tools/perf/tests/perf-record.c                     |  4 +-
 tools/perf/tests/python-use.c                      |  2 +-
 tools/perf/tests/thread-map.c                      |  6 +-
 tools/perf/tests/topology.c                        |  4 +-
 tools/perf/tests/vmlinux-kallsyms.c                |  2 +-
 tools/perf/ui/browsers/map.c                       |  6 +-
 tools/perf/ui/hist.c                               |  2 +-
 tools/perf/util/annotate.c                         |  2 +-
 tools/perf/util/cpumap.c                           | 22 +++++++
 tools/perf/util/cpumap.h                           |  1 +
 tools/perf/util/debug.c                            | 17 ++++++
 tools/perf/util/debug.h                            |  1 +
 tools/perf/util/dso.c                              |  2 +-
 tools/perf/util/env.c                              |  2 +-
 tools/perf/util/header.c                           | 33 ++++++----
 tools/perf/util/hist.c                             |  6 +-
 tools/perf/util/parse-events.c                     | 71 +++++++++++++---------
 tools/perf/util/parse-events.h                     |  2 +
 tools/perf/util/parse-events.y                     | 14 ++---
 tools/perf/util/pmu.c                              | 21 +++++--
 tools/perf/util/probe-event.c                      |  2 +-
 .../util/scripting-engines/trace-event-python.c    |  4 +-
 tools/perf/util/session.c                          |  2 +-
 tools/perf/util/setup.py                           |  9 ++-
 tools/perf/util/sort.c                             |  8 +--
 tools/perf/util/stat.c                             |  2 +-
 tools/perf/util/symbol-elf.c                       |  2 +-
 55 files changed, 260 insertions(+), 137 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 77a932b54a64..c1c1cdf0b811 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -455,7 +455,7 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp,
 				loff_t *ppos)
 {
-	int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
@@ -3522,6 +3522,8 @@ static void perf_event_enable_on_exec(int ctxn)
 	if (enabled) {
 		clone_ctx = unclone_ctx(ctx);
 		ctx_resched(cpuctx, ctx, event_type);
+	} else {
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
 	}
 	perf_ctx_unlock(cpuctx, ctx);
 
@@ -9955,6 +9957,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * of swizzling perf_event::ctx.
 		 */
 		perf_remove_from_context(group_leader, 0);
+		put_ctx(gctx);
 
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
@@ -9993,13 +9996,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		perf_event__state_init(group_leader);
 		perf_install_in_context(ctx, group_leader, group_leader->cpu);
 		get_ctx(ctx);
-
-		/*
-		 * Now that all events are installed in @ctx, nothing
-		 * references @gctx anymore, so drop the last reference we have
-		 * on it.
-		 */
-		put_ctx(gctx);
 	}
 
 	/*
diff --git a/tools/build/Makefile b/tools/build/Makefile
index aaf7ed329a45..477f00eda591 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -35,8 +35,8 @@ all: $(OUTPUT)fixdep
 
 clean:
 	$(call QUIET_CLEAN, fixdep)
-	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-	$(Q)rm -f fixdep
+	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)rm -f $(OUTPUT)fixdep
 
 $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index ad22e4e7bc59..d360f39a445b 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -3,4 +3,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 fixdep:
 	$(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
 
+fixdep-clean:
+	$(Q)$(MAKE) -C $(srctree)/tools/build clean
+
 .PHONY: fixdep
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 14a4f623c1a5..32171310bf82 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5204,13 +5204,13 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
 }
 
 /**
- * pevent_data_prempt_count - parse the preempt count from the record
+ * pevent_data_preempt_count - parse the preempt count from the record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
  * This returns the preempt count from a record.
  */
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec)
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec)
 {
 	return parse_common_pc(pevent, rec->data);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 7aae746ec2fe..67daa01260a9 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -709,7 +709,7 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
 struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec);
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec);
 int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
 const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
 struct cmdline;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 8ffbd272952d..a89273d8e744 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -39,6 +39,10 @@ OPTIONS
 --verbose::
         Be more verbose. (Show symbol address, etc)
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -D::
 --dump-raw-trace::
         Dump raw trace in ASCII.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 66dbe3dee74b..a79c84ae61aa 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -73,6 +73,10 @@ OPTIONS
 	Be verbose, for instance, show the raw counts in addition to the
 	diff.
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -f::
 --force::
         Don't do ownership validation.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 27256bc68eda..b16003ec14a7 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -157,7 +157,7 @@ OPTIONS
 
 -a::
 --all-cpus::
-        System-wide collection from all CPUs.
+        System-wide collection from all CPUs (default if no target is specified).
 
 -p::
 --pid=::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f2914f03ae7b..c04cc0647c16 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,6 +25,10 @@ OPTIONS
 --verbose::
         Be more verbose. (show symbol address, etc)
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -n::
 --show-nr-samples::
 	Show the number of samples for each symbol
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index d96ccd4844df..aecf2a87e7d6 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -63,7 +63,7 @@ OPTIONS
 
 -a::
 --all-cpus::
-        system-wide collection from all CPUs
+        system-wide collection from all CPUs (default if no target is specified)
 
 -c::
 --scale::
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 2b941efadb04..27c9fbca7bd9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -175,6 +175,10 @@ PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
+ifeq ($(CC), clang)
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+endif
+
 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
 FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
 FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
@@ -601,6 +605,9 @@ else
       PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
       PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
       PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+      ifeq ($(CC), clang)
+        PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+      endif
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
       ifneq ($(feature-libpython), 1)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4da19b6ba94a..79fe31f20a17 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -726,13 +726,13 @@ install: install-bin try-install-man
 	$(call QUIET_CLEAN, config)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
-		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
+		$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ebb628332a6e..4f52d85f5ebc 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -410,6 +410,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
@@ -463,6 +464,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		annotate.sym_hist_filter = argv[0];
 	}
 
+	if (quiet)
+		perf_quiet_option();
+
 	file.path  = input_name;
 
 	annotate.session = perf_session__new(&file, false, &annotate.tool);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 70a289347591..1b96a3122228 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -691,7 +691,7 @@ static void hists__process(struct hists *hists)
 	hists__precompute(hists);
 	hists__output_resort(hists, NULL);
 
-	hists__fprintf(hists, true, 0, 0, 0, stdout,
+	hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
 		       symbol_conf.use_callchain);
 }
 
@@ -739,12 +739,14 @@ static void data_process(void)
 				hists__link(hists_base, hists);
 		}
 
-		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-			perf_evsel__name(evsel_base));
+		if (!quiet) {
+			fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+				perf_evsel__name(evsel_base));
+		}
 
 		first = false;
 
-		if (verbose || data__files_cnt > 2)
+		if (verbose > 0 || ((data__files_cnt > 2) && !quiet))
 			data__fprintf();
 
 		/* Don't sort callchain for perf diff */
@@ -807,6 +809,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
 		    "Show only items with match in baseline"),
 	OPT_CALLBACK('c', "compute", &compute,
@@ -1328,6 +1331,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, options, diff_usage, 0);
 
+	if (quiet)
+		perf_quiet_option();
+
 	if (symbol__init(NULL) < 0)
 		return -1;
 
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index cd7bc4d104e2..6114e07ca613 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -42,8 +42,8 @@ static int parse_record_events(const struct option *opt,
 
 		fprintf(stderr, "%-13s%-*s%s\n",
 			e->tag,
-			verbose ? 25 : 0,
-			verbose ? perf_mem_events__name(j) : "",
+			verbose > 0 ? 25 : 0,
+			verbose > 0 ? perf_mem_events__name(j) : "",
 			e->supported ? ": available" : "");
 	}
 	exit(0);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6cd6776052e7..bc84a375295d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -432,7 +432,7 @@ static int record__open(struct record *rec)
 try_again:
 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
@@ -1677,8 +1677,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
+	if (quiet)
+		perf_quiet_option();
+
+	/* Make system wide (-a) the default target. */
 	if (!argc && target__none(&rec->opts.target))
-		usage_with_options(record_usage, record_options);
+		rec->opts.target.system_wide = true;
 
 	if (nr_cgroups && !rec->opts.target.system_wide) {
 		usage_with_options_msg(record_usage, record_options,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dbd7fa028861..0a88670e56f3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -320,6 +320,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 	size_t size = sizeof(buf);
 	int socked_id = hists->socket_filter;
 
+	if (quiet)
+		return 0;
+
 	if (symbol_conf.filter_relative) {
 		nr_samples = hists->stats.nr_non_filtered_samples;
 		nr_events = hists->stats.total_non_filtered_period;
@@ -372,7 +375,11 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
 	struct perf_evsel *pos;
 
-	fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
+	if (!quiet) {
+		fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
+			evlist->stats.total_lost_samples);
+	}
+
 	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
@@ -382,7 +389,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 			continue;
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
-		hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout,
+		hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
 			       symbol_conf.use_callchain);
 		fprintf(stdout, "\n\n");
 	}
@@ -716,6 +723,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "input file name"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -863,6 +871,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		report.symbol_filter_str = argv[0];
 	}
 
+	if (quiet)
+		perf_quiet_option();
+
 	if (symbol_conf.vmlinux_name &&
 	    access(symbol_conf.vmlinux_name, R_OK)) {
 		pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
@@ -983,14 +994,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto error;
 	}
 
-	if (report.header || report.header_only) {
+	if ((report.header || report.header_only) && !quiet) {
 		perf_session__fprintf_info(session, stdout,
 					   report.show_full_info);
 		if (report.header_only) {
 			ret = 0;
 			goto error;
 		}
-	} else if (use_browser == 0) {
+	} else if (use_browser == 0 && !quiet) {
 		fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
 		      stdout);
 	}
@@ -1009,7 +1020,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
  		 * providing it only in verbose mode not to bloat too
  		 * much struct symbol.
  		 */
-		if (verbose) {
+		if (verbose > 0) {
 			/*
 			 * XXX: Need to provide a less kludgy way to ask for
 			 * more space per symbol, the u32 is for the index on
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 270eb2d8ca6b..b94cf0de715a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -460,7 +460,7 @@ static struct task_desc *register_pid(struct perf_sched *sched,
 	BUG_ON(!sched->tasks);
 	sched->tasks[task->nr] = task;
 
-	if (verbose)
+	if (verbose > 0)
 		printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
 
 	return task;
@@ -794,7 +794,7 @@ replay_wakeup_event(struct perf_sched *sched,
 	const u32 pid	 = perf_evsel__intval(evsel, sample, "pid");
 	struct task_desc *waker, *wakee;
 
-	if (verbose) {
+	if (verbose > 0) {
 		printf("sched_wakeup event %p\n", evsel);
 
 		printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
@@ -822,7 +822,7 @@ static int replay_switch_event(struct perf_sched *sched,
 	int cpu = sample->cpu;
 	s64 delta;
 
-	if (verbose)
+	if (verbose > 0)
 		printf("sched_switch event %p\n", evsel);
 
 	if (cpu >= MAX_CPUS || cpu < 0)
@@ -870,7 +870,7 @@ static int replay_fork_event(struct perf_sched *sched,
 		goto out_put;
 	}
 
-	if (verbose) {
+	if (verbose > 0) {
 		printf("fork event\n");
 		printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
 		printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
@@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
 	timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
 	color_fprintf(stdout, color, "  %12s secs ", stimestamp);
-	if (new_shortname || (verbose && sched_in->tid)) {
+	if (new_shortname || (verbose > 0 && sched_in->tid)) {
 		const char *pid_color = color;
 
 		if (thread__has_color(sched_in))
@@ -2050,7 +2050,7 @@ static void save_task_callchain(struct perf_sched *sched,
 
 	if (thread__resolve_callchain(thread, cursor, evsel, sample,
 				      NULL, NULL, sched->max_stack + 2) != 0) {
-		if (verbose)
+		if (verbose > 0)
 			error("Failed to resolve callchain. Skipping\n");
 
 		return;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f28719178b51..13b54999ad79 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -573,7 +573,7 @@ static int __run_perf_stat(int argc, const char **argv)
 			if (errno == EINVAL || errno == ENOSYS ||
 			    errno == ENOENT || errno == EOPNOTSUPP ||
 			    errno == ENXIO) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s event is not supported by the kernel.\n",
 						    perf_evsel__name(counter));
 				counter->supported = false;
@@ -582,7 +582,7 @@ static int __run_perf_stat(int argc, const char **argv)
 				    !(counter->leader->nr_members > 1))
 					continue;
 			} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-                                if (verbose)
+                                if (verbose > 0)
                                         ui__warning("%s\n", msg);
                                 goto try_again;
                         }
@@ -1765,7 +1765,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i
 
 	cpu = map->map[idx];
 
-	if (cpu >= env->nr_cpus_online)
+	if (cpu >= env->nr_cpus_avail)
 		return -1;
 
 	return cpu;
@@ -2445,8 +2445,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
+	/* Make system wide (-a) the default target. */
 	if (!argc && target__none(&target))
-		usage_with_options(stat_usage, stat_options);
+		target.system_wide = true;
 
 	if (run_count < 0) {
 		pr_err("Run count must be a positive number\n");
@@ -2538,7 +2539,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	status = 0;
 	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
-		if (run_count != 1 && verbose)
+		if (run_count != 1 && verbose > 0)
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a7fd7af3a6d..ab9077915763 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -871,7 +871,7 @@ static int perf_top__start_counters(struct perf_top *top)
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
 			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 40ef9b293d1b..256f1fac6f7e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1399,7 +1399,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	return &trace->syscalls.table[id];
 
 out_cant_read:
-	if (verbose) {
+	if (verbose > 0) {
 		fprintf(trace->output, "Problems reading syscall %d", id);
 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
@@ -1801,10 +1801,10 @@ static void print_location(FILE *f, struct perf_sample *sample,
 			   bool print_dso, bool print_sym)
 {
 
-	if ((verbose || print_dso) && al->map)
+	if ((verbose > 0 || print_dso) && al->map)
 		fprintf(f, "%s@", al->map->dso->long_name);
 
-	if ((verbose || print_sym) && al->sym)
+	if ((verbose > 0 || print_sym) && al->sym)
 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
 			al->addr - al->sym->start);
 	else if (al->map)
diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c
index f67bbb0aa36e..0544398d6e2d 100644
--- a/tools/perf/pmu-events/json.c
+++ b/tools/perf/pmu-events/json.c
@@ -49,7 +49,7 @@ static char *mapfile(const char *fn, size_t *size)
 	int err;
 	int fd = open(fn, O_RDONLY);
 
-	if (fd < 0 && verbose && fn) {
+	if (fd < 0 && verbose > 0 && fn) {
 		pr_err("Error opening events file '%s': %s\n", fn,
 				strerror(errno));
 	}
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 28d1605b0338..88dc51f4c27b 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -144,7 +144,7 @@ static int run_dir(const char *d, const char *perf)
 	int vcnt = min(verbose, (int) sizeof(v) - 1);
 	char cmd[3*PATH_MAX];
 
-	if (verbose)
+	if (verbose > 0)
 		vcnt++;
 
 	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 37e326bfd2dc..83c4669cbc5b 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -299,7 +299,7 @@ static int run_test(struct test *test, int subtest)
 		if (!dont_fork) {
 			pr_debug("test child forked, pid %d\n", getpid());
 
-			if (!verbose) {
+			if (verbose <= 0) {
 				int nullfd = open("/dev/null", O_WRONLY);
 
 				if (nullfd >= 0) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index ff5bc6363a79..d1f693041324 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -599,7 +599,7 @@ static int do_test_code_reading(bool try_kcore)
 				continue;
 			}
 
-			if (verbose) {
+			if (verbose > 0) {
 				char errbuf[512];
 				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index a2b5ff9bf83d..bc5982f42dc3 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -19,7 +19,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
 {
 	int printed = 0;
 
-	if (!verbose)
+	if (verbose <= 0)
 		return 0;
 
 	printed += fprintf(fp, "\n%s: ", prefix);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index d357dab72e68..482b5365e68d 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -76,7 +76,7 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 	 * Skip this test if user's .perfconfig doesn't set [llvm] section
 	 * and clang is not found in $PATH, and this is not perf test -v
 	 */
-	if (!force && (verbose == 0 &&
+	if (!force && (verbose <= 0 &&
 		       !llvm_param.user_set_param &&
 		       llvm__search_clang())) {
 		pr_debug("No clang and no verbosive, skip this test\n");
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index aa9276bfe3e9..1dc838014422 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1808,7 +1808,7 @@ static void debug_warn(const char *warn, va_list params)
 {
 	char msg[1024];
 
-	if (!verbose)
+	if (verbose <= 0)
 		return;
 
 	vsnprintf(msg, sizeof(msg), warn, params);
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 541da7a68f91..87893f3ba5f1 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -172,13 +172,13 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 
 				err = perf_evlist__parse_sample(evlist, event, &sample);
 				if (err < 0) {
-					if (verbose)
+					if (verbose > 0)
 						perf_event__fprintf(event, stderr);
 					pr_debug("Couldn't parse sample\n");
 					goto out_delete_evlist;
 				}
 
-				if (verbose) {
+				if (verbose > 0) {
 					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
 					perf_event__fprintf(event, stderr);
 				}
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 7a52834ee0d0..fa79509da535 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -15,7 +15,7 @@ int test__python_use(int subtest __maybe_unused)
 	int ret;
 
 	if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
-		     PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
+		     PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
 		return -1;
 
 	ret = system(cmd) ? -1 : 0;
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index a4a4b4625ac3..f2d2e542d0ee 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -109,7 +109,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 	TEST_ASSERT_VAL("failed to allocate thread_map",
 			threads);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to remove thread",
@@ -117,7 +117,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
 	TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to remove thread",
@@ -125,7 +125,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
 	TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to not remove thread",
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 98fe69ac553c..803f893550d6 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -65,7 +65,9 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
 	session = perf_session__new(&file, false, NULL);
 	TEST_ASSERT_VAL("can't get session", session);
 
-	for (i = 0; i < session->header.env.nr_cpus_online; i++) {
+	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
 		pr_debug("CPU %d, core %d, socket %d\n", i,
 			 session->header.env.cpu[i].core_id,
 			 session->header.env.cpu[i].socket_id);
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index a5082331f246..862b043e5924 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -168,7 +168,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 		err = -1;
 	}
 
-	if (!verbose)
+	if (verbose <= 0)
 		goto out;
 
 	header_printed = false;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 98a34664bb7e..9ce142de536d 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -73,7 +73,7 @@ static int map_browser__run(struct map_browser *browser)
 
 	if (ui_browser__show(&browser->b, browser->map->dso->long_name,
 			     "Press ESC to exit, %s / to search",
-			     verbose ? "" : "restart with -v to use") < 0)
+			     verbose > 0 ? "" : "restart with -v to use") < 0)
 		return -1;
 
 	while (1) {
@@ -81,7 +81,7 @@ static int map_browser__run(struct map_browser *browser)
 
 		switch (key) {
 		case '/':
-			if (verbose)
+			if (verbose > 0)
 				map_browser__search(browser);
 		default:
 			break;
@@ -117,7 +117,7 @@ int map__browse(struct map *map)
 
 		if (maxaddr < pos->end)
 			maxaddr = pos->end;
-		if (verbose) {
+		if (verbose > 0) {
 			u32 *idx = symbol__browser_index(pos);
 			*idx = mb.b.nr_entries;
 		}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 18cfcdc90356..5d632dca672a 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -648,7 +648,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
 		ret += fmt->width(fmt, &dummy_hpp, hists);
 	}
 
-	if (verbose && hists__has(hists, sym)) /* Addr + origin */
+	if (verbose > 0 && hists__has(hists, sym)) /* Addr + origin */
 		ret += 3 + BITS_PER_LONG / 4;
 
 	return ret;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 06cc04e5806a..273f21fa32b5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1768,7 +1768,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 	printf("%-*.*s----\n",
 	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
 
-	if (verbose)
+	if (verbose > 0)
 		symbol__annotate_hits(sym, evsel);
 
 	list_for_each_entry(pos, &notes->src->source, node) {
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 2c0b52264a46..8c7504939113 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -9,6 +9,7 @@
 #include "asm/bug.h"
 
 static int max_cpu_num;
+static int max_present_cpu_num;
 static int max_node_num;
 static int *cpunode_map;
 
@@ -442,6 +443,7 @@ static void set_max_cpu_num(void)
 
 	/* set up default */
 	max_cpu_num = 4096;
+	max_present_cpu_num = 4096;
 
 	mnt = sysfs__mountpoint();
 	if (!mnt)
@@ -455,6 +457,17 @@ static void set_max_cpu_num(void)
 	}
 
 	ret = get_max_num(path, &max_cpu_num);
+	if (ret)
+		goto out;
+
+	/* get the highest present cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+	if (ret == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_present_cpu_num);
 
 out:
 	if (ret)
@@ -505,6 +518,15 @@ int cpu__max_cpu(void)
 	return max_cpu_num;
 }
 
+int cpu__max_present_cpu(void)
+{
+	if (unlikely(!max_present_cpu_num))
+		set_max_cpu_num();
+
+	return max_present_cpu_num;
+}
+
+
 int cpu__get_node(int cpu)
 {
 	if (unlikely(cpunode_map == NULL)) {
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 06bd689f5989..1a0549af8f5c 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -62,6 +62,7 @@ int cpu__setup_cpunode_map(void);
 
 int cpu__max_node(void);
 int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
 int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c1838b643108..03eb81f30d0d 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -203,11 +203,28 @@ int perf_debug_option(const char *str)
 		v = (v < 0) || (v > 10) ? 0 : v;
 	}
 
+	if (quiet)
+		v = -1;
+
 	*var->ptr = v;
 	free(s);
 	return 0;
 }
 
+int perf_quiet_option(void)
+{
+	struct debug_variable *var = &debug_variables[0];
+
+	/* disable all debug messages */
+	while (var->name) {
+		*var->ptr = -1;
+		var++;
+	}
+
+	quiet = true;
+	return 0;
+}
+
 #define DEBUG_WRAPPER(__n, __l)				\
 static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
 {							\
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index d242adc3d5a2..98832f5531d3 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -54,5 +54,6 @@ int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
 void perf_debug_setup(void);
+int perf_quiet_option(void);
 
 #endif	/* __PERF_DEBUG_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 28d41e709128..1a03e9e310a4 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1058,7 +1058,7 @@ int dso__name_len(const struct dso *dso)
 {
 	if (!dso)
 		return strlen("[unknown]");
-	if (verbose)
+	if (verbose > 0)
 		return dso->long_name_len;
 
 	return dso->short_name_len;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index bb964e86b09d..075fc77286bf 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -66,7 +66,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
 		return 0;
 
 	if (env->nr_cpus_avail == 0)
-		env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+		env->nr_cpus_avail = cpu__max_present_cpu();
 
 	nr_cpus = env->nr_cpus_avail;
 	if (nr_cpus == -1)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3d12c16e5103..05714d548584 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -295,11 +295,7 @@ static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
 	u32 nrc, nra;
 	int ret;
 
-	nr = sysconf(_SC_NPROCESSORS_CONF);
-	if (nr < 0)
-		return -1;
-
-	nrc = (u32)(nr & UINT_MAX);
+	nrc = cpu__max_present_cpu();
 
 	nr = sysconf(_SC_NPROCESSORS_ONLN);
 	if (nr < 0)
@@ -505,24 +501,29 @@ static void free_cpu_topo(struct cpu_topo *tp)
 
 static struct cpu_topo *build_cpu_topology(void)
 {
-	struct cpu_topo *tp;
+	struct cpu_topo *tp = NULL;
 	void *addr;
 	u32 nr, i;
 	size_t sz;
 	long ncpus;
 	int ret = -1;
+	struct cpu_map *map;
 
-	ncpus = sysconf(_SC_NPROCESSORS_CONF);
-	if (ncpus < 0)
+	ncpus = cpu__max_present_cpu();
+
+	/* build online CPU map */
+	map = cpu_map__new(NULL);
+	if (map == NULL) {
+		pr_debug("failed to get system cpumap\n");
 		return NULL;
+	}
 
 	nr = (u32)(ncpus & UINT_MAX);
 
 	sz = nr * sizeof(char *);
-
 	addr = calloc(1, sizeof(*tp) + 2 * sz);
 	if (!addr)
-		return NULL;
+		goto out_free;
 
 	tp = addr;
 	tp->cpu_nr = nr;
@@ -532,10 +533,16 @@ static struct cpu_topo *build_cpu_topology(void)
 	tp->thread_siblings = addr;
 
 	for (i = 0; i < nr; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
+
 		ret = build_cpu_topo(tp, i);
 		if (ret < 0)
 			break;
 	}
+
+out_free:
+	cpu_map__put(map);
 	if (ret) {
 		free_cpu_topo(tp);
 		tp = NULL;
@@ -1126,7 +1133,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused,
 {
 	int nr, i;
 	char *str;
-	int cpu_nr = ph->env.nr_cpus_online;
+	int cpu_nr = ph->env.nr_cpus_avail;
 
 	nr = ph->env.nr_sibling_cores;
 	str = ph->env.sibling_cores;
@@ -1781,7 +1788,7 @@ static int process_cpu_topology(struct perf_file_section *section,
 	u32 nr, i;
 	char *str;
 	struct strbuf sb;
-	int cpu_nr = ph->env.nr_cpus_online;
+	int cpu_nr = ph->env.nr_cpus_avail;
 	u64 size = 0;
 
 	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
@@ -1862,7 +1869,7 @@ static int process_cpu_topology(struct perf_file_section *section,
 		if (ph->needs_swap)
 			nr = bswap_32(nr);
 
-		if (nr > (u32)cpu_nr) {
+		if (nr != (u32)-1 && nr > (u32)cpu_nr) {
 			pr_debug("socket_id number is too big."
 				 "You may need to upgrade the perf tool.\n");
 			goto free_cpu;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32c6a939e4cc..eaf72a938fb4 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -69,7 +69,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	 */
 	if (h->ms.sym) {
 		symlen = h->ms.sym->namelen + 4;
-		if (verbose)
+		if (verbose > 0)
 			symlen += BITS_PER_LONG / 4 + 2 + 3;
 		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
 	} else {
@@ -93,7 +93,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	if (h->branch_info) {
 		if (h->branch_info->from.sym) {
 			symlen = (int)h->branch_info->from.sym->namelen + 4;
-			if (verbose)
+			if (verbose > 0)
 				symlen += BITS_PER_LONG / 4 + 2 + 3;
 			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
 
@@ -107,7 +107,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
 		if (h->branch_info->to.sym) {
 			symlen = (int)h->branch_info->to.sym->namelen + 4;
-			if (verbose)
+			if (verbose > 0)
 				symlen += BITS_PER_LONG / 4 + 2 + 3;
 			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 281e44af31e2..67a8aebc67ab 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2318,24 +2318,20 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
 	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
 }
 
-static int new_term(struct parse_events_term **_term, int type_val,
-		    int type_term, char *config,
-		    char *str, u64 num, int err_term, int err_val)
+static int new_term(struct parse_events_term **_term,
+		    struct parse_events_term *temp,
+		    char *str, u64 num)
 {
 	struct parse_events_term *term;
 
-	term = zalloc(sizeof(*term));
+	term = malloc(sizeof(*term));
 	if (!term)
 		return -ENOMEM;
 
+	*term = *temp;
 	INIT_LIST_HEAD(&term->list);
-	term->type_val  = type_val;
-	term->type_term = type_term;
-	term->config = config;
-	term->err_term = err_term;
-	term->err_val  = err_val;
 
-	switch (type_val) {
+	switch (term->type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
 		term->val.num = num;
 		break;
@@ -2353,15 +2349,22 @@ static int new_term(struct parse_events_term **_term, int type_val,
 
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
+			   bool no_value,
 			   void *loc_term_, void *loc_val_)
 {
 	YYLTYPE *loc_term = loc_term_;
 	YYLTYPE *loc_val = loc_val_;
 
-	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
-			config, NULL, num,
-			loc_term ? loc_term->first_column : 0,
-			loc_val ? loc_val->first_column : 0);
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = type_term,
+		.config    = config,
+		.no_value  = no_value,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, NULL, num);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
@@ -2371,37 +2374,45 @@ int parse_events_term__str(struct parse_events_term **term,
 	YYLTYPE *loc_term = loc_term_;
 	YYLTYPE *loc_val = loc_val_;
 
-	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
-			config, str, 0,
-			loc_term ? loc_term->first_column : 0,
-			loc_val ? loc_val->first_column : 0);
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = type_term,
+		.config    = config,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, str, 0);
 }
 
 int parse_events_term__sym_hw(struct parse_events_term **term,
 			      char *config, unsigned idx)
 {
 	struct event_symbol *sym;
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+		.config    = config ?: (char *) "event",
+	};
 
 	BUG_ON(idx >= PERF_COUNT_HW_MAX);
 	sym = &event_symbols_hw[idx];
 
-	if (config)
-		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-				PARSE_EVENTS__TERM_TYPE_USER, config,
-				(char *) sym->symbol, 0, 0, 0);
-	else
-		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-				PARSE_EVENTS__TERM_TYPE_USER,
-				(char *) "event", (char *) sym->symbol,
-				0, 0, 0);
+	return new_term(term, &temp, (char *) sym->symbol, 0);
 }
 
 int parse_events_term__clone(struct parse_events_term **new,
 			     struct parse_events_term *term)
 {
-	return new_term(new, term->type_val, term->type_term, term->config,
-			term->val.str, term->val.num,
-			term->err_term, term->err_val);
+	struct parse_events_term temp = {
+		.type_val  = term->type_val,
+		.type_term = term->type_term,
+		.config    = term->config,
+		.err_term  = term->err_term,
+		.err_val   = term->err_val,
+	};
+
+	return new_term(new, &temp, term->val.str, term->val.num);
 }
 
 void parse_events_terms__purge(struct list_head *terms)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index da246a3ddb69..1af6a267c21b 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -94,6 +94,7 @@ struct parse_events_term {
 	int type_term;
 	struct list_head list;
 	bool used;
+	bool no_value;
 
 	/* error string indexes for within parsed string */
 	int err_term;
@@ -122,6 +123,7 @@ void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
+			   bool novalue,
 			   void *loc_term, void *loc_val);
 int parse_events_term__str(struct parse_events_term **term,
 			   int type_term, char *config, char *str,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index a14b47ab3879..30f018ea1370 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -252,7 +252,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 			if (!strcasecmp(alias->name, $1)) {
 				ALLOC_LIST(head);
 				ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1, &@1, NULL));
+					$1, 1, false, &@1, NULL));
 				list_add_tail(&term->list, head);
 
 				if (!parse_events_add_pmu(data, list,
@@ -282,7 +282,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
 	ALLOC_LIST(head);
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					&pmu_name, 1, &@1, NULL));
+					&pmu_name, 1, false, &@1, NULL));
 	list_add_tail(&term->list, head);
 
 	ALLOC_LIST(list);
@@ -548,7 +548,7 @@ PE_NAME '=' PE_VALUE
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $3, &@1, &@3));
+					$1, $3, false, &@1, &@3));
 	$$ = term;
 }
 |
@@ -566,7 +566,7 @@ PE_NAME
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1, &@1, NULL));
+					$1, 1, true, &@1, NULL));
 	$$ = term;
 }
 |
@@ -591,7 +591,7 @@ PE_TERM '=' PE_VALUE
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
 	$$ = term;
 }
 |
@@ -599,7 +599,7 @@ PE_TERM
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
 	$$ = term;
 }
 |
@@ -620,7 +620,7 @@ PE_NAME array '=' PE_VALUE
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $4, &@1, &@4));
+					$1, $4, false, &@1, &@4));
 	term->array = $2;
 	$$ = term;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 49bfee0e3d9e..12f84dd2ac5d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -745,7 +745,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
 		}
 	}
 
-	if (verbose)
+	if (verbose > 0)
 		printf("Required parameter '%s' not specified\n", term->config);
 
 	return -1;
@@ -803,7 +803,7 @@ static int pmu_config_term(struct list_head *formats,
 
 	format = pmu_find_format(formats, term->config);
 	if (!format) {
-		if (verbose)
+		if (verbose > 0)
 			printf("Invalid event/parameter '%s'\n", term->config);
 		if (err) {
 			char *pmu_term = pmu_formats_string(formats);
@@ -834,11 +834,20 @@ static int pmu_config_term(struct list_head *formats,
 	 * Either directly use a numeric term, or try to translate string terms
 	 * using event parameters.
 	 */
-	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->no_value &&
+		    bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+			if (err) {
+				err->idx = term->err_val;
+				err->str = strdup("no value assigned for term");
+			}
+			return -EINVAL;
+		}
+
 		val = term->val.num;
-	else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
 		if (strcmp(term->val.str, "?")) {
-			if (verbose) {
+			if (verbose > 0) {
 				pr_info("Invalid sysfs entry %s=%s\n",
 						term->config, term->val.str);
 			}
@@ -1223,7 +1232,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 			printf("%*s", 8, "[");
 			wordwrap(aliases[j].desc, 8, columns, 0);
 			printf("]\n");
-			if (verbose)
+			if (verbose > 0)
 				printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str);
 		} else
 			printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 35f5b7b7715c..28fb62c32678 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -594,7 +594,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
 	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
 		 tp->module ? : "kernel");
 
-	dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+	dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
 	if (dinfo)
 		ret = debuginfo__find_probe_point(dinfo,
 						 (unsigned long)addr, pp);
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 089438da1f7f..89b532c47cc4 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -368,10 +368,10 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 		if (node->map) {
 			struct map *map = node->map;
 			const char *dsoname = "[unknown]";
-			if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+			if (map && map->dso) {
 				if (symbol_conf.show_kernel_path && map->dso->long_name)
 					dsoname = map->dso->long_name;
-				else if (map->dso->name)
+				else
 					dsoname = map->dso->name;
 			}
 			pydict_set_item_string_decref(pyelem, "dso",
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4cdbc8f5f14d..1dd617d116b5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -932,7 +932,7 @@ static void branch_stack__printf(struct perf_sample *sample)
 
 		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
 			i, e->from, e->to,
-			e->flags.cycles,
+			(unsigned short)e->flags.cycles,
 			e->flags.mispred ? "M" : " ",
 			e->flags.predicted ? "P" : " ",
 			e->flags.abort ? "A" : " ",
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index c8680984d2d6..af415febbc46 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,8 +1,15 @@
 #!/usr/bin/python2
 
-from distutils.core import setup, Extension
 from os import getenv
 
+cc = getenv("CC")
+if cc == "clang":
+    from _sysconfigdata import build_time_vars
+    from re import sub
+    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+
+from distutils.core import setup, Extension
+
 from distutils.command.build_ext   import build_ext   as _build_ext
 from distutils.command.install_lib import install_lib as _install_lib
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index df622f4e301e..0ff622288d24 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -151,7 +151,7 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
 	if (!dso_l || !dso_r)
 		return cmp_null(dso_r, dso_l);
 
-	if (verbose) {
+	if (verbose > 0) {
 		dso_name_l = dso_l->long_name;
 		dso_name_r = dso_r->long_name;
 	} else {
@@ -172,8 +172,8 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf,
 				     size_t size, unsigned int width)
 {
 	if (map && map->dso) {
-		const char *dso_name = !verbose ? map->dso->short_name :
-			map->dso->long_name;
+		const char *dso_name = verbose > 0 ? map->dso->long_name :
+			map->dso->short_name;
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
 	}
 
@@ -261,7 +261,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 {
 	size_t ret = 0;
 
-	if (verbose) {
+	if (verbose > 0) {
 		char o = map ? dso__symtab_origin(map->dso) : '!';
 		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
 				       BITS_PER_LONG / 4 + 2, ip, o);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 39345c2ddfc2..0d51334a9b46 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -344,7 +344,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	for (i = 0; i < 3; i++)
 		update_stats(&ps->res_stats[i], count[i]);
 
-	if (verbose) {
+	if (verbose > 0) {
 		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index adbc6c02c3aa..4e59ddeb4eda 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -213,7 +213,7 @@ static bool want_demangle(bool is_kernel_sym)
 
 static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
 {
-	int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
 	char *demangled = NULL;
 
 	/*

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-02-11 18:12 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-02-11 18:12 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 451d24d1e5f40bad000fa9abe36ddb16fc9928cb perf/core: Fix crash in perf_event_read()

A kernel crash fix plus three tooling fixes.

 Thanks,

	Ingo

------------------>
Krister Johansen (1):
      perf callchain: Reference count maps

Namhyung Kim (2):
      perf diff: Fix segfault on 'perf diff -o N' option
      perf diff: Fix -o/--order option behavior (again)

Peter Zijlstra (1):
      perf/core: Fix crash in perf_event_read()


 kernel/events/core.c        | 25 +++++++++++++++----------
 tools/perf/builtin-diff.c   |  2 +-
 tools/perf/ui/hist.c        | 10 ++++++++++
 tools/perf/util/callchain.c | 11 +++++++++--
 tools/perf/util/callchain.h |  6 ++++++
 tools/perf/util/hist.c      |  7 +++++++
 tools/perf/util/hist.h      |  7 +++++++
 7 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index e5aaa806702d..e235bb991bdd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3487,14 +3487,15 @@ struct perf_read_data {
 	int ret;
 };
 
-static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
 {
-	int event_cpu = event->oncpu;
 	u16 local_pkg, event_pkg;
 
 	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
-		event_pkg =  topology_physical_package_id(event_cpu);
-		local_pkg =  topology_physical_package_id(local_cpu);
+		int local_cpu = smp_processor_id();
+
+		event_pkg = topology_physical_package_id(event_cpu);
+		local_pkg = topology_physical_package_id(local_cpu);
 
 		if (event_pkg == local_pkg)
 			return local_cpu;
@@ -3624,7 +3625,7 @@ u64 perf_event_read_local(struct perf_event *event)
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-	int ret = 0, cpu_to_read, local_cpu;
+	int event_cpu, ret = 0;
 
 	/*
 	 * If event is enabled and currently active on a CPU, update the
@@ -3637,21 +3638,25 @@ static int perf_event_read(struct perf_event *event, bool group)
 			.ret = 0,
 		};
 
-		local_cpu = get_cpu();
-		cpu_to_read = find_cpu_to_read(event, local_cpu);
-		put_cpu();
+		event_cpu = READ_ONCE(event->oncpu);
+		if ((unsigned)event_cpu >= nr_cpu_ids)
+			return 0;
+
+		preempt_disable();
+		event_cpu = __perf_event_read_cpu(event, event_cpu);
 
 		/*
 		 * Purposely ignore the smp_call_function_single() return
 		 * value.
 		 *
-		 * If event->oncpu isn't a valid CPU it means the event got
+		 * If event_cpu isn't a valid CPU it means the event got
 		 * scheduled out and that will have updated the event count.
 		 *
 		 * Therefore, either way, we'll have an up-to-date event count
 		 * after this.
 		 */
-		(void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
+		(void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1);
+		preempt_enable();
 		ret = data.ret;
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 9ff0db4e2d0c..933aeec46f4a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1199,7 +1199,7 @@ static int ui_init(void)
 		BUG_ON(1);
 	}
 
-	perf_hpp__register_sort_field(fmt);
+	perf_hpp__prepend_sort_field(fmt);
 	return 0;
 }
 
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 37388397b5bc..18cfcdc90356 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -521,6 +521,12 @@ void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 	list_add_tail(&format->sort_list, &list->sorts);
 }
 
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format)
+{
+	list_add(&format->sort_list, &list->sorts);
+}
+
 void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
 {
 	list_del(&format->list);
@@ -560,6 +566,10 @@ void perf_hpp__setup_output_field(struct perf_hpp_list *list)
 	perf_hpp_list__for_each_sort_list(list, fmt) {
 		struct perf_hpp_fmt *pos;
 
+		/* skip sort-only fields ("sort_compute" in perf diff) */
+		if (!fmt->entry && !fmt->color)
+			continue;
+
 		perf_hpp_list__for_each_format(list, pos) {
 			if (fmt_equal(fmt, pos))
 				goto next;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 42922512c1c6..8b610dd9e2f6 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 		}
 		call->ip = cursor_node->ip;
 		call->ms.sym = cursor_node->sym;
-		call->ms.map = cursor_node->map;
+		call->ms.map = map__get(cursor_node->map);
 
 		if (cursor_node->branch) {
 			call->branch_count = 1;
@@ -477,6 +477,7 @@ add_child(struct callchain_node *parent,
 
 		list_for_each_entry_safe(call, tmp, &new->val, list) {
 			list_del(&call->list);
+			map__zput(call->ms.map);
 			free(call);
 		}
 		free(new);
@@ -761,6 +762,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
 					list->ms.map, list->ms.sym,
 					false, NULL, 0, 0);
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
@@ -811,7 +813,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	}
 
 	node->ip = ip;
-	node->map = map;
+	map__zput(node->map);
+	node->map = map__get(map);
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
@@ -1142,11 +1145,13 @@ static void free_callchain_node(struct callchain_node *node)
 
 	list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
 	list_for_each_entry_safe(list, tmp, &node->val, list) {
 		list_del(&list->list);
+		map__zput(list->ms.map);
 		free(list);
 	}
 
@@ -1210,6 +1215,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
 				goto out;
 			*new = *chain;
 			new->has_children = false;
+			map__get(new->ms.map);
 			list_add_tail(&new->list, &head);
 		}
 		parent = parent->parent;
@@ -1230,6 +1236,7 @@ int callchain_node__make_parent_list(struct callchain_node *node)
 out:
 	list_for_each_entry_safe(chain, new, &head, list) {
 		list_del(&chain->list);
+		map__zput(chain->ms.map);
 		free(chain);
 	}
 	return -ENOMEM;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 35c8e379530f..4f4b60f1558a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include "event.h"
+#include "map.h"
 #include "symbol.h"
 
 #define HELP_PAD "\t\t\t\t"
@@ -184,8 +185,13 @@ int callchain_merge(struct callchain_cursor *cursor,
  */
 static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 {
+	struct callchain_cursor_node *node;
+
 	cursor->nr = 0;
 	cursor->last = &cursor->first;
+
+	for (node = cursor->first; node != NULL; node = node->next)
+		map__zput(node->map);
 }
 
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6770a9645609..7d1b7d33e644 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,6 +1,7 @@
 #include "util.h"
 #include "build-id.h"
 #include "hist.h"
+#include "map.h"
 #include "session.h"
 #include "sort.h"
 #include "evlist.h"
@@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg)
 {
 	int err, err2;
+	struct map *alm = NULL;
+
+	if (al && al->map)
+		alm = map__get(al->map);
 
 	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
 					iter->evsel, al, max_stack_depth);
@@ -1058,6 +1063,8 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 	if (!err)
 		err = err2;
 
+	map__put(alm);
+
 	return err;
 }
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index d4b6514eeef5..28c216e3d5b7 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -283,6 +283,8 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list,
 				    struct perf_hpp_fmt *format);
 void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
 					struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+				       struct perf_hpp_fmt *format);
 
 static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
 {
@@ -294,6 +296,11 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
 	perf_hpp_list__register_sort_field(&perf_hpp_list, format);
 }
 
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+	perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
 #define perf_hpp_list__for_each_format(_list, format) \
 	list_for_each_entry(format, &(_list)->fields, list)
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-02-02 21:01 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-02-02 21:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton, Jiri Olsa, Alexander Shishkin

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 perf/x86/intel/uncore: Make package handling more robust

Five kernel fixes:

 - an mmap tracing ABI fix for certain mappings
 - a use-after-free fix, found via KASAN
 - three CPU hotplug related x86 PMU driver fixes

 Thanks,

	Ingo

------------------>
Peter Zijlstra (2):
      perf/core: Fix use-after-free bug
      perf/core: Fix PERF_RECORD_MMAP2 prot/flags for anonymous memory

Thomas Gleixner (3):
      perf/x86/intel/rapl: Make package handling more robust
      perf/x86/intel/uncore: Clean up hotplug conversion fallout
      perf/x86/intel/uncore: Make package handling more robust


 arch/x86/events/intel/rapl.c   |  60 +++++------
 arch/x86/events/intel/uncore.c | 232 ++++++++++++++++-------------------------
 include/linux/cpuhotplug.h     |   3 -
 kernel/events/core.c           |  69 ++++++++----
 4 files changed, 163 insertions(+), 201 deletions(-)

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 17c3564d087a..22ef4f72cf32 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -161,7 +161,13 @@ static u64 rapl_timer_ms;
 
 static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
 {
-	return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+	unsigned int pkgid = topology_logical_package_id(cpu);
+
+	/*
+	 * The unsigned check also catches the '-1' return value for non
+	 * existent mappings in the topology map.
+	 */
+	return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
 }
 
 static inline u64 rapl_read_counter(struct perf_event *event)
@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event)
 
 	/* must be done before validate_group */
 	pmu = cpu_to_rapl_pmu(event->cpu);
+	if (!pmu)
+		return -EINVAL;
 	event->cpu = pmu->cpu;
 	event->pmu_private = pmu;
 	event->hw.event_base = msr;
@@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu)
 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
 	int target;
 
+	if (!pmu) {
+		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+		if (!pmu)
+			return -ENOMEM;
+
+		raw_spin_lock_init(&pmu->lock);
+		INIT_LIST_HEAD(&pmu->active_list);
+		pmu->pmu = &rapl_pmus->pmu;
+		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+		rapl_hrtimer_init(pmu);
+
+		rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+	}
+
 	/*
 	 * Check if there is an online cpu in the package which collects rapl
 	 * events already.
@@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu)
 	return 0;
 }
 
-static int rapl_cpu_prepare(unsigned int cpu)
-{
-	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-
-	if (pmu)
-		return 0;
-
-	pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
-	if (!pmu)
-		return -ENOMEM;
-
-	raw_spin_lock_init(&pmu->lock);
-	INIT_LIST_HEAD(&pmu->active_list);
-	pmu->pmu = &rapl_pmus->pmu;
-	pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
-	pmu->cpu = -1;
-	rapl_hrtimer_init(pmu);
-	rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
-	return 0;
-}
-
 static int rapl_check_hw_unit(bool apply_quirk)
 {
 	u64 msr_rapl_power_unit_bits;
@@ -803,29 +804,21 @@ static int __init rapl_pmu_init(void)
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
-
-	ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare",
-				rapl_cpu_prepare, NULL);
-	if (ret)
-		goto out;
-
 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
 				"perf/x86/rapl:online",
 				rapl_cpu_online, rapl_cpu_offline);
 	if (ret)
-		goto out1;
+		goto out;
 
 	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
 	if (ret)
-		goto out2;
+		goto out1;
 
 	rapl_advertise();
 	return 0;
 
-out2:
-	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
 out1:
-	cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
 out:
 	pr_warn("Initialization failed (%d), disabled\n", ret);
 	cleanup_rapl_pmus();
@@ -836,7 +829,6 @@ module_init(rapl_pmu_init);
 static void __exit intel_rapl_exit(void)
 {
 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
-	cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
 	perf_pmu_unregister(&rapl_pmus->pmu);
 	cleanup_rapl_pmus();
 }
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 8c4ccdc3a3f3..1ab45976474d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
 
 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	return pmu->boxes[topology_logical_package_id(cpu)];
+	unsigned int pkgid = topology_logical_package_id(cpu);
+
+	/*
+	 * The unsigned check also catches the '-1' return value for non
+	 * existent mappings in the topology map.
+	 */
+	return pkgid < max_packages ? pmu->boxes[pkgid] : NULL;
 }
 
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -764,30 +770,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 	pmu->registered = false;
 }
 
-static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
-{
-	struct intel_uncore_pmu *pmu = type->pmus;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	if (pmu) {
-		pkg = topology_physical_package_id(cpu);
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (box)
-				uncore_box_exit(box);
-		}
-	}
-}
-
-static void uncore_exit_boxes(void *dummy)
-{
-	struct intel_uncore_type **types;
-
-	for (types = uncore_msr_uncores; *types; types++)
-		__uncore_exit_boxes(*types++, smp_processor_id());
-}
-
 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 {
 	int pkg;
@@ -1058,86 +1040,6 @@ static void uncore_pci_exit(void)
 	}
 }
 
-static int uncore_cpu_dying(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (box && atomic_dec_return(&box->refcnt) == 0)
-				uncore_box_exit(box);
-		}
-	}
-	return 0;
-}
-
-static int first_init;
-
-static int uncore_cpu_starting(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg, ncpus = 1;
-
-	if (first_init) {
-		/*
-		 * On init we get the number of online cpus in the package
-		 * and set refcount for all of them.
-		 */
-		ncpus = cpumask_weight(topology_core_cpumask(cpu));
-	}
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			box = pmu->boxes[pkg];
-			if (!box)
-				continue;
-			/* The first cpu on a package activates the box */
-			if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
-				uncore_box_init(box);
-		}
-	}
-
-	return 0;
-}
-
-static int uncore_cpu_prepare(unsigned int cpu)
-{
-	struct intel_uncore_type *type, **types = uncore_msr_uncores;
-	struct intel_uncore_pmu *pmu;
-	struct intel_uncore_box *box;
-	int i, pkg;
-
-	pkg = topology_logical_package_id(cpu);
-	for (; *types; types++) {
-		type = *types;
-		pmu = type->pmus;
-		for (i = 0; i < type->num_boxes; i++, pmu++) {
-			if (pmu->boxes[pkg])
-				continue;
-			/* First cpu of a package allocates the box */
-			box = uncore_alloc_box(type, cpu_to_node(cpu));
-			if (!box)
-				return -ENOMEM;
-			box->pmu = pmu;
-			box->pkgid = pkg;
-			pmu->boxes[pkg] = box;
-		}
-	}
-	return 0;
-}
-
 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
 				   int new_cpu)
 {
@@ -1177,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores,
 
 static int uncore_event_cpu_offline(unsigned int cpu)
 {
-	int target;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, pkg, target;
 
 	/* Check if exiting cpu is used for collecting uncore events */
 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-		return 0;
-
+		goto unref;
 	/* Find a new cpu to collect uncore events */
 	target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
 
@@ -1194,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu)
 
 	uncore_change_context(uncore_msr_uncores, cpu, target);
 	uncore_change_context(uncore_pci_uncores, cpu, target);
+
+unref:
+	/* Clear the references */
+	pkg = topology_logical_package_id(cpu);
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (box && atomic_dec_return(&box->refcnt) == 0)
+				uncore_box_exit(box);
+		}
+	}
 	return 0;
 }
 
+static int allocate_boxes(struct intel_uncore_type **types,
+			 unsigned int pkg, unsigned int cpu)
+{
+	struct intel_uncore_box *box, *tmp;
+	struct intel_uncore_type *type;
+	struct intel_uncore_pmu *pmu;
+	LIST_HEAD(allocated);
+	int i;
+
+	/* Try to allocate all required boxes */
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			if (pmu->boxes[pkg])
+				continue;
+			box = uncore_alloc_box(type, cpu_to_node(cpu));
+			if (!box)
+				goto cleanup;
+			box->pmu = pmu;
+			box->pkgid = pkg;
+			list_add(&box->active_list, &allocated);
+		}
+	}
+	/* Install them in the pmus */
+	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+		list_del_init(&box->active_list);
+		box->pmu->boxes[pkg] = box;
+	}
+	return 0;
+
+cleanup:
+	list_for_each_entry_safe(box, tmp, &allocated, active_list) {
+		list_del_init(&box->active_list);
+		kfree(box);
+	}
+	return -ENOMEM;
+}
+
 static int uncore_event_cpu_online(unsigned int cpu)
 {
-	int target;
+	struct intel_uncore_type *type, **types = uncore_msr_uncores;
+	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_box *box;
+	int i, ret, pkg, target;
+
+	pkg = topology_logical_package_id(cpu);
+	ret = allocate_boxes(types, pkg, cpu);
+	if (ret)
+		return ret;
+
+	for (; *types; types++) {
+		type = *types;
+		pmu = type->pmus;
+		for (i = 0; i < type->num_boxes; i++, pmu++) {
+			box = pmu->boxes[pkg];
+			if (!box && atomic_inc_return(&box->refcnt) == 1)
+				uncore_box_init(box);
+		}
+	}
 
 	/*
 	 * Check if there is an online cpu in the package
@@ -1389,38 +1363,16 @@ static int __init intel_uncore_init(void)
 	if (cret && pret)
 		return -ENODEV;
 
-	/*
-	 * Install callbacks. Core will call them for each online cpu.
-	 *
-	 * The first online cpu of each package allocates and takes
-	 * the refcounts for all other online cpus in that package.
-	 * If msrs are not enabled no allocation is required and
-	 * uncore_cpu_prepare() is not called for each online cpu.
-	 */
-	if (!cret) {
-	       ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
-				       "perf/x86/intel/uncore:prepare",
-				       uncore_cpu_prepare, NULL);
-		if (ret)
-			goto err;
-	} else {
-		cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
-					  "perf/x86/intel/uncore:prepare",
-					  uncore_cpu_prepare, NULL);
-	}
-	first_init = 1;
-	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
-			  "perf/x86/uncore:starting",
-			  uncore_cpu_starting, uncore_cpu_dying);
-	first_init = 0;
-	cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-			  "perf/x86/uncore:online",
-			  uncore_event_cpu_online, uncore_event_cpu_offline);
+	/* Install hotplug callbacks to setup the targets for each package */
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+				"perf/x86/intel/uncore:online",
+				uncore_event_cpu_online,
+				uncore_event_cpu_offline);
+	if (ret)
+		goto err;
 	return 0;
 
 err:
-	/* Undo box->init_box() */
-	on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
 	uncore_types_exit(uncore_msr_uncores);
 	uncore_pci_exit();
 	return ret;
@@ -1429,9 +1381,7 @@ module_init(intel_uncore_init);
 
 static void __exit intel_uncore_exit(void)
 {
-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
-	cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
+	cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
 	uncore_types_exit(uncore_msr_uncores);
 	uncore_pci_exit();
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d936a0021839..921acaaa1601 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,9 +8,7 @@ enum cpuhp_state {
 	CPUHP_CREATE_THREADS,
 	CPUHP_PERF_PREPARE,
 	CPUHP_PERF_X86_PREPARE,
-	CPUHP_PERF_X86_UNCORE_PREP,
 	CPUHP_PERF_X86_AMD_UNCORE_PREP,
-	CPUHP_PERF_X86_RAPL_PREP,
 	CPUHP_PERF_BFIN,
 	CPUHP_PERF_POWER,
 	CPUHP_PERF_SUPERH,
@@ -86,7 +84,6 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_ARMADA_XP_STARTING,
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
-	CPUHP_AP_PERF_X86_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 110b38a58493..e5aaa806702d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1469,7 +1469,6 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-
 	lockdep_assert_held(&ctx->lock);
 
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1624,6 +1623,8 @@ static void perf_group_attach(struct perf_event *event)
 {
 	struct perf_event *group_leader = event->group_leader, *pos;
 
+	lockdep_assert_held(&event->ctx->lock);
+
 	/*
 	 * We can have double attach due to group movement in perf_event_open.
 	 */
@@ -1697,6 +1698,8 @@ static void perf_group_detach(struct perf_event *event)
 	struct perf_event *sibling, *tmp;
 	struct list_head *list = NULL;
 
+	lockdep_assert_held(&event->ctx->lock);
+
 	/*
 	 * We can have double detach due to exit/hot-unplug + close.
 	 */
@@ -1895,9 +1898,29 @@ __perf_remove_from_context(struct perf_event *event,
  */
 static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
 {
-	lockdep_assert_held(&event->ctx->mutex);
+	struct perf_event_context *ctx = event->ctx;
+
+	lockdep_assert_held(&ctx->mutex);
 
 	event_function_call(event, __perf_remove_from_context, (void *)flags);
+
+	/*
+	 * The above event_function_call() can NO-OP when it hits
+	 * TASK_TOMBSTONE. In that case we must already have been detached
+	 * from the context (by perf_event_exit_event()) but the grouping
+	 * might still be in-tact.
+	 */
+	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
+	if ((flags & DETACH_GROUP) &&
+	    (event->attach_state & PERF_ATTACH_GROUP)) {
+		/*
+		 * Since in that case we cannot possibly be scheduled, simply
+		 * detach now.
+		 */
+		raw_spin_lock_irq(&ctx->lock);
+		perf_group_detach(event);
+		raw_spin_unlock_irq(&ctx->lock);
+	}
 }
 
 /*
@@ -6609,6 +6632,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	char *buf = NULL;
 	char *name;
 
+	if (vma->vm_flags & VM_READ)
+		prot |= PROT_READ;
+	if (vma->vm_flags & VM_WRITE)
+		prot |= PROT_WRITE;
+	if (vma->vm_flags & VM_EXEC)
+		prot |= PROT_EXEC;
+
+	if (vma->vm_flags & VM_MAYSHARE)
+		flags = MAP_SHARED;
+	else
+		flags = MAP_PRIVATE;
+
+	if (vma->vm_flags & VM_DENYWRITE)
+		flags |= MAP_DENYWRITE;
+	if (vma->vm_flags & VM_MAYEXEC)
+		flags |= MAP_EXECUTABLE;
+	if (vma->vm_flags & VM_LOCKED)
+		flags |= MAP_LOCKED;
+	if (vma->vm_flags & VM_HUGETLB)
+		flags |= MAP_HUGETLB;
+
 	if (file) {
 		struct inode *inode;
 		dev_t dev;
@@ -6635,27 +6679,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		maj = MAJOR(dev);
 		min = MINOR(dev);
 
-		if (vma->vm_flags & VM_READ)
-			prot |= PROT_READ;
-		if (vma->vm_flags & VM_WRITE)
-			prot |= PROT_WRITE;
-		if (vma->vm_flags & VM_EXEC)
-			prot |= PROT_EXEC;
-
-		if (vma->vm_flags & VM_MAYSHARE)
-			flags = MAP_SHARED;
-		else
-			flags = MAP_PRIVATE;
-
-		if (vma->vm_flags & VM_DENYWRITE)
-			flags |= MAP_DENYWRITE;
-		if (vma->vm_flags & VM_MAYEXEC)
-			flags |= MAP_EXECUTABLE;
-		if (vma->vm_flags & VM_LOCKED)
-			flags |= MAP_LOCKED;
-		if (vma->vm_flags & VM_HUGETLB)
-			flags |= MAP_HUGETLB;
-
 		goto got_name;
 	} else {
 		if (vma->vm_ops && vma->vm_ops->name) {

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-01-18  9:27 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-01-18  9:27 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 31f5260a7653e6042ac28578db1c61e84f2d7898 Merge tag 'perf-urgent-for-mingo-4.10-20170117' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

An Intel PMU driver hotplug fix and three 'perf probe' tooling fixes.

 Thanks,

	Ingo

------------------>
Masami Hiramatsu (3):
      perf probe: Fix to show correct locations for events on modules
      perf probe: Add error checks to offline probe post-processing
      perf probe: Fix to probe on gcc generated functions in modules

Zhou Chengming (1):
      perf/x86/intel: Handle exclusive threadid correctly on CPU hotplug


 arch/x86/events/intel/core.c   |  7 +++-
 tools/perf/util/probe-event.c  | 95 +++++++++++++++++++++++++++---------------
 tools/perf/util/probe-finder.c | 15 ++++---
 tools/perf/util/probe-finder.h |  3 ++
 4 files changed, 77 insertions(+), 43 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d611cab214a6..eb1484c86bb4 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3176,13 +3176,16 @@ static void intel_pmu_cpu_starting(int cpu)
 
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
 		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+			struct cpu_hw_events *sibling;
 			struct intel_excl_cntrs *c;
 
-			c = per_cpu(cpu_hw_events, i).excl_cntrs;
+			sibling = &per_cpu(cpu_hw_events, i);
+			c = sibling->excl_cntrs;
 			if (c && c->core_id == core_id) {
 				cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
 				cpuc->excl_cntrs = c;
-				cpuc->excl_thread_id = 1;
+				if (!sibling->excl_thread_id)
+					cpuc->excl_thread_id = 1;
 				break;
 			}
 		}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 4a57c8a60bd9..6a6f44dd594b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -610,6 +610,33 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
 	return ret ? : -ENOENT;
 }
 
+/* Adjust symbol name and address */
+static int post_process_probe_trace_point(struct probe_trace_point *tp,
+					   struct map *map, unsigned long offs)
+{
+	struct symbol *sym;
+	u64 addr = tp->address + tp->offset - offs;
+
+	sym = map__find_symbol(map, addr);
+	if (!sym)
+		return -ENOENT;
+
+	if (strcmp(sym->name, tp->symbol)) {
+		/* If we have no realname, use symbol for it */
+		if (!tp->realname)
+			tp->realname = tp->symbol;
+		else
+			free(tp->symbol);
+		tp->symbol = strdup(sym->name);
+		if (!tp->symbol)
+			return -ENOMEM;
+	}
+	tp->offset = addr - sym->start;
+	tp->address -= offs;
+
+	return 0;
+}
+
 /*
  * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
  * and generate new symbols with suffixes such as .constprop.N or .isra.N
@@ -622,11 +649,9 @@ static int
 post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
 					int ntevs, const char *pathname)
 {
-	struct symbol *sym;
 	struct map *map;
 	unsigned long stext = 0;
-	u64 addr;
-	int i;
+	int i, ret = 0;
 
 	/* Prepare a map for offline binary */
 	map = dso__new_map(pathname);
@@ -636,23 +661,14 @@ post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
 	}
 
 	for (i = 0; i < ntevs; i++) {
-		addr = tevs[i].point.address + tevs[i].point.offset - stext;
-		sym = map__find_symbol(map, addr);
-		if (!sym)
-			continue;
-		if (!strcmp(sym->name, tevs[i].point.symbol))
-			continue;
-		/* If we have no realname, use symbol for it */
-		if (!tevs[i].point.realname)
-			tevs[i].point.realname = tevs[i].point.symbol;
-		else
-			free(tevs[i].point.symbol);
-		tevs[i].point.symbol = strdup(sym->name);
-		tevs[i].point.offset = addr - sym->start;
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						     map, stext);
+		if (ret < 0)
+			break;
 	}
 	map__put(map);
 
-	return 0;
+	return ret;
 }
 
 static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
@@ -682,18 +698,31 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
 	return ret;
 }
 
-static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
-					    int ntevs, const char *module)
+static int
+post_process_module_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs, const char *module,
+				       struct debuginfo *dinfo)
 {
+	Dwarf_Addr text_offs = 0;
 	int i, ret = 0;
 	char *mod_name = NULL;
+	struct map *map;
 
 	if (!module)
 		return 0;
 
-	mod_name = find_module_name(module);
+	map = get_target_map(module, false);
+	if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", module);
+		return -EINVAL;
+	}
 
+	mod_name = find_module_name(module);
 	for (i = 0; i < ntevs; i++) {
+		ret = post_process_probe_trace_point(&tevs[i].point,
+						map, (unsigned long)text_offs);
+		if (ret < 0)
+			break;
 		tevs[i].point.module =
 			strdup(mod_name ? mod_name : module);
 		if (!tevs[i].point.module) {
@@ -703,6 +732,8 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
 	}
 
 	free(mod_name);
+	map__put(map);
+
 	return ret;
 }
 
@@ -760,7 +791,7 @@ arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unuse
 static int post_process_probe_trace_events(struct perf_probe_event *pev,
 					   struct probe_trace_event *tevs,
 					   int ntevs, const char *module,
-					   bool uprobe)
+					   bool uprobe, struct debuginfo *dinfo)
 {
 	int ret;
 
@@ -768,7 +799,8 @@ static int post_process_probe_trace_events(struct perf_probe_event *pev,
 		ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
 	else if (module)
 		/* Currently ref_reloc_sym based probe is not for drivers */
-		ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+		ret = post_process_module_probe_trace_events(tevs, ntevs,
+							     module, dinfo);
 	else
 		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
 
@@ -812,30 +844,27 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 		}
 	}
 
-	debuginfo__delete(dinfo);
-
 	if (ntevs > 0) {	/* Succeeded to find trace events */
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
 		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
-						pev->target, pev->uprobes);
+					pev->target, pev->uprobes, dinfo);
 		if (ret < 0 || ret == ntevs) {
+			pr_debug("Post processing failed or all events are skipped. (%d)\n", ret);
 			clear_probe_trace_events(*tevs, ntevs);
 			zfree(tevs);
+			ntevs = 0;
 		}
-		if (ret != ntevs)
-			return ret < 0 ? ret : ntevs;
-		ntevs = 0;
-		/* Fall through */
 	}
 
+	debuginfo__delete(dinfo);
+
 	if (ntevs == 0)	{	/* No error but failed to find probe point. */
 		pr_warning("Probe point '%s' not found.\n",
 			   synthesize_perf_probe_point(&pev->point));
 		return -ENOENT;
-	}
-	/* Error path : ntevs < 0 */
-	pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
-	if (ntevs < 0) {
+	} else if (ntevs < 0) {
+		/* Error path : ntevs < 0 */
+		pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
 		if (ntevs == -EBADF)
 			pr_warning("Warning: No dwarf info found in the vmlinux - "
 				"please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index df4debe564da..0d9d6e0803b8 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1501,7 +1501,8 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 }
 
 /* For the kernel module, we need a special code to get a DIE */
-static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs)
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+				bool adjust_offset)
 {
 	int n, i;
 	Elf32_Word shndx;
@@ -1530,6 +1531,8 @@ static int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs)
 			if (!shdr)
 				return -ENOENT;
 			*offs = shdr->sh_addr;
+			if (adjust_offset)
+				*offs -= shdr->sh_offset;
 		}
 	}
 	return 0;
@@ -1543,16 +1546,12 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
 	Dwarf_Addr _addr = 0, baseaddr = 0;
 	const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
 	int baseline = 0, lineno = 0, ret = 0;
-	bool reloc = false;
 
-retry:
+	/* We always need to relocate the address for aranges */
+	if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0)
+		addr += baseaddr;
 	/* Find cu die */
 	if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
-		if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) {
-			addr += baseaddr;
-			reloc = true;
-			goto retry;
-		}
 		pr_warning("Failed to find debug information for address %lx\n",
 			   addr);
 		ret = -EINVAL;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index f1d8558f498e..2956c5198652 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -46,6 +46,9 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
 int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
 				struct perf_probe_point *ppt);
 
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+			       bool adjust_offset);
+
 /* Find a line range */
 int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2017-01-15  9:59 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2017-01-15  9:59 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 18e7a45af91acdde99d3aa1372cc40e1f8142f7b perf/x86: Reject non sampling events with precise_ip

Misc race fixes uncovered by fuzzing efforts, a Sparse fix, two PMU driver fixes, 
plus miscellanous tooling fixes.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (4):
      samples/bpf sock_example: Avoid getting ethhdr from two includes
      samples/bpf trace_output_user: Remove duplicate sys/ioctl.h include
      perf tools: Install tools/lib/traceevent plugins with install-bin
      perf symbols: Robustify reading of build-id from sysfs

Colin King (1):
      perf/x86/intel: Use ULL constant to prevent undefined shift behaviour

Daniel Bristot de Oliveira (1):
      tools lib traceevent: Fix prev/next_prio for deadline tasks

David Carrillo-Cisneros (1):
      perf/x86: Set pmu->module in Intel PMU modules

Jiri Olsa (5):
      tools lib subcmd: Add OPT_STRING_OPTARG_SET option
      perf record: Make __record_options static
      perf record: Fix --switch-output documentation and comment
      perf/x86/intel: Account interrupts for PEBS errors
      perf/x86: Reject non sampling events with precise_ip

Masami Hiramatsu (3):
      perf probe: Fix to get correct modname from elf header
      perf probe: Fix --funcs to show correct symbols for offline module
      perf probe: Fix to probe on gcc generated symbols for offline kernel

Namhyung Kim (1):
      perf sched timehist: Show total scheduling time

Peter Zijlstra (2):
      perf/core: Fix sys_perf_event_open() vs. hotplug
      perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race

Prarit Bhargava (1):
      perf/x86/intel/uncore: Fix hardcoded socket 0 assumption in the Haswell init code


 arch/x86/events/core.c                     |   4 +
 arch/x86/events/intel/core.c               |   2 +-
 arch/x86/events/intel/cstate.c             |   2 +
 arch/x86/events/intel/ds.c                 |   6 +-
 arch/x86/events/intel/rapl.c               |   1 +
 arch/x86/events/intel/uncore.c             |   1 +
 arch/x86/events/intel/uncore_snbep.c       |   2 +-
 include/linux/perf_event.h                 |   1 +
 kernel/events/core.c                       | 175 ++++++++++++++++++++++-------
 samples/bpf/sock_example.h                 |   2 +-
 samples/bpf/trace_output_user.c            |   1 -
 tools/lib/subcmd/parse-options.c           |   3 +
 tools/lib/subcmd/parse-options.h           |   5 +
 tools/lib/traceevent/plugin_sched_switch.c |   4 +-
 tools/perf/Documentation/perf-record.txt   |   4 +
 tools/perf/Makefile.perf                   |   4 +-
 tools/perf/builtin-record.c                |   4 +-
 tools/perf/builtin-sched.c                 |  17 ++-
 tools/perf/util/probe-event.c              | 105 +++++++++++------
 tools/perf/util/symbol-elf.c               |   6 +
 20 files changed, 257 insertions(+), 92 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 019c5887b698..1635c0c8df23 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event)
 
 		if (event->attr.precise_ip > precise)
 			return -EOPNOTSUPP;
+
+		/* There's no sense in having PEBS for non sampling events: */
+		if (!is_sampling_event(event))
+			return -EINVAL;
 	}
 	/*
 	 * check that PEBS LBR correction does not conflict with
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 86138267b68a..d611cab214a6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3987,7 +3987,7 @@ __init int intel_pmu_init(void)
 		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
 		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
 	}
-	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+	x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
 
 	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
 		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index fec8a461bdef..1076c9a77292 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static struct pmu cstate_pkg_pmu = {
@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = {
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
+	.module		= THIS_MODULE,
 };
 
 static const struct cstate_model nhm_cstates __initconst = {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index be202390bbd3..9dfeeeca0ea8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1389,9 +1389,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		/* log dropped samples number */
-		if (error[bit])
+		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
+			if (perf_event_account_interrupt(event))
+				x86_pmu_stop(event, 0);
+		}
+
 		if (counts[bit]) {
 			__intel_pmu_pebs_event(event, iregs, base,
 					       top, bit, counts[bit]);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index bd34124449b0..17c3564d087a 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void)
 	rapl_pmus->pmu.start		= rapl_pmu_event_start;
 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
+	rapl_pmus->pmu.module		= THIS_MODULE;
 	return 0;
 }
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 97c246f84dea..8c4ccdc3a3f3 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 			.start		= uncore_pmu_event_start,
 			.stop		= uncore_pmu_event_stop,
 			.read		= uncore_pmu_event_read,
+			.module		= THIS_MODULE,
 		};
 	} else {
 		pmu->pmu = *pmu->type->pmu;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index e6832be714bc..dae2fedc1601 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
 
 void hswep_uncore_cpu_init(void)
 {
-	int pkg = topology_phys_to_logical_pkg(0);
+	int pkg = boot_cpu_data.logical_proc_id;
 
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..78ed8105e64d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1259,6 +1259,7 @@ extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
 extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
+extern int perf_event_account_interrupt(struct perf_event *event);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
 perf_aux_output_begin(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509fab8c..110b38a58493 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2249,7 +2249,7 @@ static int  __perf_install_in_context(void *info)
 	struct perf_event_context *ctx = event->ctx;
 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
-	bool activate = true;
+	bool reprogram = true;
 	int ret = 0;
 
 	raw_spin_lock(&cpuctx->ctx.lock);
@@ -2257,27 +2257,26 @@ static int  __perf_install_in_context(void *info)
 		raw_spin_lock(&ctx->lock);
 		task_ctx = ctx;
 
-		/* If we're on the wrong CPU, try again */
-		if (task_cpu(ctx->task) != smp_processor_id()) {
-			ret = -ESRCH;
-			goto unlock;
-		}
+		reprogram = (ctx->task == current);
 
 		/*
-		 * If we're on the right CPU, see if the task we target is
-		 * current, if not we don't have to activate the ctx, a future
-		 * context switch will do that for us.
+		 * If the task is running, it must be running on this CPU,
+		 * otherwise we cannot reprogram things.
+		 *
+		 * If its not running, we don't care, ctx->lock will
+		 * serialize against it becoming runnable.
 		 */
-		if (ctx->task != current)
-			activate = false;
-		else
-			WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
+		if (task_curr(ctx->task) && !reprogram) {
+			ret = -ESRCH;
+			goto unlock;
+		}
 
+		WARN_ON_ONCE(reprogram && cpuctx->task_ctx && cpuctx->task_ctx != ctx);
 	} else if (task_ctx) {
 		raw_spin_lock(&task_ctx->lock);
 	}
 
-	if (activate) {
+	if (reprogram) {
 		ctx_sched_out(ctx, cpuctx, EVENT_TIME);
 		add_event_to_ctx(event, ctx);
 		ctx_resched(cpuctx, task_ctx);
@@ -2328,13 +2327,36 @@ perf_install_in_context(struct perf_event_context *ctx,
 	/*
 	 * Installing events is tricky because we cannot rely on ctx->is_active
 	 * to be set in case this is the nr_events 0 -> 1 transition.
+	 *
+	 * Instead we use task_curr(), which tells us if the task is running.
+	 * However, since we use task_curr() outside of rq::lock, we can race
+	 * against the actual state. This means the result can be wrong.
+	 *
+	 * If we get a false positive, we retry, this is harmless.
+	 *
+	 * If we get a false negative, things are complicated. If we are after
+	 * perf_event_context_sched_in() ctx::lock will serialize us, and the
+	 * value must be correct. If we're before, it doesn't matter since
+	 * perf_event_context_sched_in() will program the counter.
+	 *
+	 * However, this hinges on the remote context switch having observed
+	 * our task->perf_event_ctxp[] store, such that it will in fact take
+	 * ctx::lock in perf_event_context_sched_in().
+	 *
+	 * We do this by task_function_call(), if the IPI fails to hit the task
+	 * we know any future context switch of task must see the
+	 * perf_event_ctpx[] store.
 	 */
-again:
+
 	/*
-	 * Cannot use task_function_call() because we need to run on the task's
-	 * CPU regardless of whether its current or not.
+	 * This smp_mb() orders the task->perf_event_ctxp[] store with the
+	 * task_cpu() load, such that if the IPI then does not find the task
+	 * running, a future context switch of that task must observe the
+	 * store.
 	 */
-	if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+	smp_mb();
+again:
+	if (!task_function_call(task, __perf_install_in_context, event))
 		return;
 
 	raw_spin_lock_irq(&ctx->lock);
@@ -2348,12 +2370,16 @@ perf_install_in_context(struct perf_event_context *ctx,
 		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
-	raw_spin_unlock_irq(&ctx->lock);
 	/*
-	 * Since !ctx->is_active doesn't mean anything, we must IPI
-	 * unconditionally.
+	 * If the task is not running, ctx->lock will avoid it becoming so,
+	 * thus we can safely install the event.
 	 */
-	goto again;
+	if (task_curr(task)) {
+		raw_spin_unlock_irq(&ctx->lock);
+		goto again;
+	}
+	add_event_to_ctx(event, ctx);
+	raw_spin_unlock_irq(&ctx->lock);
 }
 
 /*
@@ -7034,25 +7060,12 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
-/*
- * Generic event overflow handling, sampling.
- */
-
-static int __perf_event_overflow(struct perf_event *event,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
+static int
+__perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 seq;
 	int ret = 0;
-
-	/*
-	 * Non-sampling counters might still use the PMI to fold short
-	 * hardware counters, ignore those.
-	 */
-	if (unlikely(!is_sampling_event(event)))
-		return 0;
+	u64 seq;
 
 	seq = __this_cpu_read(perf_throttled_seq);
 	if (seq != hwc->interrupts_seq) {
@@ -7080,6 +7093,34 @@ static int __perf_event_overflow(struct perf_event *event,
 			perf_adjust_period(event, delta, hwc->last_period, true);
 	}
 
+	return ret;
+}
+
+int perf_event_account_interrupt(struct perf_event *event)
+{
+	return __perf_event_account_interrupt(event, 1);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	int ret = 0;
+
+	/*
+	 * Non-sampling counters might still use the PMI to fold short
+	 * hardware counters, ignore those.
+	 */
+	if (unlikely(!is_sampling_event(event)))
+		return 0;
+
+	ret = __perf_event_account_interrupt(event, throttle);
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * events
@@ -9503,6 +9544,37 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
 	return 0;
 }
 
+/*
+ * Variation on perf_event_ctx_lock_nested(), except we take two context
+ * mutexes.
+ */
+static struct perf_event_context *
+__perf_event_ctx_lock_double(struct perf_event *group_leader,
+			     struct perf_event_context *ctx)
+{
+	struct perf_event_context *gctx;
+
+again:
+	rcu_read_lock();
+	gctx = READ_ONCE(group_leader->ctx);
+	if (!atomic_inc_not_zero(&gctx->refcount)) {
+		rcu_read_unlock();
+		goto again;
+	}
+	rcu_read_unlock();
+
+	mutex_lock_double(&gctx->mutex, &ctx->mutex);
+
+	if (group_leader->ctx != gctx) {
+		mutex_unlock(&ctx->mutex);
+		mutex_unlock(&gctx->mutex);
+		put_ctx(gctx);
+		goto again;
+	}
+
+	return gctx;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -9746,12 +9818,31 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (move_group) {
-		gctx = group_leader->ctx;
-		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+		gctx = __perf_event_ctx_lock_double(group_leader, ctx);
+
 		if (gctx->task == TASK_TOMBSTONE) {
 			err = -ESRCH;
 			goto err_locked;
 		}
+
+		/*
+		 * Check if we raced against another sys_perf_event_open() call
+		 * moving the software group underneath us.
+		 */
+		if (!(group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
+			/*
+			 * If someone moved the group out from under us, check
+			 * if this new event wound up on the same ctx, if so
+			 * its the regular !move_group case, otherwise fail.
+			 */
+			if (gctx != ctx) {
+				err = -EINVAL;
+				goto err_locked;
+			} else {
+				perf_event_ctx_unlock(group_leader, gctx);
+				move_group = 0;
+			}
+		}
 	} else {
 		mutex_lock(&ctx->mutex);
 	}
@@ -9853,7 +9944,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	perf_unpin_context(ctx);
 
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 
 	if (task) {
@@ -9879,7 +9970,7 @@ SYSCALL_DEFINE5(perf_event_open,
 
 err_locked:
 	if (move_group)
-		mutex_unlock(&gctx->mutex);
+		perf_event_ctx_unlock(group_leader, gctx);
 	mutex_unlock(&ctx->mutex);
 /* err_file: */
 	fput(event_file);
diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h
index 09f7fe7e5fd7..d8014065d479 100644
--- a/samples/bpf/sock_example.h
+++ b/samples/bpf/sock_example.h
@@ -4,7 +4,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
-#include <net/ethernet.h>
+#include <linux/if_ether.h>
 #include <net/if.h>
 #include <linux/if_packet.h>
 #include <arpa/inet.h>
diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c
index f4fa6af22def..ccca1e348017 100644
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -9,7 +9,6 @@
 #include <string.h>
 #include <fcntl.h>
 #include <poll.h>
-#include <sys/ioctl.h>
 #include <linux/perf_event.h>
 #include <linux/bpf.h>
 #include <errno.h>
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index 3284bb14ae78..8aad81151d50 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -213,6 +213,9 @@ static int get_value(struct parse_opt_ctx_t *p,
 		else
 			err = get_arg(p, opt, flags, (const char **)opt->value);
 
+		if (opt->set)
+			*(bool *)opt->set = true;
+
 		/* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
 		if (opt->flags & PARSE_OPT_NOEMPTY) {
 			const char *val = *(const char **)opt->value;
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 8866ac438b34..11c3be3bcce7 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -137,6 +137,11 @@ struct option {
 	{ .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
 	  .value = check_vtype(v, const char **), (a), .help = (h), \
 	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
+#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
+	{ .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), (a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
+	  .set = check_vtype(os, bool *)}
 #define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
index f1ce60065258..ec30c2fcbac0 100644
--- a/tools/lib/traceevent/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -111,7 +111,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld ", val);
 
 	if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, "[%lld] ", val);
+		trace_seq_printf(s, "[%d] ", (int) val);
 
 	if (pevent_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
 		write_state(s, val);
@@ -129,7 +129,7 @@ static int sched_switch_handler(struct trace_seq *s,
 	trace_seq_printf(s, "%lld", val);
 
 	if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, " [%lld]", val);
+		trace_seq_printf(s, " [%d]", (int) val);
 
 	return 0;
 }
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 27fc3617c6a4..5054d9147f0f 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -430,6 +430,10 @@ that gets then processed, possibly via a perf script, to decide if that
 particular perf.data snapshot should be kept or not.
 
 Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+The reason for the latter two is to reduce the data file switching
+overhead. You can still switch them on with:
+
+  --switch-output --no-no-buildid  --no-no-buildid-cache
 
 --dry-run::
 Parse options then exit. --dry-run can be used to detect errors in cmdline
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 8fc24824705e..8bb16aa9d661 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -704,9 +704,9 @@ install-tests: all install-gtk
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
 		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
-install-bin: install-tools install-tests
+install-bin: install-tools install-tests install-traceevent-plugins
 
-install: install-bin try-install-man install-traceevent-plugins
+install: install-bin try-install-man
 
 install-python_ext:
 	$(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 74d6a035133a..4ec10e9427d9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1405,7 +1405,7 @@ static bool dry_run;
  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
  * using pipes, etc.
  */
-struct option __record_options[] = {
+static struct option __record_options[] = {
 	OPT_CALLBACK('e', "event", &record.evlist, "event",
 		     "event selector. use 'perf list' to list available events",
 		     parse_events_option),
@@ -1636,7 +1636,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		 * overhead. Still generate buildid if they are required
 		 * explicitly using
 		 *
-		 *  perf record --signal-trigger --no-no-buildid \
+		 *  perf record --switch-output --no-no-buildid \
 		 *              --no-no-buildid-cache
 		 *
 		 * Following code equals to:
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index d53e706a6f17..5b134b0d1ff3 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -209,6 +209,7 @@ struct perf_sched {
 	u64		skipped_samples;
 	const char	*time_str;
 	struct perf_time_interval ptime;
+	struct perf_time_interval hist_time;
 };
 
 /* per thread run time data */
@@ -2460,6 +2461,11 @@ static int timehist_sched_change_event(struct perf_tool *tool,
 		timehist_print_sample(sched, sample, &al, thread, t);
 
 out:
+	if (sched->hist_time.start == 0 && t >= ptime->start)
+		sched->hist_time.start = t;
+	if (ptime->end == 0 || t <= ptime->end)
+		sched->hist_time.end = t;
+
 	if (tr) {
 		/* time of this sched_switch event becomes last time task seen */
 		tr->last_time = sample->time;
@@ -2624,6 +2630,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 	struct thread *t;
 	struct thread_runtime *r;
 	int i;
+	u64 hist_time = sched->hist_time.end - sched->hist_time.start;
 
 	memset(&totals, 0, sizeof(totals));
 
@@ -2665,7 +2672,7 @@ static void timehist_print_summary(struct perf_sched *sched,
 			totals.sched_count += r->run_stats.n;
 			printf("    CPU %2d idle for ", i);
 			print_sched_time(r->total_run_time, 6);
-			printf(" msec\n");
+			printf(" msec  (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
 		} else
 			printf("    CPU %2d idle entire time window\n", i);
 	}
@@ -2701,12 +2708,16 @@ static void timehist_print_summary(struct perf_sched *sched,
 
 	printf("\n"
 	       "    Total number of unique tasks: %" PRIu64 "\n"
-	       "Total number of context switches: %" PRIu64 "\n"
-	       "           Total run time (msec): ",
+	       "Total number of context switches: %" PRIu64 "\n",
 	       totals.task_count, totals.sched_count);
 
+	printf("           Total run time (msec): ");
 	print_sched_time(totals.total_run_time, 2);
 	printf("\n");
+
+	printf("    Total scheduling time (msec): ");
+	print_sched_time(hist_time, 2);
+	printf(" (x %d)\n", sched->max_cpu);
 }
 
 typedef int (*sched_handler)(struct perf_tool *tool,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d281ae2b54e8..4a57c8a60bd9 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -163,7 +163,7 @@ static struct map *kernel_get_module_map(const char *module)
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
-		return machine__findnew_module_map(host_machine, 0, module);
+		return dso__new_map(module);
 
 	if (!module)
 		module = "kernel";
@@ -173,6 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
 		if (strncmp(pos->dso->short_name + 1, module,
 			    pos->dso->short_name_len - 2) == 0 &&
 		    module[pos->dso->short_name_len - 2] == '\0') {
+			map__get(pos);
 			return pos;
 		}
 	}
@@ -188,15 +189,6 @@ struct map *get_target_map(const char *target, bool user)
 		return kernel_get_module_map(target);
 }
 
-static void put_target_map(struct map *map, bool user)
-{
-	if (map && user) {
-		/* Only the user map needs to be released */
-		map__put(map);
-	}
-}
-
-
 static int convert_exec_to_group(const char *exec, char **result)
 {
 	char *ptr1, *ptr2, *exec_copy;
@@ -268,21 +260,6 @@ static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
 }
 
 /*
- * NOTE:
- * '.gnu.linkonce.this_module' section of kernel module elf directly
- * maps to 'struct module' from linux/module.h. This section contains
- * actual module name which will be used by kernel after loading it.
- * But, we cannot use 'struct module' here since linux/module.h is not
- * exposed to user-space. Offset of 'name' has remained same from long
- * time, so hardcoding it here.
- */
-#ifdef __LP64__
-#define MOD_NAME_OFFSET 24
-#else
-#define MOD_NAME_OFFSET 12
-#endif
-
-/*
  * @module can be module name of module file path. In case of path,
  * inspect elf and find out what is actual module name.
  * Caller has to free mod_name after using it.
@@ -296,6 +273,7 @@ static char *find_module_name(const char *module)
 	Elf_Data *data;
 	Elf_Scn *sec;
 	char *mod_name = NULL;
+	int name_offset;
 
 	fd = open(module, O_RDONLY);
 	if (fd < 0)
@@ -317,7 +295,21 @@ static char *find_module_name(const char *module)
 	if (!data || !data->d_buf)
 		goto ret_err;
 
-	mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+	/*
+	 * NOTE:
+	 * '.gnu.linkonce.this_module' section of kernel module elf directly
+	 * maps to 'struct module' from linux/module.h. This section contains
+	 * actual module name which will be used by kernel after loading it.
+	 * But, we cannot use 'struct module' here since linux/module.h is not
+	 * exposed to user-space. Offset of 'name' has remained same from long
+	 * time, so hardcoding it here.
+	 */
+	if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		name_offset = 12;
+	else	/* expect ELFCLASS64 by default */
+		name_offset = 24;
+
+	mod_name = strdup((char *)data->d_buf + name_offset);
 
 ret_err:
 	elf_end(elf);
@@ -412,7 +404,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
 	}
 
 out:
-	put_target_map(map, uprobes);
+	map__put(map);
 	return ret;
 
 }
@@ -618,6 +610,51 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
 	return ret ? : -ENOENT;
 }
 
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+					int ntevs, const char *pathname)
+{
+	struct symbol *sym;
+	struct map *map;
+	unsigned long stext = 0;
+	u64 addr;
+	int i;
+
+	/* Prepare a map for offline binary */
+	map = dso__new_map(pathname);
+	if (!map || get_text_start_address(pathname, &stext) < 0) {
+		pr_warning("Failed to get ELF symbols for %s\n", pathname);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ntevs; i++) {
+		addr = tevs[i].point.address + tevs[i].point.offset - stext;
+		sym = map__find_symbol(map, addr);
+		if (!sym)
+			continue;
+		if (!strcmp(sym->name, tevs[i].point.symbol))
+			continue;
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = strdup(sym->name);
+		tevs[i].point.offset = addr - sym->start;
+	}
+	map__put(map);
+
+	return 0;
+}
+
 static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
 					  int ntevs, const char *exec)
 {
@@ -679,7 +716,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 
 	/* Skip post process if the target is an offline kernel */
 	if (symbol_conf.ignore_vmlinux_buildid)
-		return 0;
+		return post_process_offline_probe_trace_events(tevs, ntevs,
+						symbol_conf.vmlinux_name);
 
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {
@@ -2869,7 +2907,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	}
 
 out:
-	put_target_map(map, pev->uprobes);
+	map__put(map);
 	free(syms);
 	return ret;
 
@@ -3362,10 +3400,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 		return ret;
 
 	/* Get a symbol map */
-	if (user)
-		map = dso__new_map(target);
-	else
-		map = kernel_get_module_map(target);
+	map = get_target_map(target, user);
 	if (!map) {
 		pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
 		return -EINVAL;
@@ -3397,9 +3432,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
         }
 
 end:
-	if (user) {
-		map__put(map);
-	}
+	map__put(map);
 	exit_probe_symbol_maps();
 
 	return ret;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 99400b0e8f2a..adbc6c02c3aa 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -537,6 +537,12 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
 				break;
 		} else {
 			int n = namesz + descsz;
+
+			if (n > (int)sizeof(bf)) {
+				n = sizeof(bf);
+				pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+					 __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+			}
 			if (read(fd, bf, n) != n)
 				break;
 		}

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-12-23 22:50 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-12-23 22:50 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Jiri Olsa, Alexander Shishkin, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 3705b97505bcbf6440f38119c0e7d6058f585b54 Merge tag 'perf-urgent-for-mingo-20161222' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

On the kernel side there's two x86 PMU driver fixes and a uprobes fix, plus on the 
tooling side there's a number of fixes and some late updates.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (3):
      perf tools: Remove some needless __maybe_unused
      samples/bpf: Make perf_event_read() static
      samples/bpf: Be consistent with bpf_load_program bpf_insn parameter

Davidlohr Bueso (1):
      perf bench futex: Fix lock-pi help string

Jiri Olsa (7):
      perf tools: Move headers check into bash script
      perf mem: Fix --all-user/--all-kernel options
      perf evsel: Use variable instead of repeating lengthy FD macro
      perf thread_map: Add thread_map__remove function
      perf evsel: Allow to ignore missing pid
      perf record: Force ignore_missing_thread for uid option
      perf trace: Check if MAP_32BIT is defined (again)

Joe Stringer (8):
      tools lib bpf: Sync {tools,}/include/uapi/linux/bpf.h
      tools lib bpf: use __u32 from linux/types.h
      tools lib bpf: Add flags to bpf_create_map()
      samples/bpf: Make samples more libbpf-centric
      samples/bpf: Switch over to libbpf
      tools lib bpf: Add bpf_prog_{attach,detach}
      samples/bpf: Remove perf_event_open() declaration
      samples/bpf: Move open_raw_sock to separate header

Kan Liang (1):
      perf diff: Do not overwrite valid build id

Marcin Nowakowski (1):
      uprobes: Fix uprobes on MIPS, allow for a cache flush after ixol breakpoint creation

Namhyung Kim (10):
      perf sched timehist: Split is_idle_sample()
      perf sched timehist: Introduce struct idle_time_data
      perf sched timehist: Save callchain when entering idle
      perf sched timehist: Skip non-idle events when necessary
      perf sched timehist: Add -I/--idle-hist option
      perf sched timehist: Show callchains for idle stat
      perf sched timehist: Honour 'comm_width' when aligning the headers
      perf sched timehist: Enlarge default 'comm_width'
      perf sched timehist: Remove hardcoded 'comm_width' check at print_summary
      perf sched timehist: Fix invalid period calculation

Peter Zijlstra (1):
      perf/x86: Fix overlap counter scheduling bug

Ravi Bangoria (3):
      perf annotate: Support jump instruction with target as second operand
      perf annotate: Fix jump target outside of function address range
      perf annotate: Don't throw error for zero length symbols

Stephane Eranian (1):
      perf/x86/pebs: Fix handling of PEBS buffer overflows


 arch/x86/events/intel/core.c                      |  30 +-
 arch/x86/events/intel/uncore_snbep.c              |   2 +-
 kernel/events/uprobes.c                           |   2 +-
 samples/bpf/Makefile                              |  70 +--
 samples/bpf/README.rst                            |   4 +-
 samples/bpf/bpf_load.c                            |  21 +-
 samples/bpf/bpf_load.h                            |   3 +
 samples/bpf/fds_example.c                         |  13 +-
 samples/bpf/lathist_user.c                        |   2 +-
 samples/bpf/libbpf.c                              | 176 -------
 samples/bpf/libbpf.h                              |  28 +-
 samples/bpf/lwt_len_hist_user.c                   |   6 +-
 samples/bpf/offwaketime_user.c                    |   8 +-
 samples/bpf/sampleip_user.c                       |   7 +-
 samples/bpf/sock_example.c                        |  14 +-
 samples/bpf/sock_example.h                        |  35 ++
 samples/bpf/sockex1_user.c                        |   7 +-
 samples/bpf/sockex2_user.c                        |   5 +-
 samples/bpf/sockex3_user.c                        |   5 +-
 samples/bpf/spintest_user.c                       |   8 +-
 samples/bpf/tc_l2_redirect_user.c                 |   4 +-
 samples/bpf/test_cgrp2_array_pin.c                |   4 +-
 samples/bpf/test_cgrp2_attach.c                   |  12 +-
 samples/bpf/test_cgrp2_attach2.c                  |   8 +-
 samples/bpf/test_cgrp2_sock.c                     |   7 +-
 samples/bpf/test_current_task_under_cgroup_user.c |   8 +-
 samples/bpf/test_lru_dist.c                       |  32 +-
 samples/bpf/test_probe_write_user_user.c          |   2 +-
 samples/bpf/trace_event_user.c                    |  23 +-
 samples/bpf/trace_output_user.c                   |   7 +-
 samples/bpf/tracex2_user.c                        |  10 +-
 samples/bpf/tracex3_user.c                        |   4 +-
 samples/bpf/tracex4_user.c                        |   4 +-
 samples/bpf/tracex6_user.c                        |   5 +-
 samples/bpf/xdp1_user.c                           |   2 +-
 samples/bpf/xdp_tx_iptunnel_user.c                |   6 +-
 tools/include/uapi/linux/bpf.h                    | 593 +++++++++++++---------
 tools/lib/bpf/bpf.c                               |  30 +-
 tools/lib/bpf/bpf.h                               |   9 +-
 tools/lib/bpf/libbpf.c                            |   3 +-
 tools/perf/Documentation/perf-sched.txt           |   4 +
 tools/perf/Makefile.perf                          |  94 +---
 tools/perf/bench/futex-lock-pi.c                  |   2 +-
 tools/perf/builtin-c2c.c                          |  13 +-
 tools/perf/builtin-mem.c                          |   4 +-
 tools/perf/builtin-record.c                       |   3 +
 tools/perf/builtin-report.c                       |   2 +-
 tools/perf/builtin-sched.c                        | 275 ++++++++--
 tools/perf/builtin-stat.c                         |   6 +-
 tools/perf/check-headers.sh                       |  59 +++
 tools/perf/perf.h                                 |   1 +
 tools/perf/tests/builtin-test.c                   |   4 +
 tools/perf/tests/tests.h                          |   1 +
 tools/perf/tests/thread-map.c                     |  44 ++
 tools/perf/trace/beauty/mmap.c                    |   2 +
 tools/perf/ui/browsers/annotate.c                 |   5 +-
 tools/perf/util/annotate.c                        |  23 +-
 tools/perf/util/annotate.h                        |   5 +-
 tools/perf/util/evsel.c                           |  61 ++-
 tools/perf/util/evsel.h                           |   1 +
 tools/perf/util/symbol.c                          |   3 +-
 tools/perf/util/thread_map.c                      |  22 +
 tools/perf/util/thread_map.h                      |   1 +
 63 files changed, 1104 insertions(+), 750 deletions(-)
 delete mode 100644 samples/bpf/libbpf.c
 create mode 100644 samples/bpf/sock_example.h
 create mode 100755 tools/perf/check-headers.sh

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-12-07 18:45 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-12-07 18:45 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Alexander Shishkin,
	Arnaldo Carvalho de Melo, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 8fc31ce8896fc3cea1d79688c8ff950ad4e73afe perf/core: Remove invalid warning from list_update_cgroup_even()t

A bogus warning fix, a counter width handling fix affecting certain machines, plus 
a oneliner hw-enablement patch for Knights Mill CPUs.

 Thanks,

	Ingo

------------------>
David Carrillo-Cisneros (1):
      perf/core: Remove invalid warning from list_update_cgroup_even()t

Peter Zijlstra (Intel) (1):
      perf/x86: Fix full width counter, counter overflow

Piotr Luc (1):
      perf/x86/intel: Enable C-state residency events for Knights Mill


 arch/x86/events/core.c         |  2 +-
 arch/x86/events/intel/core.c   |  2 +-
 arch/x86/events/intel/cstate.c |  1 +
 kernel/events/core.c           | 19 ++++++++-----------
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9d4bf3ab049e..6e395c996900 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -69,7 +69,7 @@ u64 x86_perf_event_update(struct perf_event *event)
 	int shift = 64 - x86_pmu.cntval_bits;
 	u64 prev_raw_count, new_raw_count;
 	int idx = hwc->idx;
-	s64 delta;
+	u64 delta;
 
 	if (idx == INTEL_PMC_IDX_FIXED_BTS)
 		return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a74a2dbc0180..cb8522290e6a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4034,7 +4034,7 @@ __init int intel_pmu_init(void)
 
 	/* Support full width counters using alternative MSR range */
 	if (x86_pmu.intel_cap.full_width_write) {
-		x86_pmu.max_period = x86_pmu.cntval_mask;
+		x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
 		x86_pmu.perfctr = MSR_IA32_PMC0;
 		pr_cont("full-width counters, ");
 	}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4f5ac726335f..da51e5a3e2ff 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -540,6 +540,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
 
 	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6ee1febdf6ff..02c8421f8c01 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -903,17 +903,14 @@ list_update_cgroup_event(struct perf_event *event,
 	 */
 	cpuctx = __get_cpu_context(ctx);
 
-	/* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */
-	if (perf_cgroup_from_task(current, ctx) != event->cgrp) {
-		/*
-		 * We are removing the last cpu event in this context.
-		 * If that event is not active in this cpu, cpuctx->cgrp
-		 * should've been cleared by perf_cgroup_switch.
-		 */
-		WARN_ON_ONCE(!add && cpuctx->cgrp);
-		return;
-	}
-	cpuctx->cgrp = add ? event->cgrp : NULL;
+	/*
+	 * cpuctx->cgrp is NULL until a cgroup event is sched in or
+	 * ctx->nr_cgroup == 0 .
+	 */
+	if (add && perf_cgroup_from_task(current, ctx) == event->cgrp)
+		cpuctx->cgrp = event->cgrp;
+	else if (!add)
+		cpuctx->cgrp = NULL;
 }
 
 #else /* !CONFIG_CGROUP_PERF */

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-11-23  9:00 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-11-23  9:00 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 033ac60c7f21f9996a0fab2fd04f334afbf77b33 perf/x86/intel/uncore: Allow only a single PMU/box within an events group

Six fixes for bugs that were found via fuzzing, and a trivial hw-enablement patch 
for AMD Family-17h CPU PMUs.

 Thanks,

	Ingo

------------------>
Alexander Shishkin (1):
      perf/core: Fix address filter parser

David Carrillo-Cisneros (1):
      perf/core: Do not set cpuctx->cgrp for unscheduled cgroups

Janakarajan Natarajan (1):
      perf/x86: Add perf support for AMD family-17h processors

Johannes Weiner (1):
      perf/x86: Restore TASK_SIZE check on frame pointer

Kan Liang (1):
      perf/x86/uncore: Fix crash by removing bogus event_list[] handling for SNB client uncore IMC

Peter Zijlstra (2):
      perf/x86/intel: Cure bogus unwind from PEBS entries
      perf/x86/intel/uncore: Allow only a single PMU/box within an events group


 arch/x86/events/amd/core.c         |  8 +++++++-
 arch/x86/events/core.c             | 10 ++--------
 arch/x86/events/intel/ds.c         | 35 +++++++++++++++++++++++------------
 arch/x86/events/intel/uncore.c     |  8 ++++----
 arch/x86/events/intel/uncore_snb.c | 12 ------------
 arch/x86/events/perf_event.h       |  2 +-
 kernel/events/core.c               | 13 +++++++++++++
 7 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index f5f4b3fbbbc2..afb222b63cae 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void)
 		pr_cont("Fam15h ");
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
 		break;
-
+	case 0x17:
+		pr_cont("Fam17h ");
+		/*
+		 * In family 17h, there are no event constraints in the PMC hardware.
+		 * We fallback to using default amd_get_event_constraints.
+		 */
+		break;
 	default:
 		pr_err("core perfctr but no constraints; unknown hardware!\n");
 		return -ENODEV;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d31735f37ed7..9d4bf3ab049e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 		frame.next_frame     = 0;
 		frame.return_address = 0;
 
-		if (!access_ok(VERIFY_READ, fp, 8))
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
@@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
 		if (bytes != 0)
 			break;
 
-		if (!valid_user_frame(fp, sizeof(frame)))
-			break;
-
 		perf_callchain_store(entry, cs_base + frame.return_address);
 		fp = compat_ptr(ss_base + frame.next_frame);
 	}
@@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 
-		if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
+		if (!valid_user_frame(fp, sizeof(frame)))
 			break;
 
 		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
@@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 		if (bytes != 0)
 			break;
 
-		if (!valid_user_frame(fp, sizeof(frame)))
-			break;
-
 		perf_callchain_store(entry, frame.return_address);
 		fp = (void __user *)frame.next_frame;
 	}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 0319311dbdbb..be202390bbd3 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	}
 
 	/*
-	 * We use the interrupt regs as a base because the PEBS record
-	 * does not contain a full regs set, specifically it seems to
-	 * lack segment descriptors, which get used by things like
-	 * user_mode().
+	 * We use the interrupt regs as a base because the PEBS record does not
+	 * contain a full regs set, specifically it seems to lack segment
+	 * descriptors, which get used by things like user_mode().
 	 *
-	 * In the simple case fix up only the IP and BP,SP regs, for
-	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
-	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
+	 *
+	 * We must however always use BP,SP from iregs for the unwinder to stay
+	 * sane; the record BP,SP can point into thin air when the record is
+	 * from a previous PMI context or an (I)RET happend between the record
+	 * and PMI.
 	 */
 	*regs = *iregs;
 	regs->flags = pebs->flags;
 	set_linear_ip(regs, pebs->ip);
-	regs->bp = pebs->bp;
-	regs->sp = pebs->sp;
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		regs->ax = pebs->ax;
@@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		regs->dx = pebs->dx;
 		regs->si = pebs->si;
 		regs->di = pebs->di;
-		regs->bp = pebs->bp;
-		regs->sp = pebs->sp;
 
-		regs->flags = pebs->flags;
+		/*
+		 * Per the above; only set BP,SP if we don't need callchains.
+		 *
+		 * XXX: does this make sense?
+		 */
+		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+			regs->bp = pebs->bp;
+			regs->sp = pebs->sp;
+		}
+
+		/*
+		 * Preserve PERF_EFLAGS_VM from set_linear_ip().
+		 */
+		regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
 		regs->r9 = pebs->r9;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index efca2685d876..dbaaf7dc8373 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
  */
 static int uncore_pmu_event_init(struct perf_event *event);
 
-static bool is_uncore_event(struct perf_event *event)
+static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 {
-	return event->pmu->event_init == uncore_pmu_event_init;
+	return &box->pmu->pmu == event->pmu;
 }
 
 static int
@@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 
 	n = box->n_events;
 
-	if (is_uncore_event(leader)) {
+	if (is_box_event(box, leader)) {
 		box->event_list[n] = leader;
 		n++;
 	}
@@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 		return n;
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (!is_uncore_event(event) ||
+		if (!is_box_event(box, event) ||
 		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 81195cca7eae..a3dcc12bef4a 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
 
 	snb_uncore_imc_event_start(event, 0);
 
-	box->n_events++;
-
 	return 0;
 }
 
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 {
-	struct intel_uncore_box *box = uncore_event_to_box(event);
-	int i;
-
 	snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-	for (i = 0; i < box->n_events; i++) {
-		if (event == box->event_list[i]) {
-			--box->n_events;
-			break;
-		}
-	}
 }
 
 int snb_pci2phy_map_init(int devid)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 5874d8de1f8d..a77ee026643d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -113,7 +113,7 @@ struct debug_store {
  * Per register state.
  */
 struct er_account {
-	raw_spinlock_t		lock;	/* per-core: protect structure */
+	raw_spinlock_t      lock;	/* per-core: protect structure */
 	u64                 config;	/* extra MSR config */
 	u64                 reg;	/* extra MSR number */
 	atomic_t            ref;	/* reference count */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0e292132efac..6ee1febdf6ff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -902,6 +902,17 @@ list_update_cgroup_event(struct perf_event *event,
 	 * this will always be called from the right CPU.
 	 */
 	cpuctx = __get_cpu_context(ctx);
+
+	/* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */
+	if (perf_cgroup_from_task(current, ctx) != event->cgrp) {
+		/*
+		 * We are removing the last cpu event in this context.
+		 * If that event is not active in this cpu, cpuctx->cgrp
+		 * should've been cleared by perf_cgroup_switch.
+		 */
+		WARN_ON_ONCE(!add && cpuctx->cgrp);
+		return;
+	}
 	cpuctx->cgrp = add ? event->cgrp : NULL;
 }
 
@@ -8018,6 +8029,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
  * if <size> is not specified, the range is treated as a single address.
  */
 enum {
+	IF_ACT_NONE = -1,
 	IF_ACT_FILTER,
 	IF_ACT_START,
 	IF_ACT_STOP,
@@ -8041,6 +8053,7 @@ static const match_table_t if_tokens = {
 	{ IF_SRC_KERNEL,	"%u/%u" },
 	{ IF_SRC_FILEADDR,	"%u@%s" },
 	{ IF_SRC_KERNELADDR,	"%u" },
+	{ IF_ACT_NONE,		NULL },
 };
 
 /*

----- End forwarded message -----

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-11-14  7:56 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-11-14  7:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: ce75632cc4012f1832bd56efd97c2ba75ca964bb Merge tag 'perf-hists-hierarchy-fixes-for-mingo-20161111' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

An uncore PMU driver hardware enablement change for Intel SkyLake uncore PMUs 
(Skylake Y, U, H and S platforms), plus a number of tooling fixes for the 
histogram handling/displaying code.

 Thanks,

	Ingo

------------------>
Kan Liang (1):
      perf/x86/intel/uncore: Add more Intel uncore IMC PCI IDs for SkyLake

Namhyung Kim (5):
      perf hist browser: Fix hierarchy column counts
      perf hists browser: Fix indentation of folded sign on --hierarchy
      perf hists browser: Show folded sign properly on --hierarchy
      perf hists browser: Fix column indentation on --hierarchy
      perf hists: Fix column length on --hierarchy


 arch/x86/events/intel/uncore_snb.c | 32 +++++++++++++++++++++----
 tools/perf/ui/browsers/hists.c     | 48 +++++++++++++++++++++++++++++---------
 tools/perf/util/hist.c             | 12 +++++-----
 3 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 5f845eef9a4d..81195cca7eae 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -8,8 +8,12 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
-#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x1904
+#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC	0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC	0x1900
+#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC	0x1910
+#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC	0x190f
+#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC	0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -616,13 +620,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
 
 static const struct pci_device_id skl_uncore_pci_ids[] = {
 	{ /* IMC */
-		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
 	{ /* IMC */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 
 	{ /* end: all zeroes */ },
 };
@@ -666,8 +686,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
-	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+	IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core Y */
 	IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core U */
+	IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Dual Core */
+	IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
+	IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
+	IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
 	{  /* end marker */ }
 };
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4ffff7be9299..a53fef0c673b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1337,8 +1337,8 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
 		}
 
 		if (first) {
-			ui_browser__printf(&browser->b, "%c", folded_sign);
-			width--;
+			ui_browser__printf(&browser->b, "%c ", folded_sign);
+			width -= 2;
 			first = false;
 		} else {
 			ui_browser__printf(&browser->b, "  ");
@@ -1361,8 +1361,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
 		width -= hpp.buf - s;
 	}
 
-	ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
-	width -= hierarchy_indent;
+	if (!first) {
+		ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+		width -= hierarchy_indent;
+	}
 
 	if (column >= browser->b.horiz_scroll) {
 		char s[2048];
@@ -1381,7 +1383,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
 		}
 
 		perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
-			ui_browser__write_nstring(&browser->b, "", 2);
+			if (first) {
+				ui_browser__printf(&browser->b, "%c ", folded_sign);
+				first = false;
+			} else {
+				ui_browser__write_nstring(&browser->b, "", 2);
+			}
+
 			width -= 2;
 
 			/*
@@ -1555,10 +1563,11 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 	int indent = hists->nr_hpp_node - 2;
 	bool first_node, first_col;
 
-	ret = scnprintf(buf, size, " ");
+	ret = scnprintf(buf, size, "  ");
 	if (advance_hpp_check(&dummy_hpp, ret))
 		return ret;
 
+	first_node = true;
 	/* the first hpp_list_node is for overhead columns */
 	fmt_node = list_first_entry(&hists->hpp_formats,
 				    struct perf_hpp_list_node, list);
@@ -1573,12 +1582,16 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
 		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
+
+		first_node = false;
 	}
 
-	ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
-			indent * HIERARCHY_INDENT, "");
-	if (advance_hpp_check(&dummy_hpp, ret))
-		return ret;
+	if (!first_node) {
+		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+				indent * HIERARCHY_INDENT, "");
+		if (advance_hpp_check(&dummy_hpp, ret))
+			return ret;
+	}
 
 	first_node = true;
 	list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
@@ -2076,8 +2089,21 @@ void hist_browser__init(struct hist_browser *browser,
 	browser->b.use_navkeypressed	= true;
 	browser->show_headers		= symbol_conf.show_hist_headers;
 
-	hists__for_each_format(hists, fmt)
+	if (symbol_conf.report_hierarchy) {
+		struct perf_hpp_list_node *fmt_node;
+
+		/* count overhead columns (in the first node) */
+		fmt_node = list_first_entry(&hists->hpp_formats,
+					    struct perf_hpp_list_node, list);
+		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+			++browser->b.columns;
+
+		/* add a single column for whole hierarchy sort keys*/
 		++browser->b.columns;
+	} else {
+		hists__for_each_format(hists, fmt)
+			++browser->b.columns;
+	}
 
 	hists__reset_column_width(hists);
 }
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b02992efb513..a69f027368ef 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1600,18 +1600,18 @@ static void hists__hierarchy_output_resort(struct hists *hists,
 		if (prog)
 			ui_progress__update(prog, 1);
 
+		hists->nr_entries++;
+		if (!he->filtered) {
+			hists->nr_non_filtered_entries++;
+			hists__calc_col_len(hists, he);
+		}
+
 		if (!he->leaf) {
 			hists__hierarchy_output_resort(hists, prog,
 						       &he->hroot_in,
 						       &he->hroot_out,
 						       min_callchain_hits,
 						       use_callchain);
-			hists->nr_entries++;
-			if (!he->filtered) {
-				hists->nr_non_filtered_entries++;
-				hists__calc_col_len(hists, he);
-			}
-
 			continue;
 		}
 

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-10-28 19:41 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-10-28 19:41 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: f92b7604149a55cb601fc0b52911b1e11f0f2514 perf/x86/intel: Honour the CPUID for number of fixed counters in hypervisors

Misc kerenl fixes: a virtualization environment related fix, an uncore PMU driver 
removal handling fix, a PowerPC fix and new events for Knights Landing.

 Thanks,

	Ingo

------------------>
Imre Palik (1):
      perf/x86/intel: Honour the CPUID for number of fixed counters in hypervisors

Jiri Olsa (2):
      perf/core: Protect PMU device removal with a 'pmu_bus_running' check, to fix CONFIG_DEBUG_TEST_DRIVER_REMOVE=y kernel panic
      perf/powerpc: Don't call perf_event_disable() from atomic context

Lukasz Odzioba (1):
      perf/x86/intel/cstate: Add C-state residency events for Knights Landing


 arch/powerpc/kernel/hw_breakpoint.c |  2 +-
 arch/x86/events/intel/core.c        | 10 +++++++---
 arch/x86/events/intel/cstate.c      | 30 ++++++++++++++++++++++++++----
 include/linux/perf_event.h          |  1 +
 kernel/events/core.c                | 23 +++++++++++++++++------
 5 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 9781c69eae57..03d089b3ed72 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args)
 	if (!stepped) {
 		WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
 			"0x%lx will be disabled.", info->address);
-		perf_event_disable(bp);
+		perf_event_disable_inatomic(bp);
 		goto out;
 	}
 	/*
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index eab0915f5995..a74a2dbc0180 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void)
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
-	 * assume at least 3 events:
+	 * assume at least 3 events, when not running in a hypervisor:
 	 */
-	if (version > 1)
-		x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+	if (version > 1) {
+		int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
+
+		x86_pmu.num_counters_fixed =
+			max((int)edx.split.num_counters_fixed, assume);
+	}
 
 	if (boot_cpu_has(X86_FEATURE_PDCM)) {
 		u64 capabilities;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 3ca87b5a8677..4f5ac726335f 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -48,7 +48,8 @@
  *			       Scope: Core
  *	MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *			       perf code: 0x02
- *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *						SKL,KNL
  *			       Scope: Core
  *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *			       perf code: 0x03
@@ -56,15 +57,16 @@
  *			       Scope: Core
  *	MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *			       perf code: 0x00
- *			       Available model: SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SNB,IVB,HSW,BDW,SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *			       perf code: 0x01
- *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *			       perf code: 0x02
- *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *						SKL,KNL
  *			       Scope: Package (physical package)
  *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *			       perf code: 0x03
@@ -118,6 +120,7 @@ struct cstate_model {
 
 /* Quirk flags */
 #define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
+#define KNL_CORE_C6_MSR		(1UL << 1)
 
 struct perf_cstate_msr {
 	u64	msr;
@@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = {
 	.quirks			= SLM_PKG_C6_USE_C7_MSR,
 };
 
+
+static const struct cstate_model knl_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES),
+	.quirks			= KNL_CORE_C6_MSR,
+};
+
+
+
 #define X86_CSTATES_MODEL(model, states)				\
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
 
@@ -523,6 +538,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+
+	X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@ -558,6 +575,11 @@ static int __init cstate_probe(const struct cstate_model *cm)
 	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
 		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
 
+	/* KNL has different MSR for CORE C6 */
+	if (cm->quirks & KNL_CORE_C6_MSR)
+		pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
+
+
 	has_cstate_core = cstate_probe_msr(cm->core_events,
 					   PERF_CSTATE_CORE_EVENT_MAX,
 					   core_msr, core_events_attrs);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 060d0ede88df..4741ecdb9817 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1257,6 +1257,7 @@ extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
+extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c6e47e97b33f..0e292132efac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_disable);
 
+void perf_event_disable_inatomic(struct perf_event *event)
+{
+	event->pending_disable = 1;
+	irq_work_queue(&event->pending);
+}
+
 static void perf_set_shadow_time(struct perf_event *event,
 				 struct perf_event_context *ctx,
 				 u64 tstamp)
@@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event,
 	if (events && atomic_dec_and_test(&event->event_limit)) {
 		ret = 1;
 		event->pending_kill = POLL_HUP;
-		event->pending_disable = 1;
-		irq_work_queue(&event->pending);
+
+		perf_event_disable_inatomic(event);
 	}
 
 	READ_ONCE(event->overflow_handler)(event, data, regs);
@@ -8855,7 +8861,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
+	int remove_device;
+
 	mutex_lock(&pmus_lock);
+	remove_device = pmu_bus_running;
 	list_del_rcu(&pmu->entry);
 	mutex_unlock(&pmus_lock);
 
@@ -8869,10 +8878,12 @@ void perf_pmu_unregister(struct pmu *pmu)
 	free_percpu(pmu->pmu_disable_count);
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
-	if (pmu->nr_addr_filters)
-		device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
-	device_del(pmu->dev);
-	put_device(pmu->dev);
+	if (remove_device) {
+		if (pmu->nr_addr_filters)
+			device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
+		device_del(pmu->dev);
+		put_device(pmu->dev);
+	}
 	free_pmu_context(pmu);
 }
 EXPORT_SYMBOL_GPL(perf_pmu_unregister);

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-10-18 11:07 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-10-18 11:07 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 0130669966bf337b616d559d5405614c0a4ae313 Merge tag 'perf-urgent-for-mingo-20161017' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Four tooling fixes, two kprobes KASAN related fixes and an x86 PMU driver 
fix/cleanup.

 Thanks,

	Ingo

------------------>
Anton Blanchard (1):
      perf jit: Fix build issue on Ubuntu

Dan Carpenter (1):
      perf/x86/intel: Remove an inconsistent NULL check

Dmitry Vyukov (2):
      kprobes: Avoid false KASAN reports during stack copy
      kprobes: Unpoison stack in jprobe_return() for KASAN

Jiri Olsa (1):
      perf header: Set nr_numa_nodes only when we parsed all the data

Namhyung Kim (1):
      perf top: Fix refreshing hierarchy entries on TUI

Wang Nan (1):
      perf jevents: Handle events including .c and .o


 arch/arm64/kernel/sleep.S      |  2 +-
 arch/x86/events/intel/lbr.c    |  4 ++--
 arch/x86/kernel/kprobes/core.c | 11 ++++++++---
 include/linux/kasan.h          |  2 ++
 mm/kasan/kasan.c               | 22 +++++++++++++++++++---
 tools/perf/jvmti/Makefile      |  2 +-
 tools/perf/ui/browsers/hists.c |  3 ++-
 tools/perf/util/header.c       |  2 +-
 tools/perf/util/parse-events.l |  4 ++--
 9 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index b8799e7c79de..1bec41b5fda3 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -135,7 +135,7 @@ ENTRY(_cpu_resume)
 
 #ifdef CONFIG_KASAN
 	mov	x0, sp
-	bl	kasan_unpoison_remaining_stack
+	bl	kasan_unpoison_task_stack_below
 #endif
 
 	ldp	x19, x20, [x29, #16]
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index fc6cf21c535e..81b321ace8e0 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -458,8 +458,8 @@ void intel_pmu_lbr_del(struct perf_event *event)
 	if (!x86_pmu.lbr_nr)
 		return;
 
-	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-					event->ctx->task_ctx_data) {
+	if (branch_user_callstack(cpuc->br_sel) &&
+	    event->ctx->task_ctx_data) {
 		task_ctx = event->ctx->task_ctx_data;
 		task_ctx->lbr_callstack_users--;
 	}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 28cee019209c..d9d8d16b69db 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -50,6 +50,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/frame.h>
+#include <linux/kasan.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -1057,9 +1058,10 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * tailcall optimization. So, to be absolutely safe
 	 * we also save and restore enough stack bytes to cover
 	 * the argument area.
+	 * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
+	 * raw stack chunk with redzones:
 	 */
-	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
-	       MIN_STACK_SIZE(addr));
+	__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
 	regs->flags &= ~X86_EFLAGS_IF;
 	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
@@ -1080,6 +1082,9 @@ void jprobe_return(void)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+	/* Unpoison stack redzones in the frames we are going to jump over. */
+	kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
+
 	asm volatile (
 #ifdef CONFIG_X86_64
 			"       xchg   %%rbx,%%rsp	\n"
@@ -1118,7 +1123,7 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 		/* It's OK to start function graph tracing again */
 		unpause_graph_tracing();
 		*regs = kcb->jprobe_saved_regs;
-		memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
+		__memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
 		preempt_enable_no_resched();
 		return 1;
 	}
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index d600303306eb..820c0ad54a01 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -44,6 +44,7 @@ static inline void kasan_disable_current(void)
 void kasan_unpoison_shadow(const void *address, size_t size);
 
 void kasan_unpoison_task_stack(struct task_struct *task);
+void kasan_unpoison_stack_above_sp_to(const void *watermark);
 
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
@@ -85,6 +86,7 @@ size_t kasan_metadata_size(struct kmem_cache *cache);
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {}
 
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 88af13c00d3c..70c009741aab 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -34,6 +34,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
+#include <linux/bug.h>
 
 #include "kasan.h"
 #include "../slab.h"
@@ -62,7 +63,7 @@ void kasan_unpoison_shadow(const void *address, size_t size)
 	}
 }
 
-static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
 {
 	void *base = task_stack_page(task);
 	size_t size = sp - base;
@@ -77,9 +78,24 @@ void kasan_unpoison_task_stack(struct task_struct *task)
 }
 
 /* Unpoison the stack for the current task beyond a watermark sp value. */
-asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
 {
-	__kasan_unpoison_stack(current, sp);
+	__kasan_unpoison_stack(current, watermark);
+}
+
+/*
+ * Clear all poison for the region between the current SP and a provided
+ * watermark value, as is sometimes required prior to hand-crafted asm function
+ * returns in the middle of functions.
+ */
+void kasan_unpoison_stack_above_sp_to(const void *watermark)
+{
+	const void *sp = __builtin_frame_address(0);
+	size_t size = watermark - sp;
+
+	if (WARN_ON(sp > watermark))
+		return;
+	kasan_unpoison_shadow(sp, size);
 }
 
 /*
diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile
index 5ce61a1bda9c..df14e6b67b63 100644
--- a/tools/perf/jvmti/Makefile
+++ b/tools/perf/jvmti/Makefile
@@ -36,7 +36,7 @@ SOLIBEXT=so
 # The following works at least on fedora 23, you may need the next
 # line for other distros.
 ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
-JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
 else
   ifneq (,$(wildcard /usr/sbin/alternatives))
     JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index fb8e42c7507a..4ffff7be9299 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -601,7 +601,8 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
 			u64 nr_entries;
 			hbt->timer(hbt->arg);
 
-			if (hist_browser__has_filter(browser))
+			if (hist_browser__has_filter(browser) ||
+			    symbol_conf.report_hierarchy)
 				hist_browser__update_nr_entries(browser);
 
 			nr_entries = hist_browser__nr_entries(browser);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 85dd0db0a127..2f3eded54b0c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1895,7 +1895,6 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 	if (ph->needs_swap)
 		nr = bswap_32(nr);
 
-	ph->env.nr_numa_nodes = nr;
 	nodes = zalloc(sizeof(*nodes) * nr);
 	if (!nodes)
 		return -ENOMEM;
@@ -1932,6 +1931,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 
 		free(str);
 	}
+	ph->env.nr_numa_nodes = nr;
 	ph->env.numa_nodes = nodes;
 	return 0;
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 9f43fda2570f..660fca05bc93 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -136,8 +136,8 @@ do {							\
 group		[^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu	[^,{}/]+[/][^/]*[/][^,{}/]*
 event		[^,{}/]+
-bpf_object	.*\.(o|bpf)
-bpf_source	.*\.c
+bpf_object	[^,{}]+\.(o|bpf)
+bpf_source	[^,{}]+\.c
 
 num_dec		[0-9]+
 num_hex		0x[a-fA-F0-9]+

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-09-13 18:14 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-09-13 18:14 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 8ef9b8455a2a3049efa9e46e8a6402b972a3eb41 perf/x86/intel: Fix PEBSv3 record drain

This contains:

- A set of fixes found by directed-random perf fuzzing efforts by Vince Weaver, 
  Alexander Shishkin and Peter Zijlstra,

- a cqm driver crash fix,

- an AMD uncore driver use after free fix.

 Thanks,

	Ingo

------------------>
Alexander Shishkin (5):
      perf/core: Fix a race between mmap_close() and set_output() of AUX events
      perf/core: Fix aux_mmap_count vs aux_refcount order
      perf/x86/intel/bts: Fix confused ordering of PMU callbacks
      perf/x86/intel/bts: Fix BTS PMI detection
      perf/x86/intel/bts: Kill a silly warning

Jiri Olsa (1):
      perf/x86/intel/cqm: Check cqm/mbm enabled state in event init

Peter Zijlstra (2):
      perf/core: Remove WARN from perf_event_read()
      perf/x86/intel: Fix PEBSv3 record drain

Sebastian Andrzej Siewior (1):
      perf/x86/amd/uncore: Prevent use after free


 arch/x86/events/amd/uncore.c |  22 ++++++--
 arch/x86/events/intel/bts.c  | 123 +++++++++++++++++++++++++++++++++----------
 arch/x86/events/intel/cqm.c  |   9 ++++
 arch/x86/events/intel/ds.c   |  19 ++++---
 kernel/events/core.c         |  47 +++++++++++++----
 kernel/events/ring_buffer.c  |  15 ++++--
 6 files changed, 180 insertions(+), 55 deletions(-)

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index e6131d4454e6..65577f081d07 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -29,6 +29,8 @@
 
 #define COUNTER_SHIFT		16
 
+static HLIST_HEAD(uncore_unused_list);
+
 struct amd_uncore {
 	int id;
 	int refcnt;
@@ -39,7 +41,7 @@ struct amd_uncore {
 	cpumask_t *active_mask;
 	struct pmu *pmu;
 	struct perf_event *events[MAX_COUNTERS];
-	struct amd_uncore *free_when_cpu_online;
+	struct hlist_node node;
 };
 
 static struct amd_uncore * __percpu *amd_uncore_nb;
@@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 		uncore_nb->active_mask = &amd_nb_active_mask;
 		uncore_nb->pmu = &amd_nb_pmu;
+		uncore_nb->id = -1;
 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 	}
 
@@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 		uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 		uncore_l2->active_mask = &amd_l2_active_mask;
 		uncore_l2->pmu = &amd_l2_pmu;
+		uncore_l2->id = -1;
 		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 	}
 
@@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
 			continue;
 
 		if (this->id == that->id) {
-			that->free_when_cpu_online = this;
+			hlist_add_head(&this->node, &uncore_unused_list);
 			this = that;
 			break;
 		}
@@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
 	return 0;
 }
 
+static void uncore_clean_online(void)
+{
+	struct amd_uncore *uncore;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
+		hlist_del(&uncore->node);
+		kfree(uncore);
+	}
+}
+
 static void uncore_online(unsigned int cpu,
 			  struct amd_uncore * __percpu *uncores)
 {
 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 
-	kfree(uncore->free_when_cpu_online);
-	uncore->free_when_cpu_online = NULL;
+	uncore_clean_online();
 
 	if (cpu == uncore->cpu)
 		cpumask_set_cpu(cpu, uncore->active_mask);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 0a6e393a2e62..bdcd6510992c 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -31,7 +31,17 @@
 struct bts_ctx {
 	struct perf_output_handle	handle;
 	struct debug_store		ds_back;
-	int				started;
+	int				state;
+};
+
+/* BTS context states: */
+enum {
+	/* no ongoing AUX transactions */
+	BTS_STATE_STOPPED = 0,
+	/* AUX transaction is on, BTS tracing is disabled */
+	BTS_STATE_INACTIVE,
+	/* AUX transaction is on, BTS tracing is running */
+	BTS_STATE_ACTIVE,
 };
 
 static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
@@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts)
 static int
 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
 
+/*
+ * Ordering PMU callbacks wrt themselves and the PMI is done by means
+ * of bts::state, which:
+ *  - is set when bts::handle::event is valid, that is, between
+ *    perf_aux_output_begin() and perf_aux_output_end();
+ *  - is zero otherwise;
+ *  - is ordered against bts::handle::event with a compiler barrier.
+ */
+
 static void __bts_event_start(struct perf_event *event)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
@@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event)
 
 	/*
 	 * local barrier to make sure that ds configuration made it
-	 * before we enable BTS
+	 * before we enable BTS and bts::state goes ACTIVE
 	 */
 	wmb();
 
+	/* INACTIVE/STOPPED -> ACTIVE */
+	WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
+
 	intel_pmu_enable_bts(config);
 
 }
@@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags)
 
 	__bts_event_start(event);
 
-	/* PMI handler: this counter is running and likely generating PMIs */
-	ACCESS_ONCE(bts->started) = 1;
-
 	return;
 
 fail_end_stop:
@@ -263,30 +282,34 @@ static void bts_event_start(struct perf_event *event, int flags)
 	event->hw.state = PERF_HES_STOPPED;
 }
 
-static void __bts_event_stop(struct perf_event *event)
+static void __bts_event_stop(struct perf_event *event, int state)
 {
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	/* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
+	WRITE_ONCE(bts->state, state);
+
 	/*
 	 * No extra synchronization is mandated by the documentation to have
 	 * BTS data stores globally visible.
 	 */
 	intel_pmu_disable_bts();
-
-	if (event->hw.state & PERF_HES_STOPPED)
-		return;
-
-	ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
 }
 
 static void bts_event_stop(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	struct bts_buffer *buf = NULL;
+	int state = READ_ONCE(bts->state);
 
-	/* PMI handler: don't restart this counter */
-	ACCESS_ONCE(bts->started) = 0;
+	if (state == BTS_STATE_ACTIVE)
+		__bts_event_stop(event, BTS_STATE_STOPPED);
 
-	__bts_event_stop(event);
+	if (state != BTS_STATE_STOPPED)
+		buf = perf_get_aux(&bts->handle);
+
+	event->hw.state |= PERF_HES_STOPPED;
 
 	if (flags & PERF_EF_UPDATE) {
 		bts_update(bts);
@@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags)
 				bts->handle.head =
 					local_xchg(&buf->data_size,
 						   buf->nr_pages << PAGE_SHIFT);
+
 			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 					    !!local_xchg(&buf->lost, 0));
 		}
@@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags)
 void intel_bts_enable_local(void)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	int state = READ_ONCE(bts->state);
+
+	/*
+	 * Here we transition from INACTIVE to ACTIVE;
+	 * if we instead are STOPPED from the interrupt handler,
+	 * stay that way. Can't be ACTIVE here though.
+	 */
+	if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
+		return;
+
+	if (state == BTS_STATE_STOPPED)
+		return;
 
-	if (bts->handle.event && bts->started)
+	if (bts->handle.event)
 		__bts_event_start(bts->handle.event);
 }
 
@@ -319,8 +355,15 @@ void intel_bts_disable_local(void)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 
+	/*
+	 * Here we transition from ACTIVE to INACTIVE;
+	 * do nothing for STOPPED or INACTIVE.
+	 */
+	if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
+		return;
+
 	if (bts->handle.event)
-		__bts_event_stop(bts->handle.event);
+		__bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
 }
 
 static int
@@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
 		return 0;
 
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
-	if (WARN_ON_ONCE(head != local_read(&buf->head)))
-		return -EINVAL;
 
 	phys = &buf->buf[buf->cur_buf];
 	space = phys->offset + phys->displacement + phys->size - head;
@@ -403,22 +444,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
 
 int intel_bts_interrupt(void)
 {
+	struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 	struct perf_event *event = bts->handle.event;
 	struct bts_buffer *buf;
 	s64 old_head;
-	int err;
+	int err = -ENOSPC, handled = 0;
 
-	if (!event || !bts->started)
-		return 0;
+	/*
+	 * The only surefire way of knowing if this NMI is ours is by checking
+	 * the write ptr against the PMI threshold.
+	 */
+	if (ds->bts_index >= ds->bts_interrupt_threshold)
+		handled = 1;
+
+	/*
+	 * this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
+	 * so we can only be INACTIVE or STOPPED
+	 */
+	if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
+		return handled;
 
 	buf = perf_get_aux(&bts->handle);
+	if (!buf)
+		return handled;
+
 	/*
 	 * Skip snapshot counters: they don't use the interrupt, but
 	 * there's no other way of telling, because the pointer will
 	 * keep moving
 	 */
-	if (!buf || buf->snapshot)
+	if (buf->snapshot)
 		return 0;
 
 	old_head = local_read(&buf->head);
@@ -426,18 +482,27 @@ int intel_bts_interrupt(void)
 
 	/* no new data */
 	if (old_head == local_read(&buf->head))
-		return 0;
+		return handled;
 
 	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 			    !!local_xchg(&buf->lost, 0));
 
 	buf = perf_aux_output_begin(&bts->handle, event);
-	if (!buf)
-		return 1;
+	if (buf)
+		err = bts_buffer_reset(buf, &bts->handle);
+
+	if (err) {
+		WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
 
-	err = bts_buffer_reset(buf, &bts->handle);
-	if (err)
-		perf_aux_output_end(&bts->handle, 0, false);
+		if (buf) {
+			/*
+			 * BTS_STATE_STOPPED should be visible before
+			 * cleared handle::event
+			 */
+			barrier();
+			perf_aux_output_end(&bts->handle, 0, false);
+		}
+	}
 
 	return 1;
 }
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 783c49ddef29..8f82b02934fa 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info);
 static void init_mbm_sample(u32 rmid, u32 evt_type);
 static void __intel_mbm_event_count(void *info);
 
+static bool is_cqm_event(int e)
+{
+	return (e == QOS_L3_OCCUP_EVENT_ID);
+}
+
 static bool is_mbm_event(int e)
 {
 	return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
@@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event)
 	     (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
 		return -EINVAL;
 
+	if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
+	    (is_mbm_event(event->attr.config) && !mbm_enabled))
+		return -EINVAL;
+
 	/* unsupported modes and filters */
 	if (event->attr.exclude_user   ||
 	    event->attr.exclude_kernel ||
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7ce9f3f669e6..9b983a474253 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 		struct pebs_record_nhm *p = at;
 		u64 pebs_status;
 
-		/* PEBS v3 has accurate status bits */
+		pebs_status = p->status & cpuc->pebs_enabled;
+		pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+		/* PEBS v3 has more accurate status bits */
 		if (x86_pmu.intel_cap.pebs_format >= 3) {
-			for_each_set_bit(bit, (unsigned long *)&p->status,
-					 MAX_PEBS_EVENTS)
+			for_each_set_bit(bit, (unsigned long *)&pebs_status,
+					 x86_pmu.max_pebs_events)
 				counts[bit]++;
 
 			continue;
 		}
 
-		pebs_status = p->status & cpuc->pebs_enabled;
-		pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
 		/*
 		 * On some CPUs the PEBS status can be zero when PEBS is
 		 * racing with clearing of GLOBAL_STATUS.
@@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			continue;
 
 		event = cpuc->events[bit];
-		WARN_ON_ONCE(!event);
-		WARN_ON_ONCE(!event->attr.precise_ip);
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (WARN_ON_ONCE(!event->attr.precise_ip))
+			continue;
 
 		/* log dropped samples number */
 		if (error[bit])
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf7b942..a54f2c2cdb20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info)
 	return 0;
 }
 
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
 {
 	struct stop_event_data sd = {
 		.event		= event,
-		.restart	= 1,
+		.restart	= restart,
 	};
 	int ret = 0;
 
@@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group)
 			.group = group,
 			.ret = 0,
 		};
-		ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
-		/* The event must have been read from an online CPU: */
-		WARN_ON_ONCE(ret);
-		ret = ret ? : data.ret;
+		/*
+		 * Purposely ignore the smp_call_function_single() return
+		 * value.
+		 *
+		 * If event->oncpu isn't a valid CPU it means the event got
+		 * scheduled out and that will have updated the event count.
+		 *
+		 * Therefore, either way, we'll have an up-to-date event count
+		 * after this.
+		 */
+		(void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+		ret = data.ret;
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;
 		unsigned long flags;
@@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event,
 		spin_unlock_irqrestore(&rb->event_lock, flags);
 	}
 
+	/*
+	 * Avoid racing with perf_mmap_close(AUX): stop the event
+	 * before swizzling the event::rb pointer; if it's getting
+	 * unmapped, its aux_mmap_count will be 0 and it won't
+	 * restart. See the comment in __perf_pmu_output_stop().
+	 *
+	 * Data will inevitably be lost when set_output is done in
+	 * mid-air, but then again, whoever does it like this is
+	 * not in for the data anyway.
+	 */
+	if (has_aux(event))
+		perf_event_stop(event, 0);
+
 	rcu_assign_pointer(event->rb, rb);
 
 	if (old_rb) {
@@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
 	raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
 	if (restart)
-		perf_event_restart(event);
+		perf_event_stop(event, 1);
 }
 
 void perf_event_exec(void)
@@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
 
 	/*
 	 * In case of inheritance, it will be the parent that links to the
-	 * ring-buffer, but it will be the child that's actually using it:
+	 * ring-buffer, but it will be the child that's actually using it.
+	 *
+	 * We are using event::rb to determine if the event should be stopped,
+	 * however this may race with ring_buffer_attach() (through set_output),
+	 * which will make us skip the event that actually needs to be stopped.
+	 * So ring_buffer_attach() has to stop an aux event before re-assigning
+	 * its rb pointer.
 	 */
 	if (rcu_dereference(parent->rb) == rb)
 		ro->err = __perf_event_stop(&sd);
@@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
 	raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
 	if (restart)
-		perf_event_restart(event);
+		perf_event_stop(event, 1);
 }
 
 /*
@@ -7859,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 	mmput(mm);
 
 restart:
-	perf_event_restart(event);
+	perf_event_stop(event, 1);
 }
 
 /*
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ae9b90dc9a5a..257fa460b846 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 	if (!rb)
 		return NULL;
 
-	if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+	if (!rb_has_aux(rb))
 		goto err;
 
 	/*
-	 * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
-	 * the aux buffer is in perf_mmap_close(), about to get freed.
+	 * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
+	 * about to get freed, so we leave immediately.
+	 *
+	 * Checking rb::aux_mmap_count and rb::refcount has to be done in
+	 * the same order, see perf_mmap_close. Otherwise we end up freeing
+	 * aux pages in this path, which is a bug, because in_atomic().
 	 */
 	if (!atomic_read(&rb->aux_mmap_count))
-		goto err_put;
+		goto err;
+
+	if (!atomic_inc_not_zero(&rb->aux_refcount))
+		goto err;
 
 	/*
 	 * Nesting is not supported for AUX area, make sure nested

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-08-18 20:38 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-08-18 20:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Peter Zijlstra, Arnaldo Carvalho de Melo,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 71e7bc2bab77e64882c031c2af943c3256c1adb0 perf/core: Check return value of the perf_event_read() IPI

Mostly tooling fixes, but also start/stop filter related fixes, a perf event 
read() fix, a fix uncovered by fuzzing, and an uprobes leak fix.

 Thanks,

	Ingo

------------------>
Adrian Hunter (2):
      perf intel-pt: Fix ip compression
      perf intel-pt: Fix occasional decoding errors when tracing system-wide

Anton Blanchard (1):
      perf symbols: Fix annotation of objects with debuginfo files

Arnaldo Carvalho de Melo (3):
      perf jitdump: Add the right header to get the major()/minor() definitions
      perf probe: Release resources on error when handling exit paths
      tools: Sync kvm related header files for arm64 and s390

Colin Ian King (1):
      perf probe: Check for dup and fdopen failures

David Carrillo-Cisneros (1):
      perf/core: Check return value of the perf_event_read() IPI

He Kuang (2):
      perf script: Show proper message when failed list scripts
      perf script: Don't disable use_callchain if input is pipe

Jiri Olsa (1):
      perf tools mem: Fix -t store option for record command

Mathieu Poirier (3):
      perf/core: Fix file name handling for start/stop filters
      perf/core: Update filters only on executable mmap
      perf/core: Enable mapping of the stop filters

Oleg Nesterov (1):
      uprobes: Fix the memcg accounting

Peter Zijlstra (1):
      perf/core: Fix event_function_local()

Ravi Bangoria (1):
      perf ppc64le: Fix build failure when libelf is not present


 kernel/events/core.c                               | 95 ++++++++++++++++------
 kernel/events/uprobes.c                            |  5 +-
 tools/arch/arm64/include/uapi/asm/kvm.h            |  2 +
 tools/arch/s390/include/uapi/asm/kvm.h             | 41 ++++++++++
 tools/arch/s390/include/uapi/asm/sie.h             |  1 +
 tools/perf/arch/powerpc/util/sym-handling.c        |  2 +
 tools/perf/arch/x86/util/intel-pt.c                |  6 +-
 tools/perf/builtin-mem.c                           |  3 +
 tools/perf/builtin-script.c                        | 13 ++-
 .../perf/util/intel-pt-decoder/intel-pt-decoder.c  | 44 +++++-----
 .../util/intel-pt-decoder/intel-pt-pkt-decoder.c   | 24 ++++--
 tools/perf/util/jitdump.c                          |  1 +
 tools/perf/util/probe-file.c                       | 36 ++++++--
 tools/perf/util/symbol-elf.c                       |  3 +-
 14 files changed, 207 insertions(+), 69 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1903b8f3a705..5650f5317e0c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -242,18 +242,6 @@ static int event_function(void *info)
 	return ret;
 }
 
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-	struct event_function_struct efs = {
-		.event = event,
-		.func = func,
-		.data = data,
-	};
-
-	int ret = event_function(&efs);
-	WARN_ON_ONCE(ret);
-}
-
 static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
 	struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct task_struct *task = READ_ONCE(ctx->task);
+	struct perf_event_context *task_ctx = NULL;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (task) {
+		if (task == TASK_TOMBSTONE)
+			return;
+
+		task_ctx = ctx;
+	}
+
+	perf_ctx_lock(cpuctx, task_ctx);
+
+	task = ctx->task;
+	if (task == TASK_TOMBSTONE)
+		goto unlock;
+
+	if (task) {
+		/*
+		 * We must be either inactive or active and the right task,
+		 * otherwise we're screwed, since we cannot IPI to somewhere
+		 * else.
+		 */
+		if (ctx->is_active) {
+			if (WARN_ON_ONCE(task != current))
+				goto unlock;
+
+			if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+				goto unlock;
+		}
+	} else {
+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
+	}
+
+	func(event, cpuctx, ctx, data);
+unlock:
+	perf_ctx_unlock(cpuctx, task_ctx);
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP |\
@@ -3513,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group)
 			.group = group,
 			.ret = 0,
 		};
-		smp_call_function_single(event->oncpu,
-					 __perf_event_read, &data, 1);
-		ret = data.ret;
+		ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+		/* The event must have been read from an online CPU: */
+		WARN_ON_ONCE(ret);
+		ret = ret ? : data.ret;
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;
 		unsigned long flags;
@@ -6584,15 +6621,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 }
 
 /*
- * Whether this @filter depends on a dynamic object which is not loaded
- * yet or its load addresses are not known.
- */
-static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
-{
-	return filter->filter && filter->inode;
-}
-
-/*
  * Check whether inode and address range match filter criteria.
  */
 static bool perf_addr_filter_match(struct perf_addr_filter *filter,
@@ -6653,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
 	struct perf_event_context *ctx;
 	int ctxn;
 
+	/*
+	 * Data tracing isn't supported yet and as such there is no need
+	 * to keep track of anything that isn't related to executable code:
+	 */
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
 	rcu_read_lock();
 	for_each_task_context_nr(ctxn) {
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -7805,7 +7840,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 	list_for_each_entry(filter, &ifh->list, entry) {
 		event->addr_filters_offs[count] = 0;
 
-		if (perf_addr_filter_needs_mmap(filter))
+		/*
+		 * Adjust base offset if the filter is associated to a binary
+		 * that needs to be mapped:
+		 */
+		if (filter->inode)
 			event->addr_filters_offs[count] =
 				perf_addr_filter_apply(filter, mm);
 
@@ -7936,8 +7975,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
 					goto fail;
 			}
 
-			if (token == IF_SRC_FILE) {
-				filename = match_strdup(&args[2]);
+			if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+				int fpos = filter->range ? 2 : 1;
+
+				filename = match_strdup(&args[fpos]);
 				if (!filename) {
 					ret = -ENOMEM;
 					goto fail;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b7a525ab2083..8c50276b60d1 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
 	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
+	if (!ptep) {
+		mem_cgroup_cancel_charge(kpage, memcg, false);
 		goto unlock;
+	}
 
 	get_page(kpage);
 	page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 	unlock_page(page);
 	return err;
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index f209ea151dca..3051f86a9b5f 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 3b8e99ef9d58..a2ffec4139ad 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
 	__u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT	2
+#define KVM_S390_VM_CPU_MACHINE_FEAT	3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS	1024
+#define KVM_S390_VM_CPU_FEAT_ESOP	0
+#define KVM_S390_VM_CPU_FEAT_SIEF2	1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO	2
+#define KVM_S390_VM_CPU_FEAT_SIIF	3
+#define KVM_S390_VM_CPU_FEAT_GPERE	4
+#define KVM_S390_VM_CPU_FEAT_GSLS	5
+#define KVM_S390_VM_CPU_FEAT_IB		6
+#define KVM_S390_VM_CPU_FEAT_CEI	7
+#define KVM_S390_VM_CPU_FEAT_IBS	8
+#define KVM_S390_VM_CPU_FEAT_SKEY	9
+#define KVM_S390_VM_CPU_FEAT_CMMA	10
+#define KVM_S390_VM_CPU_FEAT_PFMFI	11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC	4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC		5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+	__u8 plo[32];		/* always */
+	__u8 ptff[16];		/* with TOD-clock steering */
+	__u8 kmac[16];		/* with MSA */
+	__u8 kmc[16];		/* with MSA */
+	__u8 km[16];		/* with MSA */
+	__u8 kimd[16];		/* with MSA */
+	__u8 klmd[16];		/* with MSA */
+	__u8 pckmo[16];		/* with MSA3 */
+	__u8 kmctr[16];		/* with MSA4 */
+	__u8 kmf[16];		/* with MSA4 */
+	__u8 kmo[16];		/* with MSA4 */
+	__u8 pcc[16];		/* with MSA4 */
+	__u8 ppno[16];		/* with MSA5 */
+	__u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
index 8fb5d4a6dd25..3ac634368939 100644
--- a/tools/arch/s390/include/uapi/asm/sie.h
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -140,6 +140,7 @@
 	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
 	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
 	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x56, "STHYI"),	\
 	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
 	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
 	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 8d4dc97d80ba..35745a733100 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -97,6 +97,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 	}
 }
 
+#ifdef HAVE_LIBELF_SUPPORT
 void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
 					   int ntevs)
 {
@@ -118,5 +119,6 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
 		}
 	}
 }
+#endif /* HAVE_LIBELF_SUPPORT */
 
 #endif
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index fb51457ba338..a2412e9d883b 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	struct intel_pt_recording *ptr =
 			container_of(itr, struct intel_pt_recording, itr);
 	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
-	bool have_timing_info;
+	bool have_timing_info, need_immediate = false;
 	struct perf_evsel *evsel, *intel_pt_evsel = NULL;
 	const struct cpu_map *cpus = evlist->cpus;
 	bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
@@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 				ptr->have_sched_switch = 3;
 			} else {
 				opts->record_switch_events = true;
+				need_immediate = true;
 				if (cpu_wide)
 					ptr->have_sched_switch = 3;
 				else
@@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		tracking_evsel->attr.freq = 0;
 		tracking_evsel->attr.sample_period = 1;
 
+		if (need_immediate)
+			tracking_evsel->immediate = true;
+
 		/* In per-cpu case, always need the time of mmap events etc */
 		if (!cpu_map__empty(cpus)) {
 			perf_evsel__set_sample_bit(tracking_evsel, TIME);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index d608a2c9e48c..d1ce29be560e 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 	if (mem->operation & MEM_OPERATION_LOAD)
 		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
 
+	if (mem->operation & MEM_OPERATION_STORE)
+		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
 	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
 		rec_argv[i++] = "-W";
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9c640a8081c7..c859e59dfe3e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -371,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session)
 
 	if (!no_callchain) {
 		bool use_callchain = false;
+		bool not_pipe = false;
 
 		evlist__for_each_entry(session->evlist, evsel) {
+			not_pipe = true;
 			if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 				use_callchain = true;
 				break;
 			}
 		}
-		if (!use_callchain)
+		if (not_pipe && !use_callchain)
 			symbol_conf.use_callchain = false;
 	}
 
@@ -1690,8 +1692,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
 	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
 	scripts_dir = opendir(scripts_path);
-	if (!scripts_dir)
-		return -1;
+	if (!scripts_dir) {
+		fprintf(stdout,
+			"open(%s) failed.\n"
+			"Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+			scripts_path);
+		exit(-1);
+	}
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
 		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9c8f15da86ce..8ff6c6a61291 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -123,8 +123,6 @@ struct intel_pt_decoder {
 	bool have_calc_cyc_to_tsc;
 	int exec_mode;
 	unsigned int insn_bytes;
-	uint64_t sign_bit;
-	uint64_t sign_bits;
 	uint64_t period;
 	enum intel_pt_period_type period_type;
 	uint64_t tot_insn_cnt;
@@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
 
-	decoder->sign_bit           = (uint64_t)1 << 47;
-	decoder->sign_bits          = ~(((uint64_t)1 << 48) - 1);
-
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
 
@@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen)
 	return 0;
 }
 
-static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
-				 const struct intel_pt_pkt *packet,
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
 				 uint64_t last_ip)
 {
 	uint64_t ip;
 
 	switch (packet->count) {
-	case 2:
+	case 1:
 		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
 		     packet->payload;
 		break;
-	case 4:
+	case 2:
 		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
 		     packet->payload;
 		break;
+	case 3:
+		ip = packet->payload;
+		/* Sign-extend 6-byte ip */
+		if (ip & (uint64_t)0x800000000000ULL)
+			ip |= (uint64_t)0xffff000000000000ULL;
+		break;
+	case 4:
+		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+		     packet->payload;
+		break;
 	case 6:
 		ip = packet->payload;
 		break;
@@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
 		return 0;
 	}
 
-	if (ip & decoder->sign_bit)
-		return ip | decoder->sign_bits;
-
 	return ip;
 }
 
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
-	decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
-					    decoder->last_ip);
+	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -1657,6 +1657,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 	}
 }
 
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+	return decoder->last_ip || decoder->packet.count == 0 ||
+	       decoder->packet.count == 3 || decoder->packet.count == 6;
+}
+
 /* Walk PSB+ packets to get in sync. */
 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 {
@@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_FUP:
 			decoder->pge = true;
-			if (decoder->last_ip || decoder->packet.count == 6 ||
-			    decoder->packet.count == 0) {
+			if (intel_pt_have_ip(decoder)) {
 				uint64_t current_ip = decoder->ip;
 
 				intel_pt_set_ip(decoder);
@@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 		case INTEL_PT_TIP_PGE:
 		case INTEL_PT_TIP:
 			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
-			if (decoder->last_ip || decoder->packet.count == 6 ||
-			    decoder->packet.count == 0)
+			if (intel_pt_have_ip(decoder))
 				intel_pt_set_ip(decoder);
 			if (decoder->ip)
 				return 0;
@@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 
 		case INTEL_PT_FUP:
 			if (decoder->overflow) {
-				if (decoder->last_ip ||
-				    decoder->packet.count == 6 ||
-				    decoder->packet.count == 0)
+				if (intel_pt_have_ip(decoder))
 					intel_pt_set_ip(decoder);
 				if (decoder->ip)
 					return 0;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index b1257c816310..4f7b32020487 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
 			   const unsigned char *buf, size_t len,
 			   struct intel_pt_pkt *packet)
 {
-	switch (byte >> 5) {
+	int ip_len;
+
+	packet->count = byte >> 5;
+
+	switch (packet->count) {
 	case 0:
-		packet->count = 0;
+		ip_len = 0;
 		break;
 	case 1:
 		if (len < 3)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 2;
+		ip_len = 2;
 		packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
 		break;
 	case 2:
 		if (len < 5)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 4;
+		ip_len = 4;
 		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
 		break;
 	case 3:
-	case 6:
+	case 4:
 		if (len < 7)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 6;
+		ip_len = 6;
 		memcpy_le64(&packet->payload, buf + 1, 6);
 		break;
+	case 6:
+		if (len < 9)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 8;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+		break;
 	default:
 		return INTEL_PT_BAD_PACKET;
 	}
 
 	packet->type = type;
 
-	return packet->count + 1;
+	return ip_len + 1;
 }
 
 static int intel_pt_get_mode(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 9f3305f6b6d5..95f0884aae02 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -1,3 +1,4 @@
+#include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 9aed9c332da6..9c3b9ed5b3c3 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -133,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag)
 /* Get raw string list of current kprobe_events  or uprobe_events */
 struct strlist *probe_file__get_rawlist(int fd)
 {
-	int ret, idx;
+	int ret, idx, fddup;
 	FILE *fp;
 	char buf[MAX_CMDLEN];
 	char *p;
@@ -143,8 +143,17 @@ struct strlist *probe_file__get_rawlist(int fd)
 		return NULL;
 
 	sl = strlist__new(NULL, NULL);
+	if (sl == NULL)
+		return NULL;
+
+	fddup = dup(fd);
+	if (fddup < 0)
+		goto out_free_sl;
+
+	fp = fdopen(fddup, "r");
+	if (!fp)
+		goto out_close_fddup;
 
-	fp = fdopen(dup(fd), "r");
 	while (!feof(fp)) {
 		p = fgets(buf, MAX_CMDLEN, fp);
 		if (!p)
@@ -156,13 +165,21 @@ struct strlist *probe_file__get_rawlist(int fd)
 		ret = strlist__add(sl, buf);
 		if (ret < 0) {
 			pr_debug("strlist__add failed (%d)\n", ret);
-			strlist__delete(sl);
-			return NULL;
+			goto out_close_fp;
 		}
 	}
 	fclose(fp);
 
 	return sl;
+
+out_close_fp:
+	fclose(fp);
+	goto out_free_sl;
+out_close_fddup:
+	close(fddup);
+out_free_sl:
+	strlist__delete(sl);
+	return NULL;
 }
 
 static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
@@ -447,12 +464,17 @@ static int probe_cache__load(struct probe_cache *pcache)
 {
 	struct probe_cache_entry *entry = NULL;
 	char buf[MAX_CMDLEN], *p;
-	int ret = 0;
+	int ret = 0, fddup;
 	FILE *fp;
 
-	fp = fdopen(dup(pcache->fd), "r");
-	if (!fp)
+	fddup = dup(pcache->fd);
+	if (fddup < 0)
+		return -errno;
+	fp = fdopen(fddup, "r");
+	if (!fp) {
+		close(fddup);
 		return -EINVAL;
+	}
 
 	while (!feof(fp)) {
 		if (!fgets(buf, MAX_CMDLEN, fp))
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a34321e9b44d..a811c13a74d6 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -837,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
 	sec = syms_ss->symtab;
 	shdr = syms_ss->symshdr;
 
-	if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+	if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+				".text", NULL))
 		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
 
 	if (runtime_ss->opdsec)

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-08-12 19:35 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-08-12 19:35 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Arnaldo Carvalho de Melo, Peter Zijlstra,
	Thomas Gleixner, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 95f3be798472f63b495ca4712af005ea5ac7aa47 perf/x86/intel/uncore: Add enable_box for client MSR uncore

Mostly tooling fixes, plus two uncore-PMU fixes, an uprobes fix, a perf-cgroups 
fix and an AUX events fix.

 Thanks,

	Ingo

------------------>
Arnaldo Carvalho de Melo (4):
      perf hists: Trim libtraceevent trace_seq buffers
      tools: Sync cpufeatures.h and vmx.h with the kernel
      toops: Sync tools/include/uapi/linux/bpf.h with the kernel
      tools: Sync cpufeatures headers with the kernel

Brendan Gregg (1):
      perf script: Add 'bpf-output' field to usage message

David Carrillo-Cisneros (1):
      perf/core: Set cgroup in CPU contexts for new cgroup events

Denys Vlasenko (1):
      uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions

Kan Liang (2):
      perf/x86/intel/uncore: Fix uncore num_counters
      perf/x86/intel/uncore: Add enable_box for client MSR uncore

Konstantin Khlebnikov (1):
      perf probe: Fix module name matching

Mark Rutland (1):
      perf stat: Avoid skew when reading events

Masami Hiramatsu (1):
      perf probe: Adjust map->reloc offset when finding kernel symbol from map

Naohiro Aota (1):
      perf probe: Support signedness casting

Peter Zijlstra (1):
      perf/core: Fix sideband list-iteration vs. event ordering NULL pointer deference crash

Ravi Bangoria (2):
      perf probe: Add function to post process kernel trace events
      perf probe ppc64le: Fix probe location when using DWARF


 arch/x86/events/intel/uncore_snb.c             | 14 +++++
 arch/x86/events/intel/uncore_snbep.c           | 10 +--
 arch/x86/kernel/uprobes.c                      | 22 +++----
 include/linux/perf_event.h                     |  4 ++
 kernel/events/core.c                           | 77 ++++++++++++++++-------
 tools/arch/x86/include/asm/cpufeatures.h       |  9 +--
 tools/arch/x86/include/asm/disabled-features.h |  2 +
 tools/arch/x86/include/asm/required-features.h |  2 +
 tools/arch/x86/include/uapi/asm/vmx.h          |  4 +-
 tools/include/uapi/linux/bpf.h                 | 86 +++++++++++++++++++++++++-
 tools/perf/Documentation/perf-probe.txt        | 10 ++-
 tools/perf/Documentation/perf-script.txt       |  4 +-
 tools/perf/arch/powerpc/util/sym-handling.c    | 27 ++++++--
 tools/perf/builtin-script.c                    |  2 +-
 tools/perf/builtin-stat.c                      | 31 +++++++---
 tools/perf/util/probe-event.c                  | 60 +++++++++++-------
 tools/perf/util/probe-event.h                  |  6 +-
 tools/perf/util/probe-finder.c                 | 15 ++++-
 tools/perf/util/sort.c                         |  6 +-
 19 files changed, 298 insertions(+), 93 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 97a69dbba649..9d35ec0cb8fc 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
 	}
 }
 
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
+	.enable_box	= snb_uncore_msr_enable_box,
 	.exit_box	= snb_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
 	}
 }
 
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 
 static struct intel_uncore_ops skl_uncore_msr_ops = {
 	.init_box	= skl_uncore_msr_init_box,
+	.enable_box	= skl_uncore_msr_enable_box,
 	.exit_box	= skl_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 824e54086e07..8aee83bcf71f 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
 
 static struct intel_uncore_type hswep_uncore_ha = {
 	.name		= "ha",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 2,
 	.perf_ctr_bits	= 48,
 	SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
 
 static struct intel_uncore_type hswep_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
 
 static struct intel_uncore_type hswep_uncore_qpi = {
 	.name			= "qpi",
-	.num_counters		= 5,
+	.num_counters		= 4,
 	.num_boxes		= 3,
 	.perf_ctr_bits		= 48,
 	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
 
 static struct intel_uncore_type hswep_uncore_r3qpi = {
 	.name		= "r3qpi",
-	.num_counters   = 4,
+	.num_counters   = 3,
 	.num_boxes	= 3,
 	.perf_ctr_bits	= 44,
 	.constraints	= hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
 
 static struct intel_uncore_type bdx_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31d99ff..495c776de4b4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 		*cursor &= 0xfe;
 	}
 	/*
-	 * Similar treatment for VEX3 prefix.
-	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 * Similar treatment for VEX3/EVEX prefix.
+	 * TODO: add XOP treatment when insn decoder supports them
 	 */
-	if (insn->vex_prefix.nbytes == 3) {
+	if (insn->vex_prefix.nbytes >= 3) {
 		/*
 		 * vex2:     c5    rvvvvLpp   (has no b bit)
 		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
 		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-		 *   (evex will need setting of both b and x since
-		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-		 * Setting VEX3.b (setting because it has inverted meaning):
+		 * Setting VEX3.b (setting because it has inverted meaning).
+		 * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+		 * is the 4th bit of MODRM.rm, and needs the same treatment.
+		 * For VEX3-encoded insns, VEX3.x value has no effect in
+		 * non-SIB encoding, the change is superfluous but harmless.
 		 */
 		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-		*cursor |= 0x20;
+		*cursor |= 0x60;
 	}
 
 	/*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
 
 	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
 	reg2 = 0xff;		/* Fetch vex.vvvv */
-	if (insn->vex_prefix.nbytes == 2)
-		reg2 = insn->vex_prefix.bytes[1];
-	else if (insn->vex_prefix.nbytes == 3)
+	if (insn->vex_prefix.nbytes)
 		reg2 = insn->vex_prefix.bytes[2];
 	/*
-	 * TODO: add XOP, EXEV vvvv reading.
+	 * TODO: add XOP vvvv reading.
 	 *
 	 * vex.vvvv field is in bits 6-3, bits are inverted.
 	 * But in 32-bit mode, high-order bit may be ignored.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8ed4326164cc..2b6b43cc0dd5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,7 +743,9 @@ struct perf_event_context {
 	u64				parent_gen;
 	u64				generation;
 	int				pin_count;
+#ifdef CONFIG_CGROUP_PERF
 	int				nr_cgroups;	 /* cgroup evts */
+#endif
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
 };
@@ -769,7 +771,9 @@ struct perf_cpu_context {
 	unsigned int			hrtimer_active;
 
 	struct pmu			*unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
+#endif
 };
 
 struct perf_output_handle {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a19550d80ab1..1903b8f3a705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -843,6 +843,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
 		}
 	}
 }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+	struct perf_cpu_context *cpuctx;
+
+	if (!is_cgroup_event(event))
+		return;
+
+	if (add && ctx->nr_cgroups++)
+		return;
+	else if (!add && --ctx->nr_cgroups)
+		return;
+	/*
+	 * Because cgroup events are always per-cpu events,
+	 * this will always be called from the right CPU.
+	 */
+	cpuctx = __get_cpu_context(ctx);
+	cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
 #else /* !CONFIG_CGROUP_PERF */
 
 static inline bool
@@ -920,6 +946,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
 			 struct perf_event_context *ctx)
 {
 }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+}
+
 #endif
 
 /*
@@ -1392,6 +1425,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+
 	lockdep_assert_held(&ctx->lock);
 
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 		list_add_tail(&event->group_entry, list);
 	}
 
-	if (is_cgroup_event(event))
-		ctx->nr_cgroups++;
+	list_update_cgroup_event(event, ctx, true);
 
 	list_add_rcu(&event->event_entry, &ctx->event_list);
 	ctx->nr_events++;
@@ -1581,8 +1614,6 @@ static void perf_group_attach(struct perf_event *event)
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-	struct perf_cpu_context *cpuctx;
-
 	WARN_ON_ONCE(event->ctx != ctx);
 	lockdep_assert_held(&ctx->lock);
 
@@ -1594,20 +1625,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
 	event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-	if (is_cgroup_event(event)) {
-		ctx->nr_cgroups--;
-		/*
-		 * Because cgroup events are always per-cpu events, this will
-		 * always be called from the right CPU.
-		 */
-		cpuctx = __get_cpu_context(ctx);
-		/*
-		 * If there are no more cgroup events then clear cgrp to avoid
-		 * stale pointer in update_cgrp_time_from_cpuctx().
-		 */
-		if (!ctx->nr_cgroups)
-			cpuctx->cgrp = NULL;
-	}
+	list_update_cgroup_event(event, ctx, false);
 
 	ctx->nr_events--;
 	if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@ static inline int pmu_filter_match(struct perf_event *event)
 static inline int
 event_filter_match(struct perf_event *event)
 {
-	return (event->cpu == -1 || event->cpu == smp_processor_id())
-	    && perf_cgroup_match(event) && pmu_filter_match(event);
+	return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+	       perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
@@ -1737,8 +1755,8 @@ event_sched_out(struct perf_event *event,
 	 * maintained, otherwise bogus information is return
 	 * via read() for time_enabled, time_running:
 	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE
-	    && !event_filter_match(event)) {
+	if (event->state == PERF_EVENT_STATE_INACTIVE &&
+	    !event_filter_match(event)) {
 		delta = tstamp - event->tstamp_stopped;
 		event->tstamp_running += delta;
 		event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@ perf_install_in_context(struct perf_event_context *ctx,
 
 	lockdep_assert_held(&ctx->mutex);
 
-	event->ctx = ctx;
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
+	/*
+	 * Ensures that if we can observe event->ctx, both the event and ctx
+	 * will be 'complete'. See perf_iterate_sb_cpu().
+	 */
+	smp_store_release(&event->ctx, ctx);
+
 	if (!task) {
 		cpu_function_call(cpu, __perf_install_in_context, event);
 		return;
@@ -5969,6 +5992,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &pel->list, sb_list) {
+		/*
+		 * Skip events that are not fully formed yet; ensure that
+		 * if we observe event->ctx, both event and ctx will be
+		 * complete enough. See perf_install_in_context().
+		 */
+		if (!smp_load_acquire(&event->ctx))
+			continue;
+
 		if (event->state < PERF_EVENT_STATE_INACTIVE)
 			continue;
 		if (!event_filter_match(event))
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 4a413485f9eb..92a8308b96f6 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
@@ -301,10 +300,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +307,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 911e9358ceb1..85599ad4d024 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,5 +56,7 @@
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 4916144e3c42..fac9a5c0abe9 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -99,5 +99,7 @@
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
+#define REQUIRED_MASK17	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 5b15d94a33f8..37fee272618f 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 406459b935a2..da218fec6056 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SCHED_CLS,
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
+	BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -313,6 +315,66 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
+	/**
+	 * bpf_probe_write_user(void *dst, void *src, int len)
+	 * safely attempt to write to a location
+	 * @dst: destination address in userspace
+	 * @src: source address on stack
+	 * @len: number of bytes to copy
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_probe_write_user,
+
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@ enum bpf_func_id {
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@ struct bpf_tunnel_key {
 	__u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+	XDP_ABORTED = 0,
+	XDP_DROP,
+	XDP_PASS,
+	XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+	__u32 data;
+	__u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 736da44596e4..b303bcdd8ed1 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
 LINE SYNTAX
 -----------
 Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1f6c70594f0f..053bbbd84ece 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-	srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index c6d0f91731a1..8d4dc97d80ba 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -54,10 +54,6 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-	return true;
-}
 
 #ifdef HAVE_LIBELF_SUPPORT
 void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,27 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			tev->point.offset += lep_offset;
 	}
 }
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs)
+{
+	struct probe_trace_event *tev;
+	struct map *map;
+	struct symbol *sym = NULL;
+	struct rb_node *tmp;
+	int i = 0;
+
+	map = get_target_map(pev->target, pev->uprobes);
+	if (!map || map__load(map, NULL) < 0)
+		return;
+
+	for (i = 0; i < ntevs; i++) {
+		tev = &pev->tevs[i];
+		map__for_each_symbol(map, sym, tmp) {
+			if (map->unmap_ip(map, sym->start) == tev->point.address)
+				arch__fix_tev_from_maps(pev, tev, map, sym);
+		}
+	}
+}
+
 #endif
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 971ff91b16cb..9c640a8081c7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2116,7 +2116,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "callindent", parse_output_fields),
+		     "bpf-output,callindent", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0c16d20d7e32..3c7452b39f57 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -331,7 +331,7 @@ static int read_counter(struct perf_evsel *counter)
 	return 0;
 }
 
-static void read_counters(bool close_counters)
+static void read_counters(void)
 {
 	struct perf_evsel *counter;
 
@@ -341,11 +341,6 @@ static void read_counters(bool close_counters)
 
 		if (perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
-
-		if (close_counters) {
-			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-					     thread_map__nr(evsel_list->threads));
-		}
 	}
 }
 
@@ -353,7 +348,7 @@ static void process_interval(void)
 {
 	struct timespec ts, rs;
 
-	read_counters(false);
+	read_counters();
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@ static void enable_counters(void)
 		perf_evlist__enable(evsel_list);
 }
 
+static void disable_counters(void)
+{
+	/*
+	 * If we don't have tracee (attaching to task or cpu), counters may
+	 * still be running. To get accurate group ratios, we must stop groups
+	 * from counting before reading their constituent counters.
+	 */
+	if (!target__none(&target))
+		perf_evlist__disable(evsel_list);
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -657,11 +663,20 @@ static int __run_perf_stat(int argc, const char **argv)
 		}
 	}
 
+	disable_counters();
+
 	t1 = rdclock();
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
-	read_counters(true);
+	/*
+	 * Closing a group leader splits the group, and as we only disable
+	 * group leaders, results in remaining events becoming enabled. To
+	 * avoid arbitrary skew, we must read all counters before closing any
+	 * group leaders.
+	 */
+	read_counters();
+	perf_evlist__close(evsel_list);
 
 	return WEXITSTATUS(status);
 }
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 953dc1ab2ed7..28733962cd80 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -170,15 +170,17 @@ static struct map *kernel_get_module_map(const char *module)
 		module = "kernel";
 
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		/* short_name is "[module]" */
 		if (strncmp(pos->dso->short_name + 1, module,
-			    pos->dso->short_name_len - 2) == 0) {
+			    pos->dso->short_name_len - 2) == 0 &&
+		    module[pos->dso->short_name_len - 2] == '\0') {
 			return pos;
 		}
 	}
 	return NULL;
 }
 
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
 {
 	/* Init maps of given executable or kernel */
 	if (user)
@@ -385,7 +387,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
 		if (uprobes)
 			address = sym->start;
 		else
-			address = map->unmap_ip(map, sym->start);
+			address = map->unmap_ip(map, sym->start) - map->reloc;
 		break;
 	}
 	if (!address) {
@@ -664,22 +666,14 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
 	return ret;
 }
 
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-					   int ntevs, const char *module,
-					   bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs)
 {
 	struct ref_reloc_sym *reloc_sym;
 	char *tmp;
 	int i, skipped = 0;
 
-	if (uprobe)
-		return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-	/* Note that currently ref_reloc_sym based probe is not for drivers */
-	if (module)
-		return add_module_to_probe_trace_events(tevs, ntevs, module);
-
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {
 		pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 	return skipped;
 }
 
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+				      int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+					   struct probe_trace_event *tevs,
+					   int ntevs, const char *module,
+					   bool uprobe)
+{
+	int ret;
+
+	if (uprobe)
+		ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+	else if (module)
+		/* Currently ref_reloc_sym based probe is not for drivers */
+		ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+	else
+		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+	if (ret >= 0)
+		arch__post_process_probe_trace_events(pev, ntevs);
+
+	return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
 	if (ntevs > 0) {	/* Succeeded to find trace events */
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
-		ret = post_process_probe_trace_events(*tevs, ntevs,
+		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
 						pev->target, pev->uprobes);
 		if (ret < 0 || ret == ntevs) {
 			clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev,
 	return err;
 }
 
-bool __weak arch__prefers_symtab(void) { return false; }
-
 /* Concatinate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
@@ -3158,12 +3178,6 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
 		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
 
-	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-		ret = find_probe_trace_events_from_map(pev, tevs);
-		if (ret > 0)
-			return ret; /* Found in symbol table */
-	}
-
 	/* Convert perf_probe_event with debuginfo */
 	ret = try_to_find_probe_trace_events(pev, tevs);
 	if (ret != 0)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e18ea9fe6385..f4f45db77c1c 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -158,7 +158,6 @@ int show_line_range(struct line_range *lr, const char *module, bool user);
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
 			struct strfilter *filter);
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct probe_trace_event *tev, struct map *map,
 			     struct symbol *sym);
@@ -173,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
 int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
 			    struct perf_probe_arg *pvar);
 
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs);
+
 #endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff064e2d..5c290c682afe 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 	char sbuf[STRERR_BUFSIZE];
 	int bsize, boffs, total;
 	int ret;
+	char sign;
 
 	/* TODO: check all types */
-	if (cast && strcmp(cast, "string") != 0) {
+	if (cast && strcmp(cast, "string") != 0 &&
+	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
 		/* Non string type is OK */
+		/* and respect signedness cast */
 		tvar->type = strdup(cast);
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
@@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
 
+	if (cast && (strcmp(cast, "u") == 0))
+		sign = 'u';
+	else if (cast && (strcmp(cast, "s") == 0))
+		sign = 's';
+	else
+		sign = die_is_signed_type(&type) ? 's' : 'u';
+
 	ret = dwarf_bytesize(&type);
 	if (ret <= 0)
 		/* No size ... try to use default type */
@@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
 			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
 		ret = MAX_BASIC_TYPE_BITS;
 	}
-	ret = snprintf(buf, 16, "%c%d",
-		       die_is_signed_type(&type) ? 's' : 'u', ret);
+	ret = snprintf(buf, 16, "%c%d", sign, ret);
 
 formatted:
 	if (ret < 0 || ret >= 16) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 947d21f38398..3d3cb8392c86 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -588,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he)
 	} else {
 		pevent_event_info(&seq, evsel->tp_format, &rec);
 	}
-	return seq.buffer;
+	/*
+	 * Trim the buffer, it starts at 4KB and we're not going to
+	 * add anything more to this buffer.
+	 */
+	return realloc(seq.buffer, seq.len + 1);
 }
 
 static int64_t

^ permalink raw reply	[flat|nested] 316+ messages in thread

* [GIT PULL] perf fixes
@ 2016-07-26 14:13 Ingo Molnar
  0 siblings, 0 replies; 316+ messages in thread
From: Ingo Molnar @ 2016-07-26 14:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, Stephen Rothwell, Arnaldo Carvalho de Melo,
	Peter Zijlstra, Thomas Gleixner, Jiri Olsa, Andrew Morton

Linus,

Please pull the latest perf-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf-urgent-for-linus

   # HEAD: 674d2d69b14f677a771ceec4b48bfade94a0c5f1 Merge tag 'perf-core-for-mingo-20160725' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

This tree contains tooling fixes plus some additions:

 - fixes to the vdso2c build environment that Stephen Rothwell is using for the 
   linux-next build (Arnaldo Carvalho de Melo)

 - AVX-512 instruction mappings (Adrian Hunter)

 - misc fixes 

 Thanks,

	Ingo

------------------>
Adrian Hunter (4):
      x86/insn: perf tools: Fix vcvtph2ps instruction decoding
      x86/insn: Add AVX-512 support to the instruction decoder
      perf tools: Add AVX-512 support to the instruction decoder used by Intel PT
      perf tools: Add AVX-512 instructions to the new instructions test

Arnaldo Carvalho de Melo (5):
      perf tests kmod-path: Fix build on ubuntu:16.04-x-armhf
      tools build: Add HOSTARCH Makefile variable
      objtool: Use tools/scripts/Makefile.arch to get ARCH and HOSTARCH
      objtool: Always use host headers
      Revert "perf tools: event.h needs asm/perf_regs.h"

Josh Poimboeuf (1):
      tools build: Fix objtool build with ARCH=x86_64

Stephen Rothwell (1):
      x86: Make the vdso2c compiler use the host architecture headers


 arch/x86/entry/vdso/Makefile                       |    2 +-
 arch/x86/include/asm/inat.h                        |   17 +-
 arch/x86/include/asm/insn.h                        |   12 +-
 arch/x86/lib/insn.c                                |   18 +-
 arch/x86/lib/x86-opcode-map.txt                    |  263 ++-
 arch/x86/tools/gen-insn-attr-x86.awk               |   11 +-
 tools/objtool/Build                                |    2 +-
 tools/objtool/Makefile                             |    8 +-
 tools/perf/arch/x86/tests/insn-x86-dat-32.c        | 1018 ++++++++++-
 tools/perf/arch/x86/tests/insn-x86-dat-64.c        |  940 +++++++++-
 tools/perf/arch/x86/tests/insn-x86-dat-src.c       | 1789 ++++++++++++++++++++
 tools/perf/tests/kmod-path.c                       |    1 +
 tools/perf/util/event.h                            |    1 -
 .../util/intel-pt-decoder/gen-insn-attr-x86.awk    |   11 +-
 tools/perf/util/intel-pt-decoder/inat.h            |   17 +-
 tools/perf/util/intel-pt-decoder/insn.c            |   18 +-
 tools/perf/util/intel-pt-decoder/insn.h            |   12 +-
 .../perf/util/intel-pt-decoder/x86-opcode-map.txt  |  263 ++-
 tools/scripts/Makefile.arch                        |   41 +-
 19 files changed, 4221 insertions(+), 223 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..25e88c030c47 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -55,7 +55,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 74a2e312e8a2..02aff0867211 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -48,6 +48,7 @@
 /* AVX VEX prefixes */
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -89,6 +90,7 @@
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 static inline int inat_is_vex_prefix(insn_attr_t attr)
 {
 	attr &= INAT_PFX_MASK;
-	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
 }
 
 static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 
 static inline int inat_must_vex(insn_attr_t attr)
 {
-	return attr & INAT_VEXONLY;
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
 }
 #endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index e7814b74caf8..b3e32b010ab1 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -91,6 +91,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
+#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
 	return (insn->vex_prefix.value != 0);
 }
 
+static inline int insn_is_evex(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.nbytes == 4);
+}
+
 /* Ensure this instruction is decoded completely */
 static inline int insn_complete(struct insn *insn)
 {
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
 		return X86_VEX2_M;
-	else
+	else if (insn->vex_prefix.nbytes == 3)	/* 3 bytes VEX */
 		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+	else					/* EVEX */
+		return X86_EVEX_M(insn->vex_prefix.bytes[1]);
 }
 
 static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1a416935bac9..1088eb8f3a5f 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -155,14 +155,24 @@ void insn_get_prefixes(struct insn *insn)
 			/*
 			 * In 32-bits mode, if the [7:6] bits (mod bits of
 			 * ModRM) on the second byte are not 11b, it is
-			 * LDS or LES.
+			 * LDS or LES or BOUND.
 			 */
 			if (X86_MODRM_MOD(b2) != 3)
 				goto vex_end;
 		}
 		insn->vex_prefix.bytes[0] = b;
 		insn->vex_prefix.bytes[1] = b2;
-		if (inat_is_vex3_prefix(attr)) {
+		if (inat_is_evex_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+			insn->vex_prefix.bytes[3] = b2;
+			insn->vex_prefix.nbytes = 4;
+			insn->next_byte += 4;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else if (inat_is_vex3_prefix(attr)) {
 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 			insn->vex_prefix.bytes[2] = b2;
 			insn->vex_prefix.nbytes = 3;
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
 		m = insn_vex_m_bits(insn);
 		p = insn_vex_p_bits(insn);
 		insn->attr = inat_get_avx_attribute(op, m, p);
-		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+		    (!inat_accept_vex(insn->attr) &&
+		     !inat_is_group(insn->attr)))
 			insn->attr = 0;	/* This instruction is bad */
 		goto end;	/* VEX has only 1 byte for opcode */
 	}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index d388de72eaca..ec378cd7b71e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -13,12 +13,17 @@
 # opcode: escape # escaped-name
 # EndTable
 #
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
 #<group maps>
 # GrpTable: GrpXXX
 # reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
 # EndTable
 #
 # AVX Superscripts
+#  (ev): this opcode requires EVEX prefix.
+#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)
 #  (v): this opcode requires VEX prefix.
 #  (v1): this opcode only supports 128bit VEX.
 #
@@ -137,7 +142,7 @@ Table: one byte opcode
 # 0x60 - 0x6f
 60: PUSHA/PUSHAD (i64)
 61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
 64: SEG=FS (Prefix)
 65: SEG=GS (Prefix)
@@ -399,17 +404,17 @@ AVXcode: 1
 3f:
 # 0x0f 0x40-0x4f
 40: CMOVO Gv,Ev
-41: CMOVNO Gv,Ev
-42: CMOVB/C/NAE Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
 43: CMOVAE/NB/NC Gv,Ev
-44: CMOVE/Z Gv,Ev
-45: CMOVNE/NZ Gv,Ev
-46: CMOVBE/NA Gv,Ev
-47: CMOVA/NBE Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
 48: CMOVS Gv,Ev
 49: CMOVNS Gv,Ev
-4a: CMOVP/PE Gv,Ev
-4b: CMOVNP/PO Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
 4c: CMOVL/NGE Gv,Ev
 4d: CMOVNL/GE Gv,Ev
 4e: CMOVLE/NG Gv,Ev
@@ -426,7 +431,7 @@ AVXcode: 1
 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
-5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@@ -447,7 +452,7 @@ AVXcode: 1
 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
 # 0x0f 0x70-0x7f
 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
 71: Grp12 (1A)
@@ -458,14 +463,14 @@ AVXcode: 1
 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
 # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
 77: emms | vzeroupper | vzeroall
-78: VMREAD Ey,Gy
-79: VMWRITE Gy,Ey
-7a:
-7b:
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
 # 0x0f 0x80-0x8f
 # Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
@@ -485,16 +490,16 @@ AVXcode: 1
 8e: JLE/JNG Jz (f64)
 8f: JNLE/JG Jz (f64)
 # 0x0f 0x90-0x9f
-90: SETO Eb
-91: SETNO Eb
-92: SETB/C/NAE Eb
-93: SETAE/NB/NC Eb
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
 94: SETE/Z Eb
 95: SETNE/NZ Eb
 96: SETBE/NA Eb
 97: SETA/NBE Eb
-98: SETS Eb
-99: SETNS Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
 9a: SETP/PE Eb
 9b: SETNP/PO Eb
 9c: SETL/NGE Eb
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
 d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
 d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
 da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
 dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
 dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
 de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xe0-0xef
 e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
 e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
 e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
 e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
 e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
 e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
 e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
 e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
 ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
 ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
 ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
 ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xf0-0xff
 f0: vlddqu Vx,Mx (F2)
 f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@@ -626,81 +631,105 @@ AVXcode: 2
 0e: vtestps Vx,Wx (66),(v)
 0f: vtestpd Vx,Wx (66),(v)
 # 0x0f 0x38 0x10-0x1f
-10: pblendvb Vdq,Wdq (66)
-11:
-12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
-14: blendvps Vdq,Wdq (66)
-15: blendvpd Vdq,Wdq (66)
-16: vpermps Vqq,Hqq,Wqq (66),(v)
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
 17: vptest Vx,Wx (66)
 18: vbroadcastss Vx,Wd (66),(v)
-19: vbroadcastsd Vqq,Wq (66),(v)
-1a: vbroadcastf128 Vqq,Mdq (66),(v)
-1b:
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
-1f:
+1f: vpabsq Vx,Wx (66),(ev)
 # 0x0f 0x38 0x20-0x2f
-20: vpmovsxbw Vx,Ux/Mq (66),(v1)
-21: vpmovsxbd Vx,Ux/Md (66),(v1)
-22: vpmovsxbq Vx,Ux/Mw (66),(v1)
-23: vpmovsxwd Vx,Ux/Mq (66),(v1)
-24: vpmovsxwq Vx,Ux/Md (66),(v1)
-25: vpmovsxdq Vx,Ux/Mq (66),(v1)
-26:
-27:
-28: vpmuldq Vx,Hx,Wx (66),(v1)
-29: vpcmpeqq Vx,Hx,Wx (66),(v1)
-2a: vmovntdqa Vx,Mx (66),(v1)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
 2b: vpackusdw Vx,Hx,Wx (66),(v1)
-2c: vmaskmovps Vx,Hx,Mx (66),(v)
-2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
 2e: vmaskmovps Mx,Hx,Vx (66),(v)
 2f: vmaskmovpd Mx,Hx,Vx (66),(v)
 # 0x0f 0x38 0x30-0x3f
-30: vpmovzxbw Vx,Ux/Mq (66),(v1)
-31: vpmovzxbd Vx,Ux/Md (66),(v1)
-32: vpmovzxbq Vx,Ux/Mw (66),(v1)
-33: vpmovzxwd Vx,Ux/Mq (66),(v1)
-34: vpmovzxwq Vx,Ux/Md (66),(v1)
-35: vpmovzxdq Vx,Ux/Mq (66),(v1)
-36: vpermd Vqq,Hqq,Wqq (66),(v)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
 37: vpcmpgtq Vx,Hx,Wx (66),(v1)
-38: vpminsb Vx,Hx,Wx (66),(v1)
-39: vpminsd Vx,Hx,Wx (66),(v1)
-3a: vpminuw Vx,Hx,Wx (66),(v1)
-3b: vpminud Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
 3c: vpmaxsb Vx,Hx,Wx (66),(v1)
-3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
 3e: vpmaxuw Vx,Hx,Wx (66),(v1)
-3f: vpmaxud Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0x38 0x40-0x8f
-40: vpmulld Vx,Hx,Wx (66),(v1)
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
 41: vphminposuw Vdq,Wdq (66),(v1)
-42:
-43:
-44:
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
-46: vpsravd Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x57
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
 58: vpbroadcastd Vx,Wx (66),(v)
-59: vpbroadcastq Vx,Wx (66),(v)
-5a: vbroadcasti128 Vqq,Mdq (66),(v)
-# Skip 0x5b-0x77
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
 78: vpbroadcastb Vx,Wx (66),(v)
 79: vpbroadcastw Vx,Wx (66),(v)
-# Skip 0x7a-0x7f
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
 81: INVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
 # 0x0f 0x38 0x90-0xbf (FMA)
-90: vgatherdd/q Vx,Hx,Wx (66),(v)
-91: vgatherqd/q Vx,Hx,Wx (66),(v)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
 92: vgatherdps/d Vx,Hx,Wx (66),(v)
 93: vgatherqps/d Vx,Hx,Wx (66),(v)
 94:
@@ -715,6 +744,10 @@ AVXcode: 2
 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
 a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
 af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
 b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
 b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
 b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
-c8: sha1nexte Vdq,Wdq
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
 c9: sha1msg1 Vdq,Wdq
-ca: sha1msg2 Vdq,Wdq
-cb: sha256rnds2 Vdq,Wdq
-cc: sha256msg1 Vdq,Wdq
-cd: sha256msg2 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
 db: VAESIMC Vdq,Wdq (66),(v1)
 dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -763,15 +801,15 @@ AVXcode: 3
 00: vpermq Vqq,Wqq,Ib (66),(v)
 01: vpermpd Vqq,Wqq,Ib (66),(v)
 02: vpblendd Vx,Hx,Wx,Ib (66),(v)
-03:
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
 04: vpermilps Vx,Wx,Ib (66),(v)
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66)
-09: vroundpd Vx,Wx,Ib (66)
-0a: vroundss Vss,Wss,Ib (66),(v1)
-0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@@ -780,26 +818,51 @@ AVXcode: 3
 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
 16: vpextrd/q Ey,Vdq,Ib (66),(v1)
 17: vextractps Ed,Vdq,Ib (66),(v1)
-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
-19: vextractf128 Wdq,Vqq,Ib (66),(v)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
 1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
-39: vextracti128 Wdq,Vqq,Ib (66),(v)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
 40: vdpps Vx,Hx,Wx,Ib (66)
 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
 4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
@@ -927,8 +990,10 @@ GrpTable: Grp12
 EndTable
 
 GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
@@ -963,6 +1028,20 @@ GrpTable: Grp17
 3: BLSI By,Ey (v)
 EndTable
 
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 093a892026f9..a3d2c62fd805 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
-	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
 	# accepts VEX prefix
-	vexok_opcode_expr = "^v.*"
+	vexok_opcode_expr = "^[vk].*"
 	vexok_expr = "\\(v1\\)"
 	# All opcodes with (v) superscript supports *only* VEX prefix
 	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
 	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
 	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
 
 	clear_vars()
 }
@@ -319,7 +322,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 			flags = add_flags(flags, "INAT_MODRM")
 
 		# check VEX codes
-		if (match(ext, vexonly_expr))
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
 		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 2457916a3943..d6cdece5e58b 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,4 +1,4 @@
-objtool-y += arch/$(ARCH)/
+objtool-y += arch/$(SRCARCH)/
 objtool-y += builtin-check.o
 objtool-y += elf.o
 objtool-y += special.o
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 1f75b0a046cc..0b437700f688 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -1,11 +1,9 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
-ifndef ($(ARCH))
-ARCH ?= $(shell uname -m)
 ifeq ($(ARCH),x86_64)
 ARCH := x86
 endif
-endif
 
 # always use the host compiler
 CC = gcc
@@ -26,7 +24,7 @@ OBJTOOL_IN := $(OBJTOOL)-in.o
 
 all: $(OBJTOOL)
 
-INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi
+INCLUDES := -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi
 CFLAGS   += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
 LDFLAGS  += -lelf $(LIBSUBCMD)
 
@@ -35,7 +33,7 @@ elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | gre
 CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 AWK = awk
-export srctree OUTPUT CFLAGS ARCH AWK
+export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
 $(OBJTOOL_IN): fixdep FORCE
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 3b491cfe204e..3918dd52e903 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -6,6 +6,1016 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 81 78 56 34 12    \tbound  %eax,0x12345678(%ecx)",},
+{{0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 88 78 56 34 12    \tbound  %ecx,0x12345678(%eax)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 98 78 56 34 12    \tbound  %ebx,0x12345678(%eax)",},
+{{0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a0 78 56 34 12    \tbound  %esp,0x12345678(%eax)",},
+{{0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a8 78 56 34 12    \tbound  %ebp,0x12345678(%eax)",},
+{{0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b0 78 56 34 12    \tbound  %esi,0x12345678(%eax)",},
+{{0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b8 78 56 34 12    \tbound  %edi,0x12345678(%eax)",},
+{{0x62, 0x08, }, 2, 0, "", "",
+"62 08                \tbound  %ecx,(%eax)",},
+{{0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 05 78 56 34 12    \tbound  %eax,0x12345678",},
+{{0x62, 0x14, 0x01, }, 3, 0, "", "",
+"62 14 01             \tbound  %edx,(%ecx,%eax,1)",},
+{{0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 14 05 78 56 34 12 \tbound  %edx,0x12345678(,%eax,1)",},
+{{0x62, 0x14, 0x08, }, 3, 0, "", "",
+"62 14 08             \tbound  %edx,(%eax,%ecx,1)",},
+{{0x62, 0x14, 0xc8, }, 3, 0, "", "",
+"62 14 c8             \tbound  %edx,(%eax,%ecx,8)",},
+{{0x62, 0x50, 0x12, }, 3, 0, "", "",
+"62 50 12             \tbound  %edx,0x12(%eax)",},
+{{0x62, 0x55, 0x12, }, 3, 0, "", "",
+"62 55 12             \tbound  %edx,0x12(%ebp)",},
+{{0x62, 0x54, 0x01, 0x12, }, 4, 0, "", "",
+"62 54 01 12          \tbound  %edx,0x12(%ecx,%eax,1)",},
+{{0x62, 0x54, 0x05, 0x12, }, 4, 0, "", "",
+"62 54 05 12          \tbound  %edx,0x12(%ebp,%eax,1)",},
+{{0x62, 0x54, 0x08, 0x12, }, 4, 0, "", "",
+"62 54 08 12          \tbound  %edx,0x12(%eax,%ecx,1)",},
+{{0x62, 0x54, 0xc8, 0x12, }, 4, 0, "", "",
+"62 54 c8 12          \tbound  %edx,0x12(%eax,%ecx,8)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12    \tbound  %edx,0x12345678(%eax)",},
+{{0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 95 78 56 34 12    \tbound  %edx,0x12345678(%ebp)",},
+{{0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 01 78 56 34 12 \tbound  %edx,0x12345678(%ecx,%eax,1)",},
+{{0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 05 78 56 34 12 \tbound  %edx,0x12345678(%ebp,%eax,1)",},
+{{0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 08 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,1)",},
+{{0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 c8 78 56 34 12 \tbound  %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 81 78 56 34 12 \tbound  %ax,0x12345678(%ecx)",},
+{{0x66, 0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 88 78 56 34 12 \tbound  %cx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 98 78 56 34 12 \tbound  %bx,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a0 78 56 34 12 \tbound  %sp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a8 78 56 34 12 \tbound  %bp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b0 78 56 34 12 \tbound  %si,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b8 78 56 34 12 \tbound  %di,0x12345678(%eax)",},
+{{0x66, 0x62, 0x08, }, 3, 0, "", "",
+"66 62 08             \tbound  %cx,(%eax)",},
+{{0x66, 0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 05 78 56 34 12 \tbound  %ax,0x12345678",},
+{{0x66, 0x62, 0x14, 0x01, }, 4, 0, "", "",
+"66 62 14 01          \tbound  %dx,(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 14 05 78 56 34 12 \tbound  %dx,0x12345678(,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x08, }, 4, 0, "", "",
+"66 62 14 08          \tbound  %dx,(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x14, 0xc8, }, 4, 0, "", "",
+"66 62 14 c8          \tbound  %dx,(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x50, 0x12, }, 4, 0, "", "",
+"66 62 50 12          \tbound  %dx,0x12(%eax)",},
+{{0x66, 0x62, 0x55, 0x12, }, 4, 0, "", "",
+"66 62 55 12          \tbound  %dx,0x12(%ebp)",},
+{{0x66, 0x62, 0x54, 0x01, 0x12, }, 5, 0, "", "",
+"66 62 54 01 12       \tbound  %dx,0x12(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x05, 0x12, }, 5, 0, "", "",
+"66 62 54 05 12       \tbound  %dx,0x12(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x08, 0x12, }, 5, 0, "", "",
+"66 62 54 08 12       \tbound  %dx,0x12(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x54, 0xc8, 0x12, }, 5, 0, "", "",
+"66 62 54 c8 12       \tbound  %dx,0x12(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound  %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 95 78 56 34 12 \tbound  %dx,0x12345678(%ebp)",},
+{{0x66, 0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 01 78 56 34 12 \tbound  %dx,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 05 78 56 34 12 \tbound  %dx,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 08 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 c8 78 56 34 12 \tbound  %dx,0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x41, 0xd8, }, 3, 0, "", "",
+"0f 41 d8             \tcmovno %eax,%ebx",},
+{{0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%cx",},
+{{0x0f, 0x44, 0xd8, }, 3, 0, "", "",
+"0f 44 d8             \tcmove  %eax,%ebx",},
+{{0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%eax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%eax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%eax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%eax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%eax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%ecx),%k5",},
+{{0xc5, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 90 ac c8 23 01 00 00 \tkmovw  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%ecx)",},
+{{0xc5, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 91 ac c8 23 01 00 00 \tkmovw  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 90 ac c8 23 01 00 00 \tkmovq  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 91 ac c8 23 01 00 00 \tkmovq  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%ecx),%k5",},
+{{0xc5, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 90 ac c8 23 01 00 00 \tkmovb  0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%ecx)",},
+{{0xc5, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 91 ac c8 23 01 00 00 \tkmovb  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 90 ac c8 23 01 00 00 \tkmovd  0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 91 ac c8 23 01 00 00 \tkmovd  %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 5b f5    \tvcvtqq2ps %zmm5,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 48 6f f5    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 6f f5    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 48 6f f5    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 6f f5    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 6f f5    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 48 6f f5    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 78 f5    \tvcvttps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 78 f5    \tvcvttpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 78 c6    \tvcvttsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 78 c6    \tvcvttss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 78 f5    \tvcvttps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 78 f5    \tvcvttpd2uqq %zmm5,%zmm6",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 79 f5    \tvcvtps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 79 f5    \tvcvtpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 79 c6    \tvcvtsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 79 c6    \tvcvtss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 79 f5    \tvcvtps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 79 f5    \tvcvtpd2uqq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f 7a f5    \tvcvtudq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 7a f5    \tvcvtuqq2pd %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 7a f5    \tvcvtudq2ps %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 4f 7a f5    \tvcvtuqq2ps %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7a f5    \tvcvttps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7a f5    \tvcvttpd2qq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7b f5    \tvcvtps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7b f5    \tvcvtpd2qq %zmm5,%zmm6",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fd 7f ee          \tvmovdqa %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7d 48 7f ee    \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fd 48 7f ee    \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fe 7f ee          \tvmovdqu %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7e 48 7f ee    \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fe 48 7f ee    \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7f 48 7f ee    \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 ff 48 7f ee    \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 db f4    \tvpandd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 db f4    \tvpandq %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 df f4    \tvpandnd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 df f4    \tvpandnq %zmm4,%zmm5,%zmm6",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f e6 f5    \tvcvtdq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 e6 f5    \tvcvtqq2pd %zmm5,%zmm6",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 eb f4    \tvpord  %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 eb f4    \tvporq  %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 ef f4    \tvpxord %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 ef f4    \tvpxorq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x10, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 10 f4    \tvpsrlvw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x10, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 10 ee    \tvpmovuswb %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x11, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 11 ee    \tvpmovusdb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x11, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 11 f4    \tvpsravw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x12, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 12 ee    \tvpmovusqb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x12, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 12 f4    \tvpsllvw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0xf2, 0x7d, 0x4f, 0x13, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 4f 13 f5    \tvcvtph2ps %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x13, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 13 ee    \tvpmovusdw %zmm5,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x14, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 14 ee    \tvpmovusqw %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 14 f4    \tvprorvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 14 f4    \tvprorvq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x15, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 15 ee    \tvpmovusqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 15 f4    \tvprolvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 15 f4    \tvprolvq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 16 d4    \tvpermps %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 16 d4    \tvpermpd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x19, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 19 f7    \tvbroadcastf32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1a 31    \tvbroadcastf32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1a 31    \tvbroadcastf64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1b 31    \tvbroadcastf32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1b 31    \tvbroadcastf64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1f, 0xf4, }, 6, 0, "", "",
+"62 f2 fd 48 1f f4    \tvpabsq %zmm4,%zmm6",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x20, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 20 ee    \tvpmovswb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x21, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 21 ee    \tvpmovsdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x22, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 22 ee    \tvpmovsqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x23, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 23 ee    \tvpmovsdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x24, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 24 ee    \tvpmovsqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x25, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 25 ee    \tvpmovsqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 26 ed    \tvptestmb %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 26 ed    \tvptestmw %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 26 ec    \tvptestnmb %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 26 ec    \tvptestnmw %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 27 ed    \tvptestmd %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 27 ed    \tvptestmq %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 27 ec    \tvptestnmd %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 27 ec    \tvptestnmq %zmm4,%zmm5,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 28 f5    \tvpmovm2b %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 28 f5    \tvpmovm2w %k5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 29 ee    \tvpmovb2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 29 ee    \tvpmovw2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%ecx),%ymm4",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x2a, 0xce, }, 6, 0, "", "",
+"62 f2 fe 48 2a ce    \tvpbroadcastmb2q %k6,%zmm1",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 2c f4    \tvscalefps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 2c f4    \tvscalefpd %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 2d f4    \tvscalefss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 2d f4    \tvscalefsd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x30, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 30 ee    \tvpmovwb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x31, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 31 ee    \tvpmovdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x32, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 32 ee    \tvpmovqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x33, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 33 ee    \tvpmovdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x34, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 34 ee    \tvpmovqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x35, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 35 ee    \tvpmovqd %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 36 d4    \tvpermd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 36 d4    \tvpermq %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 38 f5    \tvpmovm2d %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 38 f5    \tvpmovm2q %k5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 39 f4    \tvpminsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 39 f4    \tvpminsq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 39 ee    \tvpmovd2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 39 ee    \tvpmovq2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x3a, 0xf6, }, 6, 0, "", "",
+"62 f2 7e 48 3a f6    \tvpbroadcastmw2d %k6,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3b f4    \tvpminud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3b f4    \tvpminuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3d f4    \tvpmaxsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3d f4    \tvpmaxsq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3f f4    \tvpmaxud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3f f4    \tvpmaxuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 40 f4    \tvpmulld %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 40 f4    \tvpmullq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 42 f5    \tvgetexpps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 42 f5    \tvgetexppd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x43, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 43 f4    \tvgetexpss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xe5, 0x0f, 0x43, 0xe2, }, 6, 0, "", "",
+"62 f2 e5 0f 43 e2    \tvgetexpsd %xmm2,%xmm3,%xmm4{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 44 f5    \tvplzcntd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 44 f5    \tvplzcntq %zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 46 f4    \tvpsravd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 46 f4    \tvpsravq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4c f5    \tvrcp14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4c f5    \tvrcp14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4d f4    \tvrcp14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4d f4    \tvrcp14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4e f5    \tvrsqrt14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4e f5    \tvrsqrt14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4f f4    \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4f f4    \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 59 f7    \tvbroadcasti32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5a 31    \tvbroadcasti32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5a 31    \tvbroadcasti64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5b 31    \tvbroadcasti32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5b 31    \tvbroadcasti64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 64 f4    \tvpblendmd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 64 f4    \tvpblendmq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 65 f4    \tvblendmps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 65 f4    \tvblendmpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 66 f4    \tvpblendmb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 66 f4    \tvpblendmw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 75 f4    \tvpermi2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 75 f4    \tvpermi2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 76 f4    \tvpermi2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 76 f4    \tvpermi2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 77 f4    \tvpermi2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 77 f4    \tvpermi2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7a, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7a d8    \tvpbroadcastb %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7b, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7b d8    \tvpbroadcastw %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7c, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7c d8    \tvpbroadcastd %eax,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7d f4    \tvpermt2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7d f4    \tvpermt2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7e f4    \tvpermt2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7e f4    \tvpermt2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7f f4    \tvpermt2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7f f4    \tvpermt2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x83, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 83 f4    \tvpmultishiftqb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 88 31    \tvexpandps (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 88 31    \tvexpandpd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 89 31    \tvpexpandd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 89 31    \tvpexpandq (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8a 31    \tvcompressps %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8a 31    \tvcompresspd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8b 31    \tvpcompressd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8b 31    \tvpcompressq %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x55, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 8d f4    \tvpermb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 8d f4    \tvpermw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 90 b4 fd 7b 00 00 00 \tvpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 90 b4 fd 7b 00 00 00 \tvpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a0 b4 fd 7b 00 00 00 \tvpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a1 b4 fd 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x29, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 29 a1 b4 fd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a2 b4 fd 7b 00 00 00 \tvscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a2 b4 fd 7b 00 00 00 \tvscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b4 f4    \tvpmadd52luq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b5 f4    \tvpmadd52huq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 c4 f5    \tvpconflictd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 c4 f5    \tvpconflictq %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 c8 fe    \tvexp2ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 c8 fe    \tvexp2pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 ca fe    \tvrcp28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 ca fe    \tvrcp28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cb fd    \tvrcp28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cb fd    \tvrcp28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 cc fe    \tvrsqrt28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 cc fe    \tvrsqrt28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cd fd    \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cd fd    \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 03 fd 12 \tvalignq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x08, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 08 f5 12 \tvrndscaleps $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x09, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 09 f5 12 \tvrndscalepd $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0x55, 0x0f, 0x0a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 0f 0a f4 12 \tvrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0xd5, 0x0f, 0x0b, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 0f 0b f4 12 \tvrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 18 f4 12 \tvinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 18 f4 12 \tvinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 19 ee 12 \tvextractf32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 19 ee 12 \tvextractf64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 1a fd 12 \tvinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 1a fd 12 \tvinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 1b f7 12 \tvextractf32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 1b f7 12 \tvextractf64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1e ee 12 \tvpcmpud $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1e ee 12 \tvpcmpuq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1f ee 12 \tvpcmpd $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1f ee 12 \tvpcmpq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 23 fd 12 \tvshuff32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 23 fd 12 \tvshuff64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 25 fd 12 \tvpternlogd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 25 fd 12 \tvpternlogq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 26 fe 12 \tvgetmantps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 26 fe 12 \tvgetmantpd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 27 fd 12 \tvgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 27 fd 12 \tvgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 38 f4 12 \tvinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 38 f4 12 \tvinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 39 ee 12 \tvextracti32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 39 ee 12 \tvextracti64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 3a fd 12 \tvinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 3a fd 12 \tvinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 3b f7 12 \tvextracti32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 3b f7 12 \tvextracti64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3e ee 12 \tvpcmpub $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3e ee 12 \tvpcmpuw $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3f ee 12 \tvpcmpb $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3f ee 12 \tvpcmpw $0x12,%zmm6,%zmm7,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 50 fd 12 \tvrangepd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 51 fd 12 \tvrangess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 51 fd 12 \tvrangesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 54 fd 12 \tvfixupimmps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 54 fd 12 \tvfixupimmpd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 55 fd 12 \tvfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 55 fd 12 \tvfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 56 fe 12 \tvreduceps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 56 fe 12 \tvreducepd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 57 fd 12 \tvreducess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 57 fd 12 \tvreducesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 66 ef 12 \tvfpclassps $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 66 ef 12 \tvfpclasspd $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0x7d, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 c5 12 \tvprorq $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 cd 12 \tvprold $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 cd 12 \tvprolq $0x12,%zmm5,%zmm6",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x6d, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 6d 48 72 e6 05 \tvpsrad $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf1, 0xed, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 ed 48 72 e6 05 \tvpsraq $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x4f, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 4f 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xd5, 0xcf, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 cf 58 f4    \tvaddpd %zmm4,%zmm5,%zmm6{%k7}{z}",},
+{{0x62, 0xf1, 0xd5, 0x18, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 18 58 f4    \tvaddpd {rn-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 58 58 f4    \tvaddpd {ru-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x38, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 38 58 f4    \tvaddpd {rd-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x78, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 78 58 f4    \tvaddpd {rz-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 48 58 31    \tvaddpd (%ecx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xb4, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 f1 d5 48 58 b4 c8 23 01 00 00 \tvaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 58 58 31    \tvaddpd (%ecx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 48 58 72 7f \tvaddpd 0x1fc0(%edx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 58 58 72 7f \tvaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4c, 0x58, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 4c 58 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5",},
+{{0x62, 0xf1, 0xe7, 0x0f, 0xc2, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 f1 e7 0f c2 ac c8 23 01 00 00 01 \tvcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}",},
+{{0x62, 0xf1, 0xd7, 0x1f, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 f1 d7 1f c2 ec 02 \tvcmplesd {sae},%xmm4,%xmm5,%k5{%k7}",},
+{{0x62, 0xf3, 0x5d, 0x0f, 0x27, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 f3 5d 0f 27 ac c8 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%eax),%bnd0",},
 {{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
@@ -309,19 +1319,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",},
 {{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional",
-"f2 e8 fc ff ff ff    \tbnd call 3c3 <main+0x3c3>",},
+"f2 e8 fc ff ff ff    \tbnd call fce <main+0xfce>",},
 {{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
 "f2 ff 10             \tbnd call *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd ret ",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3ce <main+0x3ce>",},
+"f2 e9 fc ff ff ff    \tbnd jmp fd9 <main+0xfd9>",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3d4 <main+0x3d4>",},
+"f2 e9 fc ff ff ff    \tbnd jmp fdf <main+0xfdf>",},
 {{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect",
 "f2 ff 21             \tbnd jmp *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional",
-"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",},
+"f2 0f 85 fc ff ff ff \tbnd jne fe9 <main+0xfe9>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 4fe7cce179c4..9c8c61e06d5a 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -6,6 +6,938 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x48, 0x0f, 0x41, 0xd8, }, 4, 0, "", "",
+"48 0f 41 d8          \tcmovno %rax,%rbx",},
+{{0x48, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%cx",},
+{{0x48, 0x0f, 0x44, 0xd8, }, 4, 0, "", "",
+"48 0f 44 d8          \tcmove  %rax,%rbx",},
+{{0x48, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove  0x12345678(%rax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto   0x12345678(%rax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno  0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb   0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae  0x12345678(%rax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets   0x12345678(%rax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns  0x12345678(%rax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef          \tkandw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef       \tkandq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef          \tkandb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef       \tkandd  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef          \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef       \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef          \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef       \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7          \tknotw  %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7       \tknotq  %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7          \tknotb  %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7       \tknotd  %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef          \tkorw   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef       \tkorq   %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef          \tkorb   %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef       \tkord   %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef          \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef       \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef          \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef       \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef          \tkxorw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef       \tkxorq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef          \tkxorb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef       \tkxord  %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef          \tkaddw  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef       \tkaddq  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef          \tkaddb  %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef       \tkaddd  %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef          \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef          \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef       \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee          \tkmovw  %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29          \tkmovw  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x78, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 90 ac f0 23 01 00 00 \tkmovw  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29          \tkmovw  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x78, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 91 ac f0 23 01 00 00 \tkmovw  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8          \tkmovw  %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed          \tkmovw  %ebp,%k5",},
+{{0xc4, 0xc1, 0x78, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 78 92 ed       \tkmovw  %r13d,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5          \tkmovw  %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed          \tkmovw  %k5,%ebp",},
+{{0xc5, 0x78, 0x93, 0xed, }, 4, 0, "", "",
+"c5 78 93 ed          \tkmovw  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee       \tkmovq  %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29       \tkmovq  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf8, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 90 ac f0 23 01 00 00 \tkmovq  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29       \tkmovq  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf8, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 91 ac f0 23 01 00 00 \tkmovq  %k5,0x123(%rax,%r14,8)",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xe8, }, 5, 0, "", "",
+"c4 e1 fb 92 e8       \tkmovq  %rax,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 92 ed       \tkmovq  %rbp,%k5",},
+{{0xc4, 0xc1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 fb 92 ed       \tkmovq  %r13,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xc5, }, 5, 0, "", "",
+"c4 e1 fb 93 c5       \tkmovq  %k5,%rax",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 93 ed       \tkmovq  %k5,%rbp",},
+{{0xc4, 0x61, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 61 fb 93 ed       \tkmovq  %k5,%r13",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee          \tkmovb  %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29          \tkmovb  (%rcx),%k5",},
+{{0xc4, 0xa1, 0x79, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 90 ac f0 23 01 00 00 \tkmovb  0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29          \tkmovb  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x79, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 91 ac f0 23 01 00 00 \tkmovb  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8          \tkmovb  %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed          \tkmovb  %ebp,%k5",},
+{{0xc4, 0xc1, 0x79, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 79 92 ed       \tkmovb  %r13d,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5          \tkmovb  %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed          \tkmovb  %k5,%ebp",},
+{{0xc5, 0x79, 0x93, 0xed, }, 4, 0, "", "",
+"c5 79 93 ed          \tkmovb  %k5,%r13d",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee       \tkmovd  %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29       \tkmovd  (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf9, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 90 ac f0 23 01 00 00 \tkmovd  0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29       \tkmovd  %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf9, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 91 ac f0 23 01 00 00 \tkmovd  %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8          \tkmovd  %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed          \tkmovd  %ebp,%k5",},
+{{0xc4, 0xc1, 0x7b, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 7b 92 ed       \tkmovd  %r13d,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5          \tkmovd  %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed          \tkmovd  %k5,%ebp",},
+{{0xc5, 0x7b, 0x93, 0xed, }, 4, 0, "", "",
+"c5 7b 93 ed          \tkmovd  %k5,%r13d",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee          \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee       \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee          \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee       \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee          \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee       \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee          \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee       \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12    \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b    \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12    \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b    \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5          \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 5b f5    \tvcvtqq2ps %zmm29,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5          \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5          \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0             \tmovq   %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4          \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 7d 48 6f d1    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fd 48 6f d1    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4          \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7e 48 6f f5    \tvmovdqu32 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xfe, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 6f d1    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 6f f5    \tvmovdqu8 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 ff 48 6f d1    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3             \tvmread %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x78, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 78 d1    \tvcvttps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 78 f5    \tvcvttpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 78 c6    \tvcvttsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 78 c6    \tvcvttss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x78, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 78 d5    \tvcvttps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 78 f5    \tvcvttpd2uqq %zmm29,%zmm30",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8             \tvmwrite %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x79, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 79 d1    \tvcvtps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 79 f5    \tvcvtpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 79 c6    \tvcvtsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 79 c6    \tvcvtss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x79, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 79 d5    \tvcvtps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 79 f5    \tvcvtpd2uqq %zmm29,%zmm30",},
+{{0x62, 0x61, 0x7e, 0x4f, 0x7a, 0xed, }, 6, 0, "", "",
+"62 61 7e 4f 7a ed    \tvcvtudq2pd %ymm5,%zmm29{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 7a d1    \tvcvtuqq2pd %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 7a f5    \tvcvtudq2ps %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 ff 4f 7a d1    \tvcvtuqq2ps %zmm25,%ymm26{%k7}",},
+{{0x62, 0x01, 0x7d, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 7d 4f 7a d1    \tvcvttps2qq %ymm25,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7a f5    \tvcvttpd2qq %zmm29,%zmm30",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0    \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0    \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x7b, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 7b d5    \tvcvtps2qq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7b f5    \tvcvtpd2qq %zmm29,%zmm30",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4             \tmovq   %mm0,%mm4",},
+{{0xc5, 0x7d, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7d 7f c6          \tvmovdqa %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7d 48 7f ca    \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fd 48 7f ca    \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0x7e, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7e 7f c6          \tvmovdqu %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7e 48 7f ca    \tvmovdqu32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fe 48 7f ca    \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x61, 0x7f, 0x48, 0x7f, 0x31, }, 6, 0, "", "",
+"62 61 7f 48 7f 31    \tvmovdqu8 %zmm30,(%rcx)",},
+{{0x62, 0x01, 0xff, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 ff 48 7f ca    \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1             \tpand   %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1          \tpand   %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4          \tvpand  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 db d0    \tvpandd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 db d0    \tvpandq %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1             \tpandn  %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1          \tpandn  %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4          \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 df d0    \tvpandnd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 df d0    \tvpandnq %zmm24,%zmm25,%zmm26",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1          \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5          \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0x61, 0x7e, 0x4f, 0xe6, 0xd5, }, 6, 0, "", "",
+"62 61 7e 4f e6 d5    \tvcvtdq2pd %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0xe6, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 e6 d1    \tvcvtqq2pd %zmm25,%zmm26",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1          \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4             \tpor    %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4          \tvpor   %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 eb d0    \tvpord  %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 eb d0    \tvporq  %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4             \tpxor   %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4          \tvpxor  %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 ef d0    \tvpxord %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 ef d0    \tvpxorq %zmm24,%zmm25,%zmm26",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1       \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x02, 0x9d, 0x40, 0x10, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 10 eb    \tvpsrlvw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x10, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 10 e6    \tvpmovuswb %zmm28,%ymm6{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x11, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 11 e6    \tvpmovusdb %zmm28,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x11, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 11 eb    \tvpsravw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x12, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 12 de    \tvpmovusqb %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x12, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 12 eb    \tvpsllvw %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x62, 0x7d, 0x4f, 0x13, 0xdd, }, 6, 0, "", "",
+"62 62 7d 4f 13 dd    \tvcvtph2ps %ymm5,%zmm27{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x13, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 13 de    \tvpmovusdw %zmm27,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1       \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x14, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 14 de    \tvpmovusqw %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 14 eb    \tvprorvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 14 eb    \tvprorvq %zmm27,%zmm28,%zmm29",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1       \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x15, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 15 de    \tvpmovusqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 15 eb    \tvprolvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 15 eb    \tvprolvq %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4       \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 16 f0    \tvpermps %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 16 f0    \tvpermpd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4       \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x19, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 19 d3    \tvbroadcastf32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21       \tvbroadcastf128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 1a 11    \tvbroadcastf32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1a 11    \tvbroadcastf64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1b, 0x19, }, 6, 0, "", "",
+"62 62 7d 48 1b 19    \tvbroadcastf32x8 (%rcx),%zmm27",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1b 11    \tvbroadcastf64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x1f, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 1f e3    \tvpabsq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec       \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x20, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 20 de    \tvpmovswb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4       \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x21, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 21 de    \tvpmovsdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4       \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x22, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 22 de    \tvpmovsqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4       \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x23, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 23 de    \tvpmovsdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4       \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x24, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 24 de    \tvpmovsqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4       \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x25, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 25 de    \tvpmovsqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x92, 0x1d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 26 eb    \tvptestmb %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 26 eb    \tvptestmw %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 26 40 26 ea    \tvptestnmb %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 26 ea    \tvptestnmw %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0x1d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 27 eb    \tvptestmd %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 27 eb    \tvptestmq %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 26 40 27 ea    \tvptestnmd %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 27 ea    \tvptestnmq %zmm26,%zmm27,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4       \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 28 e5    \tvpmovm2b %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 28 e5    \tvpmovm2w %k5,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4       \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x92, 0x7e, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 29 ec    \tvpmovb2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 29 ec    \tvpmovw2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21       \tvmovntdqa (%rcx),%ymm4",},
+{{0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6, }, 6, 0, "", "",
+"62 62 fe 48 2a f6    \tvpbroadcastmb2q %k6,%zmm30",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31       \tvmaskmovps (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 2c d0    \tvscalefps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 2c d0    \tvscalefpd %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31       \tvmaskmovpd (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 2d d0    \tvscalefss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 2d d0    \tvscalefsd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4       \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x30, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 30 de    \tvpmovwb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4       \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x31, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 31 de    \tvpmovdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4       \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x32, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 32 de    \tvpmovqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4       \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x33, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 33 de    \tvpmovdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4       \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x34, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 34 de    \tvpmovqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4       \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x35, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 35 de    \tvpmovqd %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4       \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 36 f0    \tvpermd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 36 f0    \tvpermq %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4       \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 38 e5    \tvpmovm2d %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 38 e5    \tvpmovm2q %k5,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9       \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0x02, 0x35, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 39 d0    \tvpminsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 39 d0    \tvpminsq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x92, 0x7e, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 39 ec    \tvpmovd2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 39 ec    \tvpmovq2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4       \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x3a, 0xe6, }, 6, 0, "", "",
+"62 62 7e 48 3a e6    \tvpbroadcastmw2d %k6,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4       \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3b d0    \tvpminud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3b d0    \tvpminuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4       \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3d d0    \tvpmaxsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3d d0    \tvpmaxsq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4       \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3f d0    \tvpmaxud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3f d0    \tvpmaxuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4       \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 40 d0    \tvpmulld %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 40 d0    \tvpmullq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x42, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 42 d1    \tvgetexpps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x42, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 42 e3    \tvgetexppd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x43, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 43 d0    \tvgetexpss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x95, 0x07, 0x43, 0xf4, }, 6, 0, "", "",
+"62 02 95 07 43 f4    \tvgetexpsd %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 7d 48 44 e3    \tvplzcntd %zmm27,%zmm28",},
+{{0x62, 0x02, 0xfd, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 44 e3    \tvplzcntq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4       \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 46 d0    \tvpsravd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 46 d0    \tvpsravq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4c, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4c d1    \tvrcp14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4c, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4c e3    \tvrcp14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4d d0    \tvrcp14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4d d0    \tvrcp14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4e, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4e d1    \tvrsqrt14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4e, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4e e3    \tvrsqrt14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4f d0    \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4f d0    \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4       \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 59 d3    \tvbroadcasti32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21       \tvbroadcasti128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 5a 11    \tvbroadcasti32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5a 11    \tvbroadcasti64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 5b 21    \tvbroadcasti32x8 (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5b 11    \tvbroadcasti64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 64 e2    \tvpblendmd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 64 e2    \tvpblendmq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x65, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 65 d0    \tvblendmps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x65, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 65 e2    \tvblendmpd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 66 e2    \tvpblendmb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 66 e2    \tvpblendmw %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 75 d0    \tvpermi2b %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 75 e2    \tvpermi2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 76 e2    \tvpermi2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 76 e2    \tvpermi2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 77 e2    \tvpermi2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 77 e2    \tvpermi2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7a, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7a f0    \tvpbroadcastb %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7b f0    \tvpbroadcastw %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7c f0    \tvpbroadcastd %eax,%xmm30",},
+{{0x62, 0x62, 0xfd, 0x48, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 fd 48 7c f0    \tvpbroadcastq %rax,%zmm30",},
+{{0x62, 0x02, 0x25, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7d e2    \tvpermt2b %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7d e2    \tvpermt2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7e e2    \tvpermt2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7e e2    \tvpermt2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7f e2    \tvpermt2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7f e2    \tvpermt2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x83, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 83 e2    \tvpmultishiftqb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x88, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 88 11    \tvexpandps (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x88, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 88 21    \tvexpandpd (%rcx),%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x89, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 89 21    \tvpexpandd (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x89, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 89 11    \tvpexpandq (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8a 21    \tvcompressps %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 8a 21    \tvcompresspd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8b 21    \tvpcompressd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 8b 11    \tvpcompressq %zmm26,(%rcx)",},
+{{0x62, 0x02, 0x25, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 8d e2    \tvpermb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 8d e2    \tvpermw %zmm26,%zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 90 94 dd 7b 00 00 00 \tvpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 90 94 dd 7b 00 00 00 \tvpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a0 94 dd 7b 00 00 00 \tvpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa1, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a1 b4 ed 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0xb2, 0xfd, 0x21, 0xa1, 0xb4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 fd 21 a1 b4 dd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa2, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a2 a4 ed 7b 00 00 00 \tvscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa2, 0xa4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a2 a4 dd 7b 00 00 00 \tvscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa3, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b4 e2    \tvpmadd52luq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b5 e2    \tvpmadd52huq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 7d 48 c4 da    \tvpconflictd %zmm26,%zmm27",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c4 da    \tvpconflictq %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc8, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 c8 f5    \tvexp2ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc8, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c8 da    \tvexp2pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xca, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 ca f5    \tvrcp28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xca, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 ca da    \tvrcp28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcb, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cb f4    \tvrcp28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcb, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cb d9    \tvrcp28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0xcc, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 cc f5    \tvrsqrt28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xcc, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 cc da    \tvrsqrt28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcd, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cd f4    \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cd d9    \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 03 d9 12 \tvalignq $0x12,%zmm25,%zmm26,%zmm27",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05    \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0x7d, 0x48, 0x08, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 08 d1 12 \tvrndscaleps $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05    \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0xfd, 0x48, 0x09, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 09 d1 12 \tvrndscalepd $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05    \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0x35, 0x07, 0x0a, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 07 0a d0 12 \tvrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05    \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0xb5, 0x07, 0x0b, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 07 0b d0 12 \tvrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05    \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 18 d0 12 \tvinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 18 d0 12 \tvinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05    \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 19 ca 12 \tvextractf32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 19 ca 12 \tvextractf64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x2d, 0x47, 0x1a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 47 1a d9 12 \tvinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x47, 0x1a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 47 1a f4 12 \tvinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x1b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 1b ee 12 \tvextractf32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x1b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 1b d3 12 \tvextractf64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1e ed 12 \tvpcmpud $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1e ea 12 \tvpcmpuq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1f ed 12 \tvpcmpd $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1f ea 12 \tvpcmpq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x03, 0x15, 0x40, 0x23, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 23 f4 12 \tvshuff32x4 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x23, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 23 d9 12 \tvshuff64x2 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 25 f4 12 \tvpternlogd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x95, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 25 f4 12 \tvpternlogq $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x7d, 0x48, 0x26, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 26 da 12 \tvgetmantps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x26, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 26 f5 12 \tvgetmantpd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x07, 0x27, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 07 27 d9 12 \tvgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x07, 0x27, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 07 27 f4 12 \tvgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05    \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 38 d0 12 \tvinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 38 d0 12 \tvinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05    \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 39 ca 12 \tvextracti32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 39 ca 12 \tvextracti64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x15, 0x47, 0x3a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 47 3a f4 12 \tvinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x47, 0x3a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 47 3a d9 12 \tvinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x3b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 3b ee 12 \tvextracti32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x3b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 3b d3 12 \tvextracti64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3e ed 12 \tvpcmpub $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3e ea 12 \tvpcmpuw $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3f ed 12 \tvpcmpb $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3f ea 12 \tvpcmpw $0x12,%zmm26,%zmm27,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05    \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0x03, 0x2d, 0x40, 0x43, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 50 f4 12 \tvrangepd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x51, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 51 d9 12 \tvrangess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x51, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 51 f4 12 \tvrangesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x03, 0x15, 0x40, 0x54, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 54 f4 12 \tvfixupimmps $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x54, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 54 d9 12 \tvfixupimmpd $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x07, 0x55, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 07 55 f4 12 \tvfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x07, 0x55, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 07 55 d9 12 \tvfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x48, 0x56, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 56 da 12 \tvreduceps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x56, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 56 f5 12 \tvreducepd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x57, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 57 d9 12 \tvreducess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x57, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 57 f4 12 \tvreducesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x93, 0x7d, 0x48, 0x66, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 48 66 eb 12 \tvfpclassps $0x12,%zmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x48, 0x66, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 48 66 ee 12 \tvfpclasspd $0x12,%zmm30,%k5",},
+{{0x62, 0x93, 0x7d, 0x08, 0x67, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",},
+{{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 ad 40 72 c1 12 \tvprorq $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0x0d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 0d 40 72 cd 12 \tvprold $0x12,%zmm29,%zmm30",},
+{{0x62, 0x91, 0x8d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 8d 40 72 cd 12 \tvprolq $0x12,%zmm29,%zmm30",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02          \tpsrad  $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05       \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0x91, 0x4d, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 4d 40 72 e2 05 \tvpsrad $0x5,%zmm26,%zmm22",},
+{{0x62, 0x91, 0xcd, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 cd 40 72 e2 05 \tvpsraq $0x5,%zmm26,%zmm22",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x01, 0x95, 0x40, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 40 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x47, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 47 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x01, 0x95, 0xc7, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 c7 58 f4    \tvaddpd %zmm28,%zmm29,%zmm30{%k7}{z}",},
+{{0x62, 0x01, 0x95, 0x10, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 10 58 f4    \tvaddpd {rn-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x50, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 50 58 f4    \tvaddpd {ru-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x30, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 30 58 f4    \tvaddpd {rd-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x70, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 70 58 f4    \tvaddpd {rz-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 40 58 31    \tvaddpd (%rcx),%zmm29,%zmm30",},
+{{0x62, 0x21, 0x95, 0x40, 0x58, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 21 95 40 58 b4 f0 23 01 00 00 \tvaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 50 58 31    \tvaddpd (%rcx){1to8},%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 40 58 72 7f \tvaddpd 0x1fc0(%rdx),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 50 58 72 7f \tvaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30",},
+{{0x62, 0xf1, 0x0c, 0x50, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 0c 50 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5",},
+{{0x62, 0xb1, 0x97, 0x07, 0xc2, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 b1 97 07 c2 ac f0 23 01 00 00 01 \tvcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}",},
+{{0x62, 0x91, 0x97, 0x17, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 91 97 17 c2 ec 02 \tvcmplesd {sae},%xmm28,%xmm29,%k5{%k7}",},
+{{0x62, 0x23, 0x15, 0x07, 0x27, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 23 15 07 27 b4 f0 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%rax),%bnd0",},
 {{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
@@ -325,19 +1257,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",},
 {{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional",
-"f2 e8 00 00 00 00    \tbnd callq 3f6 <main+0x3f6>",},
+"f2 e8 00 00 00 00    \tbnd callq f22 <main+0xf22>",},
 {{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
 "67 f2 ff 10          \tbnd callq *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd retq ",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 402 <main+0x402>",},
+"f2 e9 00 00 00 00    \tbnd jmpq f2e <main+0xf2e>",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 408 <main+0x408>",},
+"f2 e9 00 00 00 00    \tbnd jmpq f34 <main+0xf34>",},
 {{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect",
 "67 f2 ff 21          \tbnd jmpq *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional",
-"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",},
+"f2 0f 85 00 00 00 00 \tbnd jne f3f <main+0xf3f>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 41b1b1c62660..76e0ec379c8b 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -19,8 +19,882 @@ int main(void)
 	/* Following line is a marker for the awk script - do not change */
 	asm volatile("rdtsc"); /* Start here */
 
+	/* Test fix for vcvtph2ps in x86-opcode-map.txt */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+
 #ifdef __x86_64__
 
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %rax,%rbx");
+	asm volatile("cmovno 0x12345678(%rax),%rcx");
+	asm volatile("cmovno 0x12345678(%rax),%cx");
+
+	asm volatile("cmove  %rax,%rbx");
+	asm volatile("cmove 0x12345678(%rax),%rcx");
+	asm volatile("cmove 0x12345678(%rax),%cx");
+
+	asm volatile("seto    0x12345678(%rax)");
+	asm volatile("setno   0x12345678(%rax)");
+	asm volatile("setb    0x12345678(%rax)");
+	asm volatile("setc    0x12345678(%rax)");
+	asm volatile("setnae  0x12345678(%rax)");
+	asm volatile("setae   0x12345678(%rax)");
+	asm volatile("setnb   0x12345678(%rax)");
+	asm volatile("setnc   0x12345678(%rax)");
+	asm volatile("sets    0x12345678(%rax)");
+	asm volatile("setns   0x12345678(%rax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%rcx),%k5");
+	asm volatile("kmovw  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovw  %k5,(%rcx)");
+	asm volatile("kmovw  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %r13d,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+	asm volatile("kmovw  %k5,%r13d");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%rcx),%k5");
+	asm volatile("kmovq  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovq  %k5,(%rcx)");
+	asm volatile("kmovq  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovq  %rax,%k5");
+	asm volatile("kmovq  %rbp,%k5");
+	asm volatile("kmovq  %r13,%k5");
+	asm volatile("kmovq  %k5,%rax");
+	asm volatile("kmovq  %k5,%rbp");
+	asm volatile("kmovq  %k5,%r13");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%rcx),%k5");
+	asm volatile("kmovb  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovb  %k5,(%rcx)");
+	asm volatile("kmovb  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %r13d,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+	asm volatile("kmovb  %k5,%r13d");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%rcx),%k5");
+	asm volatile("kmovd  0x123(%rax,%r14,8),%k5");
+	asm volatile("kmovd  %k5,(%rcx)");
+	asm volatile("kmovd  %k5,0x123(%rax,%r14,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %r13d,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+	asm volatile("kmovd %k5,%r13d");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm25,%zmm26");
+	asm volatile("vmovdqa64 %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm29,%zmm30");
+	asm volatile("vmovdqu64 %zmm25,%zmm26");
+	asm volatile("vmovdqu8 %zmm29,%zmm30");
+	asm volatile("vmovdqu16 %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %rax,%rbx");
+	asm volatile("vcvttps2udq %zmm25,%zmm26");
+	asm volatile("vcvttpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%rax");
+	asm volatile("vcvttss2usi %xmm6,%rax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvttpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %rax,%rbx");
+	asm volatile("vcvtps2udq %zmm25,%zmm26");
+	asm volatile("vcvtpd2udq %zmm29,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%rax");
+	asm volatile("vcvtss2usi %xmm6,%rax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2uqq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm29{%k7}");
+	asm volatile("vcvtuqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtudq2ps %zmm29,%zmm30");
+	asm volatile("vcvtuqq2ps %zmm25,%ymm26{%k7}");
+	asm volatile("vcvttps2qq %ymm25,%zmm26{%k7}");
+	asm volatile("vcvttpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtpd2qq %zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa %ymm8,%ymm6");
+	asm volatile("vmovdqa32.s %zmm25,%zmm26");
+	asm volatile("vmovdqa64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu %ymm8,%ymm6");
+	asm volatile("vmovdqu32.s %zmm25,%zmm26");
+	asm volatile("vmovdqu64.s %zmm25,%zmm26");
+	asm volatile("vmovdqu8.s %zmm30,(%rcx)");
+	asm volatile("vmovdqu16.s %zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpandnq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm26{%k7}");
+	asm volatile("vcvtqq2pd %zmm25,%zmm26");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm24,%zmm25,%zmm26");
+	asm volatile("vporq  %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm24,%zmm25,%zmm26");
+	asm volatile("vpxorq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm27,%zmm28,%zmm29");
+	asm volatile("vpmovuswb %zmm28,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm28,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm27,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm27{%k7}");
+	asm volatile("vpmovusdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm27,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprorvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm27,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm27,%zmm28,%zmm29");
+	asm volatile("vprolvq %zmm27,%zmm28,%zmm29");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermpd %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%rcx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcastf64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%rcx),%zmm27");
+	asm volatile("vbroadcastf64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm27,%zmm28,%k5");
+	asm volatile("vptestmw %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmb %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmw %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm27,%zmm28,%k5");
+	asm volatile("vptestmq %zmm27,%zmm28,%k5");
+	asm volatile("vptestnmd %zmm26,%zmm27,%k5");
+	asm volatile("vptestnmq %zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm28");
+	asm volatile("vpmovm2w %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm28,%k5");
+	asm volatile("vpmovw2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%rcx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm30");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm24,%zmm25,%zmm26");
+	asm volatile("vscalefpd %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%rcx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vscalefsd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm27,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm27,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm24,%ymm26,%ymm22{%k7}");
+	asm volatile("vpermq %ymm24,%ymm26,%ymm22{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm28");
+	asm volatile("vpmovm2q %k5,%zmm28");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminsq %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmovd2m %zmm28,%k5");
+	asm volatile("vpmovq2m %zmm28,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm28");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpminuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxsq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmaxuq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm24,%zmm25,%zmm26");
+	asm volatile("vpmullq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm25,%zmm26");
+	asm volatile("vgetexppd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vgetexpsd %xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm27,%zmm28");
+	asm volatile("vplzcntq %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm24,%zmm25,%zmm26");
+	asm volatile("vpsravq %zmm24,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm25,%zmm26");
+	asm volatile("vrcp14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrcp14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm25,%zmm26");
+	asm volatile("vrsqrt14pd %zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}");
+	asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm27,%zmm26");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%rcx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%rcx),%zmm26");
+	asm volatile("vbroadcasti64x2 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%rcx),%zmm28");
+	asm volatile("vbroadcasti64x4 (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm24,%zmm25,%zmm26");
+	asm volatile("vblendmpd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpblendmw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm24,%zmm25,%zmm26");
+	asm volatile("vpermi2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermi2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm30");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm30");
+	asm volatile("vpbroadcastq %rax,%zmm30");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2w %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2q %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermt2pd %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%rcx),%zmm26");
+	asm volatile("vexpandpd (%rcx),%zmm28");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%rcx),%zmm28");
+	asm volatile("vpexpandq (%rcx),%zmm26");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm28,(%rcx)");
+	asm volatile("vcompresspd %zmm28,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm28,(%rcx)");
+	asm volatile("vpcompressq %zmm26,(%rcx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm26,%zmm27,%zmm28");
+	asm volatile("vpermw %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+	asm volatile("vpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}");
+	asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+	asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm26,%zmm27,%zmm28");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm26,%zmm27");
+	asm volatile("vpconflictq %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm29,%zmm30");
+	asm volatile("vexp2pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm29,%zmm30");
+	asm volatile("vrcp28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrcp28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm29,%zmm30");
+	asm volatile("vrsqrt28pd %zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("valignq $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm25,%zmm26");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpq $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vshuff64x2 $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vpternlogq $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm26,%zmm27");
+	asm volatile("vgetmantpd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm25,%xmm26{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm29,%ymm30{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm29,%zmm30,%k5");
+	asm volatile("vpcmpw $0x12,%zmm26,%zmm27,%k5");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27");
+	asm volatile("vrangepd $0x12,%zmm28,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vrangesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm28,%zmm29,%zmm30");
+	asm volatile("vfixupimmpd $0x12,%zmm25,%zmm26,%zmm27");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm26,%zmm27");
+	asm volatile("vreducepd $0x12,%zmm29,%zmm30");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm25,%xmm26,%xmm27");
+	asm volatile("vreducesd $0x12,%xmm28,%xmm29,%xmm30");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm27,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm30,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm27,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm30,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm25,%zmm26");
+	asm volatile("vprorq $0x12,%zmm25,%zmm26");
+	asm volatile("vprold $0x12,%zmm29,%zmm30");
+	asm volatile("vprolq $0x12,%zmm29,%zmm30");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm26,%zmm22");
+	asm volatile("vpsraq $0x5,%zmm26,%zmm22");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}");
+	asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {ru-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rd-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd {rz-sae},%zmm28,%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30");
+	asm volatile("vaddpd (%rcx){1to8},%zmm29,%zmm30");
+	asm volatile("vaddpd 0x1fc0(%rdx),%zmm29,%zmm30");
+	asm volatile("vaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30");
+	asm volatile("vcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5");
+	asm volatile("vcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm28,%xmm29,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}");
+
 	/* bndmk m64, bnd */
 
 	asm volatile("bndmk (%rax), %bnd0");
@@ -471,6 +1345,921 @@ int main(void)
 
 #else  /* #ifdef __x86_64__ */
 
+	/* bound r32, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %eax, 0x12345678(%ecx)");
+	asm volatile("bound %ecx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %ebx, 0x12345678(%eax)");
+	asm volatile("bound %esp, 0x12345678(%eax)");
+	asm volatile("bound %ebp, 0x12345678(%eax)");
+	asm volatile("bound %esi, 0x12345678(%eax)");
+	asm volatile("bound %edi, 0x12345678(%eax)");
+	asm volatile("bound %ecx, (%eax)");
+	asm volatile("bound %eax, (0x12345678)");
+	asm volatile("bound %edx, (%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(,%eax,1)");
+	asm volatile("bound %edx, (%eax,%ecx,1)");
+	asm volatile("bound %edx, (%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12(%eax)");
+	asm volatile("bound %edx, 0x12(%ebp)");
+	asm volatile("bound %edx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %edx, 0x12345678(%eax)");
+	asm volatile("bound %edx, 0x12345678(%ebp)");
+	asm volatile("bound %edx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %edx, 0x12345678(%eax,%ecx,8)");
+
+	/* bound r16, mem (same op code as EVEX prefix) */
+
+	asm volatile("bound %ax, 0x12345678(%ecx)");
+	asm volatile("bound %cx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %bx, 0x12345678(%eax)");
+	asm volatile("bound %sp, 0x12345678(%eax)");
+	asm volatile("bound %bp, 0x12345678(%eax)");
+	asm volatile("bound %si, 0x12345678(%eax)");
+	asm volatile("bound %di, 0x12345678(%eax)");
+	asm volatile("bound %cx, (%eax)");
+	asm volatile("bound %ax, (0x12345678)");
+	asm volatile("bound %dx, (%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(,%eax,1)");
+	asm volatile("bound %dx, (%eax,%ecx,1)");
+	asm volatile("bound %dx, (%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12(%eax)");
+	asm volatile("bound %dx, 0x12(%ebp)");
+	asm volatile("bound %dx, 0x12(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12(%eax,%ecx,8)");
+	asm volatile("bound %dx, 0x12345678(%eax)");
+	asm volatile("bound %dx, 0x12345678(%ebp)");
+	asm volatile("bound %dx, 0x12345678(%ecx,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%ebp,%eax,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,1)");
+	asm volatile("bound %dx, 0x12345678(%eax,%ecx,8)");
+
+	/* AVX-512: Instructions with the same op codes as Mask Instructions  */
+
+	asm volatile("cmovno %eax,%ebx");
+	asm volatile("cmovno 0x12345678(%eax),%ecx");
+	asm volatile("cmovno 0x12345678(%eax),%cx");
+
+	asm volatile("cmove  %eax,%ebx");
+	asm volatile("cmove 0x12345678(%eax),%ecx");
+	asm volatile("cmove 0x12345678(%eax),%cx");
+
+	asm volatile("seto    0x12345678(%eax)");
+	asm volatile("setno   0x12345678(%eax)");
+	asm volatile("setb    0x12345678(%eax)");
+	asm volatile("setc    0x12345678(%eax)");
+	asm volatile("setnae  0x12345678(%eax)");
+	asm volatile("setae   0x12345678(%eax)");
+	asm volatile("setnb   0x12345678(%eax)");
+	asm volatile("setnc   0x12345678(%eax)");
+	asm volatile("sets    0x12345678(%eax)");
+	asm volatile("setns   0x12345678(%eax)");
+
+	/* AVX-512: Mask Instructions */
+
+	asm volatile("kandw  %k7,%k6,%k5");
+	asm volatile("kandq  %k7,%k6,%k5");
+	asm volatile("kandb  %k7,%k6,%k5");
+	asm volatile("kandd  %k7,%k6,%k5");
+
+	asm volatile("kandnw  %k7,%k6,%k5");
+	asm volatile("kandnq  %k7,%k6,%k5");
+	asm volatile("kandnb  %k7,%k6,%k5");
+	asm volatile("kandnd  %k7,%k6,%k5");
+
+	asm volatile("knotw  %k7,%k6");
+	asm volatile("knotq  %k7,%k6");
+	asm volatile("knotb  %k7,%k6");
+	asm volatile("knotd  %k7,%k6");
+
+	asm volatile("korw  %k7,%k6,%k5");
+	asm volatile("korq  %k7,%k6,%k5");
+	asm volatile("korb  %k7,%k6,%k5");
+	asm volatile("kord  %k7,%k6,%k5");
+
+	asm volatile("kxnorw  %k7,%k6,%k5");
+	asm volatile("kxnorq  %k7,%k6,%k5");
+	asm volatile("kxnorb  %k7,%k6,%k5");
+	asm volatile("kxnord  %k7,%k6,%k5");
+
+	asm volatile("kxorw  %k7,%k6,%k5");
+	asm volatile("kxorq  %k7,%k6,%k5");
+	asm volatile("kxorb  %k7,%k6,%k5");
+	asm volatile("kxord  %k7,%k6,%k5");
+
+	asm volatile("kaddw  %k7,%k6,%k5");
+	asm volatile("kaddq  %k7,%k6,%k5");
+	asm volatile("kaddb  %k7,%k6,%k5");
+	asm volatile("kaddd  %k7,%k6,%k5");
+
+	asm volatile("kunpckbw %k7,%k6,%k5");
+	asm volatile("kunpckwd %k7,%k6,%k5");
+	asm volatile("kunpckdq %k7,%k6,%k5");
+
+	asm volatile("kmovw  %k6,%k5");
+	asm volatile("kmovw  (%ecx),%k5");
+	asm volatile("kmovw  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovw  %k5,(%ecx)");
+	asm volatile("kmovw  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovw  %eax,%k5");
+	asm volatile("kmovw  %ebp,%k5");
+	asm volatile("kmovw  %k5,%eax");
+	asm volatile("kmovw  %k5,%ebp");
+
+	asm volatile("kmovq  %k6,%k5");
+	asm volatile("kmovq  (%ecx),%k5");
+	asm volatile("kmovq  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovq  %k5,(%ecx)");
+	asm volatile("kmovq  %k5,0x123(%eax,%ecx,8)");
+
+	asm volatile("kmovb  %k6,%k5");
+	asm volatile("kmovb  (%ecx),%k5");
+	asm volatile("kmovb  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovb  %k5,(%ecx)");
+	asm volatile("kmovb  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovb  %eax,%k5");
+	asm volatile("kmovb  %ebp,%k5");
+	asm volatile("kmovb  %k5,%eax");
+	asm volatile("kmovb  %k5,%ebp");
+
+	asm volatile("kmovd  %k6,%k5");
+	asm volatile("kmovd  (%ecx),%k5");
+	asm volatile("kmovd  0x123(%eax,%ecx,8),%k5");
+	asm volatile("kmovd  %k5,(%ecx)");
+	asm volatile("kmovd  %k5,0x123(%eax,%ecx,8)");
+	asm volatile("kmovd  %eax,%k5");
+	asm volatile("kmovd  %ebp,%k5");
+	asm volatile("kmovd  %k5,%eax");
+	asm volatile("kmovd  %k5,%ebp");
+
+	asm volatile("kortestw %k6,%k5");
+	asm volatile("kortestq %k6,%k5");
+	asm volatile("kortestb %k6,%k5");
+	asm volatile("kortestd %k6,%k5");
+
+	asm volatile("ktestw %k6,%k5");
+	asm volatile("ktestq %k6,%k5");
+	asm volatile("ktestb %k6,%k5");
+	asm volatile("ktestd %k6,%k5");
+
+	asm volatile("kshiftrw $0x12,%k6,%k5");
+	asm volatile("kshiftrq $0x5b,%k6,%k5");
+	asm volatile("kshiftlw $0x12,%k6,%k5");
+	asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+	/* AVX-512: Op code 0f 5b */
+	asm volatile("vcvtdq2ps %xmm5,%xmm6");
+	asm volatile("vcvtqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtps2dq %xmm5,%xmm6");
+	asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+	/* AVX-512: Op code 0f 6f */
+
+	asm volatile("movq   %mm0,%mm4");
+	asm volatile("vmovdqa %ymm4,%ymm6");
+	asm volatile("vmovdqa32 %zmm5,%zmm6");
+	asm volatile("vmovdqa64 %zmm5,%zmm6");
+	asm volatile("vmovdqu %ymm4,%ymm6");
+	asm volatile("vmovdqu32 %zmm5,%zmm6");
+	asm volatile("vmovdqu64 %zmm5,%zmm6");
+	asm volatile("vmovdqu8 %zmm5,%zmm6");
+	asm volatile("vmovdqu16 %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 78 */
+
+	asm volatile("vmread %eax,%ebx");
+	asm volatile("vcvttps2udq %zmm5,%zmm6");
+	asm volatile("vcvttpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttsd2usi %xmm6,%eax");
+	asm volatile("vcvttss2usi %xmm6,%eax");
+	asm volatile("vcvttps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 79 */
+
+	asm volatile("vmwrite %eax,%ebx");
+	asm volatile("vcvtps2udq %zmm5,%zmm6");
+	asm volatile("vcvtpd2udq %zmm5,%ymm6{%k7}");
+	asm volatile("vcvtsd2usi %xmm6,%eax");
+	asm volatile("vcvtss2usi %xmm6,%eax");
+	asm volatile("vcvtps2uqq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2uqq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7a */
+
+	asm volatile("vcvtudq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtuqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtudq2ps %zmm5,%zmm6");
+	asm volatile("vcvtuqq2ps %zmm5,%ymm6{%k7}");
+	asm volatile("vcvttps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvttpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7b */
+
+	asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+	asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+	asm volatile("vcvtps2qq %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtpd2qq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 7f */
+
+	asm volatile("movq.s  %mm0,%mm4");
+	asm volatile("vmovdqa.s %ymm5,%ymm6");
+	asm volatile("vmovdqa32.s %zmm5,%zmm6");
+	asm volatile("vmovdqa64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu.s %ymm5,%ymm6");
+	asm volatile("vmovdqu32.s %zmm5,%zmm6");
+	asm volatile("vmovdqu64.s %zmm5,%zmm6");
+	asm volatile("vmovdqu8.s %zmm5,%zmm6");
+	asm volatile("vmovdqu16.s %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f db */
+
+	asm volatile("pand  %mm1,%mm2");
+	asm volatile("pand  %xmm1,%xmm2");
+	asm volatile("vpand  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f df */
+
+	asm volatile("pandn  %mm1,%mm2");
+	asm volatile("pandn  %xmm1,%xmm2");
+	asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+	asm volatile("vpandnd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpandnq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f e6 */
+
+	asm volatile("vcvttpd2dq %xmm1,%xmm2");
+	asm volatile("vcvtdq2pd %xmm5,%xmm6");
+	asm volatile("vcvtdq2pd %ymm5,%zmm6{%k7}");
+	asm volatile("vcvtqq2pd %zmm5,%zmm6");
+	asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+	/* AVX-512: Op code 0f eb */
+
+	asm volatile("por   %mm4,%mm6");
+	asm volatile("vpor   %ymm4,%ymm6,%ymm2");
+	asm volatile("vpord  %zmm4,%zmm5,%zmm6");
+	asm volatile("vporq  %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f ef */
+
+	asm volatile("pxor   %mm4,%mm6");
+	asm volatile("vpxor  %ymm4,%ymm6,%ymm2");
+	asm volatile("vpxord %zmm4,%zmm5,%zmm6");
+	asm volatile("vpxorq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 10 */
+
+	asm volatile("pblendvb %xmm1,%xmm0");
+	asm volatile("vpsrlvw %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovuswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 11 */
+
+	asm volatile("vpmovusdb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsravw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 12 */
+
+	asm volatile("vpmovusqb %zmm5,%xmm6{%k7}");
+	asm volatile("vpsllvw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 13 */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+	asm volatile("vcvtph2ps %ymm5,%zmm6{%k7}");
+	asm volatile("vpmovusdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 14 */
+
+	asm volatile("blendvps %xmm1,%xmm0");
+	asm volatile("vpmovusqw %zmm5,%xmm6{%k7}");
+	asm volatile("vprorvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprorvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 15 */
+
+	asm volatile("blendvpd %xmm1,%xmm0");
+	asm volatile("vpmovusqd %zmm5,%ymm6{%k7}");
+	asm volatile("vprolvd %zmm4,%zmm5,%zmm6");
+	asm volatile("vprolvq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 16 */
+
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermps %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermpd %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 19 */
+
+	asm volatile("vbroadcastsd %xmm4,%ymm6");
+	asm volatile("vbroadcastf32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 1a */
+
+	asm volatile("vbroadcastf128 (%ecx),%ymm4");
+	asm volatile("vbroadcastf32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1b */
+
+	asm volatile("vbroadcastf32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcastf64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 1f */
+
+	asm volatile("vpabsq %zmm4,%zmm6");
+
+	/* AVX-512: Op code 0f 38 20 */
+
+	asm volatile("vpmovsxbw %xmm4,%xmm5");
+	asm volatile("vpmovswb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 21 */
+
+	asm volatile("vpmovsxbd %xmm4,%ymm6");
+	asm volatile("vpmovsdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 22 */
+
+	asm volatile("vpmovsxbq %xmm4,%ymm4");
+	asm volatile("vpmovsqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 23 */
+
+	asm volatile("vpmovsxwd %xmm4,%ymm4");
+	asm volatile("vpmovsdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 24 */
+
+	asm volatile("vpmovsxwq %xmm4,%ymm6");
+	asm volatile("vpmovsqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 25 */
+
+	asm volatile("vpmovsxdq %xmm4,%ymm4");
+	asm volatile("vpmovsqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 26 */
+
+	asm volatile("vptestmb %zmm5,%zmm6,%k5");
+	asm volatile("vptestmw %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmb %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmw %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 27 */
+
+	asm volatile("vptestmd %zmm5,%zmm6,%k5");
+	asm volatile("vptestmq %zmm5,%zmm6,%k5");
+	asm volatile("vptestnmd %zmm4,%zmm5,%k5");
+	asm volatile("vptestnmq %zmm4,%zmm5,%k5");
+
+	/* AVX-512: Op code 0f 38 28 */
+
+	asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2b %k5,%zmm6");
+	asm volatile("vpmovm2w %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 29 */
+
+	asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovb2m %zmm6,%k5");
+	asm volatile("vpmovw2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 2a */
+
+	asm volatile("vmovntdqa (%ecx),%ymm4");
+	asm volatile("vpbroadcastmb2q %k6,%zmm1");
+
+	/* AVX-512: Op code 0f 38 2c */
+
+	asm volatile("vmaskmovps (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefps %zmm4,%zmm5,%zmm6");
+	asm volatile("vscalefpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 2d */
+
+	asm volatile("vmaskmovpd (%ecx),%ymm4,%ymm6");
+	asm volatile("vscalefss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vscalefsd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 30 */
+
+	asm volatile("vpmovzxbw %xmm4,%ymm4");
+	asm volatile("vpmovwb %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 31 */
+
+	asm volatile("vpmovzxbd %xmm4,%ymm6");
+	asm volatile("vpmovdb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 32 */
+
+	asm volatile("vpmovzxbq %xmm4,%ymm4");
+	asm volatile("vpmovqb %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 33 */
+
+	asm volatile("vpmovzxwd %xmm4,%ymm4");
+	asm volatile("vpmovdw %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 34 */
+
+	asm volatile("vpmovzxwq %xmm4,%ymm6");
+	asm volatile("vpmovqw %zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 35 */
+
+	asm volatile("vpmovzxdq %xmm4,%ymm4");
+	asm volatile("vpmovqd %zmm5,%ymm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 36 */
+
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpermd %ymm4,%ymm6,%ymm2{%k7}");
+	asm volatile("vpermq %ymm4,%ymm6,%ymm2{%k7}");
+
+	/* AVX-512: Op code 0f 38 38 */
+
+	asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmovm2d %k5,%zmm6");
+	asm volatile("vpmovm2q %k5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 39 */
+
+	asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+	asm volatile("vpminsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminsq %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmovd2m %zmm6,%k5");
+	asm volatile("vpmovq2m %zmm6,%k5");
+
+	/* AVX-512: Op code 0f 38 3a */
+
+	asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+	asm volatile("vpbroadcastmw2d %k6,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3b */
+
+	asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpminud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpminuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3d */
+
+	asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxsd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxsq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 3f */
+
+	asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmaxud %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmaxuq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 40 */
+
+	asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+	asm volatile("vpmulld %zmm4,%zmm5,%zmm6");
+	asm volatile("vpmullq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 42 */
+
+	asm volatile("vgetexpps %zmm5,%zmm6");
+	asm volatile("vgetexppd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 43 */
+
+	asm volatile("vgetexpss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vgetexpsd %xmm2,%xmm3,%xmm4{%k7}");
+
+	/* AVX-512: Op code 0f 38 44 */
+
+	asm volatile("vplzcntd %zmm5,%zmm6");
+	asm volatile("vplzcntq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 46 */
+
+	asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+	asm volatile("vpsravd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpsravq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4c */
+
+	asm volatile("vrcp14ps %zmm5,%zmm6");
+	asm volatile("vrcp14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4d */
+
+	asm volatile("vrcp14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrcp14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 4e */
+
+	asm volatile("vrsqrt14ps %zmm5,%zmm6");
+	asm volatile("vrsqrt14pd %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 4f */
+
+	asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}");
+	asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 38 59 */
+
+	asm volatile("vpbroadcastq %xmm4,%xmm6");
+	asm volatile("vbroadcasti32x2 %xmm7,%zmm6");
+
+	/* AVX-512: Op code 0f 38 5a */
+
+	asm volatile("vbroadcasti128 (%ecx),%ymm4");
+	asm volatile("vbroadcasti32x4 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x2 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 5b */
+
+	asm volatile("vbroadcasti32x8 (%ecx),%zmm6");
+	asm volatile("vbroadcasti64x4 (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 64 */
+
+	asm volatile("vpblendmd %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 65 */
+
+	asm volatile("vblendmps %zmm4,%zmm5,%zmm6");
+	asm volatile("vblendmpd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 66 */
+
+	asm volatile("vpblendmb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpblendmw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 75 */
+
+	asm volatile("vpermi2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 76 */
+
+	asm volatile("vpermi2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 77 */
+
+	asm volatile("vpermi2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermi2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7a */
+
+	asm volatile("vpbroadcastb %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7b */
+
+	asm volatile("vpbroadcastw %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7c */
+
+	asm volatile("vpbroadcastd %eax,%xmm3");
+
+	/* AVX-512: Op code 0f 38 7d */
+
+	asm volatile("vpermt2b %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2w %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7e */
+
+	asm volatile("vpermt2d %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2q %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 7f */
+
+	asm volatile("vpermt2ps %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermt2pd %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 83 */
+
+	asm volatile("vpmultishiftqb %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 88 */
+
+	asm volatile("vexpandps (%ecx),%zmm6");
+	asm volatile("vexpandpd (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 89 */
+
+	asm volatile("vpexpandd (%ecx),%zmm6");
+	asm volatile("vpexpandq (%ecx),%zmm6");
+
+	/* AVX-512: Op code 0f 38 8a */
+
+	asm volatile("vcompressps %zmm6,(%ecx)");
+	asm volatile("vcompresspd %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8b */
+
+	asm volatile("vpcompressd %zmm6,(%ecx)");
+	asm volatile("vpcompressq %zmm6,(%ecx)");
+
+	/* AVX-512: Op code 0f 38 8d */
+
+	asm volatile("vpermb %zmm4,%zmm5,%zmm6");
+	asm volatile("vpermw %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 90 */
+
+	asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdq %xmm2,0x04(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+	asm volatile("vpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 91 */
+
+	asm volatile("vpgatherqd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+	asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}");
+	asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+
+	/* AVX-512: Op code 0f 38 a0 */
+
+	asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a1 */
+
+	asm volatile("vpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a2 */
+
+	asm volatile("vscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 a3 */
+
+	asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 b4 */
+
+	asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 b5 */
+
+	asm volatile("vpmadd52huq %zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c4 */
+
+	asm volatile("vpconflictd %zmm5,%zmm6");
+	asm volatile("vpconflictq %zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 38 c8 */
+
+	asm volatile("vexp2ps %zmm6,%zmm7");
+	asm volatile("vexp2pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 ca */
+
+	asm volatile("vrcp28ps %zmm6,%zmm7");
+	asm volatile("vrcp28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cb */
+
+	asm volatile("vrcp28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrcp28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 38 cc */
+
+	asm volatile("vrsqrt28ps %zmm6,%zmm7");
+	asm volatile("vrsqrt28pd %zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 38 cd */
+
+	asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 03 */
+
+	asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("valignq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 08 */
+
+	asm volatile("vroundps $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscaleps $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 09 */
+
+	asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+	asm volatile("vrndscalepd $0x12,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 0a */
+
+	asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 0b */
+
+	asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+	asm volatile("vrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 18 */
+
+	asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 19 */
+
+	asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+	asm volatile("vextractf32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextractf64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1a */
+
+	asm volatile("vinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1b */
+
+	asm volatile("vextractf32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextractf64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 1e */
+
+	asm volatile("vpcmpud $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 1f */
+
+	asm volatile("vpcmpd $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpq $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 23 */
+
+	asm volatile("vshuff32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshuff64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 25 */
+
+	asm volatile("vpternlogd $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vpternlogq $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 26 */
+
+	asm volatile("vgetmantps $0x12,%zmm6,%zmm7");
+	asm volatile("vgetmantpd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 27 */
+
+	asm volatile("vgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 38 */
+
+	asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+	asm volatile("vinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 39 */
+
+	asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+	asm volatile("vextracti32x4 $0x12,%zmm5,%xmm6{%k7}");
+	asm volatile("vextracti64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3a */
+
+	asm volatile("vinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+	asm volatile("vinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3b */
+
+	asm volatile("vextracti32x8 $0x12,%zmm6,%ymm7{%k7}");
+	asm volatile("vextracti64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 3e */
+
+	asm volatile("vpcmpub $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpuw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 3f */
+
+	asm volatile("vpcmpb $0x12,%zmm6,%zmm7,%k5");
+	asm volatile("vpcmpw $0x12,%zmm6,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 42 */
+
+	asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+	asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+	/* AVX-512: Op code 0f 3a 43 */
+
+	asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 50 */
+
+	asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vrangepd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 51 */
+
+	asm volatile("vrangess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vrangesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 54 */
+
+	asm volatile("vfixupimmps $0x12,%zmm5,%zmm6,%zmm7");
+	asm volatile("vfixupimmpd $0x12,%zmm5,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 55 */
+
+	asm volatile("vfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+	asm volatile("vfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+	/* AVX-512: Op code 0f 3a 56 */
+
+	asm volatile("vreduceps $0x12,%zmm6,%zmm7");
+	asm volatile("vreducepd $0x12,%zmm6,%zmm7");
+
+	/* AVX-512: Op code 0f 3a 57 */
+
+	asm volatile("vreducess $0x12,%xmm5,%xmm6,%xmm7");
+	asm volatile("vreducesd $0x12,%xmm5,%xmm6,%xmm7");
+
+	/* AVX-512: Op code 0f 3a 66 */
+
+	asm volatile("vfpclassps $0x12,%zmm7,%k5");
+	asm volatile("vfpclasspd $0x12,%zmm7,%k5");
+
+	/* AVX-512: Op code 0f 3a 67 */
+
+	asm volatile("vfpclassss $0x12,%xmm7,%k5");
+	asm volatile("vfpclasssd $0x12,%xmm7,%k5");
+
+	/* AVX-512: Op code 0f 72 (Grp13) */
+
+	asm volatile("vprord $0x12,%zmm5,%zmm6");
+	asm volatile("vprorq $0x12,%zmm5,%zmm6");
+	asm volatile("vprold $0x12,%zmm5,%zmm6");
+	asm volatile("vprolq $0x12,%zmm5,%zmm6");
+	asm volatile("psrad  $0x2,%mm6");
+	asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+	asm volatile("vpsrad $0x5,%zmm6,%zmm2");
+	asm volatile("vpsraq $0x5,%zmm6,%zmm2");
+
+	/* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+	asm volatile("vgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+	asm volatile("vscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+
+	/* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+	asm volatile("vgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+	asm volatile("vscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+
+	/* AVX-512: Examples */
+
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}");
+	asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}{z}");
+	asm volatile("vaddpd {rn-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {ru-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rd-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd {rz-sae},%zmm4,%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6");
+	asm volatile("vaddpd (%ecx){1to8},%zmm5,%zmm6");
+	asm volatile("vaddpd 0x1fc0(%edx),%zmm5,%zmm6");
+	asm volatile("vaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6");
+	asm volatile("vcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5");
+	asm volatile("vcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}");
+	asm volatile("vcmplesd {sae},%xmm4,%xmm5,%k5{%k7}");
+	asm volatile("vgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}");
+
 	/* bndmk m32, bnd */
 
 	asm volatile("bndmk (%eax), %bnd0");
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index d2af78193153..76f41f249944 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -1,4 +1,5 @@
 #include <stdbool.h>
+#include <stdlib.h>
 #include "tests.h"
 #include "dso.h"
 #include "debug.h"
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b32464b353aa..8d363d5e65a2 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -8,7 +8,6 @@
 #include "map.h"
 #include "build-id.h"
 #include "perf_regs.h"
-#include <asm/perf_regs.h>
 
 struct mmap_event {
 	struct perf_event_header header;
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
index 517567347aac..54e961659514 100644
--- a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
-	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
 	# accepts VEX prefix
-	vexok_opcode_expr = "^v.*"
+	vexok_opcode_expr = "^[vk].*"
 	vexok_expr = "\\(v1\\)"
 	# All opcodes with (v) superscript supports *only* VEX prefix
 	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
 	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
 	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
 
 	clear_vars()
 }
@@ -319,7 +322,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 			flags = add_flags(flags, "INAT_MODRM")
 
 		# check VEX codes
-		if (match(ext, vexonly_expr))
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
 		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
index 611645e903a8..125ecd2a300d 100644
--- a/tools/perf/util/intel-pt-decoder/inat.h
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -48,6 +48,7 @@
 /* AVX VEX prefixes */
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -89,6 +90,7 @@
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 static inline int inat_is_vex_prefix(insn_attr_t attr)
 {
 	attr &= INAT_PFX_MASK;
-	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
 }
 
 static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 
 static inline int inat_must_vex(insn_attr_t attr)
 {
-	return attr & INAT_VEXONLY;
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
 }
 #endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
index 9f26eae6c9f0..ca983e2bea8b 100644
--- a/tools/perf/util/intel-pt-decoder/insn.c
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -155,14 +155,24 @@ void insn_get_prefixes(struct insn *insn)
 			/*
 			 * In 32-bits mode, if the [7:6] bits (mod bits of
 			 * ModRM) on the second byte are not 11b, it is
-			 * LDS or LES.
+			 * LDS or LES or BOUND.
 			 */
 			if (X86_MODRM_MOD(b2) != 3)
 				goto vex_end;
 		}
 		insn->vex_prefix.bytes[0] = b;
 		insn->vex_prefix.bytes[1] = b2;
-		if (inat_is_vex3_prefix(attr)) {
+		if (inat_is_e