FYI, we noticed the below changes on git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git sched/core commit 1d0dcb3ad9d336e6d6ee020a750a7f8d907e28de ("futex: Implement lockless wakeups") testcase/path_params/tbox_group: will-it-scale/powersave-pthread_mutex1/lituya 7675104990ed255b 1d0dcb3ad9d336e6d6ee020a75 ---------------- -------------------------- %stddev %change %stddev \ | \ 5989 ± 2% +9.6% 6566 ± 2% will-it-scale.time.involuntary_context_switches 136 ± 0% +32.2% 180 ± 0% will-it-scale.time.user_time 31929544 ± 0% +64.7% 52574500 ± 0% will-it-scale.time.voluntary_context_switches 1241 ± 0% -4.8% 1182 ± 0% will-it-scale.time.system_time 453 ± 0% -1.3% 447 ± 0% will-it-scale.time.percent_of_cpu_this_job_got 213800 ± 0% +62.8% 348085 ± 0% vmstat.system.cs 20662 ± 2% +10.3% 22800 ± 1% vmstat.system.in 136 ± 0% +32.2% 180 ± 0% time.user_time 31929544 ± 0% +64.7% 52574500 ± 0% time.voluntary_context_switches 25497500 ± 1% +79.8% 45847592 ± 0% cpuidle.C1-HSW.usage 25909310 ± 9% +40.4% 36385075 ± 10% cpuidle.C1-HSW.time 385 ± 1% -3.9% 370 ± 1% latency_stats.avg.do_wait.SyS_wait4.system_call_fastpath 45 ± 19% +46.4% 66 ± 12% latency_stats.avg.unix_wait_for_peer.unix_dgram_sendmsg.sock_sendmsg.___sys_sendmsg.__sys_sendmsg.SyS_sendmsg.system_call_fastpath 3199870 ± 2% +107.9% 6653184 ± 3% latency_stats.hits.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 4291 ± 5% -26.1% 3172 ± 4% latency_stats.max.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.system_call_fastpath 942123 ± 1% -3.8% 906698 ± 1% latency_stats.sum.do_wait.SyS_wait4.system_call_fastpath 14068 ± 10% -10.3% 12621 ± 3% latency_stats.sum.ep_poll.SyS_epoll_wait.system_call_fastpath 1442334 ± 0% -7.1% 1339318 ± 1% latency_stats.sum.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.system_call_fastpath 4763770 ± 1% +41.4% 6733885 ± 3% latency_stats.sum.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 49 ± 1% -24.7% 37 ± 18% sched_debug.cfs_rq[13]:/.load 52 ± 4% -18.1% 43 ± 20% sched_debug.cfs_rq[13]:/.runnable_load_avg 672 ± 29% -43.0% 383 ± 19% sched_debug.cfs_rq[14]:/.utilization_load_avg 596 ± 12% +16.9% 697 ± 1% sched_debug.cfs_rq[15]:/.tg_runnable_contrib 51 ± 20% -35.9% 33 ± 14% sched_debug.cfs_rq[3]:/.runnable_load_avg 68 ± 27% -35.0% 44 ± 48% sched_debug.cfs_rq[3]:/.load 433123 ± 13% -21.0% 342236 ± 7% sched_debug.cfs_rq[7]:/.min_vruntime 21198 ± 15% -21.0% 16755 ± 1% sched_debug.cfs_rq[7]:/.avg->runnable_avg_sum 35997 ± 20% -29.4% 25419 ± 9% sched_debug.cfs_rq[7]:/.exec_clock 906034 ± 16% +21.5% 1101140 ± 8% sched_debug.cpu#1.sched_goidle 1813964 ± 16% +21.5% 2203255 ± 8% sched_debug.cpu#1.nr_switches 911649 ± 16% +25.8% 1146513 ± 8% sched_debug.cpu#1.ttwu_count 1814079 ± 16% +21.5% 2203379 ± 8% sched_debug.cpu#1.sched_count 1616994 ± 19% +114.4% 3466653 ± 7% sched_debug.cpu#10.nr_switches 7 ± 17% -53.6% 3 ± 25% sched_debug.cpu#10.nr_uninterruptible 427 ± 5% +29.6% 553 ± 9% sched_debug.cpu#10.ttwu_local 822628 ± 20% +115.5% 1772801 ± 7% sched_debug.cpu#10.ttwu_count 1617109 ± 19% +114.4% 3466772 ± 7% sched_debug.cpu#10.sched_count 808217 ± 19% +114.4% 1732959 ± 7% sched_debug.cpu#10.sched_goidle 1818260 ± 10% +56.9% 2851959 ± 16% sched_debug.cpu#11.sched_count 908654 ± 10% +56.9% 1425514 ± 16% sched_debug.cpu#11.sched_goidle 1818138 ± 10% +56.9% 2851840 ± 16% sched_debug.cpu#11.nr_switches 896624 ± 8% +52.5% 1367494 ± 15% sched_debug.cpu#11.ttwu_count 1857 ± 11% +18.6% 2202 ± 8% sched_debug.cpu#12.curr->pid 58 ± 10% -19.8% 46 ± 20% sched_debug.cpu#13.cpu_load[2] 1885115 ± 10% +64.6% 3102536 ± 11% sched_debug.cpu#13.nr_switches 1885155 ± 10% +64.6% 3102576 ± 11% sched_debug.cpu#13.sched_count 942063 ± 10% +64.6% 1550798 ± 11% sched_debug.cpu#13.sched_goidle 920620 ± 9% +62.2% 1493320 ± 9% sched_debug.cpu#13.ttwu_count 56 ± 5% -23.7% 42 ± 15% sched_debug.cpu#13.cpu_load[4] 57 ± 7% -23.1% 44 ± 14% sched_debug.cpu#13.cpu_load[3] 49 ± 1% -24.7% 37 ± 18% sched_debug.cpu#13.load 745920 ± 12% +91.8% 1430771 ± 4% sched_debug.cpu#14.sched_goidle 1492512 ± 12% +91.8% 2862256 ± 4% sched_debug.cpu#14.nr_switches 746237 ± 12% +88.0% 1403001 ± 4% sched_debug.cpu#14.ttwu_count 1492551 ± 12% +91.8% 2862299 ± 4% sched_debug.cpu#14.sched_count 577321 ± 30% +161.5% 1509820 ± 7% sched_debug.cpu#15.sched_goidle 574967 ± 31% +165.3% 1525263 ± 2% sched_debug.cpu#15.ttwu_count 1155549 ± 30% +161.4% 3020803 ± 7% sched_debug.cpu#15.nr_switches 1155589 ± 30% +161.4% 3020846 ± 7% sched_debug.cpu#15.sched_count 1446483 ± 7% +75.4% 2537378 ± 15% sched_debug.cpu#3.nr_switches 720522 ± 9% +71.2% 1233795 ± 18% sched_debug.cpu#3.ttwu_count 722651 ± 7% +75.5% 1268100 ± 15% sched_debug.cpu#3.sched_goidle 1446610 ± 7% +75.4% 2537497 ± 15% sched_debug.cpu#3.sched_count 823639 ± 17% +80.6% 1487383 ± 14% sched_debug.cpu#5.nr_switches 27549 ± 6% +22.7% 33806 ± 16% sched_debug.cpu#5.nr_load_updates 670845 ± 9% -11.3% 595156 ± 5% sched_debug.cpu#5.avg_idle 410777 ± 17% +80.8% 742641 ± 14% sched_debug.cpu#5.sched_goidle 407183 ± 17% +78.5% 726745 ± 15% sched_debug.cpu#5.ttwu_count 823685 ± 17% +80.6% 1487428 ± 14% sched_debug.cpu#5.sched_count 1291878 ± 19% +26.7% 1636521 ± 11% sched_debug.cpu#6.nr_switches 2 ± 35% +150.0% 5 ± 24% sched_debug.cpu#6.nr_uninterruptible 1291922 ± 19% +26.7% 1636561 ± 11% sched_debug.cpu#6.sched_count 644667 ± 19% +26.7% 816931 ± 11% sched_debug.cpu#6.sched_goidle 999303 ± 8% +80.0% 1799127 ± 9% sched_debug.cpu#8.sched_goidle 1999260 ± 8% +80.0% 3599253 ± 9% sched_debug.cpu#8.nr_switches 1019255 ± 4% +77.5% 1809326 ± 8% sched_debug.cpu#8.ttwu_count 1999377 ± 8% +80.0% 3599372 ± 9% sched_debug.cpu#8.sched_count 737902 ± 8% -21.6% 578475 ± 15% sched_debug.cpu#9.avg_idle 1696089 ± 13% +100.9% 3406654 ± 2% sched_debug.cpu#9.sched_count 1695966 ± 13% +100.9% 3406521 ± 2% sched_debug.cpu#9.nr_switches 847698 ± 13% +100.9% 1702912 ± 2% sched_debug.cpu#9.sched_goidle 848268 ± 11% +113.4% 1810338 ± 5% sched_debug.cpu#9.ttwu_count testcase/path_params/tbox_group: will-it-scale/performance-pthread_mutex1/lituya 7675104990ed255b 1d0dcb3ad9d336e6d6ee020a75 ---------------- -------------------------- 135 ± 0% +32.6% 179 ± 0% will-it-scale.time.user_time 31795775 ± 0% +65.2% 52519962 ± 0% will-it-scale.time.voluntary_context_switches 1242 ± 0% -4.8% 1182 ± 0% will-it-scale.time.system_time 452 ± 0% -1.1% 447 ± 0% will-it-scale.time.percent_of_cpu_this_job_got 9 ± 0% -11.1% 8 ± 0% vmstat.procs.r 213068 ± 0% +63.4% 348183 ± 0% vmstat.system.cs 20072 ± 1% +15.4% 23157 ± 1% vmstat.system.in 135 ± 0% +32.6% 179 ± 0% time.user_time 31795775 ± 0% +65.2% 52519962 ± 0% time.voluntary_context_switches 25884654 ± 0% +77.0% 45817600 ± 0% cpuidle.C1-HSW.usage 27193585 ± 9% +50.5% 40917267 ± 9% cpuidle.C1-HSW.time 405 ± 15% +90.6% 772 ± 18% cpuidle.POLL.usage 302 ± 2% -7.3% 280 ± 2% latency_stats.avg.do_wait.SyS_wait4.system_call_fastpath 637 ± 34% +81.5% 1156 ± 36% latency_stats.avg.ep_poll.SyS_epoll_wait.system_call_fastpath 221 ± 1% -8.5% 202 ± 0% latency_stats.avg.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.system_call_fastpath 3365738 ± 2% +99.0% 6698670 ± 3% latency_stats.hits.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 224 ± 25% +94.2% 435 ± 39% latency_stats.max.rpc_wait_bit_killable.__rpc_execute.rpc_execute.rpc_run_task.nfs4_call_sync_sequence.[nfsv4]._nfs4_proc_access.[nfsv4].nfs4_proc_access.[nfsv4].nfs_do_access.nfs_permission.__inode_permission.inode_permission.may_open 741091 ± 3% -7.2% 687939 ± 2% latency_stats.sum.do_wait.SyS_wait4.system_call_fastpath 1150919 ± 1% -10.0% 1035254 ± 1% latency_stats.sum.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.system_call_fastpath 4920864 ± 2% +37.8% 6781129 ± 3% latency_stats.sum.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 5.32 ± 6% -36.1% 3.40 ± 41% perf-profile.cpu-cycles.search_binary_handler.do_execveat_common.sys_execve.return_from_execve 5.42 ± 4% -37.2% 3.40 ± 41% perf-profile.cpu-cycles.load_elf_binary.search_binary_handler.do_execveat_common.sys_execve.return_from_execve 7.46 ± 20% -43.7% 4.20 ± 41% perf-profile.cpu-cycles.return_from_execve 7.46 ± 20% -43.7% 4.20 ± 41% perf-profile.cpu-cycles.do_execveat_common.isra.29.sys_execve.return_from_execve 7.46 ± 20% -43.7% 4.20 ± 41% perf-profile.cpu-cycles.sys_execve.return_from_execve 13 ± 19% -50.9% 6 ± 35% sched_debug.cfs_rq[11]:/.nr_spread_over 23 ± 34% +48.4% 34 ± 19% sched_debug.cfs_rq[4]:/.runnable_load_avg 54 ± 22% -43.8% 30 ± 26% sched_debug.cfs_rq[7]:/.runnable_load_avg 39 ± 26% +47.8% 58 ± 24% sched_debug.cfs_rq[8]:/.runnable_load_avg 31 ± 1% +56.8% 49 ± 2% sched_debug.cfs_rq[9]:/.runnable_load_avg 567 ± 13% +23.2% 699 ± 1% sched_debug.cfs_rq[9]:/.tg_runnable_contrib 31 ± 1% +21.6% 38 ± 17% sched_debug.cfs_rq[9]:/.load 26043 ± 12% +23.3% 32103 ± 0% sched_debug.cfs_rq[9]:/.avg->runnable_avg_sum 522365 ± 32% +75.0% 914370 ± 16% sched_debug.cpu#0.ttwu_count 1020121 ± 32% +76.7% 1802488 ± 17% sched_debug.cpu#0.sched_count 507393 ± 33% +77.1% 898471 ± 18% sched_debug.cpu#0.sched_goidle 1019797 ± 32% +76.7% 1802170 ± 17% sched_debug.cpu#0.nr_switches 49 ± 7% -31.0% 34 ± 2% sched_debug.cpu#1.cpu_load[1] 51 ± 8% -33.7% 34 ± 10% sched_debug.cpu#1.cpu_load[0] 48 ± 11% -28.6% 34 ± 3% sched_debug.cpu#1.cpu_load[4] 477826 ± 16% +42.3% 680096 ± 4% sched_debug.cpu#1.avg_idle 48 ± 10% -29.0% 34 ± 3% sched_debug.cpu#1.cpu_load[3] 48 ± 9% -30.9% 33 ± 1% sched_debug.cpu#1.cpu_load[2] 1499187 ± 31% +87.2% 2806508 ± 21% sched_debug.cpu#10.nr_switches 754505 ± 34% +88.6% 1423293 ± 19% sched_debug.cpu#10.ttwu_count 1499311 ± 31% +87.2% 2806624 ± 21% sched_debug.cpu#10.sched_count 749245 ± 31% +87.2% 1402814 ± 21% sched_debug.cpu#10.sched_goidle 49 ± 10% -19.1% 40 ± 4% sched_debug.cpu#11.cpu_load[1] 52 ± 17% -33.8% 34 ± 13% sched_debug.cpu#11.cpu_load[0] 50 ± 7% -23.0% 38 ± 15% sched_debug.cpu#12.cpu_load[3] 53 ± 10% -26.8% 39 ± 15% sched_debug.cpu#12.cpu_load[2] 646988 ± 35% +123.8% 1448084 ± 19% sched_debug.cpu#12.ttwu_count 47 ± 10% -20.5% 37 ± 18% sched_debug.cpu#12.cpu_load[4] 651640 ± 36% +132.8% 1517162 ± 19% sched_debug.cpu#12.sched_goidle 1303921 ± 36% +132.8% 3035620 ± 19% sched_debug.cpu#12.nr_switches 64 ± 21% -35.0% 41 ± 33% sched_debug.cpu#12.cpu_load[0] 58 ± 16% -30.5% 40 ± 20% sched_debug.cpu#12.cpu_load[1] 1303958 ± 36% +132.8% 3035660 ± 19% sched_debug.cpu#12.sched_count 1642807 ± 22% +78.5% 2932208 ± 9% sched_debug.cpu#13.nr_switches 1642856 ± 22% +78.5% 2932247 ± 9% sched_debug.cpu#13.sched_count 821009 ± 22% +78.5% 1465663 ± 9% sched_debug.cpu#13.sched_goidle 788799 ± 19% +81.5% 1431516 ± 8% sched_debug.cpu#13.ttwu_count 870563 ± 20% +44.4% 1257081 ± 15% sched_debug.cpu#14.sched_goidle 1741973 ± 20% +44.4% 2515088 ± 15% sched_debug.cpu#14.nr_switches 866217 ± 19% +43.8% 1245955 ± 12% sched_debug.cpu#14.ttwu_count 1742015 ± 20% +44.4% 2515126 ± 15% sched_debug.cpu#14.sched_count 737410 ± 10% -16.4% 616336 ± 7% sched_debug.cpu#15.avg_idle 700131 ± 34% +86.8% 1307981 ± 17% sched_debug.cpu#15.sched_goidle 699153 ± 35% +83.6% 1283986 ± 18% sched_debug.cpu#15.ttwu_count 1401279 ± 34% +86.8% 2617150 ± 17% sched_debug.cpu#15.nr_switches 1401320 ± 34% +86.8% 2617190 ± 17% sched_debug.cpu#15.sched_count 1192893 ± 20% +100.2% 2387841 ± 32% sched_debug.cpu#3.nr_switches 601136 ± 20% +97.4% 1186749 ± 29% sched_debug.cpu#3.ttwu_count 595525 ± 20% +100.4% 1193216 ± 32% sched_debug.cpu#3.sched_goidle 1193047 ± 20% +100.2% 2387951 ± 32% sched_debug.cpu#3.sched_count 26 ± 30% +61.0% 42 ± 20% sched_debug.cpu#4.load 25 ± 30% +47.1% 37 ± 18% sched_debug.cpu#4.cpu_load[4] 973362 ± 30% +99.1% 1937649 ± 17% sched_debug.cpu#6.nr_switches 973400 ± 30% +99.1% 1937691 ± 17% sched_debug.cpu#6.sched_count 485548 ± 30% +99.4% 967982 ± 17% sched_debug.cpu#6.sched_goidle 489774 ± 31% +101.8% 988405 ± 21% sched_debug.cpu#6.ttwu_count 1037194 ± 16% +62.4% 1684672 ± 10% sched_debug.cpu#8.sched_goidle 2074962 ± 16% +62.4% 3370325 ± 10% sched_debug.cpu#8.nr_switches 1063289 ± 16% +59.3% 1693545 ± 9% sched_debug.cpu#8.ttwu_count 2075084 ± 16% +62.4% 3370436 ± 10% sched_debug.cpu#8.sched_count 35 ± 18% +45.0% 50 ± 3% sched_debug.cpu#9.cpu_load[2] 1715208 ± 22% +104.6% 3508980 ± 11% sched_debug.cpu#9.sched_count 35 ± 18% +42.0% 50 ± 3% sched_debug.cpu#9.cpu_load[3] 1901 ± 8% +15.2% 2191 ± 0% sched_debug.cpu#9.curr->pid 1715087 ± 22% +104.6% 3508864 ± 11% sched_debug.cpu#9.nr_switches 35 ± 18% +40.7% 49 ± 3% sched_debug.cpu#9.cpu_load[0] 35 ± 18% +42.0% 50 ± 3% sched_debug.cpu#9.cpu_load[4] 35 ± 18% +40.7% 49 ± 1% sched_debug.cpu#9.load 34 ± 15% +47.8% 50 ± 3% sched_debug.cpu#9.cpu_load[1] 857164 ± 22% +104.6% 1754026 ± 11% sched_debug.cpu#9.sched_goidle 868187 ± 22% +112.2% 1842383 ± 14% sched_debug.cpu#9.ttwu_count 478 ± 12% +49.7% 716 ± 21% sched_debug.cpu#9.ttwu_local testcase/path_params/tbox_group: will-it-scale/pthread_mutex1/lkp-sb03 7675104990ed255b 1d0dcb3ad9d336e6d6ee020a75 ---------------- -------------------------- 142 ± 0% +26.7% 180 ± 0% will-it-scale.time.user_time 21289130 ± 1% +60.5% 34178388 ± 1% will-it-scale.time.voluntary_context_switches 2337 ± 0% -2.2% 2285 ± 0% will-it-scale.time.system_time 136738 ± 1% +60.2% 219104 ± 1% vmstat.system.cs 24114 ± 2% +19.0% 28694 ± 5% vmstat.system.in 142 ± 0% +26.7% 180 ± 0% time.user_time 21289130 ± 1% +60.5% 34178388 ± 1% time.voluntary_context_switches 1025 ± 10% +20.3% 1233 ± 8% numa-meminfo.node0.Unevictable 1025 ± 10% +20.3% 1233 ± 8% numa-meminfo.node0.Mlocked 1210 ± 8% -17.2% 1002 ± 10% numa-meminfo.node1.Mlocked 1210 ± 8% -17.2% 1002 ± 10% numa-meminfo.node1.Unevictable 256 ± 10% +20.2% 307 ± 8% numa-vmstat.node0.nr_mlock 256 ± 10% +20.2% 307 ± 8% numa-vmstat.node0.nr_unevictable 302 ± 8% -17.1% 250 ± 10% numa-vmstat.node1.nr_mlock 302 ± 8% -17.1% 250 ± 10% numa-vmstat.node1.nr_unevictable 11992500 ± 1% +64.4% 19712016 ± 2% cpuidle.C1-SNB.usage 9 ± 33% -50.0% 4 ± 27% latency_stats.avg.wait_on_page_bit_killable.__lock_page_or_retry.filemap_fault.__do_fault.handle_pte_fault.handle_mm_fault.__do_page_fault.do_page_fault.page_fault 8981882 ± 2% +31.8% 11837616 ± 2% latency_stats.hits.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 3056125 ± 0% +1.0% 3085848 ± 0% latency_stats.sum.do_wait.SyS_wait4.system_call_fastpath 4855509 ± 0% +1.4% 4923890 ± 0% latency_stats.sum.pipe_wait.pipe_read.__vfs_read.vfs_read.SyS_read.system_call_fastpath 16544499 ± 2% +31.4% 21738877 ± 3% latency_stats.sum.futex_wait_queue_me.futex_wait.do_futex.SyS_futex.system_call_fastpath 0.86 ± 1% +29.4% 1.11 ± 2% perf-profile.cpu-cycles.print_context_stack.dump_trace.save_stack_trace_tsk.__account_scheduler_latency.enqueue_entity 0.00 ± 0% +Inf% 1.44 ± 6% perf-profile.cpu-cycles.ttwu_do_wakeup.ttwu_do_activate.try_to_wake_up.wake_up_process.wake_up_q 0.00 ± 0% +Inf% 10.52 ± 2% perf-profile.cpu-cycles.wake_up_q.futex_wake.do_futex.sys_futex.system_call_fastpath 0.00 ± 0% +Inf% 1.49 ± 3% perf-profile.cpu-cycles.mark_wake_futex.futex_wake.do_futex.sys_futex.system_call_fastpath 0.00 ± 0% +Inf% 10.04 ± 2% perf-profile.cpu-cycles.wake_up_process.wake_up_q.futex_wake.do_futex.sys_futex 5.57 ± 1% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.wake_up_state.wake_futex.futex_wake.do_futex.sys_futex 6.44 ± 1% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.wake_futex.futex_wake.do_futex.sys_futex.system_call_fastpath 52.44 ± 0% -17.7% 43.16 ± 0% perf-profile.cpu-cycles.futex_wake.do_futex.sys_futex.system_call_fastpath 0.56 ± 2% +93.7% 1.07 ± 4% perf-profile.cpu-cycles.tick_program_event.__hrtimer_start_range_ns.hrtimer_start_range_ns.tick_nohz_restart.tick_nohz_idle_exit 2.67 ± 2% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.enqueue_task.activate_task.ttwu_do_activate.try_to_wake_up.wake_up_state 1.60 ± 1% +85.3% 2.96 ± 5% perf-profile.cpu-cycles.__schedule.schedule.schedule_preempt_disabled.cpu_startup_entry.start_secondary 1.55 ± 3% +89.8% 2.94 ± 5% perf-profile.cpu-cycles.tick_nohz_idle_exit.cpu_startup_entry.start_secondary 2.12 ± 1% +59.8% 3.40 ± 1% perf-profile.cpu-cycles.enqueue_entity.enqueue_task_fair.enqueue_task.activate_task.ttwu_do_activate 0.53 ± 6% +79.9% 0.96 ± 3% perf-profile.cpu-cycles.menu_select.cpuidle_select.cpu_startup_entry.start_secondary 1.04 ± 4% +83.8% 1.90 ± 4% perf-profile.cpu-cycles.get_futex_value_locked.futex_wait_setup.futex_wait.do_futex.sys_futex 0.60 ± 5% +82.6% 1.10 ± 2% perf-profile.cpu-cycles.cpuidle_select.cpu_startup_entry.start_secondary 1.12 ± 1% +105.8% 2.30 ± 4% perf-profile.cpu-cycles.hrtimer_start.tick_nohz_stop_sched_tick.__tick_nohz_idle_enter.tick_nohz_idle_enter.cpu_startup_entry 1.08 ± 1% +105.5% 2.22 ± 4% perf-profile.cpu-cycles.__hrtimer_start_range_ns.hrtimer_start.tick_nohz_stop_sched_tick.__tick_nohz_idle_enter.tick_nohz_idle_enter 0.00 ± 0% +Inf% 6.03 ± 1% perf-profile.cpu-cycles.ttwu_do_activate.constprop.87.try_to_wake_up.wake_up_process.wake_up_q.futex_wake 0.00 ± 0% +Inf% 4.55 ± 1% perf-profile.cpu-cycles.activate_task.ttwu_do_activate.try_to_wake_up.wake_up_process.wake_up_q 89.43 ± 0% -11.2% 79.44 ± 0% perf-profile.cpu-cycles.system_call_fastpath 5.54 ± 1% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.try_to_wake_up.wake_up_state.wake_futex.futex_wake.do_futex 2.41 ± 2% +66.9% 4.02 ± 1% perf-profile.cpu-cycles.enqueue_task_fair.enqueue_task.activate_task.ttwu_do_activate.try_to_wake_up 1.31 ± 4% +107.0% 2.72 ± 3% perf-profile.cpu-cycles.dequeue_task.deactivate_task.__schedule.schedule.futex_wait_queue_me 1.32 ± 4% +105.9% 2.72 ± 3% perf-profile.cpu-cycles.deactivate_task.__schedule.schedule.futex_wait_queue_me.futex_wait 1.01 ± 2% +29.6% 1.31 ± 3% perf-profile.cpu-cycles.__account_scheduler_latency.enqueue_entity.enqueue_task_fair.enqueue_task.activate_task 89.24 ± 0% -11.3% 79.12 ± 0% perf-profile.cpu-cycles.sys_futex.system_call_fastpath 89.03 ± 0% -11.6% 78.72 ± 0% perf-profile.cpu-cycles.do_futex.sys_futex.system_call_fastpath 27.10 ± 0% -34.4% 17.78 ± 1% perf-profile.cpu-cycles._raw_spin_lock.futex_wait_setup.futex_wait.do_futex.sys_futex 0.66 ± 4% +108.3% 1.38 ± 0% perf-profile.cpu-cycles.__remove_hrtimer.__hrtimer_start_range_ns.hrtimer_start.tick_nohz_stop_sched_tick.__tick_nohz_idle_enter 0.87 ± 6% +96.3% 1.71 ± 5% perf-profile.cpu-cycles.intel_idle.cpuidle_enter_state.cpuidle_enter.cpu_startup_entry.start_secondary 1.09 ± 5% +92.6% 2.09 ± 4% perf-profile.cpu-cycles.tick_nohz_restart.tick_nohz_idle_exit.cpu_startup_entry.start_secondary 1.23 ± 5% +105.3% 2.52 ± 2% perf-profile.cpu-cycles.dequeue_task_fair.dequeue_task.deactivate_task.__schedule.schedule 2.76 ± 1% +102.0% 5.57 ± 1% perf-profile.cpu-cycles.__schedule.schedule.futex_wait_queue_me.futex_wait.do_futex 0.93 ± 2% +29.0% 1.20 ± 2% perf-profile.cpu-cycles.dump_trace.save_stack_trace_tsk.__account_scheduler_latency.enqueue_entity.enqueue_task_fair 31.56 ± 0% -18.7% 25.64 ± 0% perf-profile.cpu-cycles.futex_wait_setup.futex_wait.do_futex.sys_futex.system_call_fastpath 0.83 ± 3% +87.6% 1.55 ± 5% perf-profile.cpu-cycles.hrtimer_start_range_ns.tick_nohz_restart.tick_nohz_idle_exit.cpu_startup_entry.start_secondary 0.95 ± 2% +28.6% 1.22 ± 2% perf-profile.cpu-cycles.save_stack_trace_tsk.__account_scheduler_latency.enqueue_entity.enqueue_task_fair.enqueue_task 0.53 ± 1% +96.2% 1.04 ± 3% perf-profile.cpu-cycles.clockevents_program_event.tick_program_event.__remove_hrtimer.__hrtimer_start_range_ns.hrtimer_start 1.60 ± 2% +94.8% 3.12 ± 5% perf-profile.cpu-cycles.tick_nohz_stop_sched_tick.__tick_nohz_idle_enter.tick_nohz_idle_enter.cpu_startup_entry.start_secondary 42.33 ± 0% -40.8% 25.08 ± 1% perf-profile.cpu-cycles._raw_spin_lock.futex_wake.do_futex.sys_futex.system_call_fastpath 2.86 ± 1% +100.1% 5.72 ± 1% perf-profile.cpu-cycles.schedule.futex_wait_queue_me.futex_wait.do_futex.sys_futex 0.54 ± 0% +97.7% 1.06 ± 2% perf-profile.cpu-cycles.tick_program_event.__remove_hrtimer.__hrtimer_start_range_ns.hrtimer_start.tick_nohz_stop_sched_tick 1.01 ± 7% +103.5% 2.05 ± 3% perf-profile.cpu-cycles.dequeue_entity.dequeue_task_fair.dequeue_task.deactivate_task.__schedule 1.66 ± 0% +86.1% 3.08 ± 5% perf-profile.cpu-cycles.schedule_preempt_disabled.cpu_startup_entry.start_secondary 2.68 ± 2% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.activate_task.ttwu_do_activate.try_to_wake_up.wake_up_state.wake_futex 1.80 ± 2% +96.0% 3.53 ± 6% perf-profile.cpu-cycles.__tick_nohz_idle_enter.tick_nohz_idle_enter.cpu_startup_entry.start_secondary 0.81 ± 3% +96.6% 1.59 ± 2% perf-profile.cpu-cycles.__hrtimer_start_range_ns.hrtimer_start_range_ns.tick_nohz_restart.tick_nohz_idle_exit.cpu_startup_entry 1.89 ± 2% +95.5% 3.70 ± 5% perf-profile.cpu-cycles.tick_nohz_idle_enter.cpu_startup_entry.start_secondary 3.42 ± 1% -100.0% 0.00 ± 0% perf-profile.cpu-cycles.ttwu_do_activate.constprop.87.try_to_wake_up.wake_up_state.wake_futex.futex_wake 1.05 ± 6% +98.1% 2.08 ± 6% perf-profile.cpu-cycles.cpuidle_enter_state.cpuidle_enter.cpu_startup_entry.start_secondary 0.62 ± 2% +96.8% 1.23 ± 1% perf-profile.cpu-cycles.set_next_entity.pick_next_task_fair.__schedule.schedule.schedule_preempt_disabled 3.46 ± 1% +97.6% 6.83 ± 1% perf-profile.cpu-cycles.futex_wait_queue_me.futex_wait.do_futex.sys_futex.system_call_fastpath 0.00 ± 0% +Inf% 4.54 ± 1% perf-profile.cpu-cycles.enqueue_task.activate_task.ttwu_do_activate.try_to_wake_up.wake_up_process 8.27 ± 1% +91.7% 15.86 ± 4% perf-profile.cpu-cycles.start_secondary 1.03 ± 2% +96.4% 2.03 ± 1% perf-profile.cpu-cycles.pick_next_task_fair.__schedule.schedule.schedule_preempt_disabled.cpu_startup_entry 8.12 ± 1% +91.6% 15.56 ± 4% perf-profile.cpu-cycles.cpu_startup_entry.start_secondary 1.10 ± 6% +100.2% 2.21 ± 5% perf-profile.cpu-cycles.cpuidle_enter.cpu_startup_entry.start_secondary 0.54 ± 2% +94.4% 1.05 ± 3% perf-profile.cpu-cycles.clockevents_program_event.tick_program_event.__hrtimer_start_range_ns.hrtimer_start_range_ns.tick_nohz_restart 1.61 ± 1% +86.8% 3.01 ± 5% perf-profile.cpu-cycles.schedule.schedule_preempt_disabled.cpu_startup_entry.start_secondary 0.00 ± 0% +Inf% 9.90 ± 2% perf-profile.cpu-cycles.try_to_wake_up.wake_up_process.wake_up_q.futex_wake.do_futex 510107 ± 6% -24.1% 387141 ± 14% sched_debug.cfs_rq[12]:/.min_vruntime 19054 ± 8% -26.6% 13995 ± 16% sched_debug.cfs_rq[12]:/.exec_clock 350 ± 31% -36.2% 223 ± 30% sched_debug.cfs_rq[13]:/.tg_runnable_contrib 16072 ± 31% -35.9% 10302 ± 29% sched_debug.cfs_rq[13]:/.avg->runnable_avg_sum 16913 ± 33% -48.9% 8648 ± 3% sched_debug.cfs_rq[14]:/.avg->runnable_avg_sum 20 ± 49% -48.8% 10 ± 31% sched_debug.cfs_rq[14]:/.runnable_load_avg 369 ± 33% -48.8% 188 ± 3% sched_debug.cfs_rq[14]:/.tg_runnable_contrib 210 ± 17% +70.7% 359 ± 2% sched_debug.cfs_rq[15]:/.tg_runnable_contrib 15752 ± 22% +58.6% 24978 ± 21% sched_debug.cfs_rq[15]:/.exec_clock 9620 ± 17% +70.9% 16437 ± 2% sched_debug.cfs_rq[15]:/.avg->runnable_avg_sum 8529 ± 5% -10.4% 7639 ± 7% sched_debug.cfs_rq[1]:/.tg_load_avg 2 ± 47% +136.4% 6 ± 31% sched_debug.cfs_rq[24]:/.nr_spread_over 47788 ± 3% +11.0% 53030 ± 4% sched_debug.cfs_rq[28]:/.exec_clock 8514 ± 5% -10.3% 7634 ± 7% sched_debug.cfs_rq[2]:/.tg_load_avg 17945 ± 18% +14.3% 20504 ± 20% sched_debug.cfs_rq[2]:/.avg->runnable_avg_sum 391 ± 18% +14.6% 448 ± 19% sched_debug.cfs_rq[2]:/.tg_runnable_contrib 360 ± 27% +43.3% 516 ± 0% sched_debug.cfs_rq[30]:/.tg_runnable_contrib 22649 ± 7% -27.4% 16434 ± 2% sched_debug.cfs_rq[31]:/.avg->runnable_avg_sum 493 ± 7% -27.6% 357 ± 2% sched_debug.cfs_rq[31]:/.tg_runnable_contrib 8510 ± 5% -10.3% 7634 ± 7% sched_debug.cfs_rq[3]:/.tg_load_avg 8511 ± 5% -12.3% 7461 ± 10% sched_debug.cfs_rq[4]:/.tg_load_avg 8503 ± 5% -12.3% 7460 ± 10% sched_debug.cfs_rq[5]:/.tg_load_avg 8197 ± 6% -9.2% 7446 ± 10% sched_debug.cfs_rq[6]:/.tg_load_avg 2994 ± 18% -40.2% 1792 ± 11% sched_debug.cpu#10.ttwu_local 620311 ± 16% +19.5% 741285 ± 9% sched_debug.cpu#11.avg_idle 1291 ± 11% +71.4% 2214 ± 17% sched_debug.cpu#12.ttwu_local 44903 ± 2% -10.9% 40028 ± 6% sched_debug.cpu#12.nr_load_updates 18 ± 43% -56.2% 8 ± 29% sched_debug.cpu#14.cpu_load[1] 16 ± 40% -64.1% 5 ± 14% sched_debug.cpu#14.cpu_load[4] 16 ± 39% -64.2% 6 ± 11% sched_debug.cpu#14.cpu_load[3] 20 ± 49% -48.8% 10 ± 31% sched_debug.cpu#14.cpu_load[0] 651720 ± 19% +24.1% 808588 ± 3% sched_debug.cpu#14.avg_idle 17 ± 42% -61.4% 6 ± 16% sched_debug.cpu#14.cpu_load[2] 778639 ± 3% -17.1% 645179 ± 3% sched_debug.cpu#15.avg_idle 54145 ± 40% +98.6% 107550 ± 26% sched_debug.cpu#15.sched_goidle 48079 ± 36% +114.8% 103279 ± 25% sched_debug.cpu#15.ttwu_count 109390 ± 40% +99.3% 218008 ± 26% sched_debug.cpu#15.nr_switches 1187 ± 42% +133.4% 2771 ± 9% sched_debug.cpu#15.curr->pid 349 ± 17% +48.8% 520 ± 8% sched_debug.cpu#16.ttwu_local 904055 ± 18% +61.1% 1456350 ± 22% sched_debug.cpu#18.sched_goidle 1810323 ± 18% +61.1% 2915683 ± 22% sched_debug.cpu#18.sched_count 1809190 ± 18% +61.0% 2913153 ± 22% sched_debug.cpu#18.nr_switches 289 ± 25% +88.0% 543 ± 10% sched_debug.cpu#20.ttwu_local 3334 ± 8% -26.2% 2460 ± 19% sched_debug.cpu#21.curr->pid 85059 ± 35% +58.4% 134729 ± 4% sched_debug.cpu#24.ttwu_count 103842 ± 6% +46.8% 152488 ± 13% sched_debug.cpu#28.sched_goidle 221524 ± 8% +40.9% 312183 ± 13% sched_debug.cpu#28.sched_count 208190 ± 6% +46.8% 305531 ± 13% sched_debug.cpu#28.nr_switches 86918 ± 5% +68.6% 146587 ± 11% sched_debug.cpu#28.ttwu_count 207717 ± 16% +31.4% 272890 ± 6% sched_debug.cpu#29.nr_switches 103534 ± 16% +31.5% 136195 ± 6% sched_debug.cpu#29.sched_goidle 88859 ± 17% +50.8% 134010 ± 8% sched_debug.cpu#29.ttwu_count 207734 ± 16% +31.6% 273286 ± 6% sched_debug.cpu#29.sched_count 954 ± 22% +56.1% 1489 ± 24% sched_debug.cpu#3.ttwu_local 81526 ± 39% +68.8% 137618 ± 16% sched_debug.cpu#30.sched_goidle 163546 ± 39% +68.6% 275753 ± 16% sched_debug.cpu#30.nr_switches 163564 ± 39% +72.4% 281962 ± 13% sched_debug.cpu#30.sched_count 70152 ± 37% +89.1% 132670 ± 19% sched_debug.cpu#30.ttwu_count 497038 ± 1% +32.8% 660184 ± 1% sched_debug.cpu#31.avg_idle 695 ± 25% +49.6% 1040 ± 19% sched_debug.cpu#4.ttwu_local 641 ± 12% +58.6% 1017 ± 20% sched_debug.cpu#6.ttwu_local testcase/path_params/tbox_group: tlbflush/performance-200%-32x-512/lituya 7675104990ed255b 1d0dcb3ad9d336e6d6ee020a75 ---------------- -------------------------- lituya: Grantley Haswell Memory: 16G lkp-sb03: Sandy Bridge-EP Memory: 64G 7.5e+06 ++----------------------------------------------------------------+ 7e+06 ++ O | | O O O | 6.5e+06 O+ O O O O O O | 6e+06 ++ O O O O O O O O | | | 5.5e+06 ++ | 5e+06 ++ | 4.5e+06 ++ | | | 4e+06 ++ | 3.5e+06 ++ | *..*.*..*..*. .*. .*..*.*.. .*.. .*.. .*. .*.*..*..*.*..* 3e+06 ++ *. *. *..*.*. * *. *. | 2.5e+06 ++----------------------------------------------------------------+ 7.5e+06 ++----------------------------------------------------------------+ | | 7e+06 ++ O O O | | O | 6.5e+06 O+ O O O O O O | | O O O O O O O | 6e+06 ++ O | | | 5.5e+06 ++ | | | 5e+06 ++ .* | | .*..*. : *.. .*.. .*. .*..*.*..* 4.5e+06 *+.* : .*.*..*..*.*.. .*. .. * *. *..*.*. | | *. *. * | 4e+06 ++----------------------------------------------------------------+ cpuidle.C1-HSW.usage 5e+07 ++----------------------------------------------------------------+ O | 4.5e+07 ++ O O O O O O O O O O O O O O O O O O | | | | | 4e+07 ++ | | | 3.5e+07 ++ | | | 3e+07 ++ | | | *..*.*..*..*.*..*.*..*..*.*..*..*.*..*..*. .*. .*. .*.. .*..* 2.5e+07 ++ *..*. *. *. * | | | 2e+07 ++----------------------------------------------------------------+ will-it-scale.time.user_time 185 ++--------------------------------------------------------------------+ 180 O+ O O O O O O O O O O O O O O | | O O O O | 175 ++ | 170 ++ | | | 165 ++ | 160 ++ | 155 ++ | | | 150 ++ | 145 ++ | | | 140 *+.*.. .*..*..*..*.*..*..*..*..*.*.. .*.. .*. .*.. .*. | 135 ++----*-*------------------------------*-----*----*--*-----*----*--*--* will-it-scale.time.system_time 1250 ++-------------------------------------------------------------------+ | | 1240 *+.*..*.*..*..*..*.*..*..*..*.*..*..*..*.*..*..*..*.*..*..*..*.*..*..* | | 1230 ++ | | | 1220 ++ | | | 1210 ++ | | | 1200 ++ | | | 1190 ++ | | O O O O O O O O | 1180 O+-O--O----O------------------O--O--O--O-O-----O--O------------------+ will-it-scale.time.percent_of_cpu_this_job_got 453 *+-*--*-*--*--*--*--*-*--*--*--*--*-*--*--*--*--*-*--*--*--*--*-*--*--* | | 452 ++ | | | | | 451 ++ | | | 450 ++ | | | 449 ++ | | | | | 448 ++ O O O O O O O O O | | | 447 O+----O----------O-------------O--O-O--O--O-----O-O-------------------+ will-it-scale.time.voluntary_context_switches 5.5e+07 ++----------------------------------------------------------------+ O O O O O O O O O O O O O O O | | O O O O | 5e+07 ++ | | | | | 4.5e+07 ++ | | | 4e+07 ++ | | | | | 3.5e+07 ++ | | | *..*.*..*..*.*..*.*..*..*.*..*..*.*..*..*.*..*..*.*..*.*..*..*.*..* 3e+07 ++----------------------------------------------------------------+ time.user_time 185 ++--------------------------------------------------------------------+ 180 O+ O O O O O O O O O O O O O O | | O O O O | 175 ++ | 170 ++ | | | 165 ++ | 160 ++ | 155 ++ | | | 150 ++ | 145 ++ | | | 140 *+.*.. .*..*..*..*.*..*..*..*..*.*.. .*.. .*. .*.. .*. | 135 ++----*-*------------------------------*-----*----*--*-----*----*--*--* time.voluntary_context_switches 5.5e+07 ++----------------------------------------------------------------+ O O O O O O O O O O O O O O O | | O O O O | 5e+07 ++ | | | | | 4.5e+07 ++ | | | 4e+07 ++ | | | | | 3.5e+07 ++ | | | *..*.*..*..*.*..*.*..*..*.*..*..*.*..*..*.*..*..*.*..*.*..*..*.*..* 3e+07 ++----------------------------------------------------------------+ vmstat.system.cs 360000 ++-----------------------------------------------------------------+ O O O O O O O O O O O O O O O O O O O | 340000 ++ | 320000 ++ | | | 300000 ++ | | | 280000 ++ | | | 260000 ++ | 240000 ++ | | | 220000 ++ | *..*.*..*..*.*..*..*.*..*..*.*..*..*..*.*..*..*.*..*..*.*..*..*.*..* 200000 ++-----------------------------------------------------------------+ [*] bisect-good sample [O] bisect-bad sample To reproduce: apt-get install ruby git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git cd lkp-tests bin/setup-local job.yaml # the job file attached in this email bin/run-local job.yaml Disclaimer: Results have been estimated based on internal Intel analysis and are provided for informational purposes only. Any difference in system hardware or software design or configuration may affect actual performance. Thanks, Ying Huang