[v5] pgo: add clang's Profile Guided Optimization infrastructure
diff mbox series

Message ID 20210116094357.3620352-1-morbo@google.com
State New, archived
Headers show
Series
  • [v5] pgo: add clang's Profile Guided Optimization infrastructure
Related show

Commit Message

Bill Wendling Jan. 16, 2021, 9:43 a.m. UTC
From: Sami Tolvanen <samitolvanen@google.com>

Enable the use of clang's Profile-Guided Optimization[1]. To generate a
profile, the kernel is instrumented with PGO counters, a representative
workload is run, and the raw profile data is collected from
/sys/kernel/debug/pgo/profraw.

The raw profile data must be processed by clang's "llvm-profdata" tool
before it can be used during recompilation:

  $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
  $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw

Multiple raw profiles may be merged during this step.

The data can now be used by the compiler:

  $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...

This initial submission is restricted to x86, as that's the platform we
know works. This restriction can be lifted once other platforms have
been verified to work with PGO.

Note that this method of profiling the kernel is clang-native, unlike
the clang support in kernel/gcov.

[1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Co-developed-by: Bill Wendling <morbo@google.com>
Signed-off-by: Bill Wendling <morbo@google.com>
---
v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
      testing.
    - Corrected documentation, re PGO flags when using LTO, based on Fangrui
      Song's comments.
v3: - Added change log section based on Sedat Dilek's comments.
v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
      own popcount implementation, based on Nick Desaulniers's comment.
v5: - Correct padding calculation, discovered by Nathan Chancellor.
---
 Documentation/dev-tools/index.rst     |   1 +
 Documentation/dev-tools/pgo.rst       | 127 +++++++++
 MAINTAINERS                           |   9 +
 Makefile                              |   3 +
 arch/Kconfig                          |   1 +
 arch/x86/Kconfig                      |   1 +
 arch/x86/boot/Makefile                |   1 +
 arch/x86/boot/compressed/Makefile     |   1 +
 arch/x86/crypto/Makefile              |   2 +
 arch/x86/entry/vdso/Makefile          |   1 +
 arch/x86/kernel/vmlinux.lds.S         |   2 +
 arch/x86/platform/efi/Makefile        |   1 +
 arch/x86/purgatory/Makefile           |   1 +
 arch/x86/realmode/rm/Makefile         |   1 +
 arch/x86/um/vdso/Makefile             |   1 +
 drivers/firmware/efi/libstub/Makefile |   1 +
 include/asm-generic/vmlinux.lds.h     |  44 +++
 kernel/Makefile                       |   1 +
 kernel/pgo/Kconfig                    |  35 +++
 kernel/pgo/Makefile                   |   5 +
 kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
 kernel/pgo/instrument.c               | 185 +++++++++++++
 kernel/pgo/pgo.h                      | 206 ++++++++++++++
 scripts/Makefile.lib                  |  10 +
 24 files changed, 1022 insertions(+)
 create mode 100644 Documentation/dev-tools/pgo.rst
 create mode 100644 kernel/pgo/Kconfig
 create mode 100644 kernel/pgo/Makefile
 create mode 100644 kernel/pgo/fs.c
 create mode 100644 kernel/pgo/instrument.c
 create mode 100644 kernel/pgo/pgo.h

Comments

Sedat Dilek Jan. 16, 2021, 5:38 p.m. UTC | #1
On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
<clang-built-linux@googlegroups.com> wrote:
>
> From: Sami Tolvanen <samitolvanen@google.com>
>
> Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> profile, the kernel is instrumented with PGO counters, a representative
> workload is run, and the raw profile data is collected from
> /sys/kernel/debug/pgo/profraw.
>
> The raw profile data must be processed by clang's "llvm-profdata" tool
> before it can be used during recompilation:
>
>   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
>   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
>
> Multiple raw profiles may be merged during this step.
>
> The data can now be used by the compiler:
>
>   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
>
> This initial submission is restricted to x86, as that's the platform we
> know works. This restriction can be lifted once other platforms have
> been verified to work with PGO.
>
> Note that this method of profiling the kernel is clang-native, unlike
> the clang support in kernel/gcov.
>
> [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> Co-developed-by: Bill Wendling <morbo@google.com>
> Signed-off-by: Bill Wendling <morbo@google.com>
> ---
> v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
>       testing.
>     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
>       Song's comments.
> v3: - Added change log section based on Sedat Dilek's comments.
> v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
>       own popcount implementation, based on Nick Desaulniers's comment.
> v5: - Correct padding calculation, discovered by Nathan Chancellor.
> ---
>  Documentation/dev-tools/index.rst     |   1 +
>  Documentation/dev-tools/pgo.rst       | 127 +++++++++
>  MAINTAINERS                           |   9 +
>  Makefile                              |   3 +
>  arch/Kconfig                          |   1 +
>  arch/x86/Kconfig                      |   1 +
>  arch/x86/boot/Makefile                |   1 +
>  arch/x86/boot/compressed/Makefile     |   1 +
>  arch/x86/crypto/Makefile              |   2 +
>  arch/x86/entry/vdso/Makefile          |   1 +
>  arch/x86/kernel/vmlinux.lds.S         |   2 +
>  arch/x86/platform/efi/Makefile        |   1 +
>  arch/x86/purgatory/Makefile           |   1 +
>  arch/x86/realmode/rm/Makefile         |   1 +
>  arch/x86/um/vdso/Makefile             |   1 +
>  drivers/firmware/efi/libstub/Makefile |   1 +
>  include/asm-generic/vmlinux.lds.h     |  44 +++
>  kernel/Makefile                       |   1 +
>  kernel/pgo/Kconfig                    |  35 +++
>  kernel/pgo/Makefile                   |   5 +
>  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
>  kernel/pgo/instrument.c               | 185 +++++++++++++
>  kernel/pgo/pgo.h                      | 206 ++++++++++++++
>  scripts/Makefile.lib                  |  10 +
>  24 files changed, 1022 insertions(+)
>  create mode 100644 Documentation/dev-tools/pgo.rst
>  create mode 100644 kernel/pgo/Kconfig
>  create mode 100644 kernel/pgo/Makefile
>  create mode 100644 kernel/pgo/fs.c
>  create mode 100644 kernel/pgo/instrument.c
>  create mode 100644 kernel/pgo/pgo.h
>
> diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> index f7809c7b1ba9e..8d6418e858062 100644
> --- a/Documentation/dev-tools/index.rst
> +++ b/Documentation/dev-tools/index.rst
> @@ -26,6 +26,7 @@ whole; patches welcome!
>     kgdb
>     kselftest
>     kunit/index
> +   pgo
>
>
>  .. only::  subproject and html
> diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> new file mode 100644
> index 0000000000000..b7f11d8405b73
> --- /dev/null
> +++ b/Documentation/dev-tools/pgo.rst
> @@ -0,0 +1,127 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +===============================
> +Using PGO with the Linux kernel
> +===============================
> +
> +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> +when building with Clang. The profiling data is exported via the ``pgo``
> +debugfs directory.
> +
> +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> +
> +
> +Preparation
> +===========
> +
> +Configure the kernel with:
> +
> +.. code-block:: make
> +
> +   CONFIG_DEBUG_FS=y
> +   CONFIG_PGO_CLANG=y
> +
> +Note that kernels compiled with profiling flags will be significantly larger
> +and run slower.
> +
> +Profiling data will only become accessible once debugfs has been mounted:
> +
> +.. code-block:: sh
> +
> +   mount -t debugfs none /sys/kernel/debug
> +
> +
> +Customization
> +=============
> +
> +You can enable or disable profiling for individual file and directories by
> +adding a line similar to the following to the respective kernel Makefile:
> +
> +- For a single file (e.g. main.o)
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE_main.o := y
> +
> +- For all files in one directory
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE := y
> +
> +To exclude files from being profiled use
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE_main.o := n
> +
> +and
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE := n
> +
> +Only files which are linked to the main kernel image or are compiled as kernel
> +modules are supported by this mechanism.
> +
> +
> +Files
> +=====
> +
> +The PGO kernel support creates the following files in debugfs:
> +
> +``/sys/kernel/debug/pgo``
> +       Parent directory for all PGO-related files.
> +
> +``/sys/kernel/debug/pgo/reset``
> +       Global reset file: resets all coverage data to zero when written to.
> +
> +``/sys/kernel/debug/profraw``
> +       The raw PGO data that must be processed with ``llvm_profdata``.
> +
> +
> +Workflow
> +========
> +
> +The PGO kernel can be run on the host or test machines. The data though should
> +be analyzed with Clang's tools from the same Clang version as the kernel was
> +compiled. Clang's tolerant of version skew, but it's easier to use the same
> +Clang version.
> +
> +The profiling data is useful for optimizing the kernel, analyzing coverage,
> +etc. Clang offers tools to perform these tasks.
> +
> +Here is an example workflow for profiling an instrumented kernel with PGO and
> +using the result to optimize the kernel:
> +
> +1) Install the kernel on the TEST machine.
> +
> +2) Reset the data counters right before running the load tests
> +
> +   .. code-block:: sh
> +
> +      $ echo 1 > /sys/kernel/debug/pgo/reset
> +

I do not get this...

# mount | grep debugfs
debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)

After the load-test...?

echo 0 > /sys/kernel/debug/pgo/reset

> +3) Run the load tests.
> +
> +4) Collect the raw profile data
> +
> +   .. code-block:: sh
> +
> +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> +

This is only 4,9M small and seen from the date 5mins before I run the
echo-1 line.

# ll /sys/kernel/debug/pgo
insgesamt 0
drwxr-xr-x  2 root root 0 16. Jan 17:29 .
drwx------ 41 root root 0 16. Jan 17:29 ..
-rw-------  1 root root 0 16. Jan 17:29 profraw
--w-------  1 root root 0 16. Jan 18:19 reset

# cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw

# ll /tmp/vmlinux.profraw
-rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw

For me there was no prof-data collected from my defconfig kernel-build.

> +5) (Optional) Download the raw profile data to the HOST machine.
> +
> +6) Process the raw profile data
> +
> +   .. code-block:: sh
> +
> +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> +

Is that executed in /path/to/linux/git?

> +   Note that multiple raw profile data files can be merged during this step.
> +
> +7) Rebuild the kernel using the profile data (PGO disabled)
> +
> +   .. code-block:: sh
> +
> +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...

How big is vmlinux.profdata (make defconfig)?

Do I need to do a full defconfig build or can I stop the build after
let me say 10mins?

- Sedat -

> diff --git a/MAINTAINERS b/MAINTAINERS
> index 79b400c97059f..cb1f1f2b2baf4 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13948,6 +13948,15 @@ S:     Maintained
>  F:     include/linux/personality.h
>  F:     include/uapi/linux/personality.h
>
> +PGO BASED KERNEL PROFILING
> +M:     Sami Tolvanen <samitolvanen@google.com>
> +M:     Bill Wendling <wcw@google.com>
> +R:     Nathan Chancellor <natechancellor@gmail.com>
> +R:     Nick Desaulniers <ndesaulniers@google.com>
> +S:     Supported
> +F:     Documentation/dev-tools/pgo.rst
> +F:     kernel/pgo
> +
>  PHOENIX RC FLIGHT CONTROLLER ADAPTER
>  M:     Marcus Folkesson <marcus.folkesson@gmail.com>
>  L:     linux-input@vger.kernel.org
> diff --git a/Makefile b/Makefile
> index 9e73f82e0d863..9128bfe1ccc97 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -659,6 +659,9 @@ endif # KBUILD_EXTMOD
>  # Defaults to vmlinux, but the arch makefile usually adds further targets
>  all: vmlinux
>
> +CFLAGS_PGO_CLANG := -fprofile-generate
> +export CFLAGS_PGO_CLANG
> +
>  CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage \
>         $(call cc-option,-fno-tree-loop-im) \
>         $(call cc-disable-warning,maybe-uninitialized,)
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 24862d15f3a36..f39d3991f6bfe 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1112,6 +1112,7 @@ config ARCH_SPLIT_ARG64
>            pairs of 32-bit arguments, select this option.
>
>  source "kernel/gcov/Kconfig"
> +source "kernel/pgo/Kconfig"
>
>  source "scripts/gcc-plugins/Kconfig"
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 21f851179ff08..36305ea61dc09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -96,6 +96,7 @@ config X86
>         select ARCH_SUPPORTS_DEBUG_PAGEALLOC
>         select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
>         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
> +       select ARCH_SUPPORTS_PGO_CLANG          if X86_64
>         select ARCH_USE_BUILTIN_BSWAP
>         select ARCH_USE_QUEUED_RWLOCKS
>         select ARCH_USE_QUEUED_SPINLOCKS
> diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
> index fe605205b4ce2..383853e32f673 100644
> --- a/arch/x86/boot/Makefile
> +++ b/arch/x86/boot/Makefile
> @@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  KBUILD_CFLAGS  += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
>  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE := n
>
>  $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index e0bc3988c3faa..ed12ab65f6065 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -54,6 +54,7 @@ CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
>
>  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE :=n
>
>  KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index a31de0c6ccde2..775fa0b368e98 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -4,6 +4,8 @@
>
>  OBJECT_FILES_NON_STANDARD := y
>
> +PGO_PROFILE_curve25519-x86_64.o := n
> +
>  obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
>
>  obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> index 02e3e42f380bd..26e2b3af0145c 100644
> --- a/arch/x86/entry/vdso/Makefile
> +++ b/arch/x86/entry/vdso/Makefile
> @@ -179,6 +179,7 @@ quiet_cmd_vdso = VDSO    $@
>  VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
>         $(call ld-option, --eh-frame-hdr) -Bsymbolic
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  quiet_cmd_vdso_and_check = VDSO    $@
>        cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index efd9e9ea17f25..f6cab2316c46a 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -184,6 +184,8 @@ SECTIONS
>
>         BUG_TABLE
>
> +       PGO_CLANG_DATA
> +
>         ORC_UNWIND_TABLE
>
>         . = ALIGN(PAGE_SIZE);
> diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
> index 84b09c230cbd5..5f22b31446ad4 100644
> --- a/arch/x86/platform/efi/Makefile
> +++ b/arch/x86/platform/efi/Makefile
> @@ -2,6 +2,7 @@
>  OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
>  KASAN_SANITIZE := n
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  obj-$(CONFIG_EFI)              += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
>  obj-$(CONFIG_EFI_MIXED)                += efi_thunk_$(BITS).o
> diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
> index 95ea17a9d20cb..36f20e99da0bc 100644
> --- a/arch/x86/purgatory/Makefile
> +++ b/arch/x86/purgatory/Makefile
> @@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk
>
>  # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
>  GCOV_PROFILE   := n
> +PGO_PROFILE    := n
>  KASAN_SANITIZE := n
>  UBSAN_SANITIZE := n
>  KCSAN_SANITIZE := n
> diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
> index 83f1b6a56449f..21797192f958f 100644
> --- a/arch/x86/realmode/rm/Makefile
> +++ b/arch/x86/realmode/rm/Makefile
> @@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
>  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE := n
> diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
> index 5943387e3f357..54f5768f58530 100644
> --- a/arch/x86/um/vdso/Makefile
> +++ b/arch/x86/um/vdso/Makefile
> @@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO    $@
>
>  VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  #
>  # Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 8a94388e38b33..2d81623b33f29 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -40,6 +40,7 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
>  KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
>  GCOV_PROFILE                   := n
> +PGO_PROFILE                    := n
>  # Sanitizer runtimes are unavailable and cannot be linked here.
>  KASAN_SANITIZE                 := n
>  KCSAN_SANITIZE                 := n
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index b2b3d81b1535a..3a591bb18c5fb 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -316,6 +316,49 @@
>  #define THERMAL_TABLE(name)
>  #endif
>
> +#ifdef CONFIG_PGO_CLANG
> +#define PGO_CLANG_DATA                                                 \
> +       __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) {     \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_start = .;                                   \
> +               __llvm_prf_data_start = .;                              \
> +               KEEP(*(__llvm_prf_data))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_data_end = .;                                \
> +       }                                                               \
> +       __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) {     \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_cnts_start = .;                              \
> +               KEEP(*(__llvm_prf_cnts))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_cnts_end = .;                                \
> +       }                                                               \
> +       __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) {   \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_names_start = .;                             \
> +               KEEP(*(__llvm_prf_names))                               \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_names_end = .;                               \
> +               . = ALIGN(8);                                           \
> +       }                                                               \
> +       __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) {     \
> +               __llvm_prf_vals_start = .;                              \
> +               KEEP(*(__llvm_prf_vals))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_vals_end = .;                                \
> +               . = ALIGN(8);                                           \
> +       }                                                               \
> +       __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) {     \
> +               __llvm_prf_vnds_start = .;                              \
> +               KEEP(*(__llvm_prf_vnds))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_vnds_end = .;                                \
> +               __llvm_prf_end = .;                                     \
> +       }
> +#else
> +#define PGO_CLANG_DATA
> +#endif
> +
>  #define KERNEL_DTB()                                                   \
>         STRUCT_ALIGN();                                                 \
>         __dtb_start = .;                                                \
> @@ -1125,6 +1168,7 @@
>                 CONSTRUCTORS                                            \
>         }                                                               \
>         BUG_TABLE                                                       \
> +       PGO_CLANG_DATA
>
>  #define INIT_TEXT_SECTION(inittext_align)                              \
>         . = ALIGN(inittext_align);                                      \
> diff --git a/kernel/Makefile b/kernel/Makefile
> index aa7368c7eabf3..0b34ca228ba46 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -111,6 +111,7 @@ obj-$(CONFIG_BPF) += bpf/
>  obj-$(CONFIG_KCSAN) += kcsan/
>  obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>  obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
> +obj-$(CONFIG_PGO_CLANG) += pgo/
>
>  obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
> new file mode 100644
> index 0000000000000..76a640b6cf6ed
> --- /dev/null
> +++ b/kernel/pgo/Kconfig
> @@ -0,0 +1,35 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
> +
> +config ARCH_SUPPORTS_PGO_CLANG
> +       bool
> +
> +config PGO_CLANG
> +       bool "Enable clang's PGO-based kernel profiling"
> +       depends on DEBUG_FS
> +       depends on ARCH_SUPPORTS_PGO_CLANG
> +       depends on CC_IS_CLANG && CLANG_VERSION >= 120000
> +       help
> +         This option enables clang's PGO (Profile Guided Optimization) based
> +         code profiling to better optimize the kernel.
> +
> +         If unsure, say N.
> +
> +         Run a representative workload for your application on a kernel
> +         compiled with this option and download the raw profile file from
> +         /sys/kernel/debug/pgo/profraw. This file needs to be processed with
> +         llvm-profdata. It may be merged with other collected raw profiles.
> +
> +         Copy the resulting profile file into vmlinux.profdata, and enable
> +         KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
> +         kernel.
> +
> +         Note that a kernel compiled with profiling flags will be
> +         significantly larger and run slower. Also be sure to exclude files
> +         from profiling which are not linked to the kernel image to prevent
> +         linker errors.
> +
> +         Note that the debugfs filesystem has to be mounted to access
> +         profiling data.
> +
> +endmenu
> diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
> new file mode 100644
> index 0000000000000..41e27cefd9a47
> --- /dev/null
> +++ b/kernel/pgo/Makefile
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier: GPL-2.0
> +GCOV_PROFILE   := n
> +PGO_PROFILE    := n
> +
> +obj-y  += fs.o instrument.o
> diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> new file mode 100644
> index 0000000000000..68b24672be10a
> --- /dev/null
> +++ b/kernel/pgo/fs.c
> @@ -0,0 +1,382 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#define pr_fmt(fmt)    "pgo: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/debugfs.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include "pgo.h"
> +
> +static struct dentry *directory;
> +
> +struct prf_private_data {
> +       void *buffer;
> +       unsigned long size;
> +};
> +
> +/*
> + * Raw profile data format:
> + *
> + *     - llvm_prf_header
> + *     - __llvm_prf_data
> + *     - __llvm_prf_cnts
> + *     - __llvm_prf_names
> + *     - zero padding to 8 bytes
> + *     - for each llvm_prf_data in __llvm_prf_data:
> + *             - llvm_prf_value_data
> + *                     - llvm_prf_value_record + site count array
> + *                             - llvm_prf_value_node_data
> + *                             ...
> + *                     ...
> + *             ...
> + */
> +
> +static void prf_fill_header(void **buffer)
> +{
> +       struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
> +
> +       header->magic = LLVM_PRF_MAGIC;
> +       header->version = LLVM_PRF_VARIANT_MASK_IR | LLVM_PRF_VERSION;
> +       header->data_size = prf_data_count();
> +       header->padding_bytes_before_counters = 0;
> +       header->counters_size = prf_cnts_count();
> +       header->padding_bytes_after_counters = 0;
> +       header->names_size = prf_names_count();
> +       header->counters_delta = (u64)__llvm_prf_cnts_start;
> +       header->names_delta = (u64)__llvm_prf_names_start;
> +       header->value_kind_last = LLVM_PRF_IPVK_LAST;
> +
> +       *buffer += sizeof(*header);
> +}
> +
> +/*
> + * Copy the source into the buffer, incrementing the pointer into buffer in the
> + * process.
> + */
> +static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
> +{
> +       memcpy(*buffer, src, size);
> +       *buffer += size;
> +}
> +
> +static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
> +{
> +       struct llvm_prf_value_node **nodes =
> +               (struct llvm_prf_value_node **)p->values;
> +       u32 kinds = 0;
> +       u32 size = 0;
> +       unsigned int kind;
> +       unsigned int n;
> +       unsigned int s = 0;
> +
> +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> +               unsigned int sites = p->num_value_sites[kind];
> +
> +               if (!sites)
> +                       continue;
> +
> +               /* Record + site count array */
> +               size += prf_get_value_record_size(sites);
> +               kinds++;
> +
> +               if (!nodes)
> +                       continue;
> +
> +               for (n = 0; n < sites; n++) {
> +                       u32 count = 0;
> +                       struct llvm_prf_value_node *site = nodes[s + n];
> +
> +                       while (site && ++count <= U8_MAX)
> +                               site = site->next;
> +
> +                       size += count *
> +                               sizeof(struct llvm_prf_value_node_data);
> +               }
> +
> +               s += sites;
> +       }
> +
> +       if (size)
> +               size += sizeof(struct llvm_prf_value_data);
> +
> +       if (value_kinds)
> +               *value_kinds = kinds;
> +
> +       return size;
> +}
> +
> +static u32 prf_get_value_size(void)
> +{
> +       u32 size = 0;
> +       struct llvm_prf_data *p;
> +
> +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> +               size += __prf_get_value_size(p, NULL);
> +
> +       return size;
> +}
> +
> +/* Serialize the profiling's value. */
> +static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
> +{
> +       struct llvm_prf_value_data header;
> +       struct llvm_prf_value_node **nodes =
> +               (struct llvm_prf_value_node **)p->values;
> +       unsigned int kind;
> +       unsigned int n;
> +       unsigned int s = 0;
> +
> +       header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
> +
> +       if (!header.num_value_kinds)
> +               /* Nothing to write. */
> +               return;
> +
> +       prf_copy_to_buffer(buffer, &header, sizeof(header));
> +
> +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> +               struct llvm_prf_value_record *record;
> +               u8 *counts;
> +               unsigned int sites = p->num_value_sites[kind];
> +
> +               if (!sites)
> +                       continue;
> +
> +               /* Profiling value record. */
> +               record = *(struct llvm_prf_value_record **)buffer;
> +               *buffer += prf_get_value_record_header_size();
> +
> +               record->kind = kind;
> +               record->num_value_sites = sites;
> +
> +               /* Site count array. */
> +               counts = *(u8 **)buffer;
> +               *buffer += prf_get_value_record_site_count_size(sites);
> +
> +               /*
> +                * If we don't have nodes, we can skip updating the site count
> +                * array, because the buffer is zero filled.
> +                */
> +               if (!nodes)
> +                       continue;
> +
> +               for (n = 0; n < sites; n++) {
> +                       u32 count = 0;
> +                       struct llvm_prf_value_node *site = nodes[s + n];
> +
> +                       while (site && ++count <= U8_MAX) {
> +                               prf_copy_to_buffer(buffer, site,
> +                                                  sizeof(struct llvm_prf_value_node_data));
> +                               site = site->next;
> +                       }
> +
> +                       counts[n] = (u8)count;
> +               }
> +
> +               s += sites;
> +       }
> +}
> +
> +static void prf_serialize_values(void **buffer)
> +{
> +       struct llvm_prf_data *p;
> +
> +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> +               prf_serialize_value(p, buffer);
> +}
> +
> +static inline unsigned long prf_get_padding(unsigned long size)
> +{
> +       return 7 & (8 - size % 8);
> +}
> +
> +static unsigned long prf_buffer_size(void)
> +{
> +       return sizeof(struct llvm_prf_header) +
> +                       prf_data_size() +
> +                       prf_cnts_size() +
> +                       prf_names_size() +
> +                       prf_get_padding(prf_names_size()) +
> +                       prf_get_value_size();
> +}
> +
> +/* Serialize the profiling data into a format LLVM's tools can understand. */
> +static int prf_serialize(struct prf_private_data *p)
> +{
> +       int err = 0;
> +       void *buffer;
> +
> +       p->size = prf_buffer_size();
> +       p->buffer = vzalloc(p->size);
> +
> +       if (!p->buffer) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       buffer = p->buffer;
> +
> +       prf_fill_header(&buffer);
> +       prf_copy_to_buffer(&buffer, __llvm_prf_data_start,  prf_data_size());
> +       prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start,  prf_cnts_size());
> +       prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
> +       buffer += prf_get_padding(prf_names_size());
> +
> +       prf_serialize_values(&buffer);
> +
> +out:
> +       return err;
> +}
> +
> +/* open() implementation for PGO. Creates a copy of the profiling data set. */
> +static int prf_open(struct inode *inode, struct file *file)
> +{
> +       struct prf_private_data *data;
> +       unsigned long flags;
> +       int err;
> +
> +       data = kzalloc(sizeof(*data), GFP_KERNEL);
> +       if (!data) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       flags = prf_lock();
> +
> +       err = prf_serialize(data);
> +       if (err) {
> +               kfree(data);
> +               goto out_unlock;
> +       }
> +
> +       file->private_data = data;
> +
> +out_unlock:
> +       prf_unlock(flags);
> +out:
> +       return err;
> +}
> +
> +/* read() implementation for PGO. */
> +static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
> +                       loff_t *ppos)
> +{
> +       struct prf_private_data *data = file->private_data;
> +
> +       BUG_ON(!data);
> +
> +       return simple_read_from_buffer(buf, count, ppos, data->buffer,
> +                                      data->size);
> +}
> +
> +/* release() implementation for PGO. Release resources allocated by open(). */
> +static int prf_release(struct inode *inode, struct file *file)
> +{
> +       struct prf_private_data *data = file->private_data;
> +
> +       if (data) {
> +               vfree(data->buffer);
> +               kfree(data);
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct file_operations prf_fops = {
> +       .owner          = THIS_MODULE,
> +       .open           = prf_open,
> +       .read           = prf_read,
> +       .llseek         = default_llseek,
> +       .release        = prf_release
> +};
> +
> +/* write() implementation for resetting PGO's profile data. */
> +static ssize_t reset_write(struct file *file, const char __user *addr,
> +                          size_t len, loff_t *pos)
> +{
> +       struct llvm_prf_data *data;
> +
> +       memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
> +
> +       for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; ++data) {
> +               struct llvm_prf_value_node **vnodes;
> +               u64 current_vsite_count;
> +               u32 i;
> +
> +               if (!data->values)
> +                       continue;
> +
> +               current_vsite_count = 0;
> +               vnodes = (struct llvm_prf_value_node **)data->values;
> +
> +               for (i = LLVM_PRF_IPVK_FIRST; i <= LLVM_PRF_IPVK_LAST; ++i)
> +                       current_vsite_count += data->num_value_sites[i];
> +
> +               for (i = 0; i < current_vsite_count; ++i) {
> +                       struct llvm_prf_value_node *current_vnode = vnodes[i];
> +
> +                       while (current_vnode) {
> +                               current_vnode->count = 0;
> +                               current_vnode = current_vnode->next;
> +                       }
> +               }
> +       }
> +
> +       return len;
> +}
> +
> +static const struct file_operations prf_reset_fops = {
> +       .owner          = THIS_MODULE,
> +       .write          = reset_write,
> +       .llseek         = noop_llseek,
> +};
> +
> +/* Create debugfs entries. */
> +static int __init pgo_init(void)
> +{
> +       directory = debugfs_create_dir("pgo", NULL);
> +       if (!directory)
> +               goto err_remove;
> +
> +       if (!debugfs_create_file("profraw", 0600, directory, NULL,
> +                                &prf_fops))
> +               goto err_remove;
> +
> +       if (!debugfs_create_file("reset", 0200, directory, NULL,
> +                                &prf_reset_fops))
> +               goto err_remove;
> +
> +       return 0;
> +
> +err_remove:
> +       pr_err("initialization failed\n");
> +       return -EIO;
> +}
> +
> +/* Remove debugfs entries. */
> +static void __exit pgo_exit(void)
> +{
> +       debugfs_remove_recursive(directory);
> +}
> +
> +module_init(pgo_init);
> +module_exit(pgo_exit);
> diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> new file mode 100644
> index 0000000000000..6084ff0652e85
> --- /dev/null
> +++ b/kernel/pgo/instrument.c
> @@ -0,0 +1,185 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#define pr_fmt(fmt)    "pgo: " fmt
> +
> +#include <linux/bitops.h>
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <linux/spinlock.h>
> +#include <linux/types.h>
> +#include "pgo.h"
> +
> +/* Lock guarding value node access and serialization. */
> +static DEFINE_SPINLOCK(pgo_lock);
> +static int current_node;
> +
> +unsigned long prf_lock(void)
> +{
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&pgo_lock, flags);
> +
> +       return flags;
> +}
> +
> +void prf_unlock(unsigned long flags)
> +{
> +       spin_unlock_irqrestore(&pgo_lock, flags);
> +}
> +
> +/*
> + * Return a newly allocated profiling value node which contains the tracked
> + * value by the value profiler.
> + * Note: caller *must* hold pgo_lock.
> + */
> +static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> +                                                u32 index, u64 value)
> +{
> +       if (&__llvm_prf_vnds_start[current_node + 1] >= __llvm_prf_vnds_end)
> +               return NULL; /* Out of nodes */
> +
> +       current_node++;
> +
> +       /* Make sure the node is entirely within the section */
> +       if (&__llvm_prf_vnds_start[current_node] >= __llvm_prf_vnds_end ||
> +           &__llvm_prf_vnds_start[current_node + 1] > __llvm_prf_vnds_end)
> +               return NULL;
> +
> +       return &__llvm_prf_vnds_start[current_node];
> +}
> +
> +/*
> + * Counts the number of times a target value is seen.
> + *
> + * Records the target value for the CounterIndex if not seen before. Otherwise,
> + * increments the counter associated w/ the target value.
> + */
> +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
> +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
> +{
> +       struct llvm_prf_data *p = (struct llvm_prf_data *)data;
> +       struct llvm_prf_value_node **counters;
> +       struct llvm_prf_value_node *curr;
> +       struct llvm_prf_value_node *min = NULL;
> +       struct llvm_prf_value_node *prev = NULL;
> +       u64 min_count = U64_MAX;
> +       u8 values = 0;
> +       unsigned long flags;
> +
> +       if (!p || !p->values)
> +               return;
> +
> +       counters = (struct llvm_prf_value_node **)p->values;
> +       curr = counters[index];
> +
> +       while (curr) {
> +               if (target_value == curr->value) {
> +                       curr->count++;
> +                       return;
> +               }
> +
> +               if (curr->count < min_count) {
> +                       min_count = curr->count;
> +                       min = curr;
> +               }
> +
> +               prev = curr;
> +               curr = curr->next;
> +               values++;
> +       }
> +
> +       if (values >= LLVM_PRF_MAX_NUM_VALS_PER_SITE) {
> +               if (!min->count || !(--min->count)) {
> +                       curr = min;
> +                       curr->value = target_value;
> +                       curr->count++;
> +               }
> +               return;
> +       }
> +
> +       /* Lock when updating the value node structure. */
> +       flags = prf_lock();
> +
> +       curr = allocate_node(p, index, target_value);
> +       if (!curr)
> +               goto out;
> +
> +       curr->value = target_value;
> +       curr->count++;
> +
> +       if (!counters[index])
> +               counters[index] = curr;
> +       else if (prev && !prev->next)
> +               prev->next = curr;
> +
> +out:
> +       prf_unlock(flags);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_target);
> +
> +/* Counts the number of times a range of targets values are seen. */
> +void __llvm_profile_instrument_range(u64 target_value, void *data,
> +                                    u32 index, s64 precise_start,
> +                                    s64 precise_last, s64 large_value);
> +void __llvm_profile_instrument_range(u64 target_value, void *data,
> +                                    u32 index, s64 precise_start,
> +                                    s64 precise_last, s64 large_value)
> +{
> +       if (large_value != S64_MIN && (s64)target_value >= large_value)
> +               target_value = large_value;
> +       else if ((s64)target_value < precise_start ||
> +                (s64)target_value > precise_last)
> +               target_value = precise_last + 1;
> +
> +       __llvm_profile_instrument_target(target_value, data, index);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_range);
> +
> +static u64 inst_prof_get_range_rep_value(u64 value)
> +{
> +       if (value <= 8)
> +               /* The first ranges are individually tracked, us it as is. */
> +               return value;
> +       else if (value >= 513)
> +               /* The last range is mapped to its lowest value. */
> +               return 513;
> +       else if (hweight64(value) == 1)
> +               /* If it's a power of two, use it as is. */
> +               return value;
> +
> +       /* Otherwise, take to the previous power of two + 1. */
> +       return (1 << (64 - __builtin_clzll(value) - 1)) + 1;
> +}
> +
> +/*
> + * The target values are partitioned into multiple ranges. The range spec is
> + * defined in compiler-rt/include/profile/InstrProfData.inc.
> + */
> +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> +                                    u32 counter_index);
> +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> +                                    u32 counter_index)
> +{
> +       u64 rep_value;
> +
> +       /* Map the target value to the representative value of its range. */
> +       rep_value = inst_prof_get_range_rep_value(target_value);
> +       __llvm_profile_instrument_target(rep_value, data, counter_index);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_memop);
> diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> new file mode 100644
> index 0000000000000..df0aa278f28bd
> --- /dev/null
> +++ b/kernel/pgo/pgo.h
> @@ -0,0 +1,206 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#ifndef _PGO_H
> +#define _PGO_H
> +
> +/*
> + * Note: These internal LLVM definitions must match the compiler version.
> + * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
> + */
> +
> +#ifdef CONFIG_64BIT
> +       #define LLVM_PRF_MAGIC          \
> +               ((u64)255 << 56 |       \
> +                (u64)'l' << 48 |       \
> +                (u64)'p' << 40 |       \
> +                (u64)'r' << 32 |       \
> +                (u64)'o' << 24 |       \
> +                (u64)'f' << 16 |       \
> +                (u64)'r' << 8  |       \
> +                (u64)129)
> +#else
> +       #define LLVM_PRF_MAGIC          \
> +               ((u64)255 << 56 |       \
> +                (u64)'l' << 48 |       \
> +                (u64)'p' << 40 |       \
> +                (u64)'r' << 32 |       \
> +                (u64)'o' << 24 |       \
> +                (u64)'f' << 16 |       \
> +                (u64)'R' << 8  |       \
> +                (u64)129)
> +#endif
> +
> +#define LLVM_PRF_VERSION               5
> +#define LLVM_PRF_DATA_ALIGN            8
> +#define LLVM_PRF_IPVK_FIRST            0
> +#define LLVM_PRF_IPVK_LAST             1
> +#define LLVM_PRF_MAX_NUM_VALS_PER_SITE 16
> +
> +#define LLVM_PRF_VARIANT_MASK_IR       (0x1ull << 56)
> +#define LLVM_PRF_VARIANT_MASK_CSIR     (0x1ull << 57)
> +
> +/**
> + * struct llvm_prf_header - represents the raw profile header data structure.
> + * @magic: the magic token for the file format.
> + * @version: the version of the file format.
> + * @data_size: the number of entries in the profile data section.
> + * @padding_bytes_before_counters: the number of padding bytes before the
> + *   counters.
> + * @counters_size: the size in bytes of the LLVM profile section containing the
> + *   counters.
> + * @padding_bytes_after_counters: the number of padding bytes after the
> + *   counters.
> + * @names_size: the size in bytes of the LLVM profile section containing the
> + *   counters' names.
> + * @counters_delta: the beginning of the LLMV profile counters section.
> + * @names_delta: the beginning of the LLMV profile names section.
> + * @value_kind_last: the last profile value kind.
> + */
> +struct llvm_prf_header {
> +       u64 magic;
> +       u64 version;
> +       u64 data_size;
> +       u64 padding_bytes_before_counters;
> +       u64 counters_size;
> +       u64 padding_bytes_after_counters;
> +       u64 names_size;
> +       u64 counters_delta;
> +       u64 names_delta;
> +       u64 value_kind_last;
> +};
> +
> +/**
> + * struct llvm_prf_data - represents the per-function control structure.
> + * @name_ref: the reference to the function's name.
> + * @func_hash: the hash value of the function.
> + * @counter_ptr: a pointer to the profile counter.
> + * @function_ptr: a pointer to the function.
> + * @values: the profiling values associated with this function.
> + * @num_counters: the number of counters in the function.
> + * @num_value_sites: the number of value profile sites.
> + */
> +struct llvm_prf_data {
> +       const u64 name_ref;
> +       const u64 func_hash;
> +       const void *counter_ptr;
> +       const void *function_ptr;
> +       void *values;
> +       const u32 num_counters;
> +       const u16 num_value_sites[LLVM_PRF_IPVK_LAST + 1];
> +} __aligned(LLVM_PRF_DATA_ALIGN);
> +
> +/**
> + * structure llvm_prf_value_node_data - represents the data part of the struct
> + *   llvm_prf_value_node data structure.
> + * @value: the value counters.
> + * @count: the counters' count.
> + */
> +struct llvm_prf_value_node_data {
> +       u64 value;
> +       u64 count;
> +};
> +
> +/**
> + * struct llvm_prf_value_node - represents an internal data structure used by
> + *   the value profiler.
> + * @value: the value counters.
> + * @count: the counters' count.
> + * @next: the next value node.
> + */
> +struct llvm_prf_value_node {
> +       u64 value;
> +       u64 count;
> +       struct llvm_prf_value_node *next;
> +};
> +
> +/**
> + * struct llvm_prf_value_data - represents the value profiling data in indexed
> + *   format.
> + * @total_size: the total size in bytes including this field.
> + * @num_value_kinds: the number of value profile kinds that has value profile
> + *   data.
> + */
> +struct llvm_prf_value_data {
> +       u32 total_size;
> +       u32 num_value_kinds;
> +};
> +
> +/**
> + * struct llvm_prf_value_record - represents the on-disk layout of the value
> + *   profile data of a particular kind for one function.
> + * @kind: the kind of the value profile record.
> + * @num_value_sites: the number of value profile sites.
> + * @site_count_array: the first element of the array that stores the number
> + *   of profiled values for each value site.
> + */
> +struct llvm_prf_value_record {
> +       u32 kind;
> +       u32 num_value_sites;
> +       u8 site_count_array[];
> +};
> +
> +#define prf_get_value_record_header_size()             \
> +       offsetof(struct llvm_prf_value_record, site_count_array)
> +#define prf_get_value_record_site_count_size(sites)    \
> +       roundup((sites), 8)
> +#define prf_get_value_record_size(sites)               \
> +       (prf_get_value_record_header_size() +           \
> +        prf_get_value_record_site_count_size((sites)))
> +
> +/* Data sections */
> +extern struct llvm_prf_data __llvm_prf_data_start[];
> +extern struct llvm_prf_data __llvm_prf_data_end[];
> +
> +extern u64 __llvm_prf_cnts_start[];
> +extern u64 __llvm_prf_cnts_end[];
> +
> +extern char __llvm_prf_names_start[];
> +extern char __llvm_prf_names_end[];
> +
> +extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
> +extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
> +
> +/* Locking for vnodes */
> +extern unsigned long prf_lock(void);
> +extern void prf_unlock(unsigned long flags);
> +
> +#define __DEFINE_PRF_SIZE(s) \
> +       static inline unsigned long prf_ ## s ## _size(void)            \
> +       {                                                               \
> +               unsigned long start =                                   \
> +                       (unsigned long)__llvm_prf_ ## s ## _start;      \
> +               unsigned long end =                                     \
> +                       (unsigned long)__llvm_prf_ ## s ## _end;        \
> +               return roundup(end - start,                             \
> +                               sizeof(__llvm_prf_ ## s ## _start[0])); \
> +       }                                                               \
> +       static inline unsigned long prf_ ## s ## _count(void)           \
> +       {                                                               \
> +               return prf_ ## s ## _size() /                           \
> +                       sizeof(__llvm_prf_ ## s ## _start[0]);          \
> +       }
> +
> +__DEFINE_PRF_SIZE(data);
> +__DEFINE_PRF_SIZE(cnts);
> +__DEFINE_PRF_SIZE(names);
> +__DEFINE_PRF_SIZE(vnds);
> +
> +#undef __DEFINE_PRF_SIZE
> +
> +#endif /* _PGO_H */
> diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> index 213677a5ed33e..9b218afb5cb87 100644
> --- a/scripts/Makefile.lib
> +++ b/scripts/Makefile.lib
> @@ -143,6 +143,16 @@ _c_flags += $(if $(patsubst n%,, \
>                 $(CFLAGS_GCOV))
>  endif
>
> +#
> +# Enable clang's PGO profiling flags for a file or directory depending on
> +# variables PGO_PROFILE_obj.o and PGO_PROFILE.
> +#
> +ifeq ($(CONFIG_PGO_CLANG),y)
> +_c_flags += $(if $(patsubst n%,, \
> +               $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
> +               $(CFLAGS_PGO_CLANG))
> +endif
> +
>  #
>  # Enable address sanitizer flags for kernel except some files or directories
>  # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
> --
> 2.30.0.284.gd98b1dd5eaa7-goog
>
> --
> You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20210116094357.3620352-1-morbo%40google.com.
Sedat Dilek Jan. 16, 2021, 6:36 p.m. UTC | #2
On Sat, Jan 16, 2021 at 6:38 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> <clang-built-linux@googlegroups.com> wrote:
> >
> > From: Sami Tolvanen <samitolvanen@google.com>
> >
> > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > profile, the kernel is instrumented with PGO counters, a representative
> > workload is run, and the raw profile data is collected from
> > /sys/kernel/debug/pgo/profraw.
> >
> > The raw profile data must be processed by clang's "llvm-profdata" tool
> > before it can be used during recompilation:
> >
> >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> >
> > Multiple raw profiles may be merged during this step.
> >
> > The data can now be used by the compiler:
> >
> >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> >
> > This initial submission is restricted to x86, as that's the platform we
> > know works. This restriction can be lifted once other platforms have
> > been verified to work with PGO.
> >
> > Note that this method of profiling the kernel is clang-native, unlike
> > the clang support in kernel/gcov.
> >
> > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> >
> > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > Co-developed-by: Bill Wendling <morbo@google.com>
> > Signed-off-by: Bill Wendling <morbo@google.com>
> > ---
> > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> >       testing.
> >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> >       Song's comments.
> > v3: - Added change log section based on Sedat Dilek's comments.
> > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> >       own popcount implementation, based on Nick Desaulniers's comment.
> > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > ---
> >  Documentation/dev-tools/index.rst     |   1 +
> >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> >  MAINTAINERS                           |   9 +
> >  Makefile                              |   3 +
> >  arch/Kconfig                          |   1 +
> >  arch/x86/Kconfig                      |   1 +
> >  arch/x86/boot/Makefile                |   1 +
> >  arch/x86/boot/compressed/Makefile     |   1 +
> >  arch/x86/crypto/Makefile              |   2 +
> >  arch/x86/entry/vdso/Makefile          |   1 +
> >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> >  arch/x86/platform/efi/Makefile        |   1 +
> >  arch/x86/purgatory/Makefile           |   1 +
> >  arch/x86/realmode/rm/Makefile         |   1 +
> >  arch/x86/um/vdso/Makefile             |   1 +
> >  drivers/firmware/efi/libstub/Makefile |   1 +
> >  include/asm-generic/vmlinux.lds.h     |  44 +++
> >  kernel/Makefile                       |   1 +
> >  kernel/pgo/Kconfig                    |  35 +++
> >  kernel/pgo/Makefile                   |   5 +
> >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> >  kernel/pgo/instrument.c               | 185 +++++++++++++
> >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> >  scripts/Makefile.lib                  |  10 +
> >  24 files changed, 1022 insertions(+)
> >  create mode 100644 Documentation/dev-tools/pgo.rst
> >  create mode 100644 kernel/pgo/Kconfig
> >  create mode 100644 kernel/pgo/Makefile
> >  create mode 100644 kernel/pgo/fs.c
> >  create mode 100644 kernel/pgo/instrument.c
> >  create mode 100644 kernel/pgo/pgo.h
> >
> > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > index f7809c7b1ba9e..8d6418e858062 100644
> > --- a/Documentation/dev-tools/index.rst
> > +++ b/Documentation/dev-tools/index.rst
> > @@ -26,6 +26,7 @@ whole; patches welcome!
> >     kgdb
> >     kselftest
> >     kunit/index
> > +   pgo
> >
> >
> >  .. only::  subproject and html
> > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > new file mode 100644
> > index 0000000000000..b7f11d8405b73
> > --- /dev/null
> > +++ b/Documentation/dev-tools/pgo.rst
> > @@ -0,0 +1,127 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +===============================
> > +Using PGO with the Linux kernel
> > +===============================
> > +
> > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > +when building with Clang. The profiling data is exported via the ``pgo``
> > +debugfs directory.
> > +
> > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > +
> > +
> > +Preparation
> > +===========
> > +
> > +Configure the kernel with:
> > +
> > +.. code-block:: make
> > +
> > +   CONFIG_DEBUG_FS=y
> > +   CONFIG_PGO_CLANG=y
> > +
> > +Note that kernels compiled with profiling flags will be significantly larger
> > +and run slower.
> > +
> > +Profiling data will only become accessible once debugfs has been mounted:
> > +
> > +.. code-block:: sh
> > +
> > +   mount -t debugfs none /sys/kernel/debug
> > +
> > +
> > +Customization
> > +=============
> > +
> > +You can enable or disable profiling for individual file and directories by
> > +adding a line similar to the following to the respective kernel Makefile:
> > +
> > +- For a single file (e.g. main.o)
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := y
> > +
> > +- For all files in one directory
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := y
> > +
> > +To exclude files from being profiled use
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := n
> > +
> > +and
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := n
> > +
> > +Only files which are linked to the main kernel image or are compiled as kernel
> > +modules are supported by this mechanism.
> > +
> > +
> > +Files
> > +=====
> > +
> > +The PGO kernel support creates the following files in debugfs:
> > +
> > +``/sys/kernel/debug/pgo``
> > +       Parent directory for all PGO-related files.
> > +
> > +``/sys/kernel/debug/pgo/reset``
> > +       Global reset file: resets all coverage data to zero when written to.
> > +
> > +``/sys/kernel/debug/profraw``
> > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > +
> > +
> > +Workflow
> > +========
> > +
> > +The PGO kernel can be run on the host or test machines. The data though should
> > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > +Clang version.
> > +
> > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > +etc. Clang offers tools to perform these tasks.
> > +
> > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > +using the result to optimize the kernel:
> > +
> > +1) Install the kernel on the TEST machine.
> > +
> > +2) Reset the data counters right before running the load tests
> > +
> > +   .. code-block:: sh
> > +
> > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > +
>
> I do not get this...
>
> # mount | grep debugfs
> debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
>

I tried:

# umount /sys/kernel/debug

# mount -t debugfs none /sys/kernel/debug

# echo 1 > /sys/kernel/debug/pgo/reset

*** Run load-test ***

Again the profraw file is younger.

# LC_ALL=C ll /sys/kernel/debug/pgo/
total 0
drwxr-xr-x  2 root root 0 Jan 16 17:29 .
drwx------ 41 root root 0 Jan 16 17:29 ..
-rw-------  1 root root 0 Jan 16 19:14 profraw
--w-------  1 root root 0 Jan 16 19:29 reset

Did this really profile my kernel-build?

- Sedat -

> After the load-test...?
>
> echo 0 > /sys/kernel/debug/pgo/reset
>
> > +3) Run the load tests.
> > +
> > +4) Collect the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > +
>
> This is only 4,9M small and seen from the date 5mins before I run the
> echo-1 line.
>
> # ll /sys/kernel/debug/pgo
> insgesamt 0
> drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> drwx------ 41 root root 0 16. Jan 17:29 ..
> -rw-------  1 root root 0 16. Jan 17:29 profraw
> --w-------  1 root root 0 16. Jan 18:19 reset
>
> # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
>
> # ll /tmp/vmlinux.profraw
> -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
>
> For me there was no prof-data collected from my defconfig kernel-build.
>
> > +5) (Optional) Download the raw profile data to the HOST machine.
> > +
> > +6) Process the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > +
>
> Is that executed in /path/to/linux/git?
>
> > +   Note that multiple raw profile data files can be merged during this step.
> > +
> > +7) Rebuild the kernel using the profile data (PGO disabled)
> > +
> > +   .. code-block:: sh
> > +
> > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
>
> How big is vmlinux.profdata (make defconfig)?
>
> Do I need to do a full defconfig build or can I stop the build after
> let me say 10mins?
>
> - Sedat -
>
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 79b400c97059f..cb1f1f2b2baf4 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -13948,6 +13948,15 @@ S:     Maintained
> >  F:     include/linux/personality.h
> >  F:     include/uapi/linux/personality.h
> >
> > +PGO BASED KERNEL PROFILING
> > +M:     Sami Tolvanen <samitolvanen@google.com>
> > +M:     Bill Wendling <wcw@google.com>
> > +R:     Nathan Chancellor <natechancellor@gmail.com>
> > +R:     Nick Desaulniers <ndesaulniers@google.com>
> > +S:     Supported
> > +F:     Documentation/dev-tools/pgo.rst
> > +F:     kernel/pgo
> > +
> >  PHOENIX RC FLIGHT CONTROLLER ADAPTER
> >  M:     Marcus Folkesson <marcus.folkesson@gmail.com>
> >  L:     linux-input@vger.kernel.org
> > diff --git a/Makefile b/Makefile
> > index 9e73f82e0d863..9128bfe1ccc97 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -659,6 +659,9 @@ endif # KBUILD_EXTMOD
> >  # Defaults to vmlinux, but the arch makefile usually adds further targets
> >  all: vmlinux
> >
> > +CFLAGS_PGO_CLANG := -fprofile-generate
> > +export CFLAGS_PGO_CLANG
> > +
> >  CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage \
> >         $(call cc-option,-fno-tree-loop-im) \
> >         $(call cc-disable-warning,maybe-uninitialized,)
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 24862d15f3a36..f39d3991f6bfe 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -1112,6 +1112,7 @@ config ARCH_SPLIT_ARG64
> >            pairs of 32-bit arguments, select this option.
> >
> >  source "kernel/gcov/Kconfig"
> > +source "kernel/pgo/Kconfig"
> >
> >  source "scripts/gcc-plugins/Kconfig"
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 21f851179ff08..36305ea61dc09 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -96,6 +96,7 @@ config X86
> >         select ARCH_SUPPORTS_DEBUG_PAGEALLOC
> >         select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
> >         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
> > +       select ARCH_SUPPORTS_PGO_CLANG          if X86_64
> >         select ARCH_USE_BUILTIN_BSWAP
> >         select ARCH_USE_QUEUED_RWLOCKS
> >         select ARCH_USE_QUEUED_SPINLOCKS
> > diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
> > index fe605205b4ce2..383853e32f673 100644
> > --- a/arch/x86/boot/Makefile
> > +++ b/arch/x86/boot/Makefile
> > @@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  KBUILD_CFLAGS  += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
> >  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE := n
> >
> >  $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
> > diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> > index e0bc3988c3faa..ed12ab65f6065 100644
> > --- a/arch/x86/boot/compressed/Makefile
> > +++ b/arch/x86/boot/compressed/Makefile
> > @@ -54,6 +54,7 @@ CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
> >
> >  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE :=n
> >
> >  KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
> > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > index a31de0c6ccde2..775fa0b368e98 100644
> > --- a/arch/x86/crypto/Makefile
> > +++ b/arch/x86/crypto/Makefile
> > @@ -4,6 +4,8 @@
> >
> >  OBJECT_FILES_NON_STANDARD := y
> >
> > +PGO_PROFILE_curve25519-x86_64.o := n
> > +
> >  obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
> >
> >  obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
> > diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> > index 02e3e42f380bd..26e2b3af0145c 100644
> > --- a/arch/x86/entry/vdso/Makefile
> > +++ b/arch/x86/entry/vdso/Makefile
> > @@ -179,6 +179,7 @@ quiet_cmd_vdso = VDSO    $@
> >  VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
> >         $(call ld-option, --eh-frame-hdr) -Bsymbolic
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  quiet_cmd_vdso_and_check = VDSO    $@
> >        cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
> > diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> > index efd9e9ea17f25..f6cab2316c46a 100644
> > --- a/arch/x86/kernel/vmlinux.lds.S
> > +++ b/arch/x86/kernel/vmlinux.lds.S
> > @@ -184,6 +184,8 @@ SECTIONS
> >
> >         BUG_TABLE
> >
> > +       PGO_CLANG_DATA
> > +
> >         ORC_UNWIND_TABLE
> >
> >         . = ALIGN(PAGE_SIZE);
> > diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
> > index 84b09c230cbd5..5f22b31446ad4 100644
> > --- a/arch/x86/platform/efi/Makefile
> > +++ b/arch/x86/platform/efi/Makefile
> > @@ -2,6 +2,7 @@
> >  OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
> >  KASAN_SANITIZE := n
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  obj-$(CONFIG_EFI)              += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
> >  obj-$(CONFIG_EFI_MIXED)                += efi_thunk_$(BITS).o
> > diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
> > index 95ea17a9d20cb..36f20e99da0bc 100644
> > --- a/arch/x86/purgatory/Makefile
> > +++ b/arch/x86/purgatory/Makefile
> > @@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk
> >
> >  # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
> >  GCOV_PROFILE   := n
> > +PGO_PROFILE    := n
> >  KASAN_SANITIZE := n
> >  UBSAN_SANITIZE := n
> >  KCSAN_SANITIZE := n
> > diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
> > index 83f1b6a56449f..21797192f958f 100644
> > --- a/arch/x86/realmode/rm/Makefile
> > +++ b/arch/x86/realmode/rm/Makefile
> > @@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
> >  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE := n
> > diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
> > index 5943387e3f357..54f5768f58530 100644
> > --- a/arch/x86/um/vdso/Makefile
> > +++ b/arch/x86/um/vdso/Makefile
> > @@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO    $@
> >
> >  VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  #
> >  # Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
> > diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> > index 8a94388e38b33..2d81623b33f29 100644
> > --- a/drivers/firmware/efi/libstub/Makefile
> > +++ b/drivers/firmware/efi/libstub/Makefile
> > @@ -40,6 +40,7 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
> >  KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> >
> >  GCOV_PROFILE                   := n
> > +PGO_PROFILE                    := n
> >  # Sanitizer runtimes are unavailable and cannot be linked here.
> >  KASAN_SANITIZE                 := n
> >  KCSAN_SANITIZE                 := n
> > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> > index b2b3d81b1535a..3a591bb18c5fb 100644
> > --- a/include/asm-generic/vmlinux.lds.h
> > +++ b/include/asm-generic/vmlinux.lds.h
> > @@ -316,6 +316,49 @@
> >  #define THERMAL_TABLE(name)
> >  #endif
> >
> > +#ifdef CONFIG_PGO_CLANG
> > +#define PGO_CLANG_DATA                                                 \
> > +       __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) {     \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_start = .;                                   \
> > +               __llvm_prf_data_start = .;                              \
> > +               KEEP(*(__llvm_prf_data))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_data_end = .;                                \
> > +       }                                                               \
> > +       __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) {     \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_cnts_start = .;                              \
> > +               KEEP(*(__llvm_prf_cnts))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_cnts_end = .;                                \
> > +       }                                                               \
> > +       __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) {   \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_names_start = .;                             \
> > +               KEEP(*(__llvm_prf_names))                               \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_names_end = .;                               \
> > +               . = ALIGN(8);                                           \
> > +       }                                                               \
> > +       __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) {     \
> > +               __llvm_prf_vals_start = .;                              \
> > +               KEEP(*(__llvm_prf_vals))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_vals_end = .;                                \
> > +               . = ALIGN(8);                                           \
> > +       }                                                               \
> > +       __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) {     \
> > +               __llvm_prf_vnds_start = .;                              \
> > +               KEEP(*(__llvm_prf_vnds))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_vnds_end = .;                                \
> > +               __llvm_prf_end = .;                                     \
> > +       }
> > +#else
> > +#define PGO_CLANG_DATA
> > +#endif
> > +
> >  #define KERNEL_DTB()                                                   \
> >         STRUCT_ALIGN();                                                 \
> >         __dtb_start = .;                                                \
> > @@ -1125,6 +1168,7 @@
> >                 CONSTRUCTORS                                            \
> >         }                                                               \
> >         BUG_TABLE                                                       \
> > +       PGO_CLANG_DATA
> >
> >  #define INIT_TEXT_SECTION(inittext_align)                              \
> >         . = ALIGN(inittext_align);                                      \
> > diff --git a/kernel/Makefile b/kernel/Makefile
> > index aa7368c7eabf3..0b34ca228ba46 100644
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -111,6 +111,7 @@ obj-$(CONFIG_BPF) += bpf/
> >  obj-$(CONFIG_KCSAN) += kcsan/
> >  obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
> >  obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
> > +obj-$(CONFIG_PGO_CLANG) += pgo/
> >
> >  obj-$(CONFIG_PERF_EVENTS) += events/
> >
> > diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
> > new file mode 100644
> > index 0000000000000..76a640b6cf6ed
> > --- /dev/null
> > +++ b/kernel/pgo/Kconfig
> > @@ -0,0 +1,35 @@
> > +# SPDX-License-Identifier: GPL-2.0-only
> > +menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
> > +
> > +config ARCH_SUPPORTS_PGO_CLANG
> > +       bool
> > +
> > +config PGO_CLANG
> > +       bool "Enable clang's PGO-based kernel profiling"
> > +       depends on DEBUG_FS
> > +       depends on ARCH_SUPPORTS_PGO_CLANG
> > +       depends on CC_IS_CLANG && CLANG_VERSION >= 120000
> > +       help
> > +         This option enables clang's PGO (Profile Guided Optimization) based
> > +         code profiling to better optimize the kernel.
> > +
> > +         If unsure, say N.
> > +
> > +         Run a representative workload for your application on a kernel
> > +         compiled with this option and download the raw profile file from
> > +         /sys/kernel/debug/pgo/profraw. This file needs to be processed with
> > +         llvm-profdata. It may be merged with other collected raw profiles.
> > +
> > +         Copy the resulting profile file into vmlinux.profdata, and enable
> > +         KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
> > +         kernel.
> > +
> > +         Note that a kernel compiled with profiling flags will be
> > +         significantly larger and run slower. Also be sure to exclude files
> > +         from profiling which are not linked to the kernel image to prevent
> > +         linker errors.
> > +
> > +         Note that the debugfs filesystem has to be mounted to access
> > +         profiling data.
> > +
> > +endmenu
> > diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
> > new file mode 100644
> > index 0000000000000..41e27cefd9a47
> > --- /dev/null
> > +++ b/kernel/pgo/Makefile
> > @@ -0,0 +1,5 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +GCOV_PROFILE   := n
> > +PGO_PROFILE    := n
> > +
> > +obj-y  += fs.o instrument.o
> > diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> > new file mode 100644
> > index 0000000000000..68b24672be10a
> > --- /dev/null
> > +++ b/kernel/pgo/fs.c
> > @@ -0,0 +1,382 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#define pr_fmt(fmt)    "pgo: " fmt
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/fs.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/vmalloc.h>
> > +#include "pgo.h"
> > +
> > +static struct dentry *directory;
> > +
> > +struct prf_private_data {
> > +       void *buffer;
> > +       unsigned long size;
> > +};
> > +
> > +/*
> > + * Raw profile data format:
> > + *
> > + *     - llvm_prf_header
> > + *     - __llvm_prf_data
> > + *     - __llvm_prf_cnts
> > + *     - __llvm_prf_names
> > + *     - zero padding to 8 bytes
> > + *     - for each llvm_prf_data in __llvm_prf_data:
> > + *             - llvm_prf_value_data
> > + *                     - llvm_prf_value_record + site count array
> > + *                             - llvm_prf_value_node_data
> > + *                             ...
> > + *                     ...
> > + *             ...
> > + */
> > +
> > +static void prf_fill_header(void **buffer)
> > +{
> > +       struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
> > +
> > +       header->magic = LLVM_PRF_MAGIC;
> > +       header->version = LLVM_PRF_VARIANT_MASK_IR | LLVM_PRF_VERSION;
> > +       header->data_size = prf_data_count();
> > +       header->padding_bytes_before_counters = 0;
> > +       header->counters_size = prf_cnts_count();
> > +       header->padding_bytes_after_counters = 0;
> > +       header->names_size = prf_names_count();
> > +       header->counters_delta = (u64)__llvm_prf_cnts_start;
> > +       header->names_delta = (u64)__llvm_prf_names_start;
> > +       header->value_kind_last = LLVM_PRF_IPVK_LAST;
> > +
> > +       *buffer += sizeof(*header);
> > +}
> > +
> > +/*
> > + * Copy the source into the buffer, incrementing the pointer into buffer in the
> > + * process.
> > + */
> > +static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
> > +{
> > +       memcpy(*buffer, src, size);
> > +       *buffer += size;
> > +}
> > +
> > +static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
> > +{
> > +       struct llvm_prf_value_node **nodes =
> > +               (struct llvm_prf_value_node **)p->values;
> > +       u32 kinds = 0;
> > +       u32 size = 0;
> > +       unsigned int kind;
> > +       unsigned int n;
> > +       unsigned int s = 0;
> > +
> > +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> > +               unsigned int sites = p->num_value_sites[kind];
> > +
> > +               if (!sites)
> > +                       continue;
> > +
> > +               /* Record + site count array */
> > +               size += prf_get_value_record_size(sites);
> > +               kinds++;
> > +
> > +               if (!nodes)
> > +                       continue;
> > +
> > +               for (n = 0; n < sites; n++) {
> > +                       u32 count = 0;
> > +                       struct llvm_prf_value_node *site = nodes[s + n];
> > +
> > +                       while (site && ++count <= U8_MAX)
> > +                               site = site->next;
> > +
> > +                       size += count *
> > +                               sizeof(struct llvm_prf_value_node_data);
> > +               }
> > +
> > +               s += sites;
> > +       }
> > +
> > +       if (size)
> > +               size += sizeof(struct llvm_prf_value_data);
> > +
> > +       if (value_kinds)
> > +               *value_kinds = kinds;
> > +
> > +       return size;
> > +}
> > +
> > +static u32 prf_get_value_size(void)
> > +{
> > +       u32 size = 0;
> > +       struct llvm_prf_data *p;
> > +
> > +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> > +               size += __prf_get_value_size(p, NULL);
> > +
> > +       return size;
> > +}
> > +
> > +/* Serialize the profiling's value. */
> > +static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
> > +{
> > +       struct llvm_prf_value_data header;
> > +       struct llvm_prf_value_node **nodes =
> > +               (struct llvm_prf_value_node **)p->values;
> > +       unsigned int kind;
> > +       unsigned int n;
> > +       unsigned int s = 0;
> > +
> > +       header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
> > +
> > +       if (!header.num_value_kinds)
> > +               /* Nothing to write. */
> > +               return;
> > +
> > +       prf_copy_to_buffer(buffer, &header, sizeof(header));
> > +
> > +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> > +               struct llvm_prf_value_record *record;
> > +               u8 *counts;
> > +               unsigned int sites = p->num_value_sites[kind];
> > +
> > +               if (!sites)
> > +                       continue;
> > +
> > +               /* Profiling value record. */
> > +               record = *(struct llvm_prf_value_record **)buffer;
> > +               *buffer += prf_get_value_record_header_size();
> > +
> > +               record->kind = kind;
> > +               record->num_value_sites = sites;
> > +
> > +               /* Site count array. */
> > +               counts = *(u8 **)buffer;
> > +               *buffer += prf_get_value_record_site_count_size(sites);
> > +
> > +               /*
> > +                * If we don't have nodes, we can skip updating the site count
> > +                * array, because the buffer is zero filled.
> > +                */
> > +               if (!nodes)
> > +                       continue;
> > +
> > +               for (n = 0; n < sites; n++) {
> > +                       u32 count = 0;
> > +                       struct llvm_prf_value_node *site = nodes[s + n];
> > +
> > +                       while (site && ++count <= U8_MAX) {
> > +                               prf_copy_to_buffer(buffer, site,
> > +                                                  sizeof(struct llvm_prf_value_node_data));
> > +                               site = site->next;
> > +                       }
> > +
> > +                       counts[n] = (u8)count;
> > +               }
> > +
> > +               s += sites;
> > +       }
> > +}
> > +
> > +static void prf_serialize_values(void **buffer)
> > +{
> > +       struct llvm_prf_data *p;
> > +
> > +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> > +               prf_serialize_value(p, buffer);
> > +}
> > +
> > +static inline unsigned long prf_get_padding(unsigned long size)
> > +{
> > +       return 7 & (8 - size % 8);
> > +}
> > +
> > +static unsigned long prf_buffer_size(void)
> > +{
> > +       return sizeof(struct llvm_prf_header) +
> > +                       prf_data_size() +
> > +                       prf_cnts_size() +
> > +                       prf_names_size() +
> > +                       prf_get_padding(prf_names_size()) +
> > +                       prf_get_value_size();
> > +}
> > +
> > +/* Serialize the profiling data into a format LLVM's tools can understand. */
> > +static int prf_serialize(struct prf_private_data *p)
> > +{
> > +       int err = 0;
> > +       void *buffer;
> > +
> > +       p->size = prf_buffer_size();
> > +       p->buffer = vzalloc(p->size);
> > +
> > +       if (!p->buffer) {
> > +               err = -ENOMEM;
> > +               goto out;
> > +       }
> > +
> > +       buffer = p->buffer;
> > +
> > +       prf_fill_header(&buffer);
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_data_start,  prf_data_size());
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start,  prf_cnts_size());
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
> > +       buffer += prf_get_padding(prf_names_size());
> > +
> > +       prf_serialize_values(&buffer);
> > +
> > +out:
> > +       return err;
> > +}
> > +
> > +/* open() implementation for PGO. Creates a copy of the profiling data set. */
> > +static int prf_open(struct inode *inode, struct file *file)
> > +{
> > +       struct prf_private_data *data;
> > +       unsigned long flags;
> > +       int err;
> > +
> > +       data = kzalloc(sizeof(*data), GFP_KERNEL);
> > +       if (!data) {
> > +               err = -ENOMEM;
> > +               goto out;
> > +       }
> > +
> > +       flags = prf_lock();
> > +
> > +       err = prf_serialize(data);
> > +       if (err) {
> > +               kfree(data);
> > +               goto out_unlock;
> > +       }
> > +
> > +       file->private_data = data;
> > +
> > +out_unlock:
> > +       prf_unlock(flags);
> > +out:
> > +       return err;
> > +}
> > +
> > +/* read() implementation for PGO. */
> > +static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
> > +                       loff_t *ppos)
> > +{
> > +       struct prf_private_data *data = file->private_data;
> > +
> > +       BUG_ON(!data);
> > +
> > +       return simple_read_from_buffer(buf, count, ppos, data->buffer,
> > +                                      data->size);
> > +}
> > +
> > +/* release() implementation for PGO. Release resources allocated by open(). */
> > +static int prf_release(struct inode *inode, struct file *file)
> > +{
> > +       struct prf_private_data *data = file->private_data;
> > +
> > +       if (data) {
> > +               vfree(data->buffer);
> > +               kfree(data);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static const struct file_operations prf_fops = {
> > +       .owner          = THIS_MODULE,
> > +       .open           = prf_open,
> > +       .read           = prf_read,
> > +       .llseek         = default_llseek,
> > +       .release        = prf_release
> > +};
> > +
> > +/* write() implementation for resetting PGO's profile data. */
> > +static ssize_t reset_write(struct file *file, const char __user *addr,
> > +                          size_t len, loff_t *pos)
> > +{
> > +       struct llvm_prf_data *data;
> > +
> > +       memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
> > +
> > +       for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; ++data) {
> > +               struct llvm_prf_value_node **vnodes;
> > +               u64 current_vsite_count;
> > +               u32 i;
> > +
> > +               if (!data->values)
> > +                       continue;
> > +
> > +               current_vsite_count = 0;
> > +               vnodes = (struct llvm_prf_value_node **)data->values;
> > +
> > +               for (i = LLVM_PRF_IPVK_FIRST; i <= LLVM_PRF_IPVK_LAST; ++i)
> > +                       current_vsite_count += data->num_value_sites[i];
> > +
> > +               for (i = 0; i < current_vsite_count; ++i) {
> > +                       struct llvm_prf_value_node *current_vnode = vnodes[i];
> > +
> > +                       while (current_vnode) {
> > +                               current_vnode->count = 0;
> > +                               current_vnode = current_vnode->next;
> > +                       }
> > +               }
> > +       }
> > +
> > +       return len;
> > +}
> > +
> > +static const struct file_operations prf_reset_fops = {
> > +       .owner          = THIS_MODULE,
> > +       .write          = reset_write,
> > +       .llseek         = noop_llseek,
> > +};
> > +
> > +/* Create debugfs entries. */
> > +static int __init pgo_init(void)
> > +{
> > +       directory = debugfs_create_dir("pgo", NULL);
> > +       if (!directory)
> > +               goto err_remove;
> > +
> > +       if (!debugfs_create_file("profraw", 0600, directory, NULL,
> > +                                &prf_fops))
> > +               goto err_remove;
> > +
> > +       if (!debugfs_create_file("reset", 0200, directory, NULL,
> > +                                &prf_reset_fops))
> > +               goto err_remove;
> > +
> > +       return 0;
> > +
> > +err_remove:
> > +       pr_err("initialization failed\n");
> > +       return -EIO;
> > +}
> > +
> > +/* Remove debugfs entries. */
> > +static void __exit pgo_exit(void)
> > +{
> > +       debugfs_remove_recursive(directory);
> > +}
> > +
> > +module_init(pgo_init);
> > +module_exit(pgo_exit);
> > diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> > new file mode 100644
> > index 0000000000000..6084ff0652e85
> > --- /dev/null
> > +++ b/kernel/pgo/instrument.c
> > @@ -0,0 +1,185 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#define pr_fmt(fmt)    "pgo: " fmt
> > +
> > +#include <linux/bitops.h>
> > +#include <linux/kernel.h>
> > +#include <linux/export.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/types.h>
> > +#include "pgo.h"
> > +
> > +/* Lock guarding value node access and serialization. */
> > +static DEFINE_SPINLOCK(pgo_lock);
> > +static int current_node;
> > +
> > +unsigned long prf_lock(void)
> > +{
> > +       unsigned long flags;
> > +
> > +       spin_lock_irqsave(&pgo_lock, flags);
> > +
> > +       return flags;
> > +}
> > +
> > +void prf_unlock(unsigned long flags)
> > +{
> > +       spin_unlock_irqrestore(&pgo_lock, flags);
> > +}
> > +
> > +/*
> > + * Return a newly allocated profiling value node which contains the tracked
> > + * value by the value profiler.
> > + * Note: caller *must* hold pgo_lock.
> > + */
> > +static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> > +                                                u32 index, u64 value)
> > +{
> > +       if (&__llvm_prf_vnds_start[current_node + 1] >= __llvm_prf_vnds_end)
> > +               return NULL; /* Out of nodes */
> > +
> > +       current_node++;
> > +
> > +       /* Make sure the node is entirely within the section */
> > +       if (&__llvm_prf_vnds_start[current_node] >= __llvm_prf_vnds_end ||
> > +           &__llvm_prf_vnds_start[current_node + 1] > __llvm_prf_vnds_end)
> > +               return NULL;
> > +
> > +       return &__llvm_prf_vnds_start[current_node];
> > +}
> > +
> > +/*
> > + * Counts the number of times a target value is seen.
> > + *
> > + * Records the target value for the CounterIndex if not seen before. Otherwise,
> > + * increments the counter associated w/ the target value.
> > + */
> > +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
> > +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
> > +{
> > +       struct llvm_prf_data *p = (struct llvm_prf_data *)data;
> > +       struct llvm_prf_value_node **counters;
> > +       struct llvm_prf_value_node *curr;
> > +       struct llvm_prf_value_node *min = NULL;
> > +       struct llvm_prf_value_node *prev = NULL;
> > +       u64 min_count = U64_MAX;
> > +       u8 values = 0;
> > +       unsigned long flags;
> > +
> > +       if (!p || !p->values)
> > +               return;
> > +
> > +       counters = (struct llvm_prf_value_node **)p->values;
> > +       curr = counters[index];
> > +
> > +       while (curr) {
> > +               if (target_value == curr->value) {
> > +                       curr->count++;
> > +                       return;
> > +               }
> > +
> > +               if (curr->count < min_count) {
> > +                       min_count = curr->count;
> > +                       min = curr;
> > +               }
> > +
> > +               prev = curr;
> > +               curr = curr->next;
> > +               values++;
> > +       }
> > +
> > +       if (values >= LLVM_PRF_MAX_NUM_VALS_PER_SITE) {
> > +               if (!min->count || !(--min->count)) {
> > +                       curr = min;
> > +                       curr->value = target_value;
> > +                       curr->count++;
> > +               }
> > +               return;
> > +       }
> > +
> > +       /* Lock when updating the value node structure. */
> > +       flags = prf_lock();
> > +
> > +       curr = allocate_node(p, index, target_value);
> > +       if (!curr)
> > +               goto out;
> > +
> > +       curr->value = target_value;
> > +       curr->count++;
> > +
> > +       if (!counters[index])
> > +               counters[index] = curr;
> > +       else if (prev && !prev->next)
> > +               prev->next = curr;
> > +
> > +out:
> > +       prf_unlock(flags);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_target);
> > +
> > +/* Counts the number of times a range of targets values are seen. */
> > +void __llvm_profile_instrument_range(u64 target_value, void *data,
> > +                                    u32 index, s64 precise_start,
> > +                                    s64 precise_last, s64 large_value);
> > +void __llvm_profile_instrument_range(u64 target_value, void *data,
> > +                                    u32 index, s64 precise_start,
> > +                                    s64 precise_last, s64 large_value)
> > +{
> > +       if (large_value != S64_MIN && (s64)target_value >= large_value)
> > +               target_value = large_value;
> > +       else if ((s64)target_value < precise_start ||
> > +                (s64)target_value > precise_last)
> > +               target_value = precise_last + 1;
> > +
> > +       __llvm_profile_instrument_target(target_value, data, index);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_range);
> > +
> > +static u64 inst_prof_get_range_rep_value(u64 value)
> > +{
> > +       if (value <= 8)
> > +               /* The first ranges are individually tracked, us it as is. */
> > +               return value;
> > +       else if (value >= 513)
> > +               /* The last range is mapped to its lowest value. */
> > +               return 513;
> > +       else if (hweight64(value) == 1)
> > +               /* If it's a power of two, use it as is. */
> > +               return value;
> > +
> > +       /* Otherwise, take to the previous power of two + 1. */
> > +       return (1 << (64 - __builtin_clzll(value) - 1)) + 1;
> > +}
> > +
> > +/*
> > + * The target values are partitioned into multiple ranges. The range spec is
> > + * defined in compiler-rt/include/profile/InstrProfData.inc.
> > + */
> > +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> > +                                    u32 counter_index);
> > +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> > +                                    u32 counter_index)
> > +{
> > +       u64 rep_value;
> > +
> > +       /* Map the target value to the representative value of its range. */
> > +       rep_value = inst_prof_get_range_rep_value(target_value);
> > +       __llvm_profile_instrument_target(rep_value, data, counter_index);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_memop);
> > diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> > new file mode 100644
> > index 0000000000000..df0aa278f28bd
> > --- /dev/null
> > +++ b/kernel/pgo/pgo.h
> > @@ -0,0 +1,206 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#ifndef _PGO_H
> > +#define _PGO_H
> > +
> > +/*
> > + * Note: These internal LLVM definitions must match the compiler version.
> > + * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
> > + */
> > +
> > +#ifdef CONFIG_64BIT
> > +       #define LLVM_PRF_MAGIC          \
> > +               ((u64)255 << 56 |       \
> > +                (u64)'l' << 48 |       \
> > +                (u64)'p' << 40 |       \
> > +                (u64)'r' << 32 |       \
> > +                (u64)'o' << 24 |       \
> > +                (u64)'f' << 16 |       \
> > +                (u64)'r' << 8  |       \
> > +                (u64)129)
> > +#else
> > +       #define LLVM_PRF_MAGIC          \
> > +               ((u64)255 << 56 |       \
> > +                (u64)'l' << 48 |       \
> > +                (u64)'p' << 40 |       \
> > +                (u64)'r' << 32 |       \
> > +                (u64)'o' << 24 |       \
> > +                (u64)'f' << 16 |       \
> > +                (u64)'R' << 8  |       \
> > +                (u64)129)
> > +#endif
> > +
> > +#define LLVM_PRF_VERSION               5
> > +#define LLVM_PRF_DATA_ALIGN            8
> > +#define LLVM_PRF_IPVK_FIRST            0
> > +#define LLVM_PRF_IPVK_LAST             1
> > +#define LLVM_PRF_MAX_NUM_VALS_PER_SITE 16
> > +
> > +#define LLVM_PRF_VARIANT_MASK_IR       (0x1ull << 56)
> > +#define LLVM_PRF_VARIANT_MASK_CSIR     (0x1ull << 57)
> > +
> > +/**
> > + * struct llvm_prf_header - represents the raw profile header data structure.
> > + * @magic: the magic token for the file format.
> > + * @version: the version of the file format.
> > + * @data_size: the number of entries in the profile data section.
> > + * @padding_bytes_before_counters: the number of padding bytes before the
> > + *   counters.
> > + * @counters_size: the size in bytes of the LLVM profile section containing the
> > + *   counters.
> > + * @padding_bytes_after_counters: the number of padding bytes after the
> > + *   counters.
> > + * @names_size: the size in bytes of the LLVM profile section containing the
> > + *   counters' names.
> > + * @counters_delta: the beginning of the LLMV profile counters section.
> > + * @names_delta: the beginning of the LLMV profile names section.
> > + * @value_kind_last: the last profile value kind.
> > + */
> > +struct llvm_prf_header {
> > +       u64 magic;
> > +       u64 version;
> > +       u64 data_size;
> > +       u64 padding_bytes_before_counters;
> > +       u64 counters_size;
> > +       u64 padding_bytes_after_counters;
> > +       u64 names_size;
> > +       u64 counters_delta;
> > +       u64 names_delta;
> > +       u64 value_kind_last;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_data - represents the per-function control structure.
> > + * @name_ref: the reference to the function's name.
> > + * @func_hash: the hash value of the function.
> > + * @counter_ptr: a pointer to the profile counter.
> > + * @function_ptr: a pointer to the function.
> > + * @values: the profiling values associated with this function.
> > + * @num_counters: the number of counters in the function.
> > + * @num_value_sites: the number of value profile sites.
> > + */
> > +struct llvm_prf_data {
> > +       const u64 name_ref;
> > +       const u64 func_hash;
> > +       const void *counter_ptr;
> > +       const void *function_ptr;
> > +       void *values;
> > +       const u32 num_counters;
> > +       const u16 num_value_sites[LLVM_PRF_IPVK_LAST + 1];
> > +} __aligned(LLVM_PRF_DATA_ALIGN);
> > +
> > +/**
> > + * structure llvm_prf_value_node_data - represents the data part of the struct
> > + *   llvm_prf_value_node data structure.
> > + * @value: the value counters.
> > + * @count: the counters' count.
> > + */
> > +struct llvm_prf_value_node_data {
> > +       u64 value;
> > +       u64 count;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_node - represents an internal data structure used by
> > + *   the value profiler.
> > + * @value: the value counters.
> > + * @count: the counters' count.
> > + * @next: the next value node.
> > + */
> > +struct llvm_prf_value_node {
> > +       u64 value;
> > +       u64 count;
> > +       struct llvm_prf_value_node *next;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_data - represents the value profiling data in indexed
> > + *   format.
> > + * @total_size: the total size in bytes including this field.
> > + * @num_value_kinds: the number of value profile kinds that has value profile
> > + *   data.
> > + */
> > +struct llvm_prf_value_data {
> > +       u32 total_size;
> > +       u32 num_value_kinds;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_record - represents the on-disk layout of the value
> > + *   profile data of a particular kind for one function.
> > + * @kind: the kind of the value profile record.
> > + * @num_value_sites: the number of value profile sites.
> > + * @site_count_array: the first element of the array that stores the number
> > + *   of profiled values for each value site.
> > + */
> > +struct llvm_prf_value_record {
> > +       u32 kind;
> > +       u32 num_value_sites;
> > +       u8 site_count_array[];
> > +};
> > +
> > +#define prf_get_value_record_header_size()             \
> > +       offsetof(struct llvm_prf_value_record, site_count_array)
> > +#define prf_get_value_record_site_count_size(sites)    \
> > +       roundup((sites), 8)
> > +#define prf_get_value_record_size(sites)               \
> > +       (prf_get_value_record_header_size() +           \
> > +        prf_get_value_record_site_count_size((sites)))
> > +
> > +/* Data sections */
> > +extern struct llvm_prf_data __llvm_prf_data_start[];
> > +extern struct llvm_prf_data __llvm_prf_data_end[];
> > +
> > +extern u64 __llvm_prf_cnts_start[];
> > +extern u64 __llvm_prf_cnts_end[];
> > +
> > +extern char __llvm_prf_names_start[];
> > +extern char __llvm_prf_names_end[];
> > +
> > +extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
> > +extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
> > +
> > +/* Locking for vnodes */
> > +extern unsigned long prf_lock(void);
> > +extern void prf_unlock(unsigned long flags);
> > +
> > +#define __DEFINE_PRF_SIZE(s) \
> > +       static inline unsigned long prf_ ## s ## _size(void)            \
> > +       {                                                               \
> > +               unsigned long start =                                   \
> > +                       (unsigned long)__llvm_prf_ ## s ## _start;      \
> > +               unsigned long end =                                     \
> > +                       (unsigned long)__llvm_prf_ ## s ## _end;        \
> > +               return roundup(end - start,                             \
> > +                               sizeof(__llvm_prf_ ## s ## _start[0])); \
> > +       }                                                               \
> > +       static inline unsigned long prf_ ## s ## _count(void)           \
> > +       {                                                               \
> > +               return prf_ ## s ## _size() /                           \
> > +                       sizeof(__llvm_prf_ ## s ## _start[0]);          \
> > +       }
> > +
> > +__DEFINE_PRF_SIZE(data);
> > +__DEFINE_PRF_SIZE(cnts);
> > +__DEFINE_PRF_SIZE(names);
> > +__DEFINE_PRF_SIZE(vnds);
> > +
> > +#undef __DEFINE_PRF_SIZE
> > +
> > +#endif /* _PGO_H */
> > diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> > index 213677a5ed33e..9b218afb5cb87 100644
> > --- a/scripts/Makefile.lib
> > +++ b/scripts/Makefile.lib
> > @@ -143,6 +143,16 @@ _c_flags += $(if $(patsubst n%,, \
> >                 $(CFLAGS_GCOV))
> >  endif
> >
> > +#
> > +# Enable clang's PGO profiling flags for a file or directory depending on
> > +# variables PGO_PROFILE_obj.o and PGO_PROFILE.
> > +#
> > +ifeq ($(CONFIG_PGO_CLANG),y)
> > +_c_flags += $(if $(patsubst n%,, \
> > +               $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
> > +               $(CFLAGS_PGO_CLANG))
> > +endif
> > +
> >  #
> >  # Enable address sanitizer flags for kernel except some files or directories
> >  # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
> > --
> > 2.30.0.284.gd98b1dd5eaa7-goog
> >
> > --
> > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> > To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com.
> > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20210116094357.3620352-1-morbo%40google.com.
Bill Wendling Jan. 16, 2021, 8:23 p.m. UTC | #3
On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> <clang-built-linux@googlegroups.com> wrote:
> >
> > From: Sami Tolvanen <samitolvanen@google.com>
> >
> > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > profile, the kernel is instrumented with PGO counters, a representative
> > workload is run, and the raw profile data is collected from
> > /sys/kernel/debug/pgo/profraw.
> >
> > The raw profile data must be processed by clang's "llvm-profdata" tool
> > before it can be used during recompilation:
> >
> >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> >
> > Multiple raw profiles may be merged during this step.
> >
> > The data can now be used by the compiler:
> >
> >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> >
> > This initial submission is restricted to x86, as that's the platform we
> > know works. This restriction can be lifted once other platforms have
> > been verified to work with PGO.
> >
> > Note that this method of profiling the kernel is clang-native, unlike
> > the clang support in kernel/gcov.
> >
> > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> >
> > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > Co-developed-by: Bill Wendling <morbo@google.com>
> > Signed-off-by: Bill Wendling <morbo@google.com>
> > ---
> > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> >       testing.
> >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> >       Song's comments.
> > v3: - Added change log section based on Sedat Dilek's comments.
> > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> >       own popcount implementation, based on Nick Desaulniers's comment.
> > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > ---
> >  Documentation/dev-tools/index.rst     |   1 +
> >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> >  MAINTAINERS                           |   9 +
> >  Makefile                              |   3 +
> >  arch/Kconfig                          |   1 +
> >  arch/x86/Kconfig                      |   1 +
> >  arch/x86/boot/Makefile                |   1 +
> >  arch/x86/boot/compressed/Makefile     |   1 +
> >  arch/x86/crypto/Makefile              |   2 +
> >  arch/x86/entry/vdso/Makefile          |   1 +
> >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> >  arch/x86/platform/efi/Makefile        |   1 +
> >  arch/x86/purgatory/Makefile           |   1 +
> >  arch/x86/realmode/rm/Makefile         |   1 +
> >  arch/x86/um/vdso/Makefile             |   1 +
> >  drivers/firmware/efi/libstub/Makefile |   1 +
> >  include/asm-generic/vmlinux.lds.h     |  44 +++
> >  kernel/Makefile                       |   1 +
> >  kernel/pgo/Kconfig                    |  35 +++
> >  kernel/pgo/Makefile                   |   5 +
> >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> >  kernel/pgo/instrument.c               | 185 +++++++++++++
> >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> >  scripts/Makefile.lib                  |  10 +
> >  24 files changed, 1022 insertions(+)
> >  create mode 100644 Documentation/dev-tools/pgo.rst
> >  create mode 100644 kernel/pgo/Kconfig
> >  create mode 100644 kernel/pgo/Makefile
> >  create mode 100644 kernel/pgo/fs.c
> >  create mode 100644 kernel/pgo/instrument.c
> >  create mode 100644 kernel/pgo/pgo.h
> >
> > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > index f7809c7b1ba9e..8d6418e858062 100644
> > --- a/Documentation/dev-tools/index.rst
> > +++ b/Documentation/dev-tools/index.rst
> > @@ -26,6 +26,7 @@ whole; patches welcome!
> >     kgdb
> >     kselftest
> >     kunit/index
> > +   pgo
> >
> >
> >  .. only::  subproject and html
> > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > new file mode 100644
> > index 0000000000000..b7f11d8405b73
> > --- /dev/null
> > +++ b/Documentation/dev-tools/pgo.rst
> > @@ -0,0 +1,127 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +===============================
> > +Using PGO with the Linux kernel
> > +===============================
> > +
> > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > +when building with Clang. The profiling data is exported via the ``pgo``
> > +debugfs directory.
> > +
> > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > +
> > +
> > +Preparation
> > +===========
> > +
> > +Configure the kernel with:
> > +
> > +.. code-block:: make
> > +
> > +   CONFIG_DEBUG_FS=y
> > +   CONFIG_PGO_CLANG=y
> > +
> > +Note that kernels compiled with profiling flags will be significantly larger
> > +and run slower.
> > +
> > +Profiling data will only become accessible once debugfs has been mounted:
> > +
> > +.. code-block:: sh
> > +
> > +   mount -t debugfs none /sys/kernel/debug
> > +
> > +
> > +Customization
> > +=============
> > +
> > +You can enable or disable profiling for individual file and directories by
> > +adding a line similar to the following to the respective kernel Makefile:
> > +
> > +- For a single file (e.g. main.o)
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := y
> > +
> > +- For all files in one directory
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := y
> > +
> > +To exclude files from being profiled use
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := n
> > +
> > +and
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := n
> > +
> > +Only files which are linked to the main kernel image or are compiled as kernel
> > +modules are supported by this mechanism.
> > +
> > +
> > +Files
> > +=====
> > +
> > +The PGO kernel support creates the following files in debugfs:
> > +
> > +``/sys/kernel/debug/pgo``
> > +       Parent directory for all PGO-related files.
> > +
> > +``/sys/kernel/debug/pgo/reset``
> > +       Global reset file: resets all coverage data to zero when written to.
> > +
> > +``/sys/kernel/debug/profraw``
> > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > +
> > +
> > +Workflow
> > +========
> > +
> > +The PGO kernel can be run on the host or test machines. The data though should
> > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > +Clang version.
> > +
> > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > +etc. Clang offers tools to perform these tasks.
> > +
> > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > +using the result to optimize the kernel:
> > +
> > +1) Install the kernel on the TEST machine.
> > +
> > +2) Reset the data counters right before running the load tests
> > +
> > +   .. code-block:: sh
> > +
> > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > +
>
> I do not get this...
>
> # mount | grep debugfs
> debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
>
> After the load-test...?
>
> echo 0 > /sys/kernel/debug/pgo/reset
>
Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
the profiling counters. I picked 1 (one) semi-randomly, but it could
be any number, letter, your favorite short story, etc. You don't want
to reset it before collecting the profiling data from your load tests
though.

> > +3) Run the load tests.
> > +
> > +4) Collect the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > +
>
> This is only 4,9M small and seen from the date 5mins before I run the
> echo-1 line.
>
> # ll /sys/kernel/debug/pgo
> insgesamt 0
> drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> drwx------ 41 root root 0 16. Jan 17:29 ..
> -rw-------  1 root root 0 16. Jan 17:29 profraw
> --w-------  1 root root 0 16. Jan 18:19 reset
>
> # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
>
> # ll /tmp/vmlinux.profraw
> -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
>
> For me there was no prof-data collected from my defconfig kernel-build.
>
The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
it, not even the kernel. All it does is serialize the profiling
counters from a memory location in the kernel into a format that
LLVM's tools can understand.

> > +5) (Optional) Download the raw profile data to the HOST machine.
> > +
> > +6) Process the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > +
>
> Is that executed in /path/to/linux/git?
>
The llvm-profdata tool is not in the linux source tree. You need to
grab it from a clang distribution (or built from clang's git repo).

> > +   Note that multiple raw profile data files can be merged during this step.
> > +
> > +7) Rebuild the kernel using the profile data (PGO disabled)
> > +
> > +   .. code-block:: sh
> > +
> > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
>
> How big is vmlinux.profdata (make defconfig)?
>
I don't have numbers for this, but from what you listed here, it's ~5M
in size. The size is proportional to the number of counters
instrumented in the kernel.

> Do I need to do a full defconfig build or can I stop the build after
> let me say 10mins?
>
You should do a full rebuild. Make sure that PGO is disabled during the rebuild.

-bw
Sedat Dilek Jan. 17, 2021, 10:44 a.m. UTC | #4
On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
>
> On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > <clang-built-linux@googlegroups.com> wrote:
> > >
> > > From: Sami Tolvanen <samitolvanen@google.com>
> > >
> > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > profile, the kernel is instrumented with PGO counters, a representative
> > > workload is run, and the raw profile data is collected from
> > > /sys/kernel/debug/pgo/profraw.
> > >
> > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > before it can be used during recompilation:
> > >
> > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > >
> > > Multiple raw profiles may be merged during this step.
> > >
> > > The data can now be used by the compiler:
> > >
> > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > >
> > > This initial submission is restricted to x86, as that's the platform we
> > > know works. This restriction can be lifted once other platforms have
> > > been verified to work with PGO.
> > >
> > > Note that this method of profiling the kernel is clang-native, unlike
> > > the clang support in kernel/gcov.
> > >
> > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > >
> > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > ---
> > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > >       testing.
> > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > >       Song's comments.
> > > v3: - Added change log section based on Sedat Dilek's comments.
> > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > ---
> > >  Documentation/dev-tools/index.rst     |   1 +
> > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > >  MAINTAINERS                           |   9 +
> > >  Makefile                              |   3 +
> > >  arch/Kconfig                          |   1 +
> > >  arch/x86/Kconfig                      |   1 +
> > >  arch/x86/boot/Makefile                |   1 +
> > >  arch/x86/boot/compressed/Makefile     |   1 +
> > >  arch/x86/crypto/Makefile              |   2 +
> > >  arch/x86/entry/vdso/Makefile          |   1 +
> > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > >  arch/x86/platform/efi/Makefile        |   1 +
> > >  arch/x86/purgatory/Makefile           |   1 +
> > >  arch/x86/realmode/rm/Makefile         |   1 +
> > >  arch/x86/um/vdso/Makefile             |   1 +
> > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > >  kernel/Makefile                       |   1 +
> > >  kernel/pgo/Kconfig                    |  35 +++
> > >  kernel/pgo/Makefile                   |   5 +
> > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > >  scripts/Makefile.lib                  |  10 +
> > >  24 files changed, 1022 insertions(+)
> > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > >  create mode 100644 kernel/pgo/Kconfig
> > >  create mode 100644 kernel/pgo/Makefile
> > >  create mode 100644 kernel/pgo/fs.c
> > >  create mode 100644 kernel/pgo/instrument.c
> > >  create mode 100644 kernel/pgo/pgo.h
> > >
> > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > index f7809c7b1ba9e..8d6418e858062 100644
> > > --- a/Documentation/dev-tools/index.rst
> > > +++ b/Documentation/dev-tools/index.rst
> > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > >     kgdb
> > >     kselftest
> > >     kunit/index
> > > +   pgo
> > >
> > >
> > >  .. only::  subproject and html
> > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > new file mode 100644
> > > index 0000000000000..b7f11d8405b73
> > > --- /dev/null
> > > +++ b/Documentation/dev-tools/pgo.rst
> > > @@ -0,0 +1,127 @@
> > > +.. SPDX-License-Identifier: GPL-2.0
> > > +
> > > +===============================
> > > +Using PGO with the Linux kernel
> > > +===============================
> > > +
> > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > +debugfs directory.
> > > +
> > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > +
> > > +
> > > +Preparation
> > > +===========
> > > +
> > > +Configure the kernel with:
> > > +
> > > +.. code-block:: make
> > > +
> > > +   CONFIG_DEBUG_FS=y
> > > +   CONFIG_PGO_CLANG=y
> > > +
> > > +Note that kernels compiled with profiling flags will be significantly larger
> > > +and run slower.
> > > +
> > > +Profiling data will only become accessible once debugfs has been mounted:
> > > +
> > > +.. code-block:: sh
> > > +
> > > +   mount -t debugfs none /sys/kernel/debug
> > > +
> > > +
> > > +Customization
> > > +=============
> > > +
> > > +You can enable or disable profiling for individual file and directories by
> > > +adding a line similar to the following to the respective kernel Makefile:
> > > +
> > > +- For a single file (e.g. main.o)
> > > +
> > > +  .. code-block:: make
> > > +
> > > +     PGO_PROFILE_main.o := y
> > > +
> > > +- For all files in one directory
> > > +
> > > +  .. code-block:: make
> > > +
> > > +     PGO_PROFILE := y
> > > +
> > > +To exclude files from being profiled use
> > > +
> > > +  .. code-block:: make
> > > +
> > > +     PGO_PROFILE_main.o := n
> > > +
> > > +and
> > > +
> > > +  .. code-block:: make
> > > +
> > > +     PGO_PROFILE := n
> > > +
> > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > +modules are supported by this mechanism.
> > > +
> > > +
> > > +Files
> > > +=====
> > > +
> > > +The PGO kernel support creates the following files in debugfs:
> > > +
> > > +``/sys/kernel/debug/pgo``
> > > +       Parent directory for all PGO-related files.
> > > +
> > > +``/sys/kernel/debug/pgo/reset``
> > > +       Global reset file: resets all coverage data to zero when written to.
> > > +
> > > +``/sys/kernel/debug/profraw``
> > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > +
> > > +
> > > +Workflow
> > > +========
> > > +
> > > +The PGO kernel can be run on the host or test machines. The data though should
> > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > +Clang version.
> > > +
> > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > +etc. Clang offers tools to perform these tasks.
> > > +
> > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > +using the result to optimize the kernel:
> > > +
> > > +1) Install the kernel on the TEST machine.
> > > +
> > > +2) Reset the data counters right before running the load tests
> > > +
> > > +   .. code-block:: sh
> > > +
> > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > +
> >
> > I do not get this...
> >
> > # mount | grep debugfs
> > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> >
> > After the load-test...?
> >
> > echo 0 > /sys/kernel/debug/pgo/reset
> >
> Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> the profiling counters. I picked 1 (one) semi-randomly, but it could
> be any number, letter, your favorite short story, etc. You don't want
> to reset it before collecting the profiling data from your load tests
> though.
>
> > > +3) Run the load tests.
> > > +
> > > +4) Collect the raw profile data
> > > +
> > > +   .. code-block:: sh
> > > +
> > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > +
> >
> > This is only 4,9M small and seen from the date 5mins before I run the
> > echo-1 line.
> >
> > # ll /sys/kernel/debug/pgo
> > insgesamt 0
> > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > drwx------ 41 root root 0 16. Jan 17:29 ..
> > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > --w-------  1 root root 0 16. Jan 18:19 reset
> >
> > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> >
> > # ll /tmp/vmlinux.profraw
> > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> >
> > For me there was no prof-data collected from my defconfig kernel-build.
> >
> The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> it, not even the kernel. All it does is serialize the profiling
> counters from a memory location in the kernel into a format that
> LLVM's tools can understand.
>
> > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > +
> > > +6) Process the raw profile data
> > > +
> > > +   .. code-block:: sh
> > > +
> > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > +
> >
> > Is that executed in /path/to/linux/git?
> >
> The llvm-profdata tool is not in the linux source tree. You need to
> grab it from a clang distribution (or built from clang's git repo).
>
> > > +   Note that multiple raw profile data files can be merged during this step.
> > > +
> > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > +
> > > +   .. code-block:: sh
> > > +
> > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> >
> > How big is vmlinux.profdata (make defconfig)?
> >
> I don't have numbers for this, but from what you listed here, it's ~5M
> in size. The size is proportional to the number of counters
> instrumented in the kernel.
>
> > Do I need to do a full defconfig build or can I stop the build after
> > let me say 10mins?
> >
> You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
>

Thanks Bill for all the information.

And sorry if I am so pedantic.

I have installed my Debian system with Legacy-BIOS enabled.

When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
have as a default) my system hangs on reboot.

[ diffconfig ]
$ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
/boot/config-5.11.0-rc3-9-amd64-clang12-pgo
BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
PGO_CLANG y -> n

[ my make line ]
$ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
KCFLAGS=-fprofile-use=vmlinux.profdata

( Yes, 06:47 a.m. in the morning :-). )

When I boot with the rebuild Linux-kernel I see:

Wrong EFI loader signature
...
Decompressing
Parsing EFI
Performing Relocations done.
Booting the Kernel.

*** SYSTEM HANGS ***
( I waited for approx 1 min )

I tried to turn UEFI support ON and OFF.
No success.

Does Clang-PGO support Legacy-BIOS or is something different wrong?

Thanks.

- Sedat -
Sedat Dilek Jan. 17, 2021, 10:53 a.m. UTC | #5
On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> >
> > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > <clang-built-linux@googlegroups.com> wrote:
> > > >
> > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > >
> > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > workload is run, and the raw profile data is collected from
> > > > /sys/kernel/debug/pgo/profraw.
> > > >
> > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > before it can be used during recompilation:
> > > >
> > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > >
> > > > Multiple raw profiles may be merged during this step.
> > > >
> > > > The data can now be used by the compiler:
> > > >
> > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > >
> > > > This initial submission is restricted to x86, as that's the platform we
> > > > know works. This restriction can be lifted once other platforms have
> > > > been verified to work with PGO.
> > > >
> > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > the clang support in kernel/gcov.
> > > >
> > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > >
> > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > ---
> > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > >       testing.
> > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > >       Song's comments.
> > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > ---
> > > >  Documentation/dev-tools/index.rst     |   1 +
> > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > >  MAINTAINERS                           |   9 +
> > > >  Makefile                              |   3 +
> > > >  arch/Kconfig                          |   1 +
> > > >  arch/x86/Kconfig                      |   1 +
> > > >  arch/x86/boot/Makefile                |   1 +
> > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > >  arch/x86/crypto/Makefile              |   2 +
> > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > >  arch/x86/purgatory/Makefile           |   1 +
> > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > >  kernel/Makefile                       |   1 +
> > > >  kernel/pgo/Kconfig                    |  35 +++
> > > >  kernel/pgo/Makefile                   |   5 +
> > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > >  scripts/Makefile.lib                  |  10 +
> > > >  24 files changed, 1022 insertions(+)
> > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > >  create mode 100644 kernel/pgo/Kconfig
> > > >  create mode 100644 kernel/pgo/Makefile
> > > >  create mode 100644 kernel/pgo/fs.c
> > > >  create mode 100644 kernel/pgo/instrument.c
> > > >  create mode 100644 kernel/pgo/pgo.h
> > > >
> > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > --- a/Documentation/dev-tools/index.rst
> > > > +++ b/Documentation/dev-tools/index.rst
> > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > >     kgdb
> > > >     kselftest
> > > >     kunit/index
> > > > +   pgo
> > > >
> > > >
> > > >  .. only::  subproject and html
> > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > new file mode 100644
> > > > index 0000000000000..b7f11d8405b73
> > > > --- /dev/null
> > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > @@ -0,0 +1,127 @@
> > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > +
> > > > +===============================
> > > > +Using PGO with the Linux kernel
> > > > +===============================
> > > > +
> > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > +debugfs directory.
> > > > +
> > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > +
> > > > +
> > > > +Preparation
> > > > +===========
> > > > +
> > > > +Configure the kernel with:
> > > > +
> > > > +.. code-block:: make
> > > > +
> > > > +   CONFIG_DEBUG_FS=y
> > > > +   CONFIG_PGO_CLANG=y
> > > > +
> > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > +and run slower.
> > > > +
> > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > +
> > > > +.. code-block:: sh
> > > > +
> > > > +   mount -t debugfs none /sys/kernel/debug
> > > > +
> > > > +
> > > > +Customization
> > > > +=============
> > > > +
> > > > +You can enable or disable profiling for individual file and directories by
> > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > +
> > > > +- For a single file (e.g. main.o)
> > > > +
> > > > +  .. code-block:: make
> > > > +
> > > > +     PGO_PROFILE_main.o := y
> > > > +
> > > > +- For all files in one directory
> > > > +
> > > > +  .. code-block:: make
> > > > +
> > > > +     PGO_PROFILE := y
> > > > +
> > > > +To exclude files from being profiled use
> > > > +
> > > > +  .. code-block:: make
> > > > +
> > > > +     PGO_PROFILE_main.o := n
> > > > +
> > > > +and
> > > > +
> > > > +  .. code-block:: make
> > > > +
> > > > +     PGO_PROFILE := n
> > > > +
> > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > +modules are supported by this mechanism.
> > > > +
> > > > +
> > > > +Files
> > > > +=====
> > > > +
> > > > +The PGO kernel support creates the following files in debugfs:
> > > > +
> > > > +``/sys/kernel/debug/pgo``
> > > > +       Parent directory for all PGO-related files.
> > > > +
> > > > +``/sys/kernel/debug/pgo/reset``
> > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > +
> > > > +``/sys/kernel/debug/profraw``
> > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > +
> > > > +
> > > > +Workflow
> > > > +========
> > > > +
> > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > +Clang version.
> > > > +
> > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > +etc. Clang offers tools to perform these tasks.
> > > > +
> > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > +using the result to optimize the kernel:
> > > > +
> > > > +1) Install the kernel on the TEST machine.
> > > > +
> > > > +2) Reset the data counters right before running the load tests
> > > > +
> > > > +   .. code-block:: sh
> > > > +
> > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > +
> > >
> > > I do not get this...
> > >
> > > # mount | grep debugfs
> > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > >
> > > After the load-test...?
> > >
> > > echo 0 > /sys/kernel/debug/pgo/reset
> > >
> > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > be any number, letter, your favorite short story, etc. You don't want
> > to reset it before collecting the profiling data from your load tests
> > though.
> >
> > > > +3) Run the load tests.
> > > > +
> > > > +4) Collect the raw profile data
> > > > +
> > > > +   .. code-block:: sh
> > > > +
> > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > +
> > >
> > > This is only 4,9M small and seen from the date 5mins before I run the
> > > echo-1 line.
> > >
> > > # ll /sys/kernel/debug/pgo
> > > insgesamt 0
> > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > --w-------  1 root root 0 16. Jan 18:19 reset
> > >
> > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > >
> > > # ll /tmp/vmlinux.profraw
> > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > >
> > > For me there was no prof-data collected from my defconfig kernel-build.
> > >
> > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > it, not even the kernel. All it does is serialize the profiling
> > counters from a memory location in the kernel into a format that
> > LLVM's tools can understand.
> >
> > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > +
> > > > +6) Process the raw profile data
> > > > +
> > > > +   .. code-block:: sh
> > > > +
> > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > +
> > >
> > > Is that executed in /path/to/linux/git?
> > >
> > The llvm-profdata tool is not in the linux source tree. You need to
> > grab it from a clang distribution (or built from clang's git repo).
> >
> > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > +
> > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > +
> > > > +   .. code-block:: sh
> > > > +
> > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > >
> > > How big is vmlinux.profdata (make defconfig)?
> > >
> > I don't have numbers for this, but from what you listed here, it's ~5M
> > in size. The size is proportional to the number of counters
> > instrumented in the kernel.
> >
> > > Do I need to do a full defconfig build or can I stop the build after
> > > let me say 10mins?
> > >
> > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> >
>
> Thanks Bill for all the information.
>
> And sorry if I am so pedantic.
>
> I have installed my Debian system with Legacy-BIOS enabled.
>
> When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> have as a default) my system hangs on reboot.
>
> [ diffconfig ]
> $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> PGO_CLANG y -> n
>
> [ my make line ]
> $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> KCFLAGS=-fprofile-use=vmlinux.profdata
>
> ( Yes, 06:47 a.m. in the morning :-). )
>
> When I boot with the rebuild Linux-kernel I see:
>
> Wrong EFI loader signature
> ...
> Decompressing
> Parsing EFI
> Performing Relocations done.
> Booting the Kernel.
>
> *** SYSTEM HANGS ***
> ( I waited for approx 1 min )
>
> I tried to turn UEFI support ON and OFF.
> No success.
>
> Does Clang-PGO support Legacy-BIOS or is something different wrong?
>
> Thanks.
>

My bootloader is GRUB.

In UEFI-BIOS settings there is no secure-boot disable option.
Just simple "Use UEFI BIOS" enabled|disabled.

Installed Debian packages:

ii grub-common 2.04-12
ii grub-pc 2.04-12
ii grub-pc-bin 2.04-12
ii grub2-common 2.04-12

I found in the below link to do in grub-shell:

set check_signatures=no

But this is when grub-efi is installed.

- Sedat -

Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
Sedat Dilek Jan. 17, 2021, 11:23 a.m. UTC | #6
On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > >
> > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > <clang-built-linux@googlegroups.com> wrote:
> > > > >
> > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > >
> > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > workload is run, and the raw profile data is collected from
> > > > > /sys/kernel/debug/pgo/profraw.
> > > > >
> > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > before it can be used during recompilation:
> > > > >
> > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > >
> > > > > Multiple raw profiles may be merged during this step.
> > > > >
> > > > > The data can now be used by the compiler:
> > > > >
> > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > >
> > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > know works. This restriction can be lifted once other platforms have
> > > > > been verified to work with PGO.
> > > > >
> > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > the clang support in kernel/gcov.
> > > > >
> > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > >
> > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > ---
> > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > >       testing.
> > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > >       Song's comments.
> > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > ---
> > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > >  MAINTAINERS                           |   9 +
> > > > >  Makefile                              |   3 +
> > > > >  arch/Kconfig                          |   1 +
> > > > >  arch/x86/Kconfig                      |   1 +
> > > > >  arch/x86/boot/Makefile                |   1 +
> > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > >  kernel/Makefile                       |   1 +
> > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > >  kernel/pgo/Makefile                   |   5 +
> > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > >  scripts/Makefile.lib                  |  10 +
> > > > >  24 files changed, 1022 insertions(+)
> > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > >  create mode 100644 kernel/pgo/Makefile
> > > > >  create mode 100644 kernel/pgo/fs.c
> > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > >
> > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > --- a/Documentation/dev-tools/index.rst
> > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > >     kgdb
> > > > >     kselftest
> > > > >     kunit/index
> > > > > +   pgo
> > > > >
> > > > >
> > > > >  .. only::  subproject and html
> > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > new file mode 100644
> > > > > index 0000000000000..b7f11d8405b73
> > > > > --- /dev/null
> > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > @@ -0,0 +1,127 @@
> > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > +
> > > > > +===============================
> > > > > +Using PGO with the Linux kernel
> > > > > +===============================
> > > > > +
> > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > +debugfs directory.
> > > > > +
> > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > +
> > > > > +
> > > > > +Preparation
> > > > > +===========
> > > > > +
> > > > > +Configure the kernel with:
> > > > > +
> > > > > +.. code-block:: make
> > > > > +
> > > > > +   CONFIG_DEBUG_FS=y
> > > > > +   CONFIG_PGO_CLANG=y
> > > > > +
> > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > +and run slower.
> > > > > +
> > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > +
> > > > > +.. code-block:: sh
> > > > > +
> > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > +
> > > > > +
> > > > > +Customization
> > > > > +=============
> > > > > +
> > > > > +You can enable or disable profiling for individual file and directories by
> > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > +
> > > > > +- For a single file (e.g. main.o)
> > > > > +
> > > > > +  .. code-block:: make
> > > > > +
> > > > > +     PGO_PROFILE_main.o := y
> > > > > +
> > > > > +- For all files in one directory
> > > > > +
> > > > > +  .. code-block:: make
> > > > > +
> > > > > +     PGO_PROFILE := y
> > > > > +
> > > > > +To exclude files from being profiled use
> > > > > +
> > > > > +  .. code-block:: make
> > > > > +
> > > > > +     PGO_PROFILE_main.o := n
> > > > > +
> > > > > +and
> > > > > +
> > > > > +  .. code-block:: make
> > > > > +
> > > > > +     PGO_PROFILE := n
> > > > > +
> > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > +modules are supported by this mechanism.
> > > > > +
> > > > > +
> > > > > +Files
> > > > > +=====
> > > > > +
> > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > +
> > > > > +``/sys/kernel/debug/pgo``
> > > > > +       Parent directory for all PGO-related files.
> > > > > +
> > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > +
> > > > > +``/sys/kernel/debug/profraw``
> > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > +
> > > > > +
> > > > > +Workflow
> > > > > +========
> > > > > +
> > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > +Clang version.
> > > > > +
> > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > +etc. Clang offers tools to perform these tasks.
> > > > > +
> > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > +using the result to optimize the kernel:
> > > > > +
> > > > > +1) Install the kernel on the TEST machine.
> > > > > +
> > > > > +2) Reset the data counters right before running the load tests
> > > > > +
> > > > > +   .. code-block:: sh
> > > > > +
> > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > +
> > > >
> > > > I do not get this...
> > > >
> > > > # mount | grep debugfs
> > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > >
> > > > After the load-test...?
> > > >
> > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > >
> > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > be any number, letter, your favorite short story, etc. You don't want
> > > to reset it before collecting the profiling data from your load tests
> > > though.
> > >
> > > > > +3) Run the load tests.
> > > > > +
> > > > > +4) Collect the raw profile data
> > > > > +
> > > > > +   .. code-block:: sh
> > > > > +
> > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > +
> > > >
> > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > echo-1 line.
> > > >
> > > > # ll /sys/kernel/debug/pgo
> > > > insgesamt 0
> > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > >
> > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > >
> > > > # ll /tmp/vmlinux.profraw
> > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > >
> > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > >
> > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > it, not even the kernel. All it does is serialize the profiling
> > > counters from a memory location in the kernel into a format that
> > > LLVM's tools can understand.
> > >
> > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > +
> > > > > +6) Process the raw profile data
> > > > > +
> > > > > +   .. code-block:: sh
> > > > > +
> > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > +
> > > >
> > > > Is that executed in /path/to/linux/git?
> > > >
> > > The llvm-profdata tool is not in the linux source tree. You need to
> > > grab it from a clang distribution (or built from clang's git repo).
> > >
> > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > +
> > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > +
> > > > > +   .. code-block:: sh
> > > > > +
> > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > >
> > > > How big is vmlinux.profdata (make defconfig)?
> > > >
> > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > in size. The size is proportional to the number of counters
> > > instrumented in the kernel.
> > >
> > > > Do I need to do a full defconfig build or can I stop the build after
> > > > let me say 10mins?
> > > >
> > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > >
> >
> > Thanks Bill for all the information.
> >
> > And sorry if I am so pedantic.
> >
> > I have installed my Debian system with Legacy-BIOS enabled.
> >
> > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > have as a default) my system hangs on reboot.
> >
> > [ diffconfig ]
> > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > PGO_CLANG y -> n
> >
> > [ my make line ]
> > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > KCFLAGS=-fprofile-use=vmlinux.profdata
> >
> > ( Yes, 06:47 a.m. in the morning :-). )
> >
> > When I boot with the rebuild Linux-kernel I see:
> >
> > Wrong EFI loader signature
> > ...
> > Decompressing
> > Parsing EFI
> > Performing Relocations done.
> > Booting the Kernel.
> >
> > *** SYSTEM HANGS ***
> > ( I waited for approx 1 min )
> >
> > I tried to turn UEFI support ON and OFF.
> > No success.
> >
> > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> >
> > Thanks.
> >
>
> My bootloader is GRUB.
>
> In UEFI-BIOS settings there is no secure-boot disable option.
> Just simple "Use UEFI BIOS" enabled|disabled.
>
> Installed Debian packages:
>
> ii grub-common 2.04-12
> ii grub-pc 2.04-12
> ii grub-pc-bin 2.04-12
> ii grub2-common 2.04-12
>
> I found in the below link to do in grub-shell:
>
> set check_signatures=no
>
> But this is when grub-efi is installed.
>
> - Sedat -
>
> Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check

Forget about that "Wrong EFI bootloader" - I see this with all other
kernels (all boot fine).

I tried in QEMU with and without KASLR:

[ run_qemu.sh ]
KPATH=$(pwd)

APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
APPEND="$APPEND nokaslr"

qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
$KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
[ /run_qemu.sh ]

$ ./run_qemu.sh
Probing EDD (edd=off to disable)... ok
Wrong EFI loader signature.
early console in extract_kernel
input_data: 0x000000000289940d
input_len: 0x000000000069804a
output: 0x0000000001000000
output_len: 0x0000000001ef2010
kernel_total_size: 0x0000000001c2c000
needed_size: 0x0000000002000000
trampoline_32bit: 0x000000000009d000


KASLR disabled: 'nokaslr' on cmdline.


Decompressing Linux... Parsing ELF... No relocation needed... done.
Booting the kernel.

QEMU run stops, too.

- Sedat
Sedat Dilek Jan. 17, 2021, 11:42 a.m. UTC | #7
On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > >
> > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > >
> > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > >
> > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > workload is run, and the raw profile data is collected from
> > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > >
> > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > before it can be used during recompilation:
> > > > > >
> > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > >
> > > > > > Multiple raw profiles may be merged during this step.
> > > > > >
> > > > > > The data can now be used by the compiler:
> > > > > >
> > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > >
> > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > been verified to work with PGO.
> > > > > >
> > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > the clang support in kernel/gcov.
> > > > > >
> > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > >
> > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > ---
> > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > >       testing.
> > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > >       Song's comments.
> > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > ---
> > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > >  MAINTAINERS                           |   9 +
> > > > > >  Makefile                              |   3 +
> > > > > >  arch/Kconfig                          |   1 +
> > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > >  kernel/Makefile                       |   1 +
> > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > >  24 files changed, 1022 insertions(+)
> > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > >
> > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > >     kgdb
> > > > > >     kselftest
> > > > > >     kunit/index
> > > > > > +   pgo
> > > > > >
> > > > > >
> > > > > >  .. only::  subproject and html
> > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > new file mode 100644
> > > > > > index 0000000000000..b7f11d8405b73
> > > > > > --- /dev/null
> > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > @@ -0,0 +1,127 @@
> > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > +
> > > > > > +===============================
> > > > > > +Using PGO with the Linux kernel
> > > > > > +===============================
> > > > > > +
> > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > +debugfs directory.
> > > > > > +
> > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > +
> > > > > > +
> > > > > > +Preparation
> > > > > > +===========
> > > > > > +
> > > > > > +Configure the kernel with:
> > > > > > +
> > > > > > +.. code-block:: make
> > > > > > +
> > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > +
> > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > +and run slower.
> > > > > > +
> > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > +
> > > > > > +.. code-block:: sh
> > > > > > +
> > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > +
> > > > > > +
> > > > > > +Customization
> > > > > > +=============
> > > > > > +
> > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > +
> > > > > > +- For a single file (e.g. main.o)
> > > > > > +
> > > > > > +  .. code-block:: make
> > > > > > +
> > > > > > +     PGO_PROFILE_main.o := y
> > > > > > +
> > > > > > +- For all files in one directory
> > > > > > +
> > > > > > +  .. code-block:: make
> > > > > > +
> > > > > > +     PGO_PROFILE := y
> > > > > > +
> > > > > > +To exclude files from being profiled use
> > > > > > +
> > > > > > +  .. code-block:: make
> > > > > > +
> > > > > > +     PGO_PROFILE_main.o := n
> > > > > > +
> > > > > > +and
> > > > > > +
> > > > > > +  .. code-block:: make
> > > > > > +
> > > > > > +     PGO_PROFILE := n
> > > > > > +
> > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > +modules are supported by this mechanism.
> > > > > > +
> > > > > > +
> > > > > > +Files
> > > > > > +=====
> > > > > > +
> > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > +
> > > > > > +``/sys/kernel/debug/pgo``
> > > > > > +       Parent directory for all PGO-related files.
> > > > > > +
> > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > +
> > > > > > +``/sys/kernel/debug/profraw``
> > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > +
> > > > > > +
> > > > > > +Workflow
> > > > > > +========
> > > > > > +
> > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > +Clang version.
> > > > > > +
> > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > +
> > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > +using the result to optimize the kernel:
> > > > > > +
> > > > > > +1) Install the kernel on the TEST machine.
> > > > > > +
> > > > > > +2) Reset the data counters right before running the load tests
> > > > > > +
> > > > > > +   .. code-block:: sh
> > > > > > +
> > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > +
> > > > >
> > > > > I do not get this...
> > > > >
> > > > > # mount | grep debugfs
> > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > >
> > > > > After the load-test...?
> > > > >
> > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > >
> > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > be any number, letter, your favorite short story, etc. You don't want
> > > > to reset it before collecting the profiling data from your load tests
> > > > though.
> > > >
> > > > > > +3) Run the load tests.
> > > > > > +
> > > > > > +4) Collect the raw profile data
> > > > > > +
> > > > > > +   .. code-block:: sh
> > > > > > +
> > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > +
> > > > >
> > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > echo-1 line.
> > > > >
> > > > > # ll /sys/kernel/debug/pgo
> > > > > insgesamt 0
> > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > >
> > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > >
> > > > > # ll /tmp/vmlinux.profraw
> > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > >
> > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > >
> > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > it, not even the kernel. All it does is serialize the profiling
> > > > counters from a memory location in the kernel into a format that
> > > > LLVM's tools can understand.
> > > >
> > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > +
> > > > > > +6) Process the raw profile data
> > > > > > +
> > > > > > +   .. code-block:: sh
> > > > > > +
> > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > +
> > > > >
> > > > > Is that executed in /path/to/linux/git?
> > > > >
> > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > grab it from a clang distribution (or built from clang's git repo).
> > > >
> > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > +
> > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > +
> > > > > > +   .. code-block:: sh
> > > > > > +
> > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > >
> > > > > How big is vmlinux.profdata (make defconfig)?
> > > > >
> > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > in size. The size is proportional to the number of counters
> > > > instrumented in the kernel.
> > > >
> > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > let me say 10mins?
> > > > >
> > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > >
> > >
> > > Thanks Bill for all the information.
> > >
> > > And sorry if I am so pedantic.
> > >
> > > I have installed my Debian system with Legacy-BIOS enabled.
> > >
> > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > have as a default) my system hangs on reboot.
> > >
> > > [ diffconfig ]
> > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > PGO_CLANG y -> n
> > >
> > > [ my make line ]
> > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > >
> > > ( Yes, 06:47 a.m. in the morning :-). )
> > >
> > > When I boot with the rebuild Linux-kernel I see:
> > >
> > > Wrong EFI loader signature
> > > ...
> > > Decompressing
> > > Parsing EFI
> > > Performing Relocations done.
> > > Booting the Kernel.
> > >
> > > *** SYSTEM HANGS ***
> > > ( I waited for approx 1 min )
> > >
> > > I tried to turn UEFI support ON and OFF.
> > > No success.
> > >
> > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > >
> > > Thanks.
> > >
> >
> > My bootloader is GRUB.
> >
> > In UEFI-BIOS settings there is no secure-boot disable option.
> > Just simple "Use UEFI BIOS" enabled|disabled.
> >
> > Installed Debian packages:
> >
> > ii grub-common 2.04-12
> > ii grub-pc 2.04-12
> > ii grub-pc-bin 2.04-12
> > ii grub2-common 2.04-12
> >
> > I found in the below link to do in grub-shell:
> >
> > set check_signatures=no
> >
> > But this is when grub-efi is installed.
> >
> > - Sedat -
> >
> > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
>
> Forget about that "Wrong EFI bootloader" - I see this with all other
> kernels (all boot fine).
>
> I tried in QEMU with and without KASLR:
>
> [ run_qemu.sh ]
> KPATH=$(pwd)
>
> APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> APPEND="$APPEND nokaslr"
>
> qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> [ /run_qemu.sh ]
>
> $ ./run_qemu.sh
> Probing EDD (edd=off to disable)... ok
> Wrong EFI loader signature.
> early console in extract_kernel
> input_data: 0x000000000289940d
> input_len: 0x000000000069804a
> output: 0x0000000001000000
> output_len: 0x0000000001ef2010
> kernel_total_size: 0x0000000001c2c000
> needed_size: 0x0000000002000000
> trampoline_32bit: 0x000000000009d000
>
>
> KASLR disabled: 'nokaslr' on cmdline.
>
>
> Decompressing Linux... Parsing ELF... No relocation needed... done.
> Booting the kernel.
>
> QEMU run stops, too.
>

I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).

--- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
+++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
23:55:43.121735427 +0200
@@ -41,7 +41,7 @@ KEYMAP=n
# COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
#

-COMPRESS=gzip
+COMPRESS=zstd

#
# DEVICE: ...

root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER

QEMU boot stops at the same stage.

Now, my head is empty...

Any comments?

- Sedat -
Sedat Dilek Jan. 17, 2021, 11:58 a.m. UTC | #8
On Sun, Jan 17, 2021 at 12:42 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > > >
> > > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > > >
> > > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > > >
> > > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > > workload is run, and the raw profile data is collected from
> > > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > > >
> > > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > > before it can be used during recompilation:
> > > > > > >
> > > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > >
> > > > > > > Multiple raw profiles may be merged during this step.
> > > > > > >
> > > > > > > The data can now be used by the compiler:
> > > > > > >
> > > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > >
> > > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > > been verified to work with PGO.
> > > > > > >
> > > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > > the clang support in kernel/gcov.
> > > > > > >
> > > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > >
> > > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > > ---
> > > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > > >       testing.
> > > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > > >       Song's comments.
> > > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > > ---
> > > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > > >  MAINTAINERS                           |   9 +
> > > > > > >  Makefile                              |   3 +
> > > > > > >  arch/Kconfig                          |   1 +
> > > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > > >  kernel/Makefile                       |   1 +
> > > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > > >  24 files changed, 1022 insertions(+)
> > > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > > >
> > > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > > >     kgdb
> > > > > > >     kselftest
> > > > > > >     kunit/index
> > > > > > > +   pgo
> > > > > > >
> > > > > > >
> > > > > > >  .. only::  subproject and html
> > > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > > new file mode 100644
> > > > > > > index 0000000000000..b7f11d8405b73
> > > > > > > --- /dev/null
> > > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > > @@ -0,0 +1,127 @@
> > > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > > +
> > > > > > > +===============================
> > > > > > > +Using PGO with the Linux kernel
> > > > > > > +===============================
> > > > > > > +
> > > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > > +debugfs directory.
> > > > > > > +
> > > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > +
> > > > > > > +
> > > > > > > +Preparation
> > > > > > > +===========
> > > > > > > +
> > > > > > > +Configure the kernel with:
> > > > > > > +
> > > > > > > +.. code-block:: make
> > > > > > > +
> > > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > > +
> > > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > > +and run slower.
> > > > > > > +
> > > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > > +
> > > > > > > +.. code-block:: sh
> > > > > > > +
> > > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > > +
> > > > > > > +
> > > > > > > +Customization
> > > > > > > +=============
> > > > > > > +
> > > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > > +
> > > > > > > +- For a single file (e.g. main.o)
> > > > > > > +
> > > > > > > +  .. code-block:: make
> > > > > > > +
> > > > > > > +     PGO_PROFILE_main.o := y
> > > > > > > +
> > > > > > > +- For all files in one directory
> > > > > > > +
> > > > > > > +  .. code-block:: make
> > > > > > > +
> > > > > > > +     PGO_PROFILE := y
> > > > > > > +
> > > > > > > +To exclude files from being profiled use
> > > > > > > +
> > > > > > > +  .. code-block:: make
> > > > > > > +
> > > > > > > +     PGO_PROFILE_main.o := n
> > > > > > > +
> > > > > > > +and
> > > > > > > +
> > > > > > > +  .. code-block:: make
> > > > > > > +
> > > > > > > +     PGO_PROFILE := n
> > > > > > > +
> > > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > > +modules are supported by this mechanism.
> > > > > > > +
> > > > > > > +
> > > > > > > +Files
> > > > > > > +=====
> > > > > > > +
> > > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > > +
> > > > > > > +``/sys/kernel/debug/pgo``
> > > > > > > +       Parent directory for all PGO-related files.
> > > > > > > +
> > > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > > +
> > > > > > > +``/sys/kernel/debug/profraw``
> > > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > > +
> > > > > > > +
> > > > > > > +Workflow
> > > > > > > +========
> > > > > > > +
> > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > +Clang version.
> > > > > > > +
> > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > +
> > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > +using the result to optimize the kernel:
> > > > > > > +
> > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > +
> > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > +
> > > > > > > +   .. code-block:: sh
> > > > > > > +
> > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > +
> > > > > >
> > > > > > I do not get this...
> > > > > >
> > > > > > # mount | grep debugfs
> > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > >
> > > > > > After the load-test...?
> > > > > >
> > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > >
> > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > to reset it before collecting the profiling data from your load tests
> > > > > though.
> > > > >
> > > > > > > +3) Run the load tests.
> > > > > > > +
> > > > > > > +4) Collect the raw profile data
> > > > > > > +
> > > > > > > +   .. code-block:: sh
> > > > > > > +
> > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > +
> > > > > >
> > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > echo-1 line.
> > > > > >
> > > > > > # ll /sys/kernel/debug/pgo
> > > > > > insgesamt 0
> > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > >
> > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > >
> > > > > > # ll /tmp/vmlinux.profraw
> > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > >
> > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > >
> > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > counters from a memory location in the kernel into a format that
> > > > > LLVM's tools can understand.
> > > > >
> > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > +
> > > > > > > +6) Process the raw profile data
> > > > > > > +
> > > > > > > +   .. code-block:: sh
> > > > > > > +
> > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > +
> > > > > >
> > > > > > Is that executed in /path/to/linux/git?
> > > > > >
> > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > >
> > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > +
> > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > +
> > > > > > > +   .. code-block:: sh
> > > > > > > +
> > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > >
> > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > >
> > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > in size. The size is proportional to the number of counters
> > > > > instrumented in the kernel.
> > > > >
> > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > let me say 10mins?
> > > > > >
> > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > >
> > > >
> > > > Thanks Bill for all the information.
> > > >
> > > > And sorry if I am so pedantic.
> > > >
> > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > >
> > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > have as a default) my system hangs on reboot.
> > > >
> > > > [ diffconfig ]
> > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > PGO_CLANG y -> n
> > > >
> > > > [ my make line ]
> > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > >
> > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > >
> > > > When I boot with the rebuild Linux-kernel I see:
> > > >
> > > > Wrong EFI loader signature
> > > > ...
> > > > Decompressing
> > > > Parsing EFI
> > > > Performing Relocations done.
> > > > Booting the Kernel.
> > > >
> > > > *** SYSTEM HANGS ***
> > > > ( I waited for approx 1 min )
> > > >
> > > > I tried to turn UEFI support ON and OFF.
> > > > No success.
> > > >
> > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > >
> > > > Thanks.
> > > >
> > >
> > > My bootloader is GRUB.
> > >
> > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > Just simple "Use UEFI BIOS" enabled|disabled.
> > >
> > > Installed Debian packages:
> > >
> > > ii grub-common 2.04-12
> > > ii grub-pc 2.04-12
> > > ii grub-pc-bin 2.04-12
> > > ii grub2-common 2.04-12
> > >
> > > I found in the below link to do in grub-shell:
> > >
> > > set check_signatures=no
> > >
> > > But this is when grub-efi is installed.
> > >
> > > - Sedat -
> > >
> > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> >
> > Forget about that "Wrong EFI bootloader" - I see this with all other
> > kernels (all boot fine).
> >
> > I tried in QEMU with and without KASLR:
> >
> > [ run_qemu.sh ]
> > KPATH=$(pwd)
> >
> > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > APPEND="$APPEND nokaslr"
> >
> > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > [ /run_qemu.sh ]
> >
> > $ ./run_qemu.sh
> > Probing EDD (edd=off to disable)... ok
> > Wrong EFI loader signature.
> > early console in extract_kernel
> > input_data: 0x000000000289940d
> > input_len: 0x000000000069804a
> > output: 0x0000000001000000
> > output_len: 0x0000000001ef2010
> > kernel_total_size: 0x0000000001c2c000
> > needed_size: 0x0000000002000000
> > trampoline_32bit: 0x000000000009d000
> >
> >
> > KASLR disabled: 'nokaslr' on cmdline.
> >
> >
> > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > Booting the kernel.
> >
> > QEMU run stops, too.
> >
>
> I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
>
> --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> 23:55:43.121735427 +0200
> @@ -41,7 +41,7 @@ KEYMAP=n
> # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> #
>
> -COMPRESS=gzip
> +COMPRESS=zstd
>
> #
> # DEVICE: ...
>
> root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
>
> QEMU boot stops at the same stage.
>
> Now, my head is empty...
>
> Any comments?
>

( Just as a side note I have Nick's DWARF-v5 support enabled. )

There is one EFI related warning in my build-log:

$ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
does not match CC system type x86_64-pc-linux-gnu, try setting a
correct CC environment variable
warning: arch/x86/platform/efi/quirks.c: Function control flow change
detected (hash mismatch) efi_arch_mem_reserve Hash =
391331300655996873 [-Wbackend-plugin]
warning: arch/x86/platform/efi/efi.c: Function control flow change
detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
[-Wbackend-plugin]
arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
'simd_alg' [-Wunused-variable]
warning: lib/crypto/sha256.c: Function control flow change detected
(hash mismatch) sha256_update Hash = 744640996947387358
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) memcmp Hash = 742261418966908927
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) bcmp Hash = 742261418966908927
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strcmp Hash = 536873291001348520
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strnlen Hash = 146835646621254984
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) simple_strtoull Hash =
252792765950587360 [-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strstr Hash = 391331303349076211
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strchr Hash = 1063705159280644635
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) kstrtoull Hash = 758414239132790022
[-Wbackend-plugin]
drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
falls through to next function apply_tx_lanes()

Cannot say if this information is helpful.

- Sedat -
Sedat Dilek Jan. 17, 2021, 12:05 p.m. UTC | #9
On Sun, Jan 17, 2021 at 12:58 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 12:42 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > > > >
> > > > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > > > >
> > > > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > >
> > > > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > > > workload is run, and the raw profile data is collected from
> > > > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > > > >
> > > > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > > > before it can be used during recompilation:
> > > > > > > >
> > > > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > >
> > > > > > > > Multiple raw profiles may be merged during this step.
> > > > > > > >
> > > > > > > > The data can now be used by the compiler:
> > > > > > > >
> > > > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > >
> > > > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > > > been verified to work with PGO.
> > > > > > > >
> > > > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > > > the clang support in kernel/gcov.
> > > > > > > >
> > > > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > >
> > > > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > > > ---
> > > > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > > > >       testing.
> > > > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > > > >       Song's comments.
> > > > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > > > ---
> > > > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > > > >  MAINTAINERS                           |   9 +
> > > > > > > >  Makefile                              |   3 +
> > > > > > > >  arch/Kconfig                          |   1 +
> > > > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > > > >  kernel/Makefile                       |   1 +
> > > > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > > > >  24 files changed, 1022 insertions(+)
> > > > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > > > >
> > > > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > > > >     kgdb
> > > > > > > >     kselftest
> > > > > > > >     kunit/index
> > > > > > > > +   pgo
> > > > > > > >
> > > > > > > >
> > > > > > > >  .. only::  subproject and html
> > > > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > > > new file mode 100644
> > > > > > > > index 0000000000000..b7f11d8405b73
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > > > @@ -0,0 +1,127 @@
> > > > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > > > +
> > > > > > > > +===============================
> > > > > > > > +Using PGO with the Linux kernel
> > > > > > > > +===============================
> > > > > > > > +
> > > > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > > > +debugfs directory.
> > > > > > > > +
> > > > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > +
> > > > > > > > +
> > > > > > > > +Preparation
> > > > > > > > +===========
> > > > > > > > +
> > > > > > > > +Configure the kernel with:
> > > > > > > > +
> > > > > > > > +.. code-block:: make
> > > > > > > > +
> > > > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > > > +
> > > > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > > > +and run slower.
> > > > > > > > +
> > > > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > > > +
> > > > > > > > +.. code-block:: sh
> > > > > > > > +
> > > > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > > > +
> > > > > > > > +
> > > > > > > > +Customization
> > > > > > > > +=============
> > > > > > > > +
> > > > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > > > +
> > > > > > > > +- For a single file (e.g. main.o)
> > > > > > > > +
> > > > > > > > +  .. code-block:: make
> > > > > > > > +
> > > > > > > > +     PGO_PROFILE_main.o := y
> > > > > > > > +
> > > > > > > > +- For all files in one directory
> > > > > > > > +
> > > > > > > > +  .. code-block:: make
> > > > > > > > +
> > > > > > > > +     PGO_PROFILE := y
> > > > > > > > +
> > > > > > > > +To exclude files from being profiled use
> > > > > > > > +
> > > > > > > > +  .. code-block:: make
> > > > > > > > +
> > > > > > > > +     PGO_PROFILE_main.o := n
> > > > > > > > +
> > > > > > > > +and
> > > > > > > > +
> > > > > > > > +  .. code-block:: make
> > > > > > > > +
> > > > > > > > +     PGO_PROFILE := n
> > > > > > > > +
> > > > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > > > +modules are supported by this mechanism.
> > > > > > > > +
> > > > > > > > +
> > > > > > > > +Files
> > > > > > > > +=====
> > > > > > > > +
> > > > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > > > +
> > > > > > > > +``/sys/kernel/debug/pgo``
> > > > > > > > +       Parent directory for all PGO-related files.
> > > > > > > > +
> > > > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > > > +
> > > > > > > > +``/sys/kernel/debug/profraw``
> > > > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > > > +
> > > > > > > > +
> > > > > > > > +Workflow
> > > > > > > > +========
> > > > > > > > +
> > > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > > +Clang version.
> > > > > > > > +
> > > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > > +
> > > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > > +using the result to optimize the kernel:
> > > > > > > > +
> > > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > > +
> > > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > > +
> > > > > > > > +   .. code-block:: sh
> > > > > > > > +
> > > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > > +
> > > > > > >
> > > > > > > I do not get this...
> > > > > > >
> > > > > > > # mount | grep debugfs
> > > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > > >
> > > > > > > After the load-test...?
> > > > > > >
> > > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > > >
> > > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > > to reset it before collecting the profiling data from your load tests
> > > > > > though.
> > > > > >
> > > > > > > > +3) Run the load tests.
> > > > > > > > +
> > > > > > > > +4) Collect the raw profile data
> > > > > > > > +
> > > > > > > > +   .. code-block:: sh
> > > > > > > > +
> > > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > +
> > > > > > >
> > > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > > echo-1 line.
> > > > > > >
> > > > > > > # ll /sys/kernel/debug/pgo
> > > > > > > insgesamt 0
> > > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > > >
> > > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > >
> > > > > > > # ll /tmp/vmlinux.profraw
> > > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > > >
> > > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > > >
> > > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > > counters from a memory location in the kernel into a format that
> > > > > > LLVM's tools can understand.
> > > > > >
> > > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > > +
> > > > > > > > +6) Process the raw profile data
> > > > > > > > +
> > > > > > > > +   .. code-block:: sh
> > > > > > > > +
> > > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > +
> > > > > > >
> > > > > > > Is that executed in /path/to/linux/git?
> > > > > > >
> > > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > > >
> > > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > > +
> > > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > > +
> > > > > > > > +   .. code-block:: sh
> > > > > > > > +
> > > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > >
> > > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > > >
> > > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > > in size. The size is proportional to the number of counters
> > > > > > instrumented in the kernel.
> > > > > >
> > > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > > let me say 10mins?
> > > > > > >
> > > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > > >
> > > > >
> > > > > Thanks Bill for all the information.
> > > > >
> > > > > And sorry if I am so pedantic.
> > > > >
> > > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > > >
> > > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > > have as a default) my system hangs on reboot.
> > > > >
> > > > > [ diffconfig ]
> > > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > > PGO_CLANG y -> n
> > > > >
> > > > > [ my make line ]
> > > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > > >
> > > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > > >
> > > > > When I boot with the rebuild Linux-kernel I see:
> > > > >
> > > > > Wrong EFI loader signature
> > > > > ...
> > > > > Decompressing
> > > > > Parsing EFI
> > > > > Performing Relocations done.
> > > > > Booting the Kernel.
> > > > >
> > > > > *** SYSTEM HANGS ***
> > > > > ( I waited for approx 1 min )
> > > > >
> > > > > I tried to turn UEFI support ON and OFF.
> > > > > No success.
> > > > >
> > > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > > >
> > > > > Thanks.
> > > > >
> > > >
> > > > My bootloader is GRUB.
> > > >
> > > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > > Just simple "Use UEFI BIOS" enabled|disabled.
> > > >
> > > > Installed Debian packages:
> > > >
> > > > ii grub-common 2.04-12
> > > > ii grub-pc 2.04-12
> > > > ii grub-pc-bin 2.04-12
> > > > ii grub2-common 2.04-12
> > > >
> > > > I found in the below link to do in grub-shell:
> > > >
> > > > set check_signatures=no
> > > >
> > > > But this is when grub-efi is installed.
> > > >
> > > > - Sedat -
> > > >
> > > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> > >
> > > Forget about that "Wrong EFI bootloader" - I see this with all other
> > > kernels (all boot fine).
> > >
> > > I tried in QEMU with and without KASLR:
> > >
> > > [ run_qemu.sh ]
> > > KPATH=$(pwd)
> > >
> > > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > > APPEND="$APPEND nokaslr"
> > >
> > > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > > [ /run_qemu.sh ]
> > >
> > > $ ./run_qemu.sh
> > > Probing EDD (edd=off to disable)... ok
> > > Wrong EFI loader signature.
> > > early console in extract_kernel
> > > input_data: 0x000000000289940d
> > > input_len: 0x000000000069804a
> > > output: 0x0000000001000000
> > > output_len: 0x0000000001ef2010
> > > kernel_total_size: 0x0000000001c2c000
> > > needed_size: 0x0000000002000000
> > > trampoline_32bit: 0x000000000009d000
> > >
> > >
> > > KASLR disabled: 'nokaslr' on cmdline.
> > >
> > >
> > > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > > Booting the kernel.
> > >
> > > QEMU run stops, too.
> > >
> >
> > I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
> >
> > --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> > +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> > 23:55:43.121735427 +0200
> > @@ -41,7 +41,7 @@ KEYMAP=n
> > # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> > #
> >
> > -COMPRESS=gzip
> > +COMPRESS=zstd
> >
> > #
> > # DEVICE: ...
> >
> > root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
> >
> > QEMU boot stops at the same stage.
> >
> > Now, my head is empty...
> >
> > Any comments?
> >
>
> ( Just as a side note I have Nick's DWARF-v5 support enabled. )
>
> There is one EFI related warning in my build-log:
>
> $ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
> dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
> does not match CC system type x86_64-pc-linux-gnu, try setting a
> correct CC environment variable
> warning: arch/x86/platform/efi/quirks.c: Function control flow change
> detected (hash mismatch) efi_arch_mem_reserve Hash =
> 391331300655996873 [-Wbackend-plugin]
> warning: arch/x86/platform/efi/efi.c: Function control flow change
> detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
> [-Wbackend-plugin]
> arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
> 'simd_alg' [-Wunused-variable]
> warning: lib/crypto/sha256.c: Function control flow change detected
> (hash mismatch) sha256_update Hash = 744640996947387358
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) memcmp Hash = 742261418966908927
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) bcmp Hash = 742261418966908927
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) strcmp Hash = 536873291001348520
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) strnlen Hash = 146835646621254984
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) simple_strtoull Hash =
> 252792765950587360 [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) strstr Hash = 391331303349076211
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) strchr Hash = 1063705159280644635
> [-Wbackend-plugin]
> warning: arch/x86/boot/compressed/string.c: Function control flow
> change detected (hash mismatch) kstrtoull Hash = 758414239132790022
> [-Wbackend-plugin]
> drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
> falls through to next function apply_tx_lanes()
>
> Cannot say if this information is helpful.
>

My LLVM/Clang v12 is from <apt.llvm.org>:

clang-12 version 1:12~++20210115111113+45ef053bd709-1~exp1~20210115101809.3724

My kernel-config is attached.

- Sedat -
Sedat Dilek Jan. 17, 2021, 5:42 p.m. UTC | #10
On Sun, Jan 17, 2021 at 1:05 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 12:58 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 12:42 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > > > > >
> > > > > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > > > > >
> > > > > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > >
> > > > > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > > > > workload is run, and the raw profile data is collected from
> > > > > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > > > > >
> > > > > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > > > > before it can be used during recompilation:
> > > > > > > > >
> > > > > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > >
> > > > > > > > > Multiple raw profiles may be merged during this step.
> > > > > > > > >
> > > > > > > > > The data can now be used by the compiler:
> > > > > > > > >
> > > > > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > >
> > > > > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > > > > been verified to work with PGO.
> > > > > > > > >
> > > > > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > > > > the clang support in kernel/gcov.
> > > > > > > > >
> > > > > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > >
> > > > > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > > > > ---
> > > > > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > > > > >       testing.
> > > > > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > > > > >       Song's comments.
> > > > > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > > > > ---
> > > > > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > > > > >  MAINTAINERS                           |   9 +
> > > > > > > > >  Makefile                              |   3 +
> > > > > > > > >  arch/Kconfig                          |   1 +
> > > > > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > > > > >  kernel/Makefile                       |   1 +
> > > > > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > > > > >  24 files changed, 1022 insertions(+)
> > > > > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > > > > >
> > > > > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > > > > >     kgdb
> > > > > > > > >     kselftest
> > > > > > > > >     kunit/index
> > > > > > > > > +   pgo
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >  .. only::  subproject and html
> > > > > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > > > > new file mode 100644
> > > > > > > > > index 0000000000000..b7f11d8405b73
> > > > > > > > > --- /dev/null
> > > > > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > > > > @@ -0,0 +1,127 @@
> > > > > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > > > > +
> > > > > > > > > +===============================
> > > > > > > > > +Using PGO with the Linux kernel
> > > > > > > > > +===============================
> > > > > > > > > +
> > > > > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > > > > +debugfs directory.
> > > > > > > > > +
> > > > > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > > +
> > > > > > > > > +
> > > > > > > > > +Preparation
> > > > > > > > > +===========
> > > > > > > > > +
> > > > > > > > > +Configure the kernel with:
> > > > > > > > > +
> > > > > > > > > +.. code-block:: make
> > > > > > > > > +
> > > > > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > > > > +
> > > > > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > > > > +and run slower.
> > > > > > > > > +
> > > > > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > > > > +
> > > > > > > > > +.. code-block:: sh
> > > > > > > > > +
> > > > > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > > > > +
> > > > > > > > > +
> > > > > > > > > +Customization
> > > > > > > > > +=============
> > > > > > > > > +
> > > > > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > > > > +
> > > > > > > > > +- For a single file (e.g. main.o)
> > > > > > > > > +
> > > > > > > > > +  .. code-block:: make
> > > > > > > > > +
> > > > > > > > > +     PGO_PROFILE_main.o := y
> > > > > > > > > +
> > > > > > > > > +- For all files in one directory
> > > > > > > > > +
> > > > > > > > > +  .. code-block:: make
> > > > > > > > > +
> > > > > > > > > +     PGO_PROFILE := y
> > > > > > > > > +
> > > > > > > > > +To exclude files from being profiled use
> > > > > > > > > +
> > > > > > > > > +  .. code-block:: make
> > > > > > > > > +
> > > > > > > > > +     PGO_PROFILE_main.o := n
> > > > > > > > > +
> > > > > > > > > +and
> > > > > > > > > +
> > > > > > > > > +  .. code-block:: make
> > > > > > > > > +
> > > > > > > > > +     PGO_PROFILE := n
> > > > > > > > > +
> > > > > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > > > > +modules are supported by this mechanism.
> > > > > > > > > +
> > > > > > > > > +
> > > > > > > > > +Files
> > > > > > > > > +=====
> > > > > > > > > +
> > > > > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > > > > +
> > > > > > > > > +``/sys/kernel/debug/pgo``
> > > > > > > > > +       Parent directory for all PGO-related files.
> > > > > > > > > +
> > > > > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > > > > +
> > > > > > > > > +``/sys/kernel/debug/profraw``
> > > > > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > > > > +
> > > > > > > > > +
> > > > > > > > > +Workflow
> > > > > > > > > +========
> > > > > > > > > +
> > > > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > > > +Clang version.
> > > > > > > > > +
> > > > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > > > +
> > > > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > > > +using the result to optimize the kernel:
> > > > > > > > > +
> > > > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > > > +
> > > > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > > > +
> > > > > > > > > +   .. code-block:: sh
> > > > > > > > > +
> > > > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > > > +
> > > > > > > >
> > > > > > > > I do not get this...
> > > > > > > >
> > > > > > > > # mount | grep debugfs
> > > > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > > > >
> > > > > > > > After the load-test...?
> > > > > > > >
> > > > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > > > >
> > > > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > > > to reset it before collecting the profiling data from your load tests
> > > > > > > though.
> > > > > > >
> > > > > > > > > +3) Run the load tests.
> > > > > > > > > +
> > > > > > > > > +4) Collect the raw profile data
> > > > > > > > > +
> > > > > > > > > +   .. code-block:: sh
> > > > > > > > > +
> > > > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > +
> > > > > > > >
> > > > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > > > echo-1 line.
> > > > > > > >
> > > > > > > > # ll /sys/kernel/debug/pgo
> > > > > > > > insgesamt 0
> > > > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > > > >
> > > > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > >
> > > > > > > > # ll /tmp/vmlinux.profraw
> > > > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > > > >
> > > > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > > > >
> > > > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > > > counters from a memory location in the kernel into a format that
> > > > > > > LLVM's tools can understand.
> > > > > > >
> > > > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > > > +
> > > > > > > > > +6) Process the raw profile data
> > > > > > > > > +
> > > > > > > > > +   .. code-block:: sh
> > > > > > > > > +
> > > > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > +
> > > > > > > >
> > > > > > > > Is that executed in /path/to/linux/git?
> > > > > > > >
> > > > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > > > >
> > > > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > > > +
> > > > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > > > +
> > > > > > > > > +   .. code-block:: sh
> > > > > > > > > +
> > > > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > >
> > > > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > > > >
> > > > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > > > in size. The size is proportional to the number of counters
> > > > > > > instrumented in the kernel.
> > > > > > >
> > > > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > > > let me say 10mins?
> > > > > > > >
> > > > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > > > >
> > > > > >
> > > > > > Thanks Bill for all the information.
> > > > > >
> > > > > > And sorry if I am so pedantic.
> > > > > >
> > > > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > > > >
> > > > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > > > have as a default) my system hangs on reboot.
> > > > > >
> > > > > > [ diffconfig ]
> > > > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > > > PGO_CLANG y -> n
> > > > > >
> > > > > > [ my make line ]
> > > > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > > > >
> > > > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > > > >
> > > > > > When I boot with the rebuild Linux-kernel I see:
> > > > > >
> > > > > > Wrong EFI loader signature
> > > > > > ...
> > > > > > Decompressing
> > > > > > Parsing EFI
> > > > > > Performing Relocations done.
> > > > > > Booting the Kernel.
> > > > > >
> > > > > > *** SYSTEM HANGS ***
> > > > > > ( I waited for approx 1 min )
> > > > > >
> > > > > > I tried to turn UEFI support ON and OFF.
> > > > > > No success.
> > > > > >
> > > > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > > > >
> > > > > > Thanks.
> > > > > >
> > > > >
> > > > > My bootloader is GRUB.
> > > > >
> > > > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > > > Just simple "Use UEFI BIOS" enabled|disabled.
> > > > >
> > > > > Installed Debian packages:
> > > > >
> > > > > ii grub-common 2.04-12
> > > > > ii grub-pc 2.04-12
> > > > > ii grub-pc-bin 2.04-12
> > > > > ii grub2-common 2.04-12
> > > > >
> > > > > I found in the below link to do in grub-shell:
> > > > >
> > > > > set check_signatures=no
> > > > >
> > > > > But this is when grub-efi is installed.
> > > > >
> > > > > - Sedat -
> > > > >
> > > > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> > > >
> > > > Forget about that "Wrong EFI bootloader" - I see this with all other
> > > > kernels (all boot fine).
> > > >
> > > > I tried in QEMU with and without KASLR:
> > > >
> > > > [ run_qemu.sh ]
> > > > KPATH=$(pwd)
> > > >
> > > > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > > > APPEND="$APPEND nokaslr"
> > > >
> > > > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > > > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > > > [ /run_qemu.sh ]
> > > >
> > > > $ ./run_qemu.sh
> > > > Probing EDD (edd=off to disable)... ok
> > > > Wrong EFI loader signature.
> > > > early console in extract_kernel
> > > > input_data: 0x000000000289940d
> > > > input_len: 0x000000000069804a
> > > > output: 0x0000000001000000
> > > > output_len: 0x0000000001ef2010
> > > > kernel_total_size: 0x0000000001c2c000
> > > > needed_size: 0x0000000002000000
> > > > trampoline_32bit: 0x000000000009d000
> > > >
> > > >
> > > > KASLR disabled: 'nokaslr' on cmdline.
> > > >
> > > >
> > > > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > > > Booting the kernel.
> > > >
> > > > QEMU run stops, too.
> > > >
> > >
> > > I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
> > >
> > > --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> > > +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> > > 23:55:43.121735427 +0200
> > > @@ -41,7 +41,7 @@ KEYMAP=n
> > > # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> > > #
> > >
> > > -COMPRESS=gzip
> > > +COMPRESS=zstd
> > >
> > > #
> > > # DEVICE: ...
> > >
> > > root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
> > >
> > > QEMU boot stops at the same stage.
> > >
> > > Now, my head is empty...
> > >
> > > Any comments?
> > >
> >
> > ( Just as a side note I have Nick's DWARF-v5 support enabled. )
> >
> > There is one EFI related warning in my build-log:
> >
> > $ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
> > does not match CC system type x86_64-pc-linux-gnu, try setting a
> > correct CC environment variable
> > warning: arch/x86/platform/efi/quirks.c: Function control flow change
> > detected (hash mismatch) efi_arch_mem_reserve Hash =
> > 391331300655996873 [-Wbackend-plugin]
> > warning: arch/x86/platform/efi/efi.c: Function control flow change
> > detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
> > [-Wbackend-plugin]
> > arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
> > 'simd_alg' [-Wunused-variable]
> > warning: lib/crypto/sha256.c: Function control flow change detected
> > (hash mismatch) sha256_update Hash = 744640996947387358
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) memcmp Hash = 742261418966908927
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) bcmp Hash = 742261418966908927
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) strcmp Hash = 536873291001348520
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) strnlen Hash = 146835646621254984
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) simple_strtoull Hash =
> > 252792765950587360 [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) strstr Hash = 391331303349076211
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) strchr Hash = 1063705159280644635
> > [-Wbackend-plugin]
> > warning: arch/x86/boot/compressed/string.c: Function control flow
> > change detected (hash mismatch) kstrtoull Hash = 758414239132790022
> > [-Wbackend-plugin]
> > drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
> > falls through to next function apply_tx_lanes()
> >
> > Cannot say if this information is helpful.
> >
>
> My LLVM/Clang v12 is from <apt.llvm.org>:
>
> clang-12 version 1:12~++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
>
> My kernel-config is attached.
>

I dropped "LLVM_IAS=1" from my make line and did for my next build:

$ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo .config
BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-10-amd64-clang12-pgo"
DEBUG_INFO_DWARF2 n -> y
DEBUG_INFO_DWARF5 y -> n
PGO_CLANG y -> n

Means dropped DWARF5 support.

- Sedat -
Bill Wendling Jan. 17, 2021, 8:34 p.m. UTC | #11
On Sun, Jan 17, 2021 at 9:42 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Sun, Jan 17, 2021 at 1:05 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 12:58 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 12:42 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > >
> > > > > > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > > > > > >
> > > > > > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > > > > > >
> > > > > > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > > >
> > > > > > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > > > > > workload is run, and the raw profile data is collected from
> > > > > > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > > > > > >
> > > > > > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > > > > > before it can be used during recompilation:
> > > > > > > > > >
> > > > > > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > >
> > > > > > > > > > Multiple raw profiles may be merged during this step.
> > > > > > > > > >
> > > > > > > > > > The data can now be used by the compiler:
> > > > > > > > > >
> > > > > > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > > >
> > > > > > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > > > > > been verified to work with PGO.
> > > > > > > > > >
> > > > > > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > > > > > the clang support in kernel/gcov.
> > > > > > > > > >
> > > > > > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > > > > > ---
> > > > > > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > > > > > >       testing.
> > > > > > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > > > > > >       Song's comments.
> > > > > > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > > > > > ---
> > > > > > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > > > > > >  MAINTAINERS                           |   9 +
> > > > > > > > > >  Makefile                              |   3 +
> > > > > > > > > >  arch/Kconfig                          |   1 +
> > > > > > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > > > > > >  kernel/Makefile                       |   1 +
> > > > > > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > > > > > >  24 files changed, 1022 insertions(+)
> > > > > > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > > > > > >
> > > > > > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > > > > > >     kgdb
> > > > > > > > > >     kselftest
> > > > > > > > > >     kunit/index
> > > > > > > > > > +   pgo
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >  .. only::  subproject and html
> > > > > > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > > > > > new file mode 100644
> > > > > > > > > > index 0000000000000..b7f11d8405b73
> > > > > > > > > > --- /dev/null
> > > > > > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > > > > > @@ -0,0 +1,127 @@
> > > > > > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > > > > > +
> > > > > > > > > > +===============================
> > > > > > > > > > +Using PGO with the Linux kernel
> > > > > > > > > > +===============================
> > > > > > > > > > +
> > > > > > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > > > > > +debugfs directory.
> > > > > > > > > > +
> > > > > > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > > > +
> > > > > > > > > > +
> > > > > > > > > > +Preparation
> > > > > > > > > > +===========
> > > > > > > > > > +
> > > > > > > > > > +Configure the kernel with:
> > > > > > > > > > +
> > > > > > > > > > +.. code-block:: make
> > > > > > > > > > +
> > > > > > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > > > > > +
> > > > > > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > > > > > +and run slower.
> > > > > > > > > > +
> > > > > > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > > > > > +
> > > > > > > > > > +.. code-block:: sh
> > > > > > > > > > +
> > > > > > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > > > > > +
> > > > > > > > > > +
> > > > > > > > > > +Customization
> > > > > > > > > > +=============
> > > > > > > > > > +
> > > > > > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > > > > > +
> > > > > > > > > > +- For a single file (e.g. main.o)
> > > > > > > > > > +
> > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > +
> > > > > > > > > > +     PGO_PROFILE_main.o := y
> > > > > > > > > > +
> > > > > > > > > > +- For all files in one directory
> > > > > > > > > > +
> > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > +
> > > > > > > > > > +     PGO_PROFILE := y
> > > > > > > > > > +
> > > > > > > > > > +To exclude files from being profiled use
> > > > > > > > > > +
> > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > +
> > > > > > > > > > +     PGO_PROFILE_main.o := n
> > > > > > > > > > +
> > > > > > > > > > +and
> > > > > > > > > > +
> > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > +
> > > > > > > > > > +     PGO_PROFILE := n
> > > > > > > > > > +
> > > > > > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > > > > > +modules are supported by this mechanism.
> > > > > > > > > > +
> > > > > > > > > > +
> > > > > > > > > > +Files
> > > > > > > > > > +=====
> > > > > > > > > > +
> > > > > > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > > > > > +
> > > > > > > > > > +``/sys/kernel/debug/pgo``
> > > > > > > > > > +       Parent directory for all PGO-related files.
> > > > > > > > > > +
> > > > > > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > > > > > +
> > > > > > > > > > +``/sys/kernel/debug/profraw``
> > > > > > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > > > > > +
> > > > > > > > > > +
> > > > > > > > > > +Workflow
> > > > > > > > > > +========
> > > > > > > > > > +
> > > > > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > > > > +Clang version.
> > > > > > > > > > +
> > > > > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > > > > +
> > > > > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > > > > +using the result to optimize the kernel:
> > > > > > > > > > +
> > > > > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > > > > +
> > > > > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > > > > +
> > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > +
> > > > > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > > > > +
> > > > > > > > >
> > > > > > > > > I do not get this...
> > > > > > > > >
> > > > > > > > > # mount | grep debugfs
> > > > > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > > > > >
> > > > > > > > > After the load-test...?
> > > > > > > > >
> > > > > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > > > > >
> > > > > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > > > > to reset it before collecting the profiling data from your load tests
> > > > > > > > though.
> > > > > > > >
> > > > > > > > > > +3) Run the load tests.
> > > > > > > > > > +
> > > > > > > > > > +4) Collect the raw profile data
> > > > > > > > > > +
> > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > +
> > > > > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > > +
> > > > > > > > >
> > > > > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > > > > echo-1 line.
> > > > > > > > >
> > > > > > > > > # ll /sys/kernel/debug/pgo
> > > > > > > > > insgesamt 0
> > > > > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > > > > >
> > > > > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > >
> > > > > > > > > # ll /tmp/vmlinux.profraw
> > > > > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > > > > >
> > > > > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > > > > >
> > > > > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > > > > counters from a memory location in the kernel into a format that
> > > > > > > > LLVM's tools can understand.
> > > > > > > >
> > > > > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > > > > +
> > > > > > > > > > +6) Process the raw profile data
> > > > > > > > > > +
> > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > +
> > > > > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > > +
> > > > > > > > >
> > > > > > > > > Is that executed in /path/to/linux/git?
> > > > > > > > >
> > > > > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > > > > >
> > > > > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > > > > +
> > > > > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > > > > +
> > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > +
> > > > > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > >
> > > > > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > > > > >
> > > > > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > > > > in size. The size is proportional to the number of counters
> > > > > > > > instrumented in the kernel.
> > > > > > > >
> > > > > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > > > > let me say 10mins?
> > > > > > > > >
> > > > > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > > > > >
> > > > > > >
> > > > > > > Thanks Bill for all the information.
> > > > > > >
> > > > > > > And sorry if I am so pedantic.
> > > > > > >
> > > > > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > > > > >
> > > > > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > > > > have as a default) my system hangs on reboot.
> > > > > > >
> > > > > > > [ diffconfig ]
> > > > > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > > > > PGO_CLANG y -> n
> > > > > > >
> > > > > > > [ my make line ]
> > > > > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > > > > >
> > > > > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > > > > >
> > > > > > > When I boot with the rebuild Linux-kernel I see:
> > > > > > >
> > > > > > > Wrong EFI loader signature
> > > > > > > ...
> > > > > > > Decompressing
> > > > > > > Parsing EFI
> > > > > > > Performing Relocations done.
> > > > > > > Booting the Kernel.
> > > > > > >
> > > > > > > *** SYSTEM HANGS ***
> > > > > > > ( I waited for approx 1 min )
> > > > > > >
> > > > > > > I tried to turn UEFI support ON and OFF.
> > > > > > > No success.
> > > > > > >
> > > > > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > > > > >
> > > > > > > Thanks.
> > > > > > >
> > > > > >
> > > > > > My bootloader is GRUB.
> > > > > >
> > > > > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > > > > Just simple "Use UEFI BIOS" enabled|disabled.
> > > > > >
> > > > > > Installed Debian packages:
> > > > > >
> > > > > > ii grub-common 2.04-12
> > > > > > ii grub-pc 2.04-12
> > > > > > ii grub-pc-bin 2.04-12
> > > > > > ii grub2-common 2.04-12
> > > > > >
> > > > > > I found in the below link to do in grub-shell:
> > > > > >
> > > > > > set check_signatures=no
> > > > > >
> > > > > > But this is when grub-efi is installed.
> > > > > >
> > > > > > - Sedat -
> > > > > >
> > > > > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> > > > >
> > > > > Forget about that "Wrong EFI bootloader" - I see this with all other
> > > > > kernels (all boot fine).
> > > > >
> > > > > I tried in QEMU with and without KASLR:
> > > > >
> > > > > [ run_qemu.sh ]
> > > > > KPATH=$(pwd)
> > > > >
> > > > > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > > > > APPEND="$APPEND nokaslr"
> > > > >
> > > > > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > > > > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > > > > [ /run_qemu.sh ]
> > > > >
> > > > > $ ./run_qemu.sh
> > > > > Probing EDD (edd=off to disable)... ok
> > > > > Wrong EFI loader signature.
> > > > > early console in extract_kernel
> > > > > input_data: 0x000000000289940d
> > > > > input_len: 0x000000000069804a
> > > > > output: 0x0000000001000000
> > > > > output_len: 0x0000000001ef2010
> > > > > kernel_total_size: 0x0000000001c2c000
> > > > > needed_size: 0x0000000002000000
> > > > > trampoline_32bit: 0x000000000009d000
> > > > >
> > > > >
> > > > > KASLR disabled: 'nokaslr' on cmdline.
> > > > >
> > > > >
> > > > > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > > > > Booting the kernel.
> > > > >
> > > > > QEMU run stops, too.
> > > > >
> > > >
> > > > I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
> > > >
> > > > --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> > > > +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> > > > 23:55:43.121735427 +0200
> > > > @@ -41,7 +41,7 @@ KEYMAP=n
> > > > # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> > > > #
> > > >
> > > > -COMPRESS=gzip
> > > > +COMPRESS=zstd
> > > >
> > > > #
> > > > # DEVICE: ...
> > > >
> > > > root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
> > > >
> > > > QEMU boot stops at the same stage.
> > > >
> > > > Now, my head is empty...
> > > >
> > > > Any comments?
> > > >
> > >
> > > ( Just as a side note I have Nick's DWARF-v5 support enabled. )
> > >
> > > There is one EFI related warning in my build-log:
> > >
> > > $ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
> > > does not match CC system type x86_64-pc-linux-gnu, try setting a
> > > correct CC environment variable
> > > warning: arch/x86/platform/efi/quirks.c: Function control flow change
> > > detected (hash mismatch) efi_arch_mem_reserve Hash =
> > > 391331300655996873 [-Wbackend-plugin]
> > > warning: arch/x86/platform/efi/efi.c: Function control flow change
> > > detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
> > > [-Wbackend-plugin]
> > > arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
> > > 'simd_alg' [-Wunused-variable]
> > > warning: lib/crypto/sha256.c: Function control flow change detected
> > > (hash mismatch) sha256_update Hash = 744640996947387358
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) memcmp Hash = 742261418966908927
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) bcmp Hash = 742261418966908927
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) strcmp Hash = 536873291001348520
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) strnlen Hash = 146835646621254984
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) simple_strtoull Hash =
> > > 252792765950587360 [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) strstr Hash = 391331303349076211
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) strchr Hash = 1063705159280644635
> > > [-Wbackend-plugin]
> > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > change detected (hash mismatch) kstrtoull Hash = 758414239132790022
> > > [-Wbackend-plugin]
> > > drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
> > > falls through to next function apply_tx_lanes()
> > >
> > > Cannot say if this information is helpful.
> > >
> >
> > My LLVM/Clang v12 is from <apt.llvm.org>:
> >
> > clang-12 version 1:12~++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> >
> > My kernel-config is attached.
> >
>
> I dropped "LLVM_IAS=1" from my make line and did for my next build:
>
> $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo .config
> BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-10-amd64-clang12-pgo"
> DEBUG_INFO_DWARF2 n -> y
> DEBUG_INFO_DWARF5 y -> n
> PGO_CLANG y -> n
>
> Means dropped DWARF5 support.
>
Hi Sedat,

Using PGO just improves optimizations. So unless there's miscompile,
or some other nefarious thing, it shouldn't affect how the boot loader
runs.

As a sanity check, does the same Linux source and compiler version
generate a bootable kernel when PGO isn't used?

-bw
Sedat Dilek Jan. 17, 2021, 11:33 p.m. UTC | #12
On Sun, Jan 17, 2021 at 9:35 PM Bill Wendling <morbo@google.com> wrote:
>
> On Sun, Jan 17, 2021 at 9:42 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 1:05 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 12:58 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 12:42 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Sun, Jan 17, 2021 at 12:23 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > On Sun, Jan 17, 2021 at 11:53 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > >
> > > > > > > On Sun, Jan 17, 2021 at 11:44 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > >
> > > > > > > > On Sat, Jan 16, 2021 at 9:23 PM Bill Wendling <morbo@google.com> wrote:
> > > > > > > > >
> > > > > > > > > On Sat, Jan 16, 2021 at 9:39 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > > > > On Sat, Jan 16, 2021 at 10:44 AM 'Bill Wendling' via Clang Built Linux
> > > > > > > > > > <clang-built-linux@googlegroups.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > From: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > > > >
> > > > > > > > > > > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > > > > > > > > > > profile, the kernel is instrumented with PGO counters, a representative
> > > > > > > > > > > workload is run, and the raw profile data is collected from
> > > > > > > > > > > /sys/kernel/debug/pgo/profraw.
> > > > > > > > > > >
> > > > > > > > > > > The raw profile data must be processed by clang's "llvm-profdata" tool
> > > > > > > > > > > before it can be used during recompilation:
> > > > > > > > > > >
> > > > > > > > > > >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> > > > > > > > > > >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > > >
> > > > > > > > > > > Multiple raw profiles may be merged during this step.
> > > > > > > > > > >
> > > > > > > > > > > The data can now be used by the compiler:
> > > > > > > > > > >
> > > > > > > > > > >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > > > >
> > > > > > > > > > > This initial submission is restricted to x86, as that's the platform we
> > > > > > > > > > > know works. This restriction can be lifted once other platforms have
> > > > > > > > > > > been verified to work with PGO.
> > > > > > > > > > >
> > > > > > > > > > > Note that this method of profiling the kernel is clang-native, unlike
> > > > > > > > > > > the clang support in kernel/gcov.
> > > > > > > > > > >
> > > > > > > > > > > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > > > > > > > > > > Co-developed-by: Bill Wendling <morbo@google.com>
> > > > > > > > > > > Signed-off-by: Bill Wendling <morbo@google.com>
> > > > > > > > > > > ---
> > > > > > > > > > > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> > > > > > > > > > >       testing.
> > > > > > > > > > >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> > > > > > > > > > >       Song's comments.
> > > > > > > > > > > v3: - Added change log section based on Sedat Dilek's comments.
> > > > > > > > > > > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> > > > > > > > > > >       own popcount implementation, based on Nick Desaulniers's comment.
> > > > > > > > > > > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > > > > > > > > > > ---
> > > > > > > > > > >  Documentation/dev-tools/index.rst     |   1 +
> > > > > > > > > > >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> > > > > > > > > > >  MAINTAINERS                           |   9 +
> > > > > > > > > > >  Makefile                              |   3 +
> > > > > > > > > > >  arch/Kconfig                          |   1 +
> > > > > > > > > > >  arch/x86/Kconfig                      |   1 +
> > > > > > > > > > >  arch/x86/boot/Makefile                |   1 +
> > > > > > > > > > >  arch/x86/boot/compressed/Makefile     |   1 +
> > > > > > > > > > >  arch/x86/crypto/Makefile              |   2 +
> > > > > > > > > > >  arch/x86/entry/vdso/Makefile          |   1 +
> > > > > > > > > > >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> > > > > > > > > > >  arch/x86/platform/efi/Makefile        |   1 +
> > > > > > > > > > >  arch/x86/purgatory/Makefile           |   1 +
> > > > > > > > > > >  arch/x86/realmode/rm/Makefile         |   1 +
> > > > > > > > > > >  arch/x86/um/vdso/Makefile             |   1 +
> > > > > > > > > > >  drivers/firmware/efi/libstub/Makefile |   1 +
> > > > > > > > > > >  include/asm-generic/vmlinux.lds.h     |  44 +++
> > > > > > > > > > >  kernel/Makefile                       |   1 +
> > > > > > > > > > >  kernel/pgo/Kconfig                    |  35 +++
> > > > > > > > > > >  kernel/pgo/Makefile                   |   5 +
> > > > > > > > > > >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> > > > > > > > > > >  kernel/pgo/instrument.c               | 185 +++++++++++++
> > > > > > > > > > >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> > > > > > > > > > >  scripts/Makefile.lib                  |  10 +
> > > > > > > > > > >  24 files changed, 1022 insertions(+)
> > > > > > > > > > >  create mode 100644 Documentation/dev-tools/pgo.rst
> > > > > > > > > > >  create mode 100644 kernel/pgo/Kconfig
> > > > > > > > > > >  create mode 100644 kernel/pgo/Makefile
> > > > > > > > > > >  create mode 100644 kernel/pgo/fs.c
> > > > > > > > > > >  create mode 100644 kernel/pgo/instrument.c
> > > > > > > > > > >  create mode 100644 kernel/pgo/pgo.h
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > > > > > > > > > > index f7809c7b1ba9e..8d6418e858062 100644
> > > > > > > > > > > --- a/Documentation/dev-tools/index.rst
> > > > > > > > > > > +++ b/Documentation/dev-tools/index.rst
> > > > > > > > > > > @@ -26,6 +26,7 @@ whole; patches welcome!
> > > > > > > > > > >     kgdb
> > > > > > > > > > >     kselftest
> > > > > > > > > > >     kunit/index
> > > > > > > > > > > +   pgo
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >  .. only::  subproject and html
> > > > > > > > > > > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > > > > > > > > > > new file mode 100644
> > > > > > > > > > > index 0000000000000..b7f11d8405b73
> > > > > > > > > > > --- /dev/null
> > > > > > > > > > > +++ b/Documentation/dev-tools/pgo.rst
> > > > > > > > > > > @@ -0,0 +1,127 @@
> > > > > > > > > > > +.. SPDX-License-Identifier: GPL-2.0
> > > > > > > > > > > +
> > > > > > > > > > > +===============================
> > > > > > > > > > > +Using PGO with the Linux kernel
> > > > > > > > > > > +===============================
> > > > > > > > > > > +
> > > > > > > > > > > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > > > > > > > > > > +when building with Clang. The profiling data is exported via the ``pgo``
> > > > > > > > > > > +debugfs directory.
> > > > > > > > > > > +
> > > > > > > > > > > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > > > > > > > > > > +
> > > > > > > > > > > +
> > > > > > > > > > > +Preparation
> > > > > > > > > > > +===========
> > > > > > > > > > > +
> > > > > > > > > > > +Configure the kernel with:
> > > > > > > > > > > +
> > > > > > > > > > > +.. code-block:: make
> > > > > > > > > > > +
> > > > > > > > > > > +   CONFIG_DEBUG_FS=y
> > > > > > > > > > > +   CONFIG_PGO_CLANG=y
> > > > > > > > > > > +
> > > > > > > > > > > +Note that kernels compiled with profiling flags will be significantly larger
> > > > > > > > > > > +and run slower.
> > > > > > > > > > > +
> > > > > > > > > > > +Profiling data will only become accessible once debugfs has been mounted:
> > > > > > > > > > > +
> > > > > > > > > > > +.. code-block:: sh
> > > > > > > > > > > +
> > > > > > > > > > > +   mount -t debugfs none /sys/kernel/debug
> > > > > > > > > > > +
> > > > > > > > > > > +
> > > > > > > > > > > +Customization
> > > > > > > > > > > +=============
> > > > > > > > > > > +
> > > > > > > > > > > +You can enable or disable profiling for individual file and directories by
> > > > > > > > > > > +adding a line similar to the following to the respective kernel Makefile:
> > > > > > > > > > > +
> > > > > > > > > > > +- For a single file (e.g. main.o)
> > > > > > > > > > > +
> > > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > > +
> > > > > > > > > > > +     PGO_PROFILE_main.o := y
> > > > > > > > > > > +
> > > > > > > > > > > +- For all files in one directory
> > > > > > > > > > > +
> > > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > > +
> > > > > > > > > > > +     PGO_PROFILE := y
> > > > > > > > > > > +
> > > > > > > > > > > +To exclude files from being profiled use
> > > > > > > > > > > +
> > > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > > +
> > > > > > > > > > > +     PGO_PROFILE_main.o := n
> > > > > > > > > > > +
> > > > > > > > > > > +and
> > > > > > > > > > > +
> > > > > > > > > > > +  .. code-block:: make
> > > > > > > > > > > +
> > > > > > > > > > > +     PGO_PROFILE := n
> > > > > > > > > > > +
> > > > > > > > > > > +Only files which are linked to the main kernel image or are compiled as kernel
> > > > > > > > > > > +modules are supported by this mechanism.
> > > > > > > > > > > +
> > > > > > > > > > > +
> > > > > > > > > > > +Files
> > > > > > > > > > > +=====
> > > > > > > > > > > +
> > > > > > > > > > > +The PGO kernel support creates the following files in debugfs:
> > > > > > > > > > > +
> > > > > > > > > > > +``/sys/kernel/debug/pgo``
> > > > > > > > > > > +       Parent directory for all PGO-related files.
> > > > > > > > > > > +
> > > > > > > > > > > +``/sys/kernel/debug/pgo/reset``
> > > > > > > > > > > +       Global reset file: resets all coverage data to zero when written to.
> > > > > > > > > > > +
> > > > > > > > > > > +``/sys/kernel/debug/profraw``
> > > > > > > > > > > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > > > > > > > > > > +
> > > > > > > > > > > +
> > > > > > > > > > > +Workflow
> > > > > > > > > > > +========
> > > > > > > > > > > +
> > > > > > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > > > > > +Clang version.
> > > > > > > > > > > +
> > > > > > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > > > > > +
> > > > > > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > > > > > +using the result to optimize the kernel:
> > > > > > > > > > > +
> > > > > > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > > > > > +
> > > > > > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > > > > > +
> > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > +
> > > > > > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > > > > > +
> > > > > > > > > >
> > > > > > > > > > I do not get this...
> > > > > > > > > >
> > > > > > > > > > # mount | grep debugfs
> > > > > > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > > > > > >
> > > > > > > > > > After the load-test...?
> > > > > > > > > >
> > > > > > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > > > > > >
> > > > > > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > > > > > to reset it before collecting the profiling data from your load tests
> > > > > > > > > though.
> > > > > > > > >
> > > > > > > > > > > +3) Run the load tests.
> > > > > > > > > > > +
> > > > > > > > > > > +4) Collect the raw profile data
> > > > > > > > > > > +
> > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > +
> > > > > > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > > > +
> > > > > > > > > >
> > > > > > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > > > > > echo-1 line.
> > > > > > > > > >
> > > > > > > > > > # ll /sys/kernel/debug/pgo
> > > > > > > > > > insgesamt 0
> > > > > > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > > > > > >
> > > > > > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > >
> > > > > > > > > > # ll /tmp/vmlinux.profraw
> > > > > > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > > > > > >
> > > > > > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > > > > > >
> > > > > > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > > > > > counters from a memory location in the kernel into a format that
> > > > > > > > > LLVM's tools can understand.
> > > > > > > > >
> > > > > > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > > > > > +
> > > > > > > > > > > +6) Process the raw profile data
> > > > > > > > > > > +
> > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > +
> > > > > > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > > > +
> > > > > > > > > >
> > > > > > > > > > Is that executed in /path/to/linux/git?
> > > > > > > > > >
> > > > > > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > > > > > >
> > > > > > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > > > > > +
> > > > > > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > > > > > +
> > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > +
> > > > > > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > > >
> > > > > > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > > > > > >
> > > > > > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > > > > > in size. The size is proportional to the number of counters
> > > > > > > > > instrumented in the kernel.
> > > > > > > > >
> > > > > > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > > > > > let me say 10mins?
> > > > > > > > > >
> > > > > > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Thanks Bill for all the information.
> > > > > > > >
> > > > > > > > And sorry if I am so pedantic.
> > > > > > > >
> > > > > > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > > > > > >
> > > > > > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > > > > > have as a default) my system hangs on reboot.
> > > > > > > >
> > > > > > > > [ diffconfig ]
> > > > > > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > > > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > > > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > > > > > PGO_CLANG y -> n
> > > > > > > >
> > > > > > > > [ my make line ]
> > > > > > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > > > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > > > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > > > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > > > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > > > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > > > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > > > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > > > > > >
> > > > > > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > > > > > >
> > > > > > > > When I boot with the rebuild Linux-kernel I see:
> > > > > > > >
> > > > > > > > Wrong EFI loader signature
> > > > > > > > ...
> > > > > > > > Decompressing
> > > > > > > > Parsing EFI
> > > > > > > > Performing Relocations done.
> > > > > > > > Booting the Kernel.
> > > > > > > >
> > > > > > > > *** SYSTEM HANGS ***
> > > > > > > > ( I waited for approx 1 min )
> > > > > > > >
> > > > > > > > I tried to turn UEFI support ON and OFF.
> > > > > > > > No success.
> > > > > > > >
> > > > > > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > > > > > >
> > > > > > > > Thanks.
> > > > > > > >
> > > > > > >
> > > > > > > My bootloader is GRUB.
> > > > > > >
> > > > > > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > > > > > Just simple "Use UEFI BIOS" enabled|disabled.
> > > > > > >
> > > > > > > Installed Debian packages:
> > > > > > >
> > > > > > > ii grub-common 2.04-12
> > > > > > > ii grub-pc 2.04-12
> > > > > > > ii grub-pc-bin 2.04-12
> > > > > > > ii grub2-common 2.04-12
> > > > > > >
> > > > > > > I found in the below link to do in grub-shell:
> > > > > > >
> > > > > > > set check_signatures=no
> > > > > > >
> > > > > > > But this is when grub-efi is installed.
> > > > > > >
> > > > > > > - Sedat -
> > > > > > >
> > > > > > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> > > > > >
> > > > > > Forget about that "Wrong EFI bootloader" - I see this with all other
> > > > > > kernels (all boot fine).
> > > > > >
> > > > > > I tried in QEMU with and without KASLR:
> > > > > >
> > > > > > [ run_qemu.sh ]
> > > > > > KPATH=$(pwd)
> > > > > >
> > > > > > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > > > > > APPEND="$APPEND nokaslr"
> > > > > >
> > > > > > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > > > > > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > > > > > [ /run_qemu.sh ]
> > > > > >
> > > > > > $ ./run_qemu.sh
> > > > > > Probing EDD (edd=off to disable)... ok
> > > > > > Wrong EFI loader signature.
> > > > > > early console in extract_kernel
> > > > > > input_data: 0x000000000289940d
> > > > > > input_len: 0x000000000069804a
> > > > > > output: 0x0000000001000000
> > > > > > output_len: 0x0000000001ef2010
> > > > > > kernel_total_size: 0x0000000001c2c000
> > > > > > needed_size: 0x0000000002000000
> > > > > > trampoline_32bit: 0x000000000009d000
> > > > > >
> > > > > >
> > > > > > KASLR disabled: 'nokaslr' on cmdline.
> > > > > >
> > > > > >
> > > > > > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > > > > > Booting the kernel.
> > > > > >
> > > > > > QEMU run stops, too.
> > > > > >
> > > > >
> > > > > I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
> > > > >
> > > > > --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> > > > > +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> > > > > 23:55:43.121735427 +0200
> > > > > @@ -41,7 +41,7 @@ KEYMAP=n
> > > > > # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> > > > > #
> > > > >
> > > > > -COMPRESS=gzip
> > > > > +COMPRESS=zstd
> > > > >
> > > > > #
> > > > > # DEVICE: ...
> > > > >
> > > > > root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
> > > > >
> > > > > QEMU boot stops at the same stage.
> > > > >
> > > > > Now, my head is empty...
> > > > >
> > > > > Any comments?
> > > > >
> > > >
> > > > ( Just as a side note I have Nick's DWARF-v5 support enabled. )
> > > >
> > > > There is one EFI related warning in my build-log:
> > > >
> > > > $ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
> > > > does not match CC system type x86_64-pc-linux-gnu, try setting a
> > > > correct CC environment variable
> > > > warning: arch/x86/platform/efi/quirks.c: Function control flow change
> > > > detected (hash mismatch) efi_arch_mem_reserve Hash =
> > > > 391331300655996873 [-Wbackend-plugin]
> > > > warning: arch/x86/platform/efi/efi.c: Function control flow change
> > > > detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
> > > > [-Wbackend-plugin]
> > > > arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
> > > > 'simd_alg' [-Wunused-variable]
> > > > warning: lib/crypto/sha256.c: Function control flow change detected
> > > > (hash mismatch) sha256_update Hash = 744640996947387358
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) memcmp Hash = 742261418966908927
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) bcmp Hash = 742261418966908927
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) strcmp Hash = 536873291001348520
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) strnlen Hash = 146835646621254984
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) simple_strtoull Hash =
> > > > 252792765950587360 [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) strstr Hash = 391331303349076211
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) strchr Hash = 1063705159280644635
> > > > [-Wbackend-plugin]
> > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > change detected (hash mismatch) kstrtoull Hash = 758414239132790022
> > > > [-Wbackend-plugin]
> > > > drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
> > > > falls through to next function apply_tx_lanes()
> > > >
> > > > Cannot say if this information is helpful.
> > > >
> > >
> > > My LLVM/Clang v12 is from <apt.llvm.org>:
> > >
> > > clang-12 version 1:12~++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > >
> > > My kernel-config is attached.
> > >
> >
> > I dropped "LLVM_IAS=1" from my make line and did for my next build:
> >
> > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo .config
> > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-10-amd64-clang12-pgo"
> > DEBUG_INFO_DWARF2 n -> y
> > DEBUG_INFO_DWARF5 y -> n
> > PGO_CLANG y -> n
> >
> > Means dropped DWARF5 support.
> >
> Hi Sedat,
>
> Using PGO just improves optimizations. So unless there's miscompile,
> or some other nefarious thing, it shouldn't affect how the boot loader
> runs.
>
> As a sanity check, does the same Linux source and compiler version
> generate a bootable kernel when PGO isn't used?
>

Yes, I can boot with the same code base without PGO.

With the attached kernel-config.

I remember there is a fix in CBL issue tracker for...

( https://github.com/ClangBuiltLinux/linux/issues/1250 )

Loading, please wait...
Starting version 247.2-4
[    2.157223] floppy: module verification failed: signature and/or
required key missing - tainting kernel
[    2.179326] i2c_piix4: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.183558] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.187991] floppy: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.195047] psmouse: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.210404] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.231055] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)

Full QEMU log...

$ ./run_qemu.sh
Probing EDD (edd=off to disable)... ok
Wrong EFI loader signature.
early console in extract_kernel
input_data: 0x000000000289c40d
input_len: 0x0000000000693f62
output: 0x0000000001000000
output_len: 0x0000000001ef0224
kernel_total_size: 0x0000000001c2c000
needed_size: 0x0000000002000000
trampoline_32bit: 0x000000000009d000
Physical KASLR using RDTSC...
Virtual KASLR using RDTSC...

Decompressing Linux... Parsing ELF... Performing relocations... done.
Booting the kernel.
[    0.000000] Linux version 5.11.0-rc3-10-amd64-clang12-pgo
(sedat.dilek@gmail.com@iniza) (Debian clang version
12.0.0-++20210115111113+45ef053bd709-1~exp1~2021011510
1809.3724, LLD 12.0.0) #10~bullseye+dileks1 SMP 2021-01-17
[    0.000000] Command line: root=/dev/ram0 console=ttyS0
hung_task_panic=1 earlyprintk=ttyS0,115200
[    0.000000] x86/fpu: x87 FPU will use FXSAVE
[    0.000000] BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000001ffdffff] usable
[    0.000000] BIOS-e820: [mem 0x000000001ffe0000-0x000000001fffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] printk: bootconsole [earlyser0] enabled
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] SMBIOS 2.8 present.
[    0.000000] DMI: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.14.0-2 04/01/2014
[    0.000000] Hypervisor detected: KVM
[    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
[    0.000000] kvm-clock: cpu 0, msr c877001, primary cpu clock
[    0.000000] kvm-clock: using sched offset of 510123624 cycles
[    0.003240] clocksource: kvm-clock: mask: 0xffffffffffffffff
max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
[    0.009652] tsc: Detected 1596.372 MHz processor
[    0.013107] last_pfn = 0x1ffe0 max_arch_pfn = 0x400000000
[    0.015537] x86/PAT: PAT not supported by the CPU.
[    0.017605] x86/PAT: Configuration [0-7]: WB  WT  UC- UC  WB  WT  UC- UC
Memory KASLR using RDTSC...
[    0.038444] found SMP MP-table at [mem 0x000f5ce0-0x000f5cef]
[    0.042330] RAMDISK: [mem 0x1dfdb000-0x1ffdffff]
[    0.044738] ACPI: Early table checksum verification disabled
[    0.047289] ACPI: RSDP 0x00000000000F5B20 000014 (v00 BOCHS )
[    0.049887] ACPI: RSDT 0x000000001FFE1550 000034 (v01 BOCHS
BXPCRSDT 00000001 BXPC 00000001)
[    0.054578] ACPI: FACP 0x000000001FFE1404 000074 (v01 BOCHS
BXPCFACP 00000001 BXPC 00000001)
[    0.058412] ACPI: DSDT 0x000000001FFE0040 0013C4 (v01 BOCHS
BXPCDSDT 00000001 BXPC 00000001)
[    0.062056] ACPI: FACS 0x000000001FFE0000 000040
[    0.064325] ACPI: APIC 0x000000001FFE1478 000078 (v01 BOCHS
BXPCAPIC 00000001 BXPC 00000001)
[    0.068546] ACPI: HPET 0x000000001FFE14F0 000038 (v01 BOCHS
BXPCHPET 00000001 BXPC 00000001)
[    0.073026] ACPI: WAET 0x000000001FFE1528 000028 (v01 BOCHS
BXPCWAET 00000001 BXPC 00000001)
[    0.078063] No NUMA configuration found
[    0.080007] Faking a node at [mem 0x0000000000000000-0x000000001ffdffff]
[    0.083430] NODE_DATA(0) allocated [mem 0x1dfb1000-0x1dfdafff]
[    0.086934] Zone ranges:
[    0.087919]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[    0.089927]   DMA32    [mem 0x0000000001000000-0x000000001ffdffff]
[    0.092270]   Normal   empty
[    0.093824]   Device   empty
[    0.095069] Movable zone start for each node
[    0.096880] Early memory node ranges
[    0.098410]   node   0: [mem 0x0000000000001000-0x000000000009efff]
[    0.101939]   node   0: [mem 0x0000000000100000-0x000000001ffdffff]
[    0.106130] Zeroed struct page in unavailable ranges: 130 pages
[    0.106139] Initmem setup node 0 [mem 0x0000000000001000-0x000000001ffdffff]
[    0.115094] ACPI: PM-Timer IO Port: 0x608
[    0.117173] ACPI: LAPIC_NMI (acpi_id[0xff] dfl dfl lint[0x1])
[    0.121073] IOAPIC[0]: apic_id 0, version 17, address 0xfec00000, GSI 0-23
[    0.123537] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.126254] ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
[    0.129062] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.131888] ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
[    0.135065] ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
[    0.137286] Using ACPI (MADT) for SMP configuration information
[    0.139743] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.141956] smpboot: Allowing 1 CPUs, 0 hotplug CPUs
[    0.143678] PM: hibernation: Registered nosave memory: [mem
0x00000000-0x00000fff]
[    0.146249] PM: hibernation: Registered nosave memory: [mem
0x0009f000-0x0009ffff]
[    0.148784] PM: hibernation: Registered nosave memory: [mem
0x000a0000-0x000effff]
[    0.152756] PM: hibernation: Registered nosave memory: [mem
0x000f0000-0x000fffff]
[    0.155969] [mem 0x20000000-0xfeffbfff] available for PCI devices
[    0.158542] Booting paravirtualized kernel on KVM
[    0.160520] clocksource: refined-jiffies: mask: 0xffffffff
max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
[    0.171049] setup_percpu: NR_CPUS:8192 nr_cpumask_bits:1
nr_cpu_ids:1 nr_node_ids:1
[    0.175162] percpu: Embedded 54 pages/cpu s183512 r8192 d29480 u2097152
[    0.178044] kvm-guest: stealtime: cpu 0, msr 1d418480
[    0.180197] kvm-guest: PV spinlocks disabled, no host support
[    0.182655] Built 1 zonelists, mobility grouping on.  Total pages: 128872
[    0.188717] Policy zone: DMA32
[    0.190055] Kernel command line: root=/dev/ram0 console=ttyS0
hung_task_panic=1 earlyprintk=ttyS0,115200
[    0.194307] Dentry cache hash table entries: 65536 (order: 7,
524288 bytes, linear)
[    0.197691] Inode-cache hash table entries: 32768 (order: 6, 262144
bytes, linear)
[    0.201953] mem auto-init: stack:off, heap alloc:on, heap free:off
[    0.206787] Memory: 232680K/523768K available (12295K kernel code,
2462K rwdata, 4008K rodata, 2444K init, 1888K bss, 71012K reserved, 0K
cma-reserved)
[    0.212719] random: get_random_u64 called from
kmem_cache_open+0x27/0x500 with crng_init=0
[    0.212892] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[    0.220858] Kernel/User page tables isolation: enabled
[    0.223136] ftrace: allocating 36189 entries in 142 pages
[    0.249721] ftrace: allocated 142 pages with 4 groups
[    0.252993] rcu: Hierarchical RCU implementation.
[    0.255411] rcu:     RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=1.
[    0.258890]  Rude variant of Tasks RCU enabled.
[    0.260761]  Tracing variant of Tasks RCU enabled.
[    0.262625] rcu: RCU calculated value of scheduler-enlistment delay
is 25 jiffies.
[    0.265212] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=1
[    0.271882] NR_IRQS: 524544, nr_irqs: 256, preallocated irqs: 16
[    0.295378] Console: colour VGA+ 80x25
[    0.297439] printk: console [ttyS0] enabled
[    0.297439] printk: console [ttyS0] enabled
[    0.302560] printk: bootconsole [earlyser0] disabled
[    0.302560] printk: bootconsole [earlyser0] disabled
[    0.307728] ACPI: Core revision 20201113
[    0.310172] clocksource: hpet: mask: 0xffffffff max_cycles:
0xffffffff, max_idle_ns: 19112604467 ns
[    0.315115] APIC: Switch to symmetric I/O mode setup
[    0.318899] x2apic enabled
[    0.321088] Switched APIC routing to physical x2apic.
[    0.326381] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[    0.329537] clocksource: tsc-early: mask: 0xffffffffffffffff
max_cycles: 0x1702c1d9d3d, max_idle_ns: 440795278546 ns
[    0.335417] Calibrating delay loop (skipped) preset value.. 3192.74
BogoMIPS (lpj=6385488)
[    0.339418] pid_max: default: 32768 minimum: 301
[    0.341620] LSM: Security Framework initializing
[    0.343446] Yama: becoming mindful.
[    0.345314] AppArmor: AppArmor initialized
[    0.347421] TOMOYO Linux initialized
[    0.349270] Mount-cache hash table entries: 1024 (order: 1, 8192
bytes, linear)
[    0.351417] Mountpoint-cache hash table entries: 1024 (order: 1,
8192 bytes, linear)
Poking KASLR using RDTSC...
[    0.361119] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0
[    0.363416] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0
[    0.367419] Spectre V1 : Mitigation: usercopy/swapgs barriers and
__user pointer sanitization
[    0.370260] Spectre V2 : Mitigation: Full generic retpoline
[    0.371412] Spectre V2 : Spectre v2 / SpectreRSB mitigation:
Filling RSB on context switch
[    0.374257] Speculative Store Bypass: Vulnerable
[    0.375416] MDS: Vulnerable: Clear CPU buffers attempted, no microcode
[    0.389948] Freeing SMP alternatives memory: 36K
[    0.505617] APIC calibration not consistent with PM-Timer: 101ms
instead of 100ms
[    0.507410] APIC delta adjusted to PM-Timer: 6252138 (6321934)
[    0.507513] smpboot: CPU0: Intel QEMU Virtual CPU version 2.5+
(family: 0x6, model: 0x6, stepping: 0x3)
[    0.512111] Performance Events: PMU not available due to
virtualization, using software events only.
[    0.515510] rcu: Hierarchical SRCU implementation.
[    0.517439] NMI watchdog: Perf NMI watchdog permanently disabled
[    0.519477] smp: Bringing up secondary CPUs ...
[    0.523416] smp: Brought up 1 node, 1 CPU
[    0.525134] smpboot: Max logical packages: 1
[    0.526969] smpboot: Total of 1 processors activated (3192.74 BogoMIPS)
[    0.532118] node 0 deferred pages initialised in 4ms
[    0.534052] devtmpfs: initialized
[    0.535262] x86/mm: Memory block size: 128MB
[    0.535711] clocksource: jiffies: mask: 0xffffffff max_cycles:
0xffffffff, max_idle_ns: 7645041785100000 ns
[    0.539428] futex hash table entries: 256 (order: 2, 16384 bytes, linear)
[    0.541875] pinctrl core: initialized pinctrl subsystem
[    0.543936] NET: Registered protocol family 16
[    0.547553] audit: initializing netlink subsys (disabled)
[    0.551634] thermal_sys: Registered thermal governor 'fair_share'
[    0.551637] thermal_sys: Registered thermal governor 'bang_bang'
[    0.554723] thermal_sys: Registered thermal governor 'step_wise'
[    0.555425] audit: type=2000 audit(1610926004.833:1):
state=initialized audit_enabled=0 res=1
[    0.563420] thermal_sys: Registered thermal governor 'user_space'
[    0.563434] cpuidle: using governor ladder
[    0.569524] cpuidle: using governor menu
[    0.571485] ACPI: bus type PCI registered
[    0.573517] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
[    0.576324] PCI: Using configuration type 1 for base access
[    0.580588] HugeTLB registered 2.00 MiB page size, pre-allocated 0 pages
[    0.772658] ACPI: Added _OSI(Module Device)
[    0.774521] ACPI: Added _OSI(Processor Device)
[    0.775417] ACPI: Added _OSI(3.0 _SCP Extensions)
[    0.778176] ACPI: Added _OSI(Processor Aggregator Device)
[    0.779432] ACPI: Added _OSI(Linux-Dell-Video)
[    0.783458] ACPI: Added _OSI(Linux-Lenovo-NV-HDMI-Audio)
[    0.785480] ACPI: Added _OSI(Linux-HPI-Hybrid-Graphics)
[    0.788133] ACPI: 1 ACPI AML tables successfully acquired and loaded
[    0.792295] ACPI: Interpreter enabled
[    0.794716] ACPI: (supports S0 S3 S4 S5)
[    0.795415] ACPI: Using IOAPIC for interrupt routing
[    0.797540] PCI: Using host bridge windows from ACPI; if necessary,
use "pci=nocrs" and report a bug
[    0.799590] ACPI: Enabled 2 GPEs in block 00 to 0F
[    0.807844] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
[    0.811186] acpi PNP0A03:00: _OSC: OS supports [ASPM ClockPM
Segments MSI HPX-Type3]
[    0.811444] acpi PNP0A03:00: fail to add MMCONFIG information,
can't access extended PCI configuration space under this bridge.
[    0.815895] acpiphp: Slot [3] registered
[    0.819473] acpiphp: Slot [4] registered
[    0.821210] acpiphp: Slot [5] registered
[    0.823453] acpiphp: Slot [6] registered
[    0.825153] acpiphp: Slot [7] registered
[    0.827461] acpiphp: Slot [8] registered
[    0.829166] acpiphp: Slot [9] registered
[    0.831537] acpiphp: Slot [10] registered
[    0.833276] acpiphp: Slot [11] registered
[    0.835447] acpiphp: Slot [12] registered
[    0.837183] acpiphp: Slot [13] registered
[    0.839428] acpiphp: Slot [14] registered
[    0.841167] acpiphp: Slot [15] registered
[    0.843042] acpiphp: Slot [16] registered
[    0.843455] acpiphp: Slot [17] registered
[    0.845205] acpiphp: Slot [18] registered
[    0.847452] acpiphp: Slot [19] registered
[    0.849209] acpiphp: Slot [20] registered
[    0.851448] acpiphp: Slot [21] registered
[    0.853215] acpiphp: Slot [22] registered
[    0.855447] acpiphp: Slot [23] registered
[    0.857179] acpiphp: Slot [24] registered
[    0.859478] acpiphp: Slot [25] registered
[    0.861807] acpiphp: Slot [26] registered
[    0.863150] acpiphp: Slot [27] registered
[    0.863458] acpiphp: Slot [28] registered
[    0.865444] acpiphp: Slot [29] registered
[    0.867451] acpiphp: Slot [30] registered
[    0.868826] acpiphp: Slot [31] registered
[    0.870296] PCI host bridge to bus 0000:00
[    0.871415] pci_bus 0000:00: root bus resource [io  0x0000-0x0cf7 window]
[    0.875414] pci_bus 0000:00: root bus resource [io  0x0d00-0xffff window]
[    0.879418] pci_bus 0000:00: root bus resource [mem
0x000a0000-0x000bffff window]
[    0.883416] pci_bus 0000:00: root bus resource [mem
0x20000000-0xfebfffff window]
[    0.887416] pci_bus 0000:00: root bus resource [mem
0x100000000-0x17fffffff window]
[    0.891277] pci_bus 0000:00: root bus resource [bus 00-ff]
[    0.891510] pci 0000:00:00.0: [8086:1237] type 00 class 0x060000
[    0.896375] pci 0000:00:01.0: [8086:7000] type 00 class 0x060100
[    0.900672] pci 0000:00:01.1: [8086:7010] type 00 class 0x010180
[    0.908157] pci 0000:00:01.1: reg 0x20: [io  0xc000-0xc00f]
[    0.912723] pci 0000:00:01.1: legacy IDE quirk: reg 0x10: [io  0x01f0-0x01f7]
[    0.915417] pci 0000:00:01.1: legacy IDE quirk: reg 0x14: [io  0x03f6]
[    0.919413] pci 0000:00:01.1: legacy IDE quirk: reg 0x18: [io  0x0170-0x0177]
[    0.923413] pci 0000:00:01.1: legacy IDE quirk: reg 0x1c: [io  0x0376]
[    0.926608] pci 0000:00:01.3: [8086:7113] type 00 class 0x068000
[    0.928431] pci 0000:00:01.3: quirk: [io  0x0600-0x063f] claimed by
PIIX4 ACPI
[    0.931433] pci 0000:00:01.3: quirk: [io  0x0700-0x070f] claimed by PIIX4 SMB
[    0.935898] pci 0000:00:02.0: [1234:1111] type 00 class 0x030000
[    0.941665] pci 0000:00:02.0: reg 0x10: [mem 0xfd000000-0xfdffffff pref]
[    0.949458] pci 0000:00:02.0: reg 0x18: [mem 0xfebf0000-0xfebf0fff]
[    0.958562] pci 0000:00:02.0: reg 0x30: [mem 0xfebe0000-0xfebeffff pref]
[    0.961151] ACPI: PCI Interrupt Link [LNKA] (IRQs 5 *10 11)
[    0.963610] ACPI: PCI Interrupt Link [LNKB] (IRQs 5 *10 11)
[    0.966032] ACPI: PCI Interrupt Link [LNKC] (IRQs 5 10 *11)
[    0.967627] ACPI: PCI Interrupt Link [LNKD] (IRQs 5 10 *11)
[    0.971526] ACPI: PCI Interrupt Link [LNKS] (IRQs *9)
[    0.974667] iommu: Default domain type: Translated
[    0.975568] pci 0000:00:02.0: vgaarb: setting as boot VGA device
[    0.978113] pci 0000:00:02.0: vgaarb: VGA device added:
decodes=io+mem,owns=io+mem,locks=none
[    0.979413] pci 0000:00:02.0: vgaarb: bridge control possible
[    0.983413] vgaarb: loaded
[    0.984827] EDAC MC: Ver: 3.0.0
[    0.988222] NetLabel: Initializing
[    0.991415] NetLabel:  domain hash size = 128
[    0.992873] NetLabel:  protocols = UNLABELED CIPSOv4 CALIPSO
[    0.994825] NetLabel:  unlabeled traffic allowed by default
[    0.995430] PCI: Using ACPI for IRQ routing
[    0.999490] hpet: 3 channels of 0 reserved for per-cpu timers
[    1.001394] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
[    1.002975] hpet0: 3 comparators, 64-bit 100.000000 MHz counter
[    1.009634] clocksource: Switched to clocksource kvm-clock
[    1.019197] VFS: Disk quotas dquot_6.6.0
[    1.021644] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[    1.029347] AppArmor: AppArmor Filesystem Enabled
[    1.031431] pnp: PnP ACPI init
[    1.033294] pnp: PnP ACPI: found 6 devices
[    1.041838] clocksource: acpi_pm: mask: 0xffffff max_cycles:
0xffffff, max_idle_ns: 2085701024 ns
[    1.045506] NET: Registered protocol family 2
[    1.047325] tcp_listen_portaddr_hash hash table entries: 256
(order: 0, 4096 bytes, linear)
[    1.051250] TCP established hash table entries: 4096 (order: 3,
32768 bytes, linear)
[    1.054797] TCP bind hash table entries: 4096 (order: 4, 65536 bytes, linear)
[    1.057867] TCP: Hash tables configured (established 4096 bind 4096)
[    1.060657] UDP hash table entries: 256 (order: 1, 8192 bytes, linear)
[    1.063438] UDP-Lite hash table entries: 256 (order: 1, 8192 bytes, linear)
[    1.066616] NET: Registered protocol family 1
[    1.068525] NET: Registered protocol family 44
[    1.070988] pci_bus 0000:00: resource 4 [io  0x0000-0x0cf7 window]
[    1.073088] pci_bus 0000:00: resource 5 [io  0x0d00-0xffff window]
[    1.075350] pci_bus 0000:00: resource 6 [mem 0x000a0000-0x000bffff window]
[    1.078851] pci_bus 0000:00: resource 7 [mem 0x20000000-0xfebfffff window]
[    1.082396] pci_bus 0000:00: resource 8 [mem 0x100000000-0x17fffffff window]
[    1.086505] pci 0000:00:01.0: PIIX3: Enabling Passive Release
[    1.089003] pci 0000:00:00.0: Limiting direct PCI/PCI transfers
[    1.091193] pci 0000:00:01.0: Activating ISA DMA hang workarounds
[    1.093281] pci 0000:00:02.0: Video device with shadowed ROM at
[mem 0x000c0000-0x000dffff]
[    1.096308] PCI: CLS 0 bytes, default 64
[    1.098784] Trying to unpack rootfs image as initramfs...
[    1.756924] Freeing initrd memory: 32788K
[    1.759044] clocksource: tsc: mask: 0xffffffffffffffff max_cycles:
0x1702c1d9d3d, max_idle_ns: 440795278546 ns
[    1.765351] Initialise system trusted keyrings
[    1.767287] Key type blacklist registered
[    1.769096] workingset: timestamp_bits=36 max_order=17 bucket_order=0
[    1.773218] zbud: loaded
[    1.774596] integrity: Platform Keyring initialized
[    1.776709] Key type asymmetric registered
[    1.779399] Asymmetric key parser 'x509' registered
[    1.781504] Block layer SCSI generic (bsg) driver version 0.4
loaded (major 251)
[    1.784737] io scheduler mq-deadline registered
[    1.786842] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
[    1.790028] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[    1.793393] 00:04: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200)
is a 16550A
[    1.798437] Linux agpgart interface v0.103
[    1.799944] AMD-Vi: AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>
[    1.802535] AMD-Vi: AMD IOMMUv2 functionality not available on this system
[    1.806358] i8042: PNP: PS/2 Controller [PNP0303:KBD,PNP0f13:MOU]
at 0x60,0x64 irq 1,12
[    1.810762] serio: i8042 KBD port at 0x60,0x64 irq 1
[    1.813927] serio: i8042 AUX port at 0x60,0x64 irq 12
[    1.816306] mousedev: PS/2 mouse device common for all mice
[    1.819254] input: AT Translated Set 2 keyboard as
/devices/platform/i8042/serio0/input/input0
[    1.823023] rtc_cmos 00:05: RTC can wake from S4
[    1.826320] rtc_cmos 00:05: registered as rtc0
[    1.829030] rtc_cmos 00:05: setting system clock to
2021-01-17T23:26:45 UTC (1610926005)
[    1.832489] rtc_cmos 00:05: alarms up to one day, y3k, 242 bytes
nvram, hpet irqs
[    1.835661] intel_pstate: CPU model not supported
[    1.837656] ledtrig-cpu: registered to indicate activity on CPUs
[    1.840489] NET: Registered protocol family 10
[    1.857135] Segment Routing with IPv6
[    1.858772] mip6: Mobile IPv6
[    1.860093] NET: Registered protocol family 17
[    1.862844] mpls_gso: MPLS GSO support
[    1.864379] IPI shorthand broadcast: enabled
[    1.865844] sched_clock: Marking stable (1819436328,
44726425)->(1868284483, -4121730)
[    1.869029] registered taskstats version 1
[    1.870771] Loading compiled-in X.509 certificates
[    1.873185] zswap: loaded using pool zstd/zbud
[    1.875399] Key type ._fscrypt registered
[    1.877158] Key type .fscrypt registered
[    1.879447] Key type fscrypt-provisioning registered
[    1.881189] AppArmor: AppArmor sha1 policy hashing enabled
[    1.886920] Freeing unused kernel image (initmem) memory: 2444K
[    1.891517] Write protecting the kernel read-only data: 18432k
[    1.896049] Freeing unused kernel image (text/rodata gap) memory: 2040K
[    1.899196] Freeing unused kernel image (rodata/data gap) memory: 88K
[    1.968324] x86/mm: Checked W+X mappings: passed, no W+X pages found.
[    1.971797] x86/mm: Checking user space page tables
[    2.037848] x86/mm: Checked W+X mappings: passed, no W+X pages found.
[    2.040258] Run /init as init process
Loading, please wait...
Starting version 247.2-4
[    2.157223] floppy: module verification failed: signature and/or
required key missing - tainting kernel
[    2.179326] i2c_piix4: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.183558] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.187991] floppy: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.195047] psmouse: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.210404] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
[    2.231055] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
Begin: Loading essential drivers ... done.
Begin: Running /scripts/init-premount ... done.
Begin: Mounting root file system ... Begin: Running /scripts/local-top ... done.
Begin: Running /scripts/local-premount ... [    2.261574] libcrc32c:
Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
Scanning for Btrfs filesystems
done.
Begin: Waiting for root file system ... Begin: Running
/scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
Begin: Running /scripts/local-block ... done.
qemu-system-x86_64: terminating on signal 2

- Sedat -
Sedat Dilek Jan. 18, 2021, 12:26 a.m. UTC | #13
On Mon, Jan 18, 2021 at 12:33 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:

[ big snip ]

> > > > > > > > > > > > +Workflow
> > > > > > > > > > > > +========
> > > > > > > > > > > > +
> > > > > > > > > > > > +The PGO kernel can be run on the host or test machines. The data though should
> > > > > > > > > > > > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > > > > > > > > > > > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > > > > > > > > > > > +Clang version.
> > > > > > > > > > > > +
> > > > > > > > > > > > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > > > > > > > > > > > +etc. Clang offers tools to perform these tasks.
> > > > > > > > > > > > +
> > > > > > > > > > > > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > > > > > > > > > > > +using the result to optimize the kernel:
> > > > > > > > > > > > +
> > > > > > > > > > > > +1) Install the kernel on the TEST machine.
> > > > > > > > > > > > +
> > > > > > > > > > > > +2) Reset the data counters right before running the load tests
> > > > > > > > > > > > +
> > > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > > +
> > > > > > > > > > > > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > > > > > > > > > > > +
> > > > > > > > > > >
> > > > > > > > > > > I do not get this...
> > > > > > > > > > >
> > > > > > > > > > > # mount | grep debugfs
> > > > > > > > > > > debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)
> > > > > > > > > > >
> > > > > > > > > > > After the load-test...?
> > > > > > > > > > >
> > > > > > > > > > > echo 0 > /sys/kernel/debug/pgo/reset
> > > > > > > > > > >
> > > > > > > > > > Writing anything to /sys/kernel/debug/pgo/reset will cause it to reset
> > > > > > > > > > the profiling counters. I picked 1 (one) semi-randomly, but it could
> > > > > > > > > > be any number, letter, your favorite short story, etc. You don't want
> > > > > > > > > > to reset it before collecting the profiling data from your load tests
> > > > > > > > > > though.
> > > > > > > > > >
> > > > > > > > > > > > +3) Run the load tests.
> > > > > > > > > > > > +
> > > > > > > > > > > > +4) Collect the raw profile data
> > > > > > > > > > > > +
> > > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > > +
> > > > > > > > > > > > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > > > > +
> > > > > > > > > > >
> > > > > > > > > > > This is only 4,9M small and seen from the date 5mins before I run the
> > > > > > > > > > > echo-1 line.
> > > > > > > > > > >
> > > > > > > > > > > # ll /sys/kernel/debug/pgo
> > > > > > > > > > > insgesamt 0
> > > > > > > > > > > drwxr-xr-x  2 root root 0 16. Jan 17:29 .
> > > > > > > > > > > drwx------ 41 root root 0 16. Jan 17:29 ..
> > > > > > > > > > > -rw-------  1 root root 0 16. Jan 17:29 profraw
> > > > > > > > > > > --w-------  1 root root 0 16. Jan 18:19 reset
> > > > > > > > > > >
> > > > > > > > > > > # cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > > > > > > > > > >
> > > > > > > > > > > # ll /tmp/vmlinux.profraw
> > > > > > > > > > > -rw------- 1 root root 4,9M 16. Jan 17:29 /tmp/vmlinux.profraw
> > > > > > > > > > >
> > > > > > > > > > > For me there was no prof-data collected from my defconfig kernel-build.
> > > > > > > > > > >
> > > > > > > > > > The /sys/kernel/debug/pgo/profraw file is read-only. Nothing writes to
> > > > > > > > > > it, not even the kernel. All it does is serialize the profiling
> > > > > > > > > > counters from a memory location in the kernel into a format that
> > > > > > > > > > LLVM's tools can understand.
> > > > > > > > > >
> > > > > > > > > > > > +5) (Optional) Download the raw profile data to the HOST machine.
> > > > > > > > > > > > +
> > > > > > > > > > > > +6) Process the raw profile data
> > > > > > > > > > > > +
> > > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > > +
> > > > > > > > > > > > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > > > > > > > > > > > +
> > > > > > > > > > >
> > > > > > > > > > > Is that executed in /path/to/linux/git?
> > > > > > > > > > >
> > > > > > > > > > The llvm-profdata tool is not in the linux source tree. You need to
> > > > > > > > > > grab it from a clang distribution (or built from clang's git repo).
> > > > > > > > > >
> > > > > > > > > > > > +   Note that multiple raw profile data files can be merged during this step.
> > > > > > > > > > > > +
> > > > > > > > > > > > +7) Rebuild the kernel using the profile data (PGO disabled)
> > > > > > > > > > > > +
> > > > > > > > > > > > +   .. code-block:: sh
> > > > > > > > > > > > +
> > > > > > > > > > > > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > > > > > > > > > >
> > > > > > > > > > > How big is vmlinux.profdata (make defconfig)?
> > > > > > > > > > >
> > > > > > > > > > I don't have numbers for this, but from what you listed here, it's ~5M
> > > > > > > > > > in size. The size is proportional to the number of counters
> > > > > > > > > > instrumented in the kernel.
> > > > > > > > > >
> > > > > > > > > > > Do I need to do a full defconfig build or can I stop the build after
> > > > > > > > > > > let me say 10mins?
> > > > > > > > > > >
> > > > > > > > > > You should do a full rebuild. Make sure that PGO is disabled during the rebuild.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Thanks Bill for all the information.
> > > > > > > > >
> > > > > > > > > And sorry if I am so pedantic.
> > > > > > > > >
> > > > > > > > > I have installed my Debian system with Legacy-BIOS enabled.
> > > > > > > > >
> > > > > > > > > When I rebuild with KCFLAGS=-fprofile-use=vmlinux.profdata (LLVM=1 I
> > > > > > > > > have as a default) my system hangs on reboot.
> > > > > > > > >
> > > > > > > > > [ diffconfig ]
> > > > > > > > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo
> > > > > > > > > /boot/config-5.11.0-rc3-9-amd64-clang12-pgo
> > > > > > > > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-9-amd64-clang12-pgo"
> > > > > > > > > PGO_CLANG y -> n
> > > > > > > > >
> > > > > > > > > [ my make line ]
> > > > > > > > > $ cat ../start-build_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > > > > > dileks     63120   63095  0 06:47 pts/2    00:00:00 /usr/bin/perf_5.10
> > > > > > > > > stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++ HOSTLD=ld.lld CC=clang
> > > > > > > > > LD=ld.lld LLVM=1 LLVM_IAS=1 PAHOLE=/opt/pahole/bin/pahole
> > > > > > > > > LOCALVERSION=-9-amd64-clang12-pgo KBUILD_VERBOSE=1
> > > > > > > > > KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
> > > > > > > > > KBUILD_BUILD_TIMESTAMP=2021-01-17 bindeb-pkg
> > > > > > > > > KDEB_PKGVERSION=5.11.0~rc3-9~bullseye+dileks1
> > > > > > > > > KCFLAGS=-fprofile-use=vmlinux.profdata
> > > > > > > > >
> > > > > > > > > ( Yes, 06:47 a.m. in the morning :-). )
> > > > > > > > >
> > > > > > > > > When I boot with the rebuild Linux-kernel I see:
> > > > > > > > >
> > > > > > > > > Wrong EFI loader signature
> > > > > > > > > ...
> > > > > > > > > Decompressing
> > > > > > > > > Parsing EFI
> > > > > > > > > Performing Relocations done.
> > > > > > > > > Booting the Kernel.
> > > > > > > > >
> > > > > > > > > *** SYSTEM HANGS ***
> > > > > > > > > ( I waited for approx 1 min )
> > > > > > > > >
> > > > > > > > > I tried to turn UEFI support ON and OFF.
> > > > > > > > > No success.
> > > > > > > > >
> > > > > > > > > Does Clang-PGO support Legacy-BIOS or is something different wrong?
> > > > > > > > >
> > > > > > > > > Thanks.
> > > > > > > > >
> > > > > > > >
> > > > > > > > My bootloader is GRUB.
> > > > > > > >
> > > > > > > > In UEFI-BIOS settings there is no secure-boot disable option.
> > > > > > > > Just simple "Use UEFI BIOS" enabled|disabled.
> > > > > > > >
> > > > > > > > Installed Debian packages:
> > > > > > > >
> > > > > > > > ii grub-common 2.04-12
> > > > > > > > ii grub-pc 2.04-12
> > > > > > > > ii grub-pc-bin 2.04-12
> > > > > > > > ii grub2-common 2.04-12
> > > > > > > >
> > > > > > > > I found in the below link to do in grub-shell:
> > > > > > > >
> > > > > > > > set check_signatures=no
> > > > > > > >
> > > > > > > > But this is when grub-efi is installed.
> > > > > > > >
> > > > > > > > - Sedat -
> > > > > > > >
> > > > > > > > Link: https://unix.stackexchange.com/questions/126286/grub-efi-disable-signature-check
> > > > > > >
> > > > > > > Forget about that "Wrong EFI bootloader" - I see this with all other
> > > > > > > kernels (all boot fine).
> > > > > > >
> > > > > > > I tried in QEMU with and without KASLR:
> > > > > > >
> > > > > > > [ run_qemu.sh ]
> > > > > > > KPATH=$(pwd)
> > > > > > >
> > > > > > > APPEND="root=/dev/ram0 console=ttyS0 hung_task_panic=1 earlyprintk=ttyS0,115200"
> > > > > > > APPEND="$APPEND nokaslr"
> > > > > > >
> > > > > > > qemu-system-x86_64 -enable-kvm -M pc -kernel $KPATH/bzImage -initrd
> > > > > > > $KPATH/initrd.img -m 512 -net none -serial stdio -append "${APPEND}"
> > > > > > > [ /run_qemu.sh ]
> > > > > > >
> > > > > > > $ ./run_qemu.sh
> > > > > > > Probing EDD (edd=off to disable)... ok
> > > > > > > Wrong EFI loader signature.
> > > > > > > early console in extract_kernel
> > > > > > > input_data: 0x000000000289940d
> > > > > > > input_len: 0x000000000069804a
> > > > > > > output: 0x0000000001000000
> > > > > > > output_len: 0x0000000001ef2010
> > > > > > > kernel_total_size: 0x0000000001c2c000
> > > > > > > needed_size: 0x0000000002000000
> > > > > > > trampoline_32bit: 0x000000000009d000
> > > > > > >
> > > > > > >
> > > > > > > KASLR disabled: 'nokaslr' on cmdline.
> > > > > > >
> > > > > > >
> > > > > > > Decompressing Linux... Parsing ELF... No relocation needed... done.
> > > > > > > Booting the kernel.
> > > > > > >
> > > > > > > QEMU run stops, too.
> > > > > > >
> > > > > >
> > > > > > I re-generated my initrd.img with GZIP as compressor (my default is ZSTD).
> > > > > >
> > > > > > --- /etc/initramfs-tools/initramfs.conf 2021-01-17 12:35:30.823818501 +0100
> > > > > > +++ /etc/initramfs-tools/initramfs.conf.zstd    2020-09-21
> > > > > > 23:55:43.121735427 +0200
> > > > > > @@ -41,7 +41,7 @@ KEYMAP=n
> > > > > > # COMPRESS: [ gzip | bzip2 | lz4 | lzma | lzop | xz | zstd ]
> > > > > > #
> > > > > >
> > > > > > -COMPRESS=gzip
> > > > > > +COMPRESS=zstd
> > > > > >
> > > > > > #
> > > > > > # DEVICE: ...
> > > > > >
> > > > > > root# KVER="5.11.0-rc3-9-amd64-clang12-pgo" ; update-initramfs -c -k $KVER
> > > > > >
> > > > > > QEMU boot stops at the same stage.
> > > > > >
> > > > > > Now, my head is empty...
> > > > > >
> > > > > > Any comments?
> > > > > >
> > > > >
> > > > > ( Just as a side note I have Nick's DWARF-v5 support enabled. )
> > > > >
> > > > > There is one EFI related warning in my build-log:
> > > > >
> > > > > $ grep warning: build-log_5.11.0-rc3-9-amd64-clang12-pgo.txt
> > > > > dpkg-architecture: warning: specified GNU system type x86_64-linux-gnu
> > > > > does not match CC system type x86_64-pc-linux-gnu, try setting a
> > > > > correct CC environment variable
> > > > > warning: arch/x86/platform/efi/quirks.c: Function control flow change
> > > > > detected (hash mismatch) efi_arch_mem_reserve Hash =
> > > > > 391331300655996873 [-Wbackend-plugin]
> > > > > warning: arch/x86/platform/efi/efi.c: Function control flow change
> > > > > detected (hash mismatch) efi_attr_is_visible Hash = 567185240781730690
> > > > > [-Wbackend-plugin]
> > > > > arch/x86/crypto/aegis128-aesni-glue.c:265:30: warning: unused variable
> > > > > 'simd_alg' [-Wunused-variable]
> > > > > warning: lib/crypto/sha256.c: Function control flow change detected
> > > > > (hash mismatch) sha256_update Hash = 744640996947387358
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) memcmp Hash = 742261418966908927
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) bcmp Hash = 742261418966908927
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) strcmp Hash = 536873291001348520
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) strnlen Hash = 146835646621254984
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) simple_strtoull Hash =
> > > > > 252792765950587360 [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) strstr Hash = 391331303349076211
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) strchr Hash = 1063705159280644635
> > > > > [-Wbackend-plugin]
> > > > > warning: arch/x86/boot/compressed/string.c: Function control flow
> > > > > change detected (hash mismatch) kstrtoull Hash = 758414239132790022
> > > > > [-Wbackend-plugin]
> > > > > drivers/infiniband/hw/hfi1/platform.o: warning: objtool: tune_serdes()
> > > > > falls through to next function apply_tx_lanes()
> > > > >
> > > > > Cannot say if this information is helpful.
> > > > >
> > > >
> > > > My LLVM/Clang v12 is from <apt.llvm.org>:
> > > >
> > > > clang-12 version 1:12~++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > > >
> > > > My kernel-config is attached.
> > > >
> > >
> > > I dropped "LLVM_IAS=1" from my make line and did for my next build:
> > >
> > > $ scripts/diffconfig /boot/config-5.11.0-rc3-8-amd64-clang12-pgo .config
> > > BUILD_SALT "5.11.0-rc3-8-amd64-clang12-pgo" -> "5.11.0-rc3-10-amd64-clang12-pgo"
> > > DEBUG_INFO_DWARF2 n -> y
> > > DEBUG_INFO_DWARF5 y -> n
> > > PGO_CLANG y -> n
> > >
> > > Means dropped DWARF5 support.
> > >
> > Hi Sedat,
> >
> > Using PGO just improves optimizations. So unless there's miscompile,
> > or some other nefarious thing, it shouldn't affect how the boot loader
> > runs.
> >
> > As a sanity check, does the same Linux source and compiler version
> > generate a bootable kernel when PGO isn't used?
> >
>
> Yes, I can boot with the same code base without PGO.
>
> With the attached kernel-config.
>
> I remember there is a fix in CBL issue tracker for...
>
> ( https://github.com/ClangBuiltLinux/linux/issues/1250 )
>
> Loading, please wait...
> Starting version 247.2-4
> [    2.157223] floppy: module verification failed: signature and/or
> required key missing - tainting kernel
> [    2.179326] i2c_piix4: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
> [    2.183558] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
> [    2.187991] floppy: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
> [    2.195047] psmouse: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
> [    2.210404] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
> [    2.231055] scsi_mod: Unknown symbol _GLOBAL_OFFSET_TABLE_ (err -2)
>

[ CC Fangrui ]

With the attached...

   [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
undefined symbols

...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
For details see ClangBuiltLinux issue #1250 "Unknown symbol
_GLOBAL_OFFSET_TABLE_ loading kernel modules".

@ Bill Nick Sami Nathan

1, Can you say something of the impact passing "LLVM_IAS=1" to make?
2. Can you please try Nick's DWARF v5 support patchset v5 and
CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?

I would like to know what the impact of the Clang's Integrated
Assembler and DWARF v5 are.

I dropped both means...

1. Do not pass "LLVM_IAS=1" to make.
2. Use default DWARF v2 (with Nick's patchset: CONFIG_DEBUG_INFO_DWARF2=y).

...for a successfull build and boot on bare metal.

Thanks.

- Sedat -

[1] https://github.com/ClangBuiltLinux/linux/issues/1250
[Mon Jan 18 01:02:58 2021] microcode: microcode updated early to revision 0x2f, date = 2019-02-17
[Mon Jan 18 01:02:58 2021] Linux version 5.11.0-rc3-10-amd64-clang12-pgo (sedat.dilek@gmail.com@iniza) (Debian clang version 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724, LLD 12.0.0) #10~bullseye+dileks1 SMP 2021-01-17
[Mon Jan 18 01:02:58 2021] Command line: BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3-10-amd64-clang12-pgo root=UUID=5f730cbc-abda-410c-9ea9-f0bdeda41926 ro
[Mon Jan 18 01:02:58 2021] Disabled fast string operations
[Mon Jan 18 01:02:58 2021] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[Mon Jan 18 01:02:58 2021] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[Mon Jan 18 01:02:58 2021] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[Mon Jan 18 01:02:58 2021] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[Mon Jan 18 01:02:58 2021] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format.
[Mon Jan 18 01:02:58 2021] BIOS-provided physical RAM map:
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000000000000-0x000000000009d7ff] usable
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x000000000009d800-0x000000000009ffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000000100000-0x000000001fffffff] usable
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000020000000-0x00000000201fffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000020200000-0x000000003fffffff] usable
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000040000000-0x00000000401fffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000040200000-0x00000000d9c9efff] usable
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000d9c9f000-0x00000000dae7efff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000dae7f000-0x00000000daf9efff] ACPI NVS
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000daf9f000-0x00000000daffefff] ACPI data
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000dafff000-0x00000000daffffff] usable
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000db000000-0x00000000df9fffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000f8000000-0x00000000fbffffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000fed08000-0x00000000fed08fff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000fed10000-0x00000000fed19fff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000fed1c000-0x00000000fed1ffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x00000000ffd80000-0x00000000ffffffff] reserved
[Mon Jan 18 01:02:58 2021] BIOS-e820: [mem 0x0000000100000000-0x000000021fdfffff] usable
[Mon Jan 18 01:02:58 2021] NX (Execute Disable) protection: active
[Mon Jan 18 01:02:58 2021] SMBIOS 2.6 present.
[Mon Jan 18 01:02:58 2021] DMI: SAMSUNG ELECTRONICS CO., LTD. 530U3BI/530U4BI/530U4BH/530U3BI/530U4BI/530U4BH, BIOS 13XK 03/28/2013
[Mon Jan 18 01:02:58 2021] tsc: Fast TSC calibration using PIT
[Mon Jan 18 01:02:58 2021] tsc: Detected 1596.330 MHz processor
[Mon Jan 18 01:02:58 2021] e820: update [mem 0x00000000-0x00000fff] usable ==> reserved
[Mon Jan 18 01:02:58 2021] e820: remove [mem 0x000a0000-0x000fffff] usable
[Mon Jan 18 01:02:58 2021] last_pfn = 0x21fe00 max_arch_pfn = 0x400000000
[Mon Jan 18 01:02:58 2021] MTRR default type: uncachable
[Mon Jan 18 01:02:58 2021] MTRR fixed ranges enabled:
[Mon Jan 18 01:02:58 2021]   00000-9FFFF write-back
[Mon Jan 18 01:02:58 2021]   A0000-BFFFF uncachable
[Mon Jan 18 01:02:58 2021]   C0000-FFFFF write-protect
[Mon Jan 18 01:02:58 2021] MTRR variable ranges enabled:
[Mon Jan 18 01:02:58 2021]   0 base 000000000 mask F80000000 write-back
[Mon Jan 18 01:02:58 2021]   1 base 080000000 mask FC0000000 write-back
[Mon Jan 18 01:02:58 2021]   2 base 0C0000000 mask FE0000000 write-back
[Mon Jan 18 01:02:58 2021]   3 base 0DC000000 mask FFC000000 uncachable
[Mon Jan 18 01:02:58 2021]   4 base 0DB000000 mask FFF000000 uncachable
[Mon Jan 18 01:02:58 2021]   5 base 100000000 mask F00000000 write-back
[Mon Jan 18 01:02:58 2021]   6 base 200000000 mask FE0000000 write-back
[Mon Jan 18 01:02:58 2021]   7 base 21FE00000 mask FFFE00000 uncachable
[Mon Jan 18 01:02:58 2021]   8 base 0FFC00000 mask FFFC00000 write-protect
[Mon Jan 18 01:02:58 2021]   9 disabled
[Mon Jan 18 01:02:58 2021] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT  
[Mon Jan 18 01:02:58 2021] last_pfn = 0xdb000 max_arch_pfn = 0x400000000
[Mon Jan 18 01:02:58 2021] found SMP MP-table at [mem 0x000f00e0-0x000f00ef]
[Mon Jan 18 01:02:58 2021] reserving inaccessible SNB gfx pages
[Mon Jan 18 01:02:58 2021] RAMDISK: [mem 0x33fe5000-0x35fe9fff]
[Mon Jan 18 01:02:58 2021] ACPI: Early table checksum verification disabled
[Mon Jan 18 01:02:58 2021] ACPI: RSDP 0x00000000000F0100 000024 (v02 SECCSD)
[Mon Jan 18 01:02:58 2021] ACPI: XSDT 0x00000000DAFFE170 000084 (v01 SECCSD LH43STAR 00000002 PTEC 00000002)
[Mon Jan 18 01:02:58 2021] ACPI: FACP 0x00000000DAFEF000 00010C (v05 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: DSDT 0x00000000DAFF2000 0083AC (v02 SECCSD SNB-CPT  00000000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: FACS 0x00000000DAF47000 000040
[Mon Jan 18 01:02:58 2021] ACPI: SLIC 0x00000000DAFFD000 000176 (v01 SECCSD LH43STAR 00000002 PTEC 00000001)
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0x00000000DAFFB000 001068 (v01 SECCSD PtidDevc 00001000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: ASF! 0x00000000DAFF1000 0000A5 (v32 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: HPET 0x00000000DAFEE000 000038 (v01 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: APIC 0x00000000DAFED000 000098 (v03 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: MCFG 0x00000000DAFEC000 00003C (v01 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0x00000000DAFEB000 000804 (v01 PmRef  Cpu0Ist  00003000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0x00000000DAFEA000 000996 (v01 PmRef  CpuPm    00003000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: UEFI 0x00000000DAFE9000 00003E (v01 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: UEFI 0x00000000DAFE8000 000042 (v01 PTL    COMBUF   00000001 PTL  00000001)
[Mon Jan 18 01:02:58 2021] ACPI: UEFI 0x00000000DAFE7000 00026A (v01 SECCSD LH43STAR 00000002 PTL  00000002)
[Mon Jan 18 01:02:58 2021] ACPI: Local APIC address 0xfee00000
[Mon Jan 18 01:02:58 2021] No NUMA configuration found
[Mon Jan 18 01:02:58 2021] Faking a node at [mem 0x0000000000000000-0x000000021fdfffff]
[Mon Jan 18 01:02:58 2021] NODE_DATA(0) allocated [mem 0x21fdd3000-0x21fdfcfff]
[Mon Jan 18 01:02:58 2021] Zone ranges:
[Mon Jan 18 01:02:58 2021]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[Mon Jan 18 01:02:58 2021]   DMA32    [mem 0x0000000001000000-0x00000000ffffffff]
[Mon Jan 18 01:02:58 2021]   Normal   [mem 0x0000000100000000-0x000000021fdfffff]
[Mon Jan 18 01:02:58 2021]   Device   empty
[Mon Jan 18 01:02:58 2021] Movable zone start for each node
[Mon Jan 18 01:02:58 2021] Early memory node ranges
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x0000000000001000-0x000000000009cfff]
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x0000000000100000-0x000000001fffffff]
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x0000000020200000-0x000000003fffffff]
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x0000000040200000-0x00000000d9c9efff]
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x00000000dafff000-0x00000000daffffff]
[Mon Jan 18 01:02:58 2021]   node   0: [mem 0x0000000100000000-0x000000021fdfffff]
[Mon Jan 18 01:02:58 2021] Zeroed struct page in unavailable ranges: 27076 pages
[Mon Jan 18 01:02:58 2021] Initmem setup node 0 [mem 0x0000000000001000-0x000000021fdfffff]
[Mon Jan 18 01:02:58 2021] On node 0 totalpages: 2070076
[Mon Jan 18 01:02:58 2021]   DMA zone: 64 pages used for memmap
[Mon Jan 18 01:02:58 2021]   DMA zone: 156 pages reserved
[Mon Jan 18 01:02:58 2021]   DMA zone: 3996 pages, LIFO batch:0
[Mon Jan 18 01:02:58 2021]   DMA32 zone: 13859 pages used for memmap
[Mon Jan 18 01:02:58 2021]   DMA32 zone: 886944 pages, LIFO batch:63
[Mon Jan 18 01:02:58 2021]   Normal zone: 18424 pages used for memmap
[Mon Jan 18 01:02:58 2021]   Normal zone: 1179136 pages, LIFO batch:63
[Mon Jan 18 01:02:58 2021] Reserving Intel graphics memory at [mem 0xdba00000-0xdf9fffff]
[Mon Jan 18 01:02:58 2021] ACPI: PM-Timer IO Port: 0x408
[Mon Jan 18 01:02:58 2021] ACPI: Local APIC address 0xfee00000
[Mon Jan 18 01:02:58 2021] ACPI: LAPIC_NMI (acpi_id[0x00] high edge lint[0x1])
[Mon Jan 18 01:02:58 2021] ACPI: LAPIC_NMI (acpi_id[0x01] high edge lint[0x1])
[Mon Jan 18 01:02:58 2021] IOAPIC[0]: apic_id 14, version 32, address 0xfec00000, GSI 0-23
[Mon Jan 18 01:02:58 2021] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[Mon Jan 18 01:02:58 2021] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[Mon Jan 18 01:02:58 2021] ACPI: IRQ0 used by override.
[Mon Jan 18 01:02:58 2021] ACPI: IRQ9 used by override.
[Mon Jan 18 01:02:58 2021] Using ACPI (MADT) for SMP configuration information
[Mon Jan 18 01:02:58 2021] ACPI: HPET id: 0x8086a301 base: 0xfed00000
[Mon Jan 18 01:02:58 2021] TSC deadline timer available
[Mon Jan 18 01:02:58 2021] smpboot: Allowing 8 CPUs, 4 hotplug CPUs
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x00000000-0x00000fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x0009d000-0x0009dfff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x0009e000-0x0009ffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x000a0000-0x000dffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x000e0000-0x000fffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x20000000-0x201fffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0x40000000-0x401fffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xd9c9f000-0xdae7efff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xdae7f000-0xdaf9efff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xdaf9f000-0xdaffefff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xdb000000-0xdf9fffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xdfa00000-0xf7ffffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xf8000000-0xfbffffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfc000000-0xfebfffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfec00000-0xfec00fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfec01000-0xfed07fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed08000-0xfed08fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed09000-0xfed0ffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed10000-0xfed19fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed1a000-0xfed1bfff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed1c000-0xfed1ffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfed20000-0xfedfffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfee00000-0xfee00fff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xfee01000-0xffd7ffff]
[Mon Jan 18 01:02:58 2021] PM: hibernation: Registered nosave memory: [mem 0xffd80000-0xffffffff]
[Mon Jan 18 01:02:58 2021] [mem 0xdfa00000-0xf7ffffff] available for PCI devices
[Mon Jan 18 01:02:58 2021] Booting paravirtualized kernel on bare hardware
[Mon Jan 18 01:02:58 2021] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
[Mon Jan 18 01:02:58 2021] setup_percpu: NR_CPUS:8192 nr_cpumask_bits:8 nr_cpu_ids:8 nr_node_ids:1
[Mon Jan 18 01:02:58 2021] percpu: Embedded 54 pages/cpu s183512 r8192 d29480 u262144
[Mon Jan 18 01:02:58 2021] pcpu-alloc: s183512 r8192 d29480 u262144 alloc=1*2097152
[Mon Jan 18 01:02:58 2021] pcpu-alloc: [0] 0 1 2 3 4 5 6 7 
[Mon Jan 18 01:02:58 2021] Built 1 zonelists, mobility grouping on.  Total pages: 2037573
[Mon Jan 18 01:02:58 2021] Policy zone: Normal
[Mon Jan 18 01:02:58 2021] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3-10-amd64-clang12-pgo root=UUID=5f730cbc-abda-410c-9ea9-f0bdeda41926 ro
[Mon Jan 18 01:02:58 2021] Dentry cache hash table entries: 1048576 (order: 11, 8388608 bytes, linear)
[Mon Jan 18 01:02:58 2021] Inode-cache hash table entries: 524288 (order: 10, 4194304 bytes, linear)
[Mon Jan 18 01:02:58 2021] mem auto-init: stack:off, heap alloc:on, heap free:off
[Mon Jan 18 01:02:58 2021] Memory: 3595624K/8280304K available (12295K kernel code, 2462K rwdata, 4008K rodata, 2444K init, 1888K bss, 273396K reserved, 0K cma-reserved)
[Mon Jan 18 01:02:58 2021] random: get_random_u64 called from kmem_cache_open+0x27/0x500 with crng_init=0
[Mon Jan 18 01:02:58 2021] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=8, Nodes=1
[Mon Jan 18 01:02:58 2021] Kernel/User page tables isolation: enabled
[Mon Jan 18 01:02:58 2021] ftrace: allocating 36189 entries in 142 pages
[Mon Jan 18 01:02:58 2021] ftrace: allocated 142 pages with 4 groups
[Mon Jan 18 01:02:58 2021] rcu: Hierarchical RCU implementation.
[Mon Jan 18 01:02:58 2021] rcu: 	RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=8.
[Mon Jan 18 01:02:58 2021] 	Rude variant of Tasks RCU enabled.
[Mon Jan 18 01:02:58 2021] 	Tracing variant of Tasks RCU enabled.
[Mon Jan 18 01:02:58 2021] rcu: RCU calculated value of scheduler-enlistment delay is 25 jiffies.
[Mon Jan 18 01:02:58 2021] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=8
[Mon Jan 18 01:02:58 2021] NR_IRQS: 524544, nr_irqs: 488, preallocated irqs: 16
[Mon Jan 18 01:02:58 2021] Console: colour VGA+ 80x25
[Mon Jan 18 01:02:58 2021] printk: console [tty0] enabled
[Mon Jan 18 01:02:58 2021] ACPI: Core revision 20201113
[Mon Jan 18 01:02:58 2021] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 133484882848 ns
[Mon Jan 18 01:02:58 2021] APIC: Switch to symmetric I/O mode setup
[Mon Jan 18 01:02:58 2021] x2apic: IRQ remapping doesn't support X2APIC mode
[Mon Jan 18 01:02:58 2021] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[Mon Jan 18 01:02:58 2021] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x17029a094ea, max_idle_ns: 440795233542 ns
[Mon Jan 18 01:02:58 2021] Calibrating delay loop (skipped), value calculated using timer frequency.. 3192.66 BogoMIPS (lpj=6385320)
[Mon Jan 18 01:02:58 2021] pid_max: default: 32768 minimum: 301
[Mon Jan 18 01:02:58 2021] LSM: Security Framework initializing
[Mon Jan 18 01:02:58 2021] Yama: becoming mindful.
[Mon Jan 18 01:02:58 2021] AppArmor: AppArmor initialized
[Mon Jan 18 01:02:58 2021] TOMOYO Linux initialized
[Mon Jan 18 01:02:58 2021] Mount-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
[Mon Jan 18 01:02:58 2021] Mountpoint-cache hash table entries: 16384 (order: 5, 131072 bytes, linear)
[Mon Jan 18 01:02:58 2021] Disabled fast string operations
[Mon Jan 18 01:02:58 2021] mce: CPU0: Thermal monitoring enabled (TM1)
[Mon Jan 18 01:02:58 2021] process: using mwait in idle threads
[Mon Jan 18 01:02:58 2021] Last level iTLB entries: 4KB 512, 2MB 8, 4MB 8
[Mon Jan 18 01:02:58 2021] Last level dTLB entries: 4KB 512, 2MB 32, 4MB 32, 1GB 0
[Mon Jan 18 01:02:58 2021] Spectre V1 : Mitigation: usercopy/swapgs barriers and __user pointer sanitization
[Mon Jan 18 01:02:58 2021] Spectre V2 : Mitigation: Full generic retpoline
[Mon Jan 18 01:02:58 2021] Spectre V2 : Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch
[Mon Jan 18 01:02:58 2021] Spectre V2 : Enabling Restricted Speculation for firmware calls
[Mon Jan 18 01:02:58 2021] Spectre V2 : mitigation: Enabling conditional Indirect Branch Prediction Barrier
[Mon Jan 18 01:02:58 2021] Spectre V2 : User space: Mitigation: STIBP via seccomp and prctl
[Mon Jan 18 01:02:58 2021] Speculative Store Bypass: Mitigation: Speculative Store Bypass disabled via prctl and seccomp
[Mon Jan 18 01:02:58 2021] MDS: Mitigation: Clear CPU buffers
[Mon Jan 18 01:02:58 2021] Freeing SMP alternatives memory: 36K
[Mon Jan 18 01:02:58 2021] smpboot: Estimated ratio of average max frequency by base frequency (times 1024): 1280
[Mon Jan 18 01:02:58 2021] smpboot: CPU0: Intel(R) Core(TM) i5-2467M CPU @ 1.60GHz (family: 0x6, model: 0x2a, stepping: 0x7)
[Mon Jan 18 01:02:58 2021] Performance Events: PEBS fmt1+, SandyBridge events, 16-deep LBR, full-width counters, Intel PMU driver.
[Mon Jan 18 01:02:58 2021] ... version:                3
[Mon Jan 18 01:02:58 2021] ... bit width:              48
[Mon Jan 18 01:02:58 2021] ... generic registers:      4
[Mon Jan 18 01:02:58 2021] ... value mask:             0000ffffffffffff
[Mon Jan 18 01:02:58 2021] ... max period:             00007fffffffffff
[Mon Jan 18 01:02:58 2021] ... fixed-purpose events:   3
[Mon Jan 18 01:02:58 2021] ... event mask:             000000070000000f
[Mon Jan 18 01:02:58 2021] rcu: Hierarchical SRCU implementation.
[Mon Jan 18 01:02:58 2021] NMI watchdog: Enabled. Permanently consumes one hw-PMU counter.
[Mon Jan 18 01:02:58 2021] smp: Bringing up secondary CPUs ...
[Mon Jan 18 01:02:58 2021] x86: Booting SMP configuration:
[Mon Jan 18 01:02:58 2021] .... node  #0, CPUs:      #1
[Mon Jan 18 01:02:58 2021] Disabled fast string operations
[Mon Jan 18 01:02:58 2021] MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.
[Mon Jan 18 01:02:58 2021]  #2
[Mon Jan 18 01:02:58 2021] Disabled fast string operations
[Mon Jan 18 01:02:58 2021]  #3
[Mon Jan 18 01:02:58 2021] Disabled fast string operations
[Mon Jan 18 01:02:58 2021] smp: Brought up 1 node, 4 CPUs
[Mon Jan 18 01:02:58 2021] smpboot: Max logical packages: 2
[Mon Jan 18 01:02:58 2021] smpboot: Total of 4 processors activated (12770.64 BogoMIPS)
[Mon Jan 18 01:02:58 2021] node 0 deferred pages initialised in 8ms
[Mon Jan 18 01:02:58 2021] devtmpfs: initialized
[Mon Jan 18 01:02:58 2021] x86/mm: Memory block size: 128MB
[Mon Jan 18 01:02:58 2021] PM: Registering ACPI NVS region [mem 0xdae7f000-0xdaf9efff] (1179648 bytes)
[Mon Jan 18 01:02:58 2021] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[Mon Jan 18 01:02:58 2021] futex hash table entries: 2048 (order: 5, 131072 bytes, linear)
[Mon Jan 18 01:02:58 2021] pinctrl core: initialized pinctrl subsystem
[Mon Jan 18 01:02:58 2021] NET: Registered protocol family 16
[Mon Jan 18 01:02:58 2021] audit: initializing netlink subsys (disabled)
[Mon Jan 18 01:02:58 2021] audit: type=2000 audit(1610928178.052:1): state=initialized audit_enabled=0 res=1
[Mon Jan 18 01:02:58 2021] thermal_sys: Registered thermal governor 'fair_share'
[Mon Jan 18 01:02:58 2021] thermal_sys: Registered thermal governor 'bang_bang'
[Mon Jan 18 01:02:58 2021] thermal_sys: Registered thermal governor 'step_wise'
[Mon Jan 18 01:02:58 2021] thermal_sys: Registered thermal governor 'user_space'
[Mon Jan 18 01:02:58 2021] cpuidle: using governor ladder
[Mon Jan 18 01:02:58 2021] cpuidle: using governor menu
[Mon Jan 18 01:02:58 2021] ACPI: bus type PCI registered
[Mon Jan 18 01:02:58 2021] acpiphp: ACPI Hot Plug PCI Controller Driver version: 0.5
[Mon Jan 18 01:02:58 2021] PCI: MMCONFIG for domain 0000 [bus 00-3f] at [mem 0xf8000000-0xfbffffff] (base 0xf8000000)
[Mon Jan 18 01:02:58 2021] PCI: MMCONFIG at [mem 0xf8000000-0xfbffffff] reserved in E820
[Mon Jan 18 01:02:58 2021] PCI: Using configuration type 1 for base access
[Mon Jan 18 01:02:58 2021] core: PMU erratum BJ122, BV98, HSD29 worked around, HT is on
[Mon Jan 18 01:02:58 2021] ENERGY_PERF_BIAS: Set to 'normal', was 'performance'
[Mon Jan 18 01:02:58 2021] mtrr: your CPUs had inconsistent variable MTRR settings
[Mon Jan 18 01:02:58 2021] mtrr: probably your BIOS does not setup all CPUs.
[Mon Jan 18 01:02:58 2021] mtrr: corrected configuration.
[Mon Jan 18 01:02:58 2021] HugeTLB registered 2.00 MiB page size, pre-allocated 0 pages
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Module Device)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Processor Device)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(3.0 _SCP Extensions)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Processor Aggregator Device)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Linux-Dell-Video)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Linux-Lenovo-NV-HDMI-Audio)
[Mon Jan 18 01:02:58 2021] ACPI: Added _OSI(Linux-HPI-Hybrid-Graphics)
[Mon Jan 18 01:02:58 2021] ACPI: 4 ACPI AML tables successfully acquired and loaded
[Mon Jan 18 01:02:58 2021] ACPI: [Firmware Bug]: BIOS _OSI(Linux) query ignored
[Mon Jan 18 01:02:58 2021] ACPI: Dynamic OEM Table Load:
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0xFFFF95194082A800 000688 (v01 PmRef  Cpu0Cst  00003001 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: Dynamic OEM Table Load:
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0xFFFF951940BB8000 000303 (v01 PmRef  ApIst    00003000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: Dynamic OEM Table Load:
[Mon Jan 18 01:02:58 2021] ACPI: SSDT 0xFFFF9519408FEA00 000119 (v01 PmRef  ApCst    00003000 INTL 20061109)
[Mon Jan 18 01:02:58 2021] ACPI: EC: EC started
[Mon Jan 18 01:02:58 2021] ACPI: EC: interrupt blocked
[Mon Jan 18 01:02:58 2021] ACPI: EC: EC_CMD/EC_SC=0x66, EC_DATA=0x62
[Mon Jan 18 01:02:58 2021] ACPI: \_SB_.PCI0.LPCB.H_EC: Boot DSDT EC used to handle transactions
[Mon Jan 18 01:02:58 2021] ACPI: Interpreter enabled
[Mon Jan 18 01:02:58 2021] ACPI: (supports S0 S1 S3 S4 S5)
[Mon Jan 18 01:02:58 2021] ACPI: Using IOAPIC for interrupt routing
[Mon Jan 18 01:02:58 2021] PCI: Using host bridge windows from ACPI; if necessary, use "pci=nocrs" and report a bug
[Mon Jan 18 01:02:58 2021] ACPI: Enabled 8 GPEs in block 00 to 3F
[Mon Jan 18 01:02:58 2021] ACPI: Power Resource [FN00] (off)
[Mon Jan 18 01:02:58 2021] ACPI: Power Resource [FN01] (off)
[Mon Jan 18 01:02:58 2021] ACPI: Power Resource [FN02] (off)
[Mon Jan 18 01:02:58 2021] ACPI: Power Resource [FN03] (off)
[Mon Jan 18 01:02:58 2021] ACPI: Power Resource [FN04] (off)
[Mon Jan 18 01:02:58 2021] ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-3e])
[Mon Jan 18 01:02:58 2021] acpi PNP0A08:00: _OSC: OS supports [ExtendedConfig ASPM ClockPM Segments MSI HPX-Type3]
[Mon Jan 18 01:02:58 2021] acpi PNP0A08:00: _OSC failed (AE_ERROR); disabling ASPM
[Mon Jan 18 01:02:58 2021] PCI host bridge to bus 0000:00
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [io  0x0000-0x0cf7 window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [io  0x0d00-0xffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [mem 0x000a0000-0x000bffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [mem 0xdfa00000-0xfeafffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [mem 0xfed40000-0xfed44fff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: root bus resource [bus 00-3e]
[Mon Jan 18 01:02:58 2021] pci 0000:00:00.0: [8086:0104] type 00 class 0x060000
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: [8086:0116] type 00 class 0x030000
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: reg 0x10: [mem 0xf0000000-0xf03fffff 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: reg 0x18: [mem 0xe0000000-0xefffffff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: reg 0x20: [io  0x3000-0x303f]
[Mon Jan 18 01:02:58 2021] pci 0000:00:16.0: [8086:1c3a] type 00 class 0x078000
[Mon Jan 18 01:02:58 2021] pci 0000:00:16.0: reg 0x10: [mem 0xf0705000-0xf070500f 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:00:16.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1a.0: [8086:1c2d] type 00 class 0x0c0320
[Mon Jan 18 01:02:58 2021] pci 0000:00:1a.0: reg 0x10: [mem 0xf070a000-0xf070a3ff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1a.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1b.0: [8086:1c20] type 00 class 0x040300
[Mon Jan 18 01:02:58 2021] pci 0000:00:1b.0: reg 0x10: [mem 0xf0700000-0xf0703fff 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1b.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0: [8086:1c10] type 01 class 0x060400
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3: [8086:1c16] type 01 class 0x060400
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4: [8086:1c18] type 01 class 0x060400
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1d.0: [8086:1c26] type 00 class 0x0c0320
[Mon Jan 18 01:02:58 2021] pci 0000:00:1d.0: reg 0x10: [mem 0xf0709000-0xf07093ff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1d.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.0: [8086:1c49] type 00 class 0x060100
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: [8086:1c03] type 00 class 0x010601
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x10: [io  0x3088-0x308f]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x14: [io  0x3094-0x3097]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x18: [io  0x3080-0x3087]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x1c: [io  0x3090-0x3093]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x20: [io  0x3060-0x307f]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: reg 0x24: [mem 0xf0708000-0xf07087ff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.2: PME# supported from D3hot
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.3: [8086:1c22] type 00 class 0x0c0500
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.3: reg 0x10: [mem 0xf0704000-0xf07040ff 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1f.3: reg 0x20: [io  0xefa0-0xefbf]
[Mon Jan 18 01:02:58 2021] pci 0000:01:00.0: [8086:0091] type 00 class 0x028000
[Mon Jan 18 01:02:58 2021] pci 0000:01:00.0: reg 0x10: [mem 0xf0600000-0xf0601fff 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:01:00.0: PME# supported from D0 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0: PCI bridge to [bus 01]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0:   bridge window [mem 0xf0600000-0xf06fffff]
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: [10ec:8168] type 00 class 0x020000
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: reg 0x10: [io  0x2000-0x20ff]
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: reg 0x18: [mem 0xf0404000-0xf0404fff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: reg 0x20: [mem 0xf0400000-0xf0403fff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: supports D1 D2
[Mon Jan 18 01:02:58 2021] pci 0000:02:00.0: PME# supported from D0 D1 D2 D3hot D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3: PCI bridge to [bus 02]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3:   bridge window [io  0x2000-0x2fff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3:   bridge window [mem 0xf0400000-0xf04fffff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci 0000:03:00.0: [1b21:1042] type 00 class 0x0c0330
[Mon Jan 18 01:02:58 2021] pci 0000:03:00.0: reg 0x10: [mem 0xf0500000-0xf0507fff 64bit]
[Mon Jan 18 01:02:58 2021] pci 0000:03:00.0: PME# supported from D3cold
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4: PCI bridge to [bus 03]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4:   bridge window [mem 0xf0500000-0xf05fffff]
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKA] (IRQs 1 3 4 5 6 10 *11 12 14 15)
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKB] (IRQs 1 3 4 5 6 10 11 12 14 15) *0, disabled.
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKC] (IRQs 1 3 4 5 6 *10 11 12 14 15)
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKD] (IRQs 1 3 4 5 6 *10 11 12 14 15)
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKE] (IRQs 1 3 4 5 6 10 11 12 14 15) *9
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKF] (IRQs 1 3 4 5 6 10 11 12 14 15) *0, disabled.
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKG] (IRQs 1 3 4 5 6 10 *11 12 14 15)
[Mon Jan 18 01:02:58 2021] ACPI: PCI Interrupt Link [LNKH] (IRQs 1 3 4 5 6 10 11 12 14 15) *9
[Mon Jan 18 01:02:58 2021] ACPI: EC: interrupt unblocked
[Mon Jan 18 01:02:58 2021] ACPI: EC: event unblocked
[Mon Jan 18 01:02:58 2021] ACPI: EC: 0 stale EC events cleared
[Mon Jan 18 01:02:58 2021] ACPI: EC: EC_CMD/EC_SC=0x66, EC_DATA=0x62
[Mon Jan 18 01:02:58 2021] ACPI: EC: GPE=0x17
[Mon Jan 18 01:02:58 2021] ACPI: \_SB_.PCI0.LPCB.H_EC: Boot DSDT EC initialization complete
[Mon Jan 18 01:02:58 2021] ACPI: \_SB_.PCI0.LPCB.H_EC: EC: Used to handle transactions and events
[Mon Jan 18 01:02:58 2021] iommu: Default domain type: Translated 
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: vgaarb: setting as boot VGA device
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: vgaarb: VGA device added: decodes=io+mem,owns=io+mem,locks=none
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: vgaarb: bridge control possible
[Mon Jan 18 01:02:58 2021] vgaarb: loaded
[Mon Jan 18 01:02:58 2021] EDAC MC: Ver: 3.0.0
[Mon Jan 18 01:02:58 2021] NetLabel: Initializing
[Mon Jan 18 01:02:58 2021] NetLabel:  domain hash size = 128
[Mon Jan 18 01:02:58 2021] NetLabel:  protocols = UNLABELED CIPSOv4 CALIPSO
[Mon Jan 18 01:02:58 2021] NetLabel:  unlabeled traffic allowed by default
[Mon Jan 18 01:02:58 2021] PCI: Using ACPI for IRQ routing
[Mon Jan 18 01:02:58 2021] PCI: pci_cache_line_size set to 64 bytes
[Mon Jan 18 01:02:58 2021] e820: reserve RAM buffer [mem 0x0009d800-0x0009ffff]
[Mon Jan 18 01:02:58 2021] e820: reserve RAM buffer [mem 0xd9c9f000-0xdbffffff]
[Mon Jan 18 01:02:58 2021] e820: reserve RAM buffer [mem 0xdb000000-0xdbffffff]
[Mon Jan 18 01:02:58 2021] e820: reserve RAM buffer [mem 0x21fe00000-0x21fffffff]
[Mon Jan 18 01:02:58 2021] hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0, 0, 0, 0, 0, 0
[Mon Jan 18 01:02:58 2021] hpet0: 8 comparators, 64-bit 14.318180 MHz counter
[Mon Jan 18 01:02:58 2021] clocksource: Switched to clocksource tsc-early
[Mon Jan 18 01:02:58 2021] VFS: Disk quotas dquot_6.6.0
[Mon Jan 18 01:02:58 2021] VFS: Dquot-cache hash table entries: 512 (order 0, 4096 bytes)
[Mon Jan 18 01:02:58 2021] AppArmor: AppArmor Filesystem Enabled
[Mon Jan 18 01:02:58 2021] pnp: PnP ACPI init
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x0680-0x069f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x1000-0x100f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x5000-0x5003] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0xffff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x0400-0x0453] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x0458-0x047f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x0500-0x057f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x0a00-0x0a0f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x164e-0x164f] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:00: [io  0x5000-0x500f] could not be reserved
[Mon Jan 18 01:02:58 2021] system 00:00: Plug and Play ACPI device, IDs PNP0c02 (active)
[Mon Jan 18 01:02:58 2021] pnp 00:01: Plug and Play ACPI device, IDs PNP0b00 (active)
[Mon Jan 18 01:02:58 2021] system 00:02: [io  0x0454-0x0457] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:02: Plug and Play ACPI device, IDs INT3f0d PNP0c02 (active)
[Mon Jan 18 01:02:58 2021] pnp 00:03: Plug and Play ACPI device, IDs PNP0303 (active)
[Mon Jan 18 01:02:58 2021] pnp 00:04: Plug and Play ACPI device, IDs ETD0b00 SYN0002 PNP0f13 (active)
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed1c000-0xfed1ffff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed10000-0xfed17fff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed18000-0xfed18fff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed19000-0xfed19fff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xf8000000-0xfbffffff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed20000-0xfed3ffff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed90000-0xfed93fff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfed45000-0xfed8ffff] has been reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xff000000-0xffffffff] could not be reserved
[Mon Jan 18 01:02:58 2021] system 00:05: [mem 0xfee00000-0xfeefffff] could not be reserved
[Mon Jan 18 01:02:58 2021] system 00:05: Plug and Play ACPI device, IDs PNP0c02 (active)
[Mon Jan 18 01:02:58 2021] system 00:06: Plug and Play ACPI device, IDs PNP0c01 (active)
[Mon Jan 18 01:02:58 2021] pnp: PnP ACPI: found 7 devices
[Mon Jan 18 01:02:58 2021] clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns: 2085701024 ns
[Mon Jan 18 01:02:58 2021] NET: Registered protocol family 2
[Mon Jan 18 01:02:58 2021] tcp_listen_portaddr_hash hash table entries: 4096 (order: 4, 65536 bytes, linear)
[Mon Jan 18 01:02:58 2021] TCP established hash table entries: 65536 (order: 7, 524288 bytes, linear)
[Mon Jan 18 01:02:58 2021] TCP bind hash table entries: 65536 (order: 8, 1048576 bytes, linear)
[Mon Jan 18 01:02:58 2021] TCP: Hash tables configured (established 65536 bind 65536)
[Mon Jan 18 01:02:58 2021] UDP hash table entries: 4096 (order: 5, 131072 bytes, linear)
[Mon Jan 18 01:02:58 2021] UDP-Lite hash table entries: 4096 (order: 5, 131072 bytes, linear)
[Mon Jan 18 01:02:58 2021] NET: Registered protocol family 1
[Mon Jan 18 01:02:58 2021] NET: Registered protocol family 44
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0: PCI bridge to [bus 01]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.0:   bridge window [mem 0xf0600000-0xf06fffff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3: PCI bridge to [bus 02]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3:   bridge window [io  0x2000-0x2fff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.3:   bridge window [mem 0xf0400000-0xf04fffff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4: PCI bridge to [bus 03]
[Mon Jan 18 01:02:58 2021] pci 0000:00:1c.4:   bridge window [mem 0xf0500000-0xf05fffff]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: resource 4 [io  0x0000-0x0cf7 window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: resource 5 [io  0x0d00-0xffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: resource 6 [mem 0x000a0000-0x000bffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: resource 7 [mem 0xdfa00000-0xfeafffff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:00: resource 8 [mem 0xfed40000-0xfed44fff window]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:01: resource 1 [mem 0xf0600000-0xf06fffff]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:02: resource 0 [io  0x2000-0x2fff]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:02: resource 2 [mem 0xf0400000-0xf04fffff 64bit pref]
[Mon Jan 18 01:02:58 2021] pci_bus 0000:03: resource 1 [mem 0xf0500000-0xf05fffff]
[Mon Jan 18 01:02:58 2021] pci 0000:00:02.0: Video device with shadowed ROM at [mem 0x000c0000-0x000dffff]
[Mon Jan 18 01:02:58 2021] PCI: CLS 64 bytes, default 64
[Mon Jan 18 01:02:58 2021] Trying to unpack rootfs image as initramfs...
[Mon Jan 18 01:02:59 2021] Freeing initrd memory: 32788K
[Mon Jan 18 01:02:59 2021] PCI-DMA: Using software bounce buffering for IO (SWIOTLB)
[Mon Jan 18 01:02:59 2021] software IO TLB: mapped [mem 0x00000000d5c9f000-0x00000000d9c9f000] (64MB)
[Mon Jan 18 01:02:59 2021] Initialise system trusted keyrings
[Mon Jan 18 01:02:59 2021] Key type blacklist registered
[Mon Jan 18 01:02:59 2021] workingset: timestamp_bits=36 max_order=21 bucket_order=0
[Mon Jan 18 01:02:59 2021] zbud: loaded
[Mon Jan 18 01:02:59 2021] integrity: Platform Keyring initialized
[Mon Jan 18 01:02:59 2021] Key type asymmetric registered
[Mon Jan 18 01:02:59 2021] Asymmetric key parser 'x509' registered
[Mon Jan 18 01:02:59 2021] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 251)
[Mon Jan 18 01:02:59 2021] io scheduler mq-deadline registered
[Mon Jan 18 01:02:59 2021] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
[Mon Jan 18 01:02:59 2021] intel_idle: MWAIT substates: 0x21120
[Mon Jan 18 01:02:59 2021] intel_idle: v0.5.1 model 0x2A
[Mon Jan 18 01:02:59 2021] intel_idle: Local APIC timer is reliable in all C-states
[Mon Jan 18 01:02:59 2021] thermal LNXTHERM:00: registered as thermal_zone0
[Mon Jan 18 01:02:59 2021] ACPI: Thermal Zone [TZ00] (68 C)
[Mon Jan 18 01:02:59 2021] thermal LNXTHERM:01: registered as thermal_zone1
[Mon Jan 18 01:02:59 2021] ACPI: Thermal Zone [TZ01] (30 C)
[Mon Jan 18 01:02:59 2021] Serial: 8250/16550 driver, 4 ports, IRQ sharing enabled
[Mon Jan 18 01:02:59 2021] Linux agpgart interface v0.103
[Mon Jan 18 01:02:59 2021] AMD-Vi: AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>
[Mon Jan 18 01:02:59 2021] AMD-Vi: AMD IOMMUv2 functionality not available on this system
[Mon Jan 18 01:02:59 2021] i8042: PNP: PS/2 Controller [PNP0303:PS2K,PNP0f13:EPAD] at 0x60,0x64 irq 1,12
[Mon Jan 18 01:02:59 2021] serio: i8042 KBD port at 0x60,0x64 irq 1
[Mon Jan 18 01:02:59 2021] serio: i8042 AUX port at 0x60,0x64 irq 12
[Mon Jan 18 01:02:59 2021] mousedev: PS/2 mouse device common for all mice
[Mon Jan 18 01:02:59 2021] rtc_cmos 00:01: registered as rtc0
[Mon Jan 18 01:02:59 2021] rtc_cmos 00:01: setting system clock to 2021-01-18T00:02:59 UTC (1610928179)
[Mon Jan 18 01:02:59 2021] rtc_cmos 00:01: alarms up to one month, y3k, 242 bytes nvram, hpet irqs
[Mon Jan 18 01:02:59 2021] intel_pstate: Intel P-state driver initializing
[Mon Jan 18 01:02:59 2021] ledtrig-cpu: registered to indicate activity on CPUs
[Mon Jan 18 01:02:59 2021] NET: Registered protocol family 10
[Mon Jan 18 01:02:59 2021] input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input0
[Mon Jan 18 01:02:59 2021] Segment Routing with IPv6
[Mon Jan 18 01:02:59 2021] mip6: Mobile IPv6
[Mon Jan 18 01:02:59 2021] NET: Registered protocol family 17
[Mon Jan 18 01:02:59 2021] mpls_gso: MPLS GSO support
[Mon Jan 18 01:02:59 2021] microcode: sig=0x206a7, pf=0x10, revision=0x2f
[Mon Jan 18 01:02:59 2021] microcode: Microcode Update Driver: v2.2.
[Mon Jan 18 01:02:59 2021] IPI shorthand broadcast: enabled
[Mon Jan 18 01:02:59 2021] sched_clock: Marking stable (1229106958, 12143855)->(1260049626, -18798813)
[Mon Jan 18 01:02:59 2021] registered taskstats version 1
[Mon Jan 18 01:02:59 2021] Loading compiled-in X.509 certificates
[Mon Jan 18 01:02:59 2021] zswap: loaded using pool zstd/zbud
[Mon Jan 18 01:02:59 2021] Key type ._fscrypt registered
[Mon Jan 18 01:02:59 2021] Key type .fscrypt registered
[Mon Jan 18 01:02:59 2021] Key type fscrypt-provisioning registered
[Mon Jan 18 01:02:59 2021] AppArmor: AppArmor sha1 policy hashing enabled
[Mon Jan 18 01:02:59 2021] Freeing unused kernel image (initmem) memory: 2444K
[Mon Jan 18 01:02:59 2021] Write protecting the kernel read-only data: 18432k
[Mon Jan 18 01:02:59 2021] Freeing unused kernel image (text/rodata gap) memory: 2040K
[Mon Jan 18 01:02:59 2021] Freeing unused kernel image (rodata/data gap) memory: 88K
[Mon Jan 18 01:02:59 2021] x86/mm: Checked W+X mappings: passed, no W+X pages found.
[Mon Jan 18 01:02:59 2021] x86/mm: Checking user space page tables
[Mon Jan 18 01:02:59 2021] x86/mm: Checked W+X mappings: passed, no W+X pages found.
[Mon Jan 18 01:02:59 2021] Run /init as init process
[Mon Jan 18 01:02:59 2021]   with arguments:
[Mon Jan 18 01:02:59 2021]     /init
[Mon Jan 18 01:02:59 2021]   with environment:
[Mon Jan 18 01:02:59 2021]     HOME=/
[Mon Jan 18 01:02:59 2021]     TERM=linux
[Mon Jan 18 01:02:59 2021]     BOOT_IMAGE=/boot/vmlinuz-5.11.0-rc3-10-amd64-clang12-pgo
[Mon Jan 18 01:02:59 2021] fjes: module verification failed: signature and/or required key missing - tainting kernel
[Mon Jan 18 01:02:59 2021] battery: ACPI: Battery Slot [BAT1] (battery present)
[Mon Jan 18 01:02:59 2021] ACPI Warning: SystemIO range 0x0000000000000428-0x000000000000042F conflicts with OpRegion 0x0000000000000400-0x000000000000047F (\PMIO) (20201113/utaddress-204)
[Mon Jan 18 01:02:59 2021] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
[Mon Jan 18 01:02:59 2021] ACPI Warning: SystemIO range 0x0000000000000540-0x000000000000054F conflicts with OpRegion 0x0000000000000500-0x0000000000000563 (\GPIO) (20201113/utaddress-204)
[Mon Jan 18 01:02:59 2021] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
[Mon Jan 18 01:02:59 2021] ACPI Warning: SystemIO range 0x0000000000000530-0x000000000000053F conflicts with OpRegion 0x0000000000000500-0x0000000000000563 (\GPIO) (20201113/utaddress-204)
[Mon Jan 18 01:02:59 2021] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
[Mon Jan 18 01:02:59 2021] ACPI Warning: SystemIO range 0x0000000000000500-0x000000000000052F conflicts with OpRegion 0x0000000000000500-0x0000000000000563 (\GPIO) (20201113/utaddress-204)
[Mon Jan 18 01:02:59 2021] ACPI: If an ACPI driver is available for this device, you should use it instead of the native driver
[Mon Jan 18 01:02:59 2021] lpc_ich: Resource conflict(s) found affecting gpio_ich
[Mon Jan 18 01:02:59 2021] i801_smbus 0000:00:1f.3: SMBus using PCI interrupt
[Mon Jan 18 01:02:59 2021] i2c i2c-0: 2/4 memory slots populated (from DMI)
[Mon Jan 18 01:02:59 2021] ACPI: bus type USB registered
[Mon Jan 18 01:02:59 2021] usbcore: registered new interface driver usbfs
[Mon Jan 18 01:02:59 2021] usbcore: registered new interface driver hub
[Mon Jan 18 01:02:59 2021] usbcore: registered new device driver usb
[Mon Jan 18 01:02:59 2021] SCSI subsystem initialized
[Mon Jan 18 01:02:59 2021] i2c i2c-0: Successfully instantiated SPD at 0x52
[Mon Jan 18 01:02:59 2021] ehci_hcd: USB 2.0 'Enhanced' Host Controller (EHCI) Driver
[Mon Jan 18 01:02:59 2021] ehci-pci: EHCI PCI platform driver
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1a.0: EHCI Host Controller
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1a.0: new USB bus registered, assigned bus number 1
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1a.0: debug port 2
[Mon Jan 18 01:02:59 2021] r8169 0000:02:00.0: can't disable ASPM; OS doesn't have ASPM control
[Mon Jan 18 01:02:59 2021] libata version 3.00 loaded.
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1a.0: irq 16, io mem 0xf070a000
[Mon Jan 18 01:02:59 2021] libphy: r8169: probed
[Mon Jan 18 01:02:59 2021] r8169 0000:02:00.0 eth0: RTL8168evl/8111evl, e8:03:9a:36:17:a9, XID 2c9, IRQ 27
[Mon Jan 18 01:02:59 2021] r8169 0000:02:00.0 eth0: jumbo features [frames: 9194 bytes, tx checksumming: ko]
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1a.0: USB 2.0 started, EHCI 1.00
[Mon Jan 18 01:02:59 2021] usb usb1: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.11
[Mon Jan 18 01:02:59 2021] usb usb1: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[Mon Jan 18 01:02:59 2021] usb usb1: Product: EHCI Host Controller
[Mon Jan 18 01:02:59 2021] usb usb1: Manufacturer: Linux 5.11.0-rc3-10-amd64-clang12-pgo ehci_hcd
[Mon Jan 18 01:02:59 2021] usb usb1: SerialNumber: 0000:00:1a.0
[Mon Jan 18 01:02:59 2021] hub 1-0:1.0: USB hub found
[Mon Jan 18 01:02:59 2021] hub 1-0:1.0: 2 ports detected
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: xHCI Host Controller
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: new USB bus registered, assigned bus number 2
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: hcc params 0x0200f180 hci version 0x96 quirks 0x0000000000080000
[Mon Jan 18 01:02:59 2021] usb usb2: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.11
[Mon Jan 18 01:02:59 2021] usb usb2: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[Mon Jan 18 01:02:59 2021] usb usb2: Product: xHCI Host Controller
[Mon Jan 18 01:02:59 2021] usb usb2: Manufacturer: Linux 5.11.0-rc3-10-amd64-clang12-pgo xhci-hcd
[Mon Jan 18 01:02:59 2021] usb usb2: SerialNumber: 0000:03:00.0
[Mon Jan 18 01:02:59 2021] hub 2-0:1.0: USB hub found
[Mon Jan 18 01:02:59 2021] hub 2-0:1.0: 2 ports detected
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1d.0: EHCI Host Controller
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: xHCI Host Controller
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1d.0: new USB bus registered, assigned bus number 3
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: new USB bus registered, assigned bus number 4
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1d.0: debug port 2
[Mon Jan 18 01:02:59 2021] xhci_hcd 0000:03:00.0: Host supports USB 3.0 SuperSpeed
[Mon Jan 18 01:02:59 2021] usb usb4: We don't know the algorithms for LPM for this host, disabling LPM.
[Mon Jan 18 01:02:59 2021] usb usb4: New USB device found, idVendor=1d6b, idProduct=0003, bcdDevice= 5.11
[Mon Jan 18 01:02:59 2021] usb usb4: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[Mon Jan 18 01:02:59 2021] usb usb4: Product: xHCI Host Controller
[Mon Jan 18 01:02:59 2021] usb usb4: Manufacturer: Linux 5.11.0-rc3-10-amd64-clang12-pgo xhci-hcd
[Mon Jan 18 01:02:59 2021] usb usb4: SerialNumber: 0000:03:00.0
[Mon Jan 18 01:02:59 2021] hub 4-0:1.0: USB hub found
[Mon Jan 18 01:02:59 2021] hub 4-0:1.0: 2 ports detected
[Mon Jan 18 01:02:59 2021] ahci 0000:00:1f.2: version 3.0
[Mon Jan 18 01:02:59 2021] ahci 0000:00:1f.2: SSS flag set, parallel bus scan disabled
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1d.0: irq 23, io mem 0xf0709000
[Mon Jan 18 01:02:59 2021] r8169 0000:02:00.0 enp2s0: renamed from eth0
[Mon Jan 18 01:02:59 2021] ahci 0000:00:1f.2: AHCI 0001.0300 32 slots 6 ports 6 Gbps 0x1b impl SATA mode
[Mon Jan 18 01:02:59 2021] ahci 0000:00:1f.2: flags: 64bit ncq sntf ilck stag pm led clo pio slum part ems sxs apst 
[Mon Jan 18 01:02:59 2021] ehci-pci 0000:00:1d.0: USB 2.0 started, EHCI 1.00
[Mon Jan 18 01:02:59 2021] usb usb3: New USB device found, idVendor=1d6b, idProduct=0002, bcdDevice= 5.11
[Mon Jan 18 01:02:59 2021] usb usb3: New USB device strings: Mfr=3, Product=2, SerialNumber=1
[Mon Jan 18 01:02:59 2021] usb usb3: Product: EHCI Host Controller
[Mon Jan 18 01:02:59 2021] usb usb3: Manufacturer: Linux 5.11.0-rc3-10-amd64-clang12-pgo ehci_hcd
[Mon Jan 18 01:02:59 2021] usb usb3: SerialNumber: 0000:00:1d.0
[Mon Jan 18 01:02:59 2021] hub 3-0:1.0: USB hub found
[Mon Jan 18 01:02:59 2021] hub 3-0:1.0: 2 ports detected
[Mon Jan 18 01:02:59 2021] scsi host0: ahci
[Mon Jan 18 01:02:59 2021] scsi host1: ahci
[Mon Jan 18 01:02:59 2021] scsi host2: ahci
[Mon Jan 18 01:02:59 2021] scsi host3: ahci
[Mon Jan 18 01:02:59 2021] scsi host4: ahci
[Mon Jan 18 01:02:59 2021] scsi host5: ahci
[Mon Jan 18 01:02:59 2021] ata1: SATA max UDMA/133 abar m2048@0xf0708000 port 0xf0708100 irq 33
[Mon Jan 18 01:02:59 2021] ata2: SATA max UDMA/133 abar m2048@0xf0708000 port 0xf0708180 irq 33
[Mon Jan 18 01:02:59 2021] ata3: DUMMY
[Mon Jan 18 01:02:59 2021] ata4: SATA max UDMA/133 abar m2048@0xf0708000 port 0xf0708280 irq 33
[Mon Jan 18 01:02:59 2021] ata5: SATA max UDMA/133 abar m2048@0xf0708000 port 0xf0708300 irq 33
[Mon Jan 18 01:02:59 2021] ata6: DUMMY
[Mon Jan 18 01:02:59 2021] usb 1-1: new high-speed USB device number 2 using ehci-pci
[Mon Jan 18 01:03:00 2021] usb 3-1: new high-speed USB device number 2 using ehci-pci
[Mon Jan 18 01:03:00 2021] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[Mon Jan 18 01:03:00 2021] ata1.00: ATA-8: Hitachi HTS545050A7E380, GG2OA6C0, max UDMA/133
[Mon Jan 18 01:03:00 2021] ata1.00: 976773168 sectors, multi 16: LBA48 NCQ (depth 32), AA
[Mon Jan 18 01:03:00 2021] ata1.00: configured for UDMA/133
[Mon Jan 18 01:03:00 2021] scsi 0:0:0:0: Direct-Access     ATA      Hitachi HTS54505 A6C0 PQ: 0 ANSI: 5
[Mon Jan 18 01:03:00 2021] usb 4-1: new SuperSpeed Gen 1 USB device number 2 using xhci_hcd
[Mon Jan 18 01:03:00 2021] usb 1-1: New USB device found, idVendor=8087, idProduct=0024, bcdDevice= 0.00
[Mon Jan 18 01:03:00 2021] usb 1-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[Mon Jan 18 01:03:00 2021] hub 1-1:1.0: USB hub found
[Mon Jan 18 01:03:00 2021] hub 1-1:1.0: 6 ports detected
[Mon Jan 18 01:03:00 2021] usb 4-1: New USB device found, idVendor=174c, idProduct=55aa, bcdDevice= 1.00
[Mon Jan 18 01:03:00 2021] usb 4-1: New USB device strings: Mfr=2, Product=3, SerialNumber=1
[Mon Jan 18 01:03:00 2021] usb 4-1: Product: MEDION HDDrive-n-GO
[Mon Jan 18 01:03:00 2021] usb 4-1: Manufacturer: MEDION
[Mon Jan 18 01:03:00 2021] usb 4-1: SerialNumber: 3180000000000000092C
[Mon Jan 18 01:03:00 2021] usb-storage 4-1:1.0: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] usb-storage 4-1:1.0: Quirks match for vid 174c pid 55aa: 400000
[Mon Jan 18 01:03:00 2021] scsi host6: usb-storage 4-1:1.0
[Mon Jan 18 01:03:00 2021] usbcore: registered new interface driver usb-storage
[Mon Jan 18 01:03:00 2021] usbcore: registered new interface driver uas
[Mon Jan 18 01:03:00 2021] usb 3-1: New USB device found, idVendor=8087, idProduct=0024, bcdDevice= 0.00
[Mon Jan 18 01:03:00 2021] usb 3-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[Mon Jan 18 01:03:00 2021] hub 3-1:1.0: USB hub found
[Mon Jan 18 01:03:00 2021] hub 3-1:1.0: 6 ports detected
[Mon Jan 18 01:03:00 2021] tsc: Refined TSC clocksource calibration: 1596.373 MHz
[Mon Jan 18 01:03:00 2021] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x1702c2a0637, max_idle_ns: 440795222505 ns
[Mon Jan 18 01:03:00 2021] clocksource: Switched to clocksource tsc
[Mon Jan 18 01:03:00 2021] usb 1-1.2: new high-speed USB device number 3 using ehci-pci
[Mon Jan 18 01:03:00 2021] ata2: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[Mon Jan 18 01:03:00 2021] ata2.00: ATA-8: SanDisk iSSD P4 16GB, SSD 9.14, max UDMA/133
[Mon Jan 18 01:03:00 2021] ata2.00: 31277232 sectors, multi 1: LBA48 
[Mon Jan 18 01:03:00 2021] ata2.00: configured for UDMA/133
[Mon Jan 18 01:03:00 2021] scsi 1:0:0:0: Direct-Access     ATA      SanDisk iSSD P4  9.14 PQ: 0 ANSI: 5
[Mon Jan 18 01:03:00 2021] usb 1-1.2: New USB device found, idVendor=12d1, idProduct=1436, bcdDevice= 0.00
[Mon Jan 18 01:03:00 2021] usb 1-1.2: New USB device strings: Mfr=4, Product=3, SerialNumber=0
[Mon Jan 18 01:03:00 2021] usb 1-1.2: Product: HUAWEI Mobile
[Mon Jan 18 01:03:00 2021] usb 1-1.2: Manufacturer: HUAWEI Technology
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.0: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] usb 3-1.4: new low-speed USB device number 3 using ehci-pci
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.1: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.2: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] random: fast init done
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.3: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] psmouse serio1: elantech: assuming hardware version 3 (with firmware version 0x450f00)
[Mon Jan 18 01:03:00 2021] psmouse serio1: elantech: Synaptics capabilities query result 0x08, 0x17, 0x0c.
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.4: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] psmouse serio1: elantech: Elan sample query result 03, 3f, 86
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.5: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] scsi host7: usb-storage 1-1.2:1.5
[Mon Jan 18 01:03:00 2021] usb-storage 1-1.2:1.6: USB Mass Storage device detected
[Mon Jan 18 01:03:00 2021] scsi host8: usb-storage 1-1.2:1.6
[Mon Jan 18 01:03:00 2021] usb 3-1.4: New USB device found, idVendor=046d, idProduct=c00e, bcdDevice=11.10
[Mon Jan 18 01:03:00 2021] usb 3-1.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0
[Mon Jan 18 01:03:00 2021] usb 3-1.4: Product: USB-PS/2 Optical Mouse
[Mon Jan 18 01:03:00 2021] usb 3-1.4: Manufacturer: Logitech
[Mon Jan 18 01:03:00 2021] hid: raw HID events driver (C) Jiri Kosina
[Mon Jan 18 01:03:00 2021] usbcore: registered new interface driver usbhid
[Mon Jan 18 01:03:00 2021] usbhid: USB HID core driver
[Mon Jan 18 01:03:00 2021] input: Logitech USB-PS/2 Optical Mouse as /devices/pci0000:00/0000:00:1d.0/usb3/3-1/3-1.4/3-1.4:1.0/0003:046D:C00E.0001/input/input4
[Mon Jan 18 01:03:00 2021] hid-generic 0003:046D:C00E.0001: input,hidraw0: USB HID v1.10 Mouse [Logitech USB-PS/2 Optical Mouse] on usb-0000:00:1d.0-1.4/input0
[Mon Jan 18 01:03:00 2021] input: ETPS/2 Elantech Touchpad as /devices/platform/i8042/serio1/input/input3
[Mon Jan 18 01:03:00 2021] ata4: SATA link down (SStatus 0 SControl 300)
[Mon Jan 18 01:03:00 2021] usb 1-1.4: new high-speed USB device number 4 using ehci-pci
[Mon Jan 18 01:03:00 2021] usb 3-1.5: new full-speed USB device number 4 using ehci-pci
[Mon Jan 18 01:03:00 2021] usb 3-1.5: New USB device found, idVendor=8086, idProduct=0189, bcdDevice=69.19
[Mon Jan 18 01:03:00 2021] usb 3-1.5: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[Mon Jan 18 01:03:00 2021] usb 1-1.4: New USB device found, idVendor=2232, idProduct=1018, bcdDevice= 0.01
[Mon Jan 18 01:03:00 2021] usb 1-1.4: New USB device strings: Mfr=1, Product=2, SerialNumber=0
[Mon Jan 18 01:03:00 2021] usb 1-1.4: Product: WebCam SC-13HDL11431N
[Mon Jan 18 01:03:00 2021] usb 1-1.4: Manufacturer: 123
[Mon Jan 18 01:03:01 2021] ata5: SATA link down (SStatus 0 SControl 300)
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] 976773168 512-byte logical blocks: (500 GB/466 GiB)
[Mon Jan 18 01:03:01 2021] sd 1:0:0:0: [sdb] 31277232 512-byte logical blocks: (16.0 GB/14.9 GiB)
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] 4096-byte physical blocks
[Mon Jan 18 01:03:01 2021] sd 1:0:0:0: [sdb] Write Protect is off
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] Write Protect is off
[Mon Jan 18 01:03:01 2021] sd 1:0:0:0: [sdb] Mode Sense: 00 3a 00 00
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[Mon Jan 18 01:03:01 2021] sd 1:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[Mon Jan 18 01:03:01 2021]  sdb: sdb1
[Mon Jan 18 01:03:01 2021] sd 1:0:0:0: [sdb] Attached SCSI disk
[Mon Jan 18 01:03:01 2021]  sda: sda1 sda2 sda3
[Mon Jan 18 01:03:01 2021] sd 0:0:0:0: [sda] Attached SCSI disk
[Mon Jan 18 01:03:01 2021] scsi 6:0:0:0: Direct-Access     ASMT     2105             0    PQ: 0 ANSI: 6
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] 1953525168 512-byte logical blocks: (1.00 TB/932 GiB)
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] 4096-byte physical blocks
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] Write Protect is off
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] Mode Sense: 43 00 00 00
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[Mon Jan 18 01:03:01 2021]  sdc: sdc1 sdc2 sdc3 sdc4 < sdc5 >
[Mon Jan 18 01:03:01 2021] sd 6:0:0:0: [sdc] Attached SCSI disk
[Mon Jan 18 01:03:01 2021] scsi 8:0:0:0: Direct-Access     HUAWEI   SD Storage       2.31 PQ: 0 ANSI: 2
[Mon Jan 18 01:03:01 2021] scsi 7:0:0:0: CD-ROM            HUAWEI   Mass Storage     2.31 PQ: 0 ANSI: 2
[Mon Jan 18 01:03:01 2021] sd 8:0:0:0: [sdd] Attached SCSI removable disk
[Mon Jan 18 01:03:01 2021] sr 7:0:0:0: [sr0] scsi-1 drive
[Mon Jan 18 01:03:01 2021] cdrom: Uniform CD-ROM driver Revision: 3.20
[Mon Jan 18 01:03:01 2021] sr 7:0:0:0: Attached scsi CD-ROM sr0
[Mon Jan 18 01:03:02 2021] xor: automatically using best checksumming function   avx       
[Mon Jan 18 01:03:02 2021] raid6: sse2x4   gen()  8783 MB/s
[Mon Jan 18 01:03:02 2021] raid6: sse2x4   xor()  5871 MB/s
[Mon Jan 18 01:03:02 2021] raid6: sse2x2   gen() 10527 MB/s
[Mon Jan 18 01:03:02 2021] raid6: sse2x2   xor()  6220 MB/s
[Mon Jan 18 01:03:02 2021] raid6: sse2x1   gen()  9738 MB/s
[Mon Jan 18 01:03:02 2021] raid6: sse2x1   xor()  5166 MB/s
[Mon Jan 18 01:03:02 2021] raid6: using algorithm sse2x2 gen() 10527 MB/s
[Mon Jan 18 01:03:02 2021] raid6: .... xor() 6220 MB/s, rmw enabled
[Mon Jan 18 01:03:02 2021] raid6: using ssse3x2 recovery algorithm
[Mon Jan 18 01:03:02 2021] Btrfs loaded, crc32c=crc32c-intel, zoned=yes
[Mon Jan 18 01:03:03 2021] random: crng init done
[Mon Jan 18 01:03:03 2021] EXT4-fs (sdc2): mounted filesystem with ordered data mode. Opts: (null). Quota mode: none.
[Mon Jan 18 01:03:04 2021] Not activating Mandatory Access Control as /sbin/tomoyo-init does not exist.
[Mon Jan 18 01:03:06 2021] systemd[1]: Inserted module 'autofs4'
[Mon Jan 18 01:03:06 2021] systemd[1]: systemd 247.2-4 running in system mode. (+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN +PCRE2 default-hierarchy=unified)
[Mon Jan 18 01:03:06 2021] systemd[1]: Detected architecture x86-64.
[Mon Jan 18 01:03:06 2021] systemd[1]: Set hostname to <iniza>.
[Mon Jan 18 01:03:07 2021] systemd-sysv-generator[235]: SysV service '/etc/init.d/virtualbox' lacks a native systemd unit file. Automatically generating a unit file for compatibility. Please update package to include a native systemd unit file, in order to make it more safe and robust.
[Mon Jan 18 01:03:07 2021] systemd-sysv-generator[235]: SysV service '/etc/init.d/exim4' lacks a native systemd unit file. Automatically generating a unit file for compatibility. Please update package to include a native systemd unit file, in order to make it more safe and robust.
[Mon Jan 18 01:03:07 2021] systemd-sysv-generator[235]: SysV service '/etc/init.d/gdomap' lacks a native systemd unit file. Automatically generating a unit file for compatibility. Please update package to include a native systemd unit file, in order to make it more safe and robust.
[Mon Jan 18 01:03:09 2021] systemd[1]: Queued start job for default target Graphical Interface.
[Mon Jan 18 01:03:09 2021] systemd[1]: Created slice system-getty.slice.
[Mon Jan 18 01:03:09 2021] systemd[1]: Created slice system-modprobe.slice.
[Mon Jan 18 01:03:09 2021] systemd[1]: Created slice system-postgresql.slice.
[Mon Jan 18 01:03:09 2021] systemd[1]: Created slice User and Session Slice.
[Mon Jan 18 01:03:09 2021] systemd[1]: Started Dispatch Password Requests to Console Directory Watch.
[Mon Jan 18 01:03:09 2021] systemd[1]: Started Forward Password Requests to Wall Directory Watch.
[Mon Jan 18 01:03:09 2021] systemd[1]: Set up automount Arbitrary Executable File Formats File System Automount Point.
[Mon Jan 18 01:03:09 2021] systemd[1]: Reached target Local Encrypted Volumes.
[Mon Jan 18 01:03:09 2021] systemd[1]: Reached target User and Group Name Lookups.
[Mon Jan 18 01:03:09 2021] systemd[1]: Reached target Remote File Systems.
[Mon Jan 18 01:03:09 2021] systemd[1]: Reached target Slices.
[Mon Jan 18 01:03:09 2021] systemd[1]: Reached target Swap.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on Syslog Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on fsck to fsckd communication Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on initctl Compatibility Named Pipe.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on Journal Audit Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on Journal Socket (/dev/log).
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on Journal Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on udev Control Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Listening on udev Kernel Socket.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting Huge Pages File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting POSIX Message Queue File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting Kernel Debug File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting Kernel Trace File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Wait for network to be configured by ifupdown...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Set the console keyboard layout...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Create list of static device nodes for the current kernel...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Load Kernel Module configfs...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Load Kernel Module drm...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Load Kernel Module fuse...
[Mon Jan 18 01:03:09 2021] systemd[1]: Condition check resulted in Set Up Additional Binary Formats being skipped.
[Mon Jan 18 01:03:09 2021] systemd[1]: Condition check resulted in File System Check on Root Device being skipped.
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Journal Service...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Load Kernel Modules...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Remount Root and Kernel File Systems...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Coldplug All udev Devices...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted Huge Pages File System.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted POSIX Message Queue File System.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted Kernel Debug File System.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted Kernel Trace File System.
[Mon Jan 18 01:03:09 2021] systemd[1]: Finished Create list of static device nodes for the current kernel.
[Mon Jan 18 01:03:09 2021] systemd[1]: modprobe@configfs.service: Succeeded.
[Mon Jan 18 01:03:09 2021] systemd[1]: Finished Load Kernel Module configfs.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting Kernel Configuration File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted Kernel Configuration File System.
[Mon Jan 18 01:03:09 2021] systemd[1]: Finished Wait for network to be configured by ifupdown.
[Mon Jan 18 01:03:09 2021] EXT4-fs (sdc2): re-mounted. Opts: errors=remount-ro. Quota mode: none.
[Mon Jan 18 01:03:09 2021] systemd[1]: Finished Remount Root and Kernel File Systems.
[Mon Jan 18 01:03:09 2021] fuse: init (API version 7.33)
[Mon Jan 18 01:03:09 2021] systemd[1]: Condition check resulted in Rebuild Hardware Database being skipped.
[Mon Jan 18 01:03:09 2021] systemd[1]: Condition check resulted in Platform Persistent Storage Archival being skipped.
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Load/Save Random Seed...
[Mon Jan 18 01:03:09 2021] systemd[1]: Starting Create System Users...
[Mon Jan 18 01:03:09 2021] systemd[1]: modprobe@fuse.service: Succeeded.
[Mon Jan 18 01:03:09 2021] systemd[1]: Finished Load Kernel Module fuse.
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounting FUSE Control File System...
[Mon Jan 18 01:03:09 2021] systemd[1]: Mounted FUSE Control File System.
[Mon Jan 18 01:03:10 2021] systemd[1]: Started Journal Service.
[Mon Jan 18 01:03:10 2021] lp: driver loaded but no devices found
[Mon Jan 18 01:03:10 2021] ppdev: user-space parallel port driver
[Mon Jan 18 01:03:14 2021] audit: type=1400 audit(1610928195.048:2): apparmor="STATUS" operation="profile_load" profile="unconfined" name="libreoffice-xpdfimport" pid=296 comm="apparmor_parser"
[Mon Jan 18 01:03:14 2021] audit: type=1400 audit(1610928195.124:3): apparmor="STATUS" operation="profile_load" profile="unconfined" name="lsb_release" pid=298 comm="apparmor_parser"
[Mon Jan 18 01:03:14 2021] audit: type=1400 audit(1610928195.216:4): apparmor="STATUS" operation="profile_load" profile="unconfined" name="libreoffice-senddoc" pid=301 comm="apparmor_parser"
[Mon Jan 18 01:03:14 2021] audit: type=1400 audit(1610928195.232:5): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/sbin/haveged" pid=302 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.356:6): apparmor="STATUS" operation="profile_load" profile="unconfined" name="libreoffice-oopslash" pid=303 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.416:7): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/sbin/cups-browsed" pid=304 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.420:8): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/lib/cups/backend/cups-pdf" pid=297 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.420:9): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/sbin/cupsd" pid=297 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.420:10): apparmor="STATUS" operation="profile_load" profile="unconfined" name="/usr/sbin/cupsd//third_party" pid=297 comm="apparmor_parser"
[Mon Jan 18 01:03:15 2021] audit: type=1400 audit(1610928195.540:11): apparmor="STATUS" operation="profile_load" profile="unconfined" name="tcpdump" pid=306 comm="apparmor_parser"
[Mon Jan 18 01:03:17 2021] ACPI: AC Adapter [ADP1] (on-line)
[Mon Jan 18 01:03:17 2021] input: Lid Switch as /devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0D:00/input/input5
[Mon Jan 18 01:03:17 2021] ACPI: Lid Switch [LID0]
[Mon Jan 18 01:03:17 2021] input: Power Button as /devices/LNXSYSTM:00/LNXSYBUS:00/PNP0C0C:00/input/input6
[Mon Jan 18 01:03:17 2021] ACPI: Power Button [PWRB]
[Mon Jan 18 01:03:17 2021] input: Power Button as /devices/LNXSYSTM:00/LNXPWRBN:00/input/input7
[Mon Jan 18 01:03:17 2021] ACPI: Power Button [PWRF]
[Mon Jan 18 01:03:17 2021] input: PC Speaker as /devices/platform/pcspkr/input/input8
[Mon Jan 18 01:03:18 2021] RAPL PMU: API unit is 2^-32 Joules, 3 fixed counters, 163840 ms ovfl timer
[Mon Jan 18 01:03:18 2021] RAPL PMU: hw unit of domain pp0-core 2^-16 Joules
[Mon Jan 18 01:03:18 2021] RAPL PMU: hw unit of domain package 2^-16 Joules
[Mon Jan 18 01:03:18 2021] RAPL PMU: hw unit of domain pp1-gpu 2^-16 Joules
[Mon Jan 18 01:03:19 2021] at24 0-0052: supply vcc not found, using dummy regulator
[Mon Jan 18 01:03:19 2021] at24 0-0052: 256 byte spd EEPROM, read-only
[Mon Jan 18 01:03:20 2021] sd 0:0:0:0: Attached scsi generic sg0 type 0
[Mon Jan 18 01:03:20 2021] sd 1:0:0:0: Attached scsi generic sg1 type 0
[Mon Jan 18 01:03:20 2021] sd 6:0:0:0: Attached scsi generic sg2 type 0
[Mon Jan 18 01:03:20 2021] sd 8:0:0:0: Attached scsi generic sg3 type 0
[Mon Jan 18 01:03:20 2021] sr 7:0:0:0: Attached scsi generic sg4 type 5
[Mon Jan 18 01:03:20 2021] iTCO_vendor_support: vendor-support=0
[Mon Jan 18 01:03:20 2021] AVX version of gcm_enc/dec engaged.
[Mon Jan 18 01:03:20 2021] AES CTR mode by8 optimization enabled
[Mon Jan 18 01:03:21 2021] samsung_laptop: detected SABI interface: SwSmi@
[Mon Jan 18 01:03:21 2021] iTCO_wdt iTCO_wdt.1.auto: Found a Cougar Point TCO device (Version=2, TCOBASE=0x0460)
[Mon Jan 18 01:03:21 2021] iTCO_wdt iTCO_wdt.1.auto: initialized. heartbeat=30 sec (nowayout=0)
[Mon Jan 18 01:03:22 2021] Error: Driver 'pcspkr' is already registered, aborting...
[Mon Jan 18 01:03:22 2021] usb 3-1.5: USB disconnect, device number 4
[Mon Jan 18 01:03:22 2021] cfg80211: Loading compiled-in X.509 certificates for regulatory database
[Mon Jan 18 01:03:22 2021] cfg80211: Loaded X.509 cert 'sforshee: 00b28ddf47aef9cea7'
[Mon Jan 18 01:03:23 2021] cfg80211: loaded regulatory.db is malformed or signature is missing/invalid
[Mon Jan 18 01:03:24 2021] zram: Added device: zram0
[Mon Jan 18 01:03:24 2021] Intel(R) Wireless WiFi driver for Linux
[Mon Jan 18 01:03:24 2021] iwlwifi 0000:01:00.0: can't disable ASPM; OS doesn't have ASPM control
[Mon Jan 18 01:03:25 2021] zram0: detected capacity change from 524288 to 0
[Mon Jan 18 01:03:26 2021] usbcore: registered new interface driver usbserial_generic
[Mon Jan 18 01:03:26 2021] usbserial: USB Serial support registered for generic
[Mon Jan 18 01:03:26 2021] iwlwifi 0000:01:00.0: loaded firmware version 18.168.6.1 6000g2b-6.ucode op_mode iwldvm
[Mon Jan 18 01:03:26 2021] mc: Linux media interface: v0.10
[Mon Jan 18 01:03:26 2021] alg: No test for fips(ansi_cprng) (fips_ansi_cprng)
[Mon Jan 18 01:03:27 2021] usbcore: registered new interface driver option
[Mon Jan 18 01:03:27 2021] usbserial: USB Serial support registered for GSM modem (1-port)
[Mon Jan 18 01:03:27 2021] cdc_ether 1-1.2:1.1 wwan0: register 'cdc_ether' at usb-0000:00:1a.0-1.2, Mobile Broadband Network Device, 02:50:f3:00:00:00
[Mon Jan 18 01:03:27 2021] usbcore: registered new interface driver cdc_ether
[Mon Jan 18 01:03:27 2021] option 1-1.2:1.0: GSM modem (1-port) converter detected
[Mon Jan 18 01:03:27 2021] usb 1-1.2: GSM modem (1-port) converter now attached to ttyUSB0
[Mon Jan 18 01:03:27 2021] option 1-1.2:1.3: GSM modem (1-port) converter detected
[Mon Jan 18 01:03:27 2021] usb 1-1.2: GSM modem (1-port) converter now attached to ttyUSB1
[Mon Jan 18 01:03:27 2021] option 1-1.2:1.4: GSM modem (1-port) converter detected
[Mon Jan 18 01:03:27 2021] usb 1-1.2: GSM modem (1-port) converter now attached to ttyUSB2
[Mon Jan 18 01:03:27 2021] Adding 262140k swap on /dev/zram0.  Priority:100 extents:1 across:262140k SSFS
[Mon Jan 18 01:03:27 2021] videodev: Linux video capture interface: v2.00
[Mon Jan 18 01:03:27 2021] cdc_ether 1-1.2:1.1 wwx0250f3000000: renamed from wwan0
[Mon Jan 18 01:03:28 2021] i915 0000:00:02.0: vgaarb: deactivate vga console
[Mon Jan 18 01:03:28 2021] Console: switching to colour dummy device 80x25
[Mon Jan 18 01:03:28 2021] i915 0000:00:02.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=io+mem
[Mon Jan 18 01:03:28 2021] [drm] Initialized i915 1.6.0 20201103 for 0000:00:02.0 on minor 0
[Mon Jan 18 01:03:28 2021] ACPI: Video Device [GFX0] (multi-head: yes  rom: no  post: no)
[Mon Jan 18 01:03:28 2021] input: Video Bus as /devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/LNXVIDEO:00/input/input9
[Mon Jan 18 01:03:28 2021] fbcon: i915drmfb (fb0) is primary device
[Mon Jan 18 01:03:29 2021] Console: switching to colour frame buffer device 170x48
[Mon Jan 18 01:03:29 2021] snd_hda_intel 0000:00:1b.0: bound 0000:00:02.0 (ops i915_audio_component_bind_ops [i915])
[Mon Jan 18 01:03:29 2021] i915 0000:00:02.0: [drm] fb0: i915drmfb frame buffer device
[Mon Jan 18 01:03:30 2021] Bluetooth: Core ver 2.22
[Mon Jan 18 01:03:30 2021] NET: Registered protocol family 31
[Mon Jan 18 01:03:30 2021] Bluetooth: HCI device and connection manager initialized
[Mon Jan 18 01:03:30 2021] Bluetooth: HCI socket layer initialized
[Mon Jan 18 01:03:30 2021] Bluetooth: L2CAP socket layer initialized
[Mon Jan 18 01:03:30 2021] Bluetooth: SCO socket layer initialized
[Mon Jan 18 01:03:30 2021] intel_rapl_common: Found RAPL domain package
[Mon Jan 18 01:03:30 2021] intel_rapl_common: Found RAPL domain core
[Mon Jan 18 01:03:30 2021] intel_rapl_common: Found RAPL domain uncore
[Mon Jan 18 01:03:30 2021] intel_rapl_common: RAPL package-0 domain package locked by BIOS
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEBUG disabled
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEBUGFS disabled
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEVICE_TRACING disabled
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: Detected Intel(R) Centrino(R) Advanced-N 6230 AGN, REV=0xB0
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: reporting RF_KILL (radio disabled)
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0: RF_KILL bit toggled to disable radio.
[Mon Jan 18 01:03:30 2021] uvcvideo: Found UVC 1.00 device WebCam SC-13HDL11431N (2232:1018)
[Mon Jan 18 01:03:30 2021] ieee80211 phy0: Selected rate control algorithm 'iwl-agn-rs'
[Mon Jan 18 01:03:30 2021] input: WebCam SC-13HDL11431N: WebCam S as /devices/pci0000:00/0000:00:1a.0/usb1/1-1/1-1.4/1-1.4:1.0/input/input10
[Mon Jan 18 01:03:30 2021] usbcore: registered new interface driver uvcvideo
[Mon Jan 18 01:03:30 2021] USB Video Class driver (1.1.1)
[Mon Jan 18 01:03:30 2021] iwlwifi 0000:01:00.0 wlp1s0: renamed from wlan0
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0: autoconfig for ALC269VC: line_outs=1 (0x14/0x0/0x0/0x0/0x0) type:speaker
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:    speaker_outs=0 (0x0/0x0/0x0/0x0/0x0)
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:    hp_outs=1 (0x15/0x0/0x0/0x0/0x0)
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:    mono: mono_out=0x0
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:    inputs:
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:      Internal Mic=0x19
[Mon Jan 18 01:03:31 2021] snd_hda_codec_realtek hdaudioC0D0:      Mic=0x18
[Mon Jan 18 01:03:31 2021] input: HDA Intel PCH Mic as /devices/pci0000:00/0000:00:1b.0/sound/card0/input11
[Mon Jan 18 01:03:31 2021] input: HDA Intel PCH Headphone as /devices/pci0000:00/0000:00:1b.0/sound/card0/input12
[Mon Jan 18 01:03:31 2021] input: HDA Intel PCH HDMI/DP,pcm=3 as /devices/pci0000:00/0000:00:1b.0/sound/card0/input13
[Mon Jan 18 01:03:32 2021] usbcore: registered new interface driver btusb
[Mon Jan 18 01:03:35 2021] Error: Driver 'pcspkr' is already registered, aborting...
[Mon Jan 18 01:03:42 2021] kauditd_printk_skb: 21 callbacks suppressed
[Mon Jan 18 01:03:42 2021] audit: type=1400 audit(1610928222.892:33): apparmor="DENIED" operation="capable" profile="/usr/sbin/cupsd" pid=841 comm="cupsd" capability=12  capname="net_admin"
[Mon Jan 18 01:03:43 2021] RTL8211E Gigabit Ethernet r8169-200:00: attached PHY driver (mii_bus:phy_addr=r8169-200:00, irq=IGNORE)
[Mon Jan 18 01:03:43 2021] r8169 0000:02:00.0 enp2s0: Link is Down
[Mon Jan 18 01:03:44 2021] audit: type=1400 audit(1610928224.480:34): apparmor="DENIED" operation="capable" profile="/usr/sbin/cups-browsed" pid=1191 comm="cups-browsed" capability=23  capname="sys_nice"
[Mon Jan 18 01:04:12 2021] Bluetooth: BNEP (Ethernet Emulation) ver 1.3
[Mon Jan 18 01:04:12 2021] Bluetooth: BNEP filters: protocol multicast
[Mon Jan 18 01:04:12 2021] Bluetooth: BNEP socket layer initialized
[Mon Jan 18 01:05:43 2021] PPP generic driver version 2.4.2
[Mon Jan 18 01:05:43 2021] PPP BSD Compression module registered
[Mon Jan 18 01:05:43 2021] PPP Deflate Compression module registered
Bill Wendling Jan. 18, 2021, 2:32 a.m. UTC | #14
On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> [ big snip ]

[More snippage.]

> [ CC Fangrui ]
>
> With the attached...
>
>    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> undefined symbols
>
> ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> For details see ClangBuiltLinux issue #1250 "Unknown symbol
> _GLOBAL_OFFSET_TABLE_ loading kernel modules".
>
Thanks for confirming that this works with the above patch.

> @ Bill Nick Sami Nathan
>
> 1, Can you say something of the impact passing "LLVM_IAS=1" to make?

The integrated assembler and this option are more-or-less orthogonal
to each other. One can still use the GNU assembler with PGO. If you're
having an issue, it may be related to ClangBuiltLinux issue #1250.

> 2. Can you please try Nick's DWARF v5 support patchset v5 and
> CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
>
I know Nick did several tests with PGO. He may have looked into it
already, but we can check.

> I would like to know what the impact of the Clang's Integrated
> Assembler and DWARF v5 are.
>
> I dropped both means...
>
> 1. Do not pass "LLVM_IAS=1" to make.
> 2. Use default DWARF v2 (with Nick's patchset: CONFIG_DEBUG_INFO_DWARF2=y).
>
> ...for a successfull build and boot on bare metal.
>

[Next message]

> On each rebuild I need to pass to make ...?
>
>   LLVM=1 -fprofile-use=vmlinux.profdata
>
Yes.

> Did you try together with passing LLVM_IAS=1 to make?

One of my tests was with the integrated assembler enabled. Are you
finding issues with it?

The problem with using top-of-tree clang is that it's not necessarily
stable. You could try using the clang 11.x release (changing the
"CLANG_VERSION >= 120000" in kernel/pgo/Kconfig/ to "CLANG_VERSION >=
110000").

-bw
Sedat Dilek Jan. 18, 2021, 12:39 p.m. UTC | #15
On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
>
> On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > [ big snip ]
>
> [More snippage.]
>
> > [ CC Fangrui ]
> >
> > With the attached...
> >
> >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > undefined symbols
> >
> > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> >
> Thanks for confirming that this works with the above patch.
>
> > @ Bill Nick Sami Nathan
> >
> > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
>
> The integrated assembler and this option are more-or-less orthogonal
> to each other. One can still use the GNU assembler with PGO. If you're
> having an issue, it may be related to ClangBuiltLinux issue #1250.
>
> > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> >
> I know Nick did several tests with PGO. He may have looked into it
> already, but we can check.
>

Reproducible.

LLVM_IAS=1 + DWARF5 = Not bootable

I will try:

LLVM_IAS=1 + DWARF4

- Sedat -

> > I would like to know what the impact of the Clang's Integrated
> > Assembler and DWARF v5 are.
> >
> > I dropped both means...
> >
> > 1. Do not pass "LLVM_IAS=1" to make.
> > 2. Use default DWARF v2 (with Nick's patchset: CONFIG_DEBUG_INFO_DWARF2=y).
> >
> > ...for a successfull build and boot on bare metal.
> >
>
> [Next message]
>
> > On each rebuild I need to pass to make ...?
> >
> >   LLVM=1 -fprofile-use=vmlinux.profdata
> >
> Yes.
>
> > Did you try together with passing LLVM_IAS=1 to make?
>
> One of my tests was with the integrated assembler enabled. Are you
> finding issues with it?
>
> The problem with using top-of-tree clang is that it's not necessarily
> stable. You could try using the clang 11.x release (changing the
> "CLANG_VERSION >= 120000" in kernel/pgo/Kconfig/ to "CLANG_VERSION >=
> 110000").
>
> -bw
Sedat Dilek Jan. 18, 2021, 5:26 p.m. UTC | #16
On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> >
> > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > [ big snip ]
> >
> > [More snippage.]
> >
> > > [ CC Fangrui ]
> > >
> > > With the attached...
> > >
> > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > undefined symbols
> > >
> > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > >
> > Thanks for confirming that this works with the above patch.
> >
> > > @ Bill Nick Sami Nathan
> > >
> > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> >
> > The integrated assembler and this option are more-or-less orthogonal
> > to each other. One can still use the GNU assembler with PGO. If you're
> > having an issue, it may be related to ClangBuiltLinux issue #1250.
> >
> > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > >
> > I know Nick did several tests with PGO. He may have looked into it
> > already, but we can check.
> >
>
> Reproducible.
>
> LLVM_IAS=1 + DWARF5 = Not bootable
>
> I will try:
>
> LLVM_IAS=1 + DWARF4
>

I was not able to boot into such a built Linux-kernel.

For me worked: DWARF2 and LLVM_IAS=1 *not* set.

Of course, this could be an issue with my system's LLVM/Clang.

Debian clang version
12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724

Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
and especially CONFIG_DEBUG_INFO_DWARF5=y?
Success means I was able to boot in QEMU and/or bare metal.

Thanks.

Regards,
- Sedat -

>
> > > I would like to know what the impact of the Clang's Integrated
> > > Assembler and DWARF v5 are.
> > >
> > > I dropped both means...
> > >
> > > 1. Do not pass "LLVM_IAS=1" to make.
> > > 2. Use default DWARF v2 (with Nick's patchset: CONFIG_DEBUG_INFO_DWARF2=y).
> > >
> > > ...for a successfull build and boot on bare metal.
> > >
> >
> > [Next message]
> >
> > > On each rebuild I need to pass to make ...?
> > >
> > >   LLVM=1 -fprofile-use=vmlinux.profdata
> > >
> > Yes.
> >
> > > Did you try together with passing LLVM_IAS=1 to make?
> >
> > One of my tests was with the integrated assembler enabled. Are you
> > finding issues with it?
> >
> > The problem with using top-of-tree clang is that it's not necessarily
> > stable. You could try using the clang 11.x release (changing the
> > "CLANG_VERSION >= 120000" in kernel/pgo/Kconfig/ to "CLANG_VERSION >=
> > 110000").
> >
> > -bw
Bill Wendling Jan. 18, 2021, 9:56 p.m. UTC | #17
On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > >
> > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > [ big snip ]
> > >
> > > [More snippage.]
> > >
> > > > [ CC Fangrui ]
> > > >
> > > > With the attached...
> > > >
> > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > undefined symbols
> > > >
> > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > >
> > > Thanks for confirming that this works with the above patch.
> > >
> > > > @ Bill Nick Sami Nathan
> > > >
> > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > >
> > > The integrated assembler and this option are more-or-less orthogonal
> > > to each other. One can still use the GNU assembler with PGO. If you're
> > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > >
> > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > >
> > > I know Nick did several tests with PGO. He may have looked into it
> > > already, but we can check.
> > >
> >
> > Reproducible.
> >
> > LLVM_IAS=1 + DWARF5 = Not bootable
> >
> > I will try:
> >
> > LLVM_IAS=1 + DWARF4
> >
>
> I was not able to boot into such a built Linux-kernel.
>
PGO will have no effect on debugging data. If this is an issue with
DWARF, then it's likely orthogonal to the PGO patch.

> For me worked: DWARF2 and LLVM_IAS=1 *not* set.
>
> Of course, this could be an issue with my system's LLVM/Clang.
>
> Debian clang version
> 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
>
Please use the official clang 11.0.1 release
(https://releases.llvm.org/download.html), modifying the
kernel/pgo/Kconfig as I suggested above. The reason we specify clang
12 for the minimal version is because of an issue that was recently
fixed.

> Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> and especially CONFIG_DEBUG_INFO_DWARF5=y?
> Success means I was able to boot in QEMU and/or bare metal.
>
The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.

-bw
Sedat Dilek Jan. 18, 2021, 11:29 p.m. UTC | #18
On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
>
> On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > [ big snip ]
> > > >
> > > > [More snippage.]
> > > >
> > > > > [ CC Fangrui ]
> > > > >
> > > > > With the attached...
> > > > >
> > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > undefined symbols
> > > > >
> > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > >
> > > > Thanks for confirming that this works with the above patch.
> > > >
> > > > > @ Bill Nick Sami Nathan
> > > > >
> > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > >
> > > > The integrated assembler and this option are more-or-less orthogonal
> > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > >
> > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > >
> > > > I know Nick did several tests with PGO. He may have looked into it
> > > > already, but we can check.
> > > >
> > >
> > > Reproducible.
> > >
> > > LLVM_IAS=1 + DWARF5 = Not bootable
> > >
> > > I will try:
> > >
> > > LLVM_IAS=1 + DWARF4
> > >
> >
> > I was not able to boot into such a built Linux-kernel.
> >
> PGO will have no effect on debugging data. If this is an issue with
> DWARF, then it's likely orthogonal to the PGO patch.
>
> > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> >
> > Of course, this could be an issue with my system's LLVM/Clang.
> >
> > Debian clang version
> > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> >
> Please use the official clang 11.0.1 release
> (https://releases.llvm.org/download.html), modifying the
> kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> 12 for the minimal version is because of an issue that was recently
> fixed.
>
> > Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> > and especially CONFIG_DEBUG_INFO_DWARF5=y?
> > Success means I was able to boot in QEMU and/or bare metal.
> >
> The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.
>

That means to build the first PGO-enabled kernel with clang-11 and
rebuild in a second step again with the same clang-11.

Just FYI:
I was able to boot into a Linux-kernel  rebuild with *no  LLVM_IAS=1*
(means use "GNU AS 2.35.1") set and DWARF5 using LLVM=1 from
LLVM/Clang-12.

- Sedat -
Nick Desaulniers Jan. 20, 2021, 1:02 a.m. UTC | #19
On Sat, Jan 16, 2021 at 1:44 AM Bill Wendling <morbo@google.com> wrote:
>
> From: Sami Tolvanen <samitolvanen@google.com>
>
> Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> profile, the kernel is instrumented with PGO counters, a representative
> workload is run, and the raw profile data is collected from
> /sys/kernel/debug/pgo/profraw.
>
> The raw profile data must be processed by clang's "llvm-profdata" tool
> before it can be used during recompilation:
>
>   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
>   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
>
> Multiple raw profiles may be merged during this step.
>
> The data can now be used by the compiler:
>
>   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
>
> This initial submission is restricted to x86, as that's the platform we
> know works. This restriction can be lifted once other platforms have
> been verified to work with PGO.
>
> Note that this method of profiling the kernel is clang-native, unlike
> the clang support in kernel/gcov.
>
> [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> Co-developed-by: Bill Wendling <morbo@google.com>
> Signed-off-by: Bill Wendling <morbo@google.com>
> ---
> v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
>       testing.
>     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
>       Song's comments.
> v3: - Added change log section based on Sedat Dilek's comments.
> v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
>       own popcount implementation, based on Nick Desaulniers's comment.
> v5: - Correct padding calculation, discovered by Nathan Chancellor.

Specifically for v5:
Tested-by: Nick Desaulniers <ndesaulniers@google.com>

If anything changes drastically, please drop that and I'll retest it;
otherwise for changes to the commit message or docs, feel free to
carry it forward.

I'll try to provide code review by EOW, assuming we can stop
regressing LLVM so I can focus. (Ha!)
Nick Desaulniers Jan. 21, 2021, 12:51 a.m. UTC | #20
Thanks Bill, mostly questions below.  Patch looks good to me modulo
disabling profiling for one crypto TU, mixing style of pre/post
increment, and some comments around locking.  With those addressed,
I'm hoping akpm@ would consider picking this up.

On Sat, Jan 16, 2021 at 1:44 AM Bill Wendling <morbo@google.com> wrote:
>
> From: Sami Tolvanen <samitolvanen@google.com>
>
> Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> profile, the kernel is instrumented with PGO counters, a representative
> workload is run, and the raw profile data is collected from
> /sys/kernel/debug/pgo/profraw.
>
> The raw profile data must be processed by clang's "llvm-profdata" tool
> before it can be used during recompilation:
>
>   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
>   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
>
> Multiple raw profiles may be merged during this step.
>
> The data can now be used by the compiler:
>
>   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
>
> This initial submission is restricted to x86, as that's the platform we
> know works. This restriction can be lifted once other platforms have
> been verified to work with PGO.
>
> Note that this method of profiling the kernel is clang-native, unlike
> the clang support in kernel/gcov.
>
> [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> Co-developed-by: Bill Wendling <morbo@google.com>
> Signed-off-by: Bill Wendling <morbo@google.com>
> ---
> v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
>       testing.
>     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
>       Song's comments.
> v3: - Added change log section based on Sedat Dilek's comments.
> v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
>       own popcount implementation, based on Nick Desaulniers's comment.
> v5: - Correct padding calculation, discovered by Nathan Chancellor.
> ---
>  Documentation/dev-tools/index.rst     |   1 +
>  Documentation/dev-tools/pgo.rst       | 127 +++++++++
>  MAINTAINERS                           |   9 +
>  Makefile                              |   3 +
>  arch/Kconfig                          |   1 +
>  arch/x86/Kconfig                      |   1 +
>  arch/x86/boot/Makefile                |   1 +
>  arch/x86/boot/compressed/Makefile     |   1 +
>  arch/x86/crypto/Makefile              |   2 +
>  arch/x86/entry/vdso/Makefile          |   1 +
>  arch/x86/kernel/vmlinux.lds.S         |   2 +
>  arch/x86/platform/efi/Makefile        |   1 +
>  arch/x86/purgatory/Makefile           |   1 +
>  arch/x86/realmode/rm/Makefile         |   1 +
>  arch/x86/um/vdso/Makefile             |   1 +
>  drivers/firmware/efi/libstub/Makefile |   1 +
>  include/asm-generic/vmlinux.lds.h     |  44 +++
>  kernel/Makefile                       |   1 +
>  kernel/pgo/Kconfig                    |  35 +++
>  kernel/pgo/Makefile                   |   5 +
>  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
>  kernel/pgo/instrument.c               | 185 +++++++++++++
>  kernel/pgo/pgo.h                      | 206 ++++++++++++++
>  scripts/Makefile.lib                  |  10 +
>  24 files changed, 1022 insertions(+)
>  create mode 100644 Documentation/dev-tools/pgo.rst
>  create mode 100644 kernel/pgo/Kconfig
>  create mode 100644 kernel/pgo/Makefile
>  create mode 100644 kernel/pgo/fs.c
>  create mode 100644 kernel/pgo/instrument.c
>  create mode 100644 kernel/pgo/pgo.h
>
> diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> index f7809c7b1ba9e..8d6418e858062 100644
> --- a/Documentation/dev-tools/index.rst
> +++ b/Documentation/dev-tools/index.rst
> @@ -26,6 +26,7 @@ whole; patches welcome!
>     kgdb
>     kselftest
>     kunit/index
> +   pgo
>
>
>  .. only::  subproject and html
> diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> new file mode 100644
> index 0000000000000..b7f11d8405b73
> --- /dev/null
> +++ b/Documentation/dev-tools/pgo.rst
> @@ -0,0 +1,127 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +===============================
> +Using PGO with the Linux kernel
> +===============================
> +
> +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> +when building with Clang. The profiling data is exported via the ``pgo``
> +debugfs directory.
> +
> +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> +
> +
> +Preparation
> +===========
> +
> +Configure the kernel with:
> +
> +.. code-block:: make
> +
> +   CONFIG_DEBUG_FS=y
> +   CONFIG_PGO_CLANG=y
> +
> +Note that kernels compiled with profiling flags will be significantly larger
> +and run slower.
> +
> +Profiling data will only become accessible once debugfs has been mounted:
> +
> +.. code-block:: sh
> +
> +   mount -t debugfs none /sys/kernel/debug
> +
> +
> +Customization
> +=============
> +
> +You can enable or disable profiling for individual file and directories by
> +adding a line similar to the following to the respective kernel Makefile:
> +
> +- For a single file (e.g. main.o)
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE_main.o := y
> +
> +- For all files in one directory
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE := y
> +
> +To exclude files from being profiled use
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE_main.o := n
> +
> +and
> +
> +  .. code-block:: make
> +
> +     PGO_PROFILE := n
> +
> +Only files which are linked to the main kernel image or are compiled as kernel
> +modules are supported by this mechanism.
> +
> +
> +Files
> +=====
> +
> +The PGO kernel support creates the following files in debugfs:
> +
> +``/sys/kernel/debug/pgo``
> +       Parent directory for all PGO-related files.
> +
> +``/sys/kernel/debug/pgo/reset``
> +       Global reset file: resets all coverage data to zero when written to.
> +
> +``/sys/kernel/debug/profraw``
> +       The raw PGO data that must be processed with ``llvm_profdata``.
> +
> +
> +Workflow
> +========
> +
> +The PGO kernel can be run on the host or test machines. The data though should
> +be analyzed with Clang's tools from the same Clang version as the kernel was
> +compiled. Clang's tolerant of version skew, but it's easier to use the same
> +Clang version.
> +
> +The profiling data is useful for optimizing the kernel, analyzing coverage,
> +etc. Clang offers tools to perform these tasks.
> +
> +Here is an example workflow for profiling an instrumented kernel with PGO and
> +using the result to optimize the kernel:
> +
> +1) Install the kernel on the TEST machine.
> +
> +2) Reset the data counters right before running the load tests
> +
> +   .. code-block:: sh
> +
> +      $ echo 1 > /sys/kernel/debug/pgo/reset
> +
> +3) Run the load tests.
> +
> +4) Collect the raw profile data
> +
> +   .. code-block:: sh
> +
> +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> +
> +5) (Optional) Download the raw profile data to the HOST machine.
> +
> +6) Process the raw profile data
> +
> +   .. code-block:: sh
> +
> +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> +
> +   Note that multiple raw profile data files can be merged during this step.
> +
> +7) Rebuild the kernel using the profile data (PGO disabled)
> +
> +   .. code-block:: sh
> +
> +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 79b400c97059f..cb1f1f2b2baf4 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13948,6 +13948,15 @@ S:     Maintained
>  F:     include/linux/personality.h
>  F:     include/uapi/linux/personality.h
>
> +PGO BASED KERNEL PROFILING
> +M:     Sami Tolvanen <samitolvanen@google.com>
> +M:     Bill Wendling <wcw@google.com>
> +R:     Nathan Chancellor <natechancellor@gmail.com>
> +R:     Nick Desaulniers <ndesaulniers@google.com>
> +S:     Supported
> +F:     Documentation/dev-tools/pgo.rst
> +F:     kernel/pgo
> +
>  PHOENIX RC FLIGHT CONTROLLER ADAPTER
>  M:     Marcus Folkesson <marcus.folkesson@gmail.com>
>  L:     linux-input@vger.kernel.org
> diff --git a/Makefile b/Makefile
> index 9e73f82e0d863..9128bfe1ccc97 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -659,6 +659,9 @@ endif # KBUILD_EXTMOD
>  # Defaults to vmlinux, but the arch makefile usually adds further targets
>  all: vmlinux
>
> +CFLAGS_PGO_CLANG := -fprofile-generate
> +export CFLAGS_PGO_CLANG
> +
>  CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage \
>         $(call cc-option,-fno-tree-loop-im) \
>         $(call cc-disable-warning,maybe-uninitialized,)
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 24862d15f3a36..f39d3991f6bfe 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1112,6 +1112,7 @@ config ARCH_SPLIT_ARG64
>            pairs of 32-bit arguments, select this option.
>
>  source "kernel/gcov/Kconfig"
> +source "kernel/pgo/Kconfig"
>
>  source "scripts/gcc-plugins/Kconfig"
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 21f851179ff08..36305ea61dc09 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -96,6 +96,7 @@ config X86
>         select ARCH_SUPPORTS_DEBUG_PAGEALLOC
>         select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
>         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
> +       select ARCH_SUPPORTS_PGO_CLANG          if X86_64
>         select ARCH_USE_BUILTIN_BSWAP
>         select ARCH_USE_QUEUED_RWLOCKS
>         select ARCH_USE_QUEUED_SPINLOCKS
> diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
> index fe605205b4ce2..383853e32f673 100644
> --- a/arch/x86/boot/Makefile
> +++ b/arch/x86/boot/Makefile
> @@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  KBUILD_CFLAGS  += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
>  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE := n
>
>  $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index e0bc3988c3faa..ed12ab65f6065 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -54,6 +54,7 @@ CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
>
>  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE :=n
>
>  KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
> diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> index a31de0c6ccde2..775fa0b368e98 100644
> --- a/arch/x86/crypto/Makefile
> +++ b/arch/x86/crypto/Makefile
> @@ -4,6 +4,8 @@
>
>  OBJECT_FILES_NON_STANDARD := y
>
> +PGO_PROFILE_curve25519-x86_64.o := n
> +

^ Do you have more info about this?

>  obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
>
>  obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> index 02e3e42f380bd..26e2b3af0145c 100644
> --- a/arch/x86/entry/vdso/Makefile
> +++ b/arch/x86/entry/vdso/Makefile
> @@ -179,6 +179,7 @@ quiet_cmd_vdso = VDSO    $@
>  VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
>         $(call ld-option, --eh-frame-hdr) -Bsymbolic
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  quiet_cmd_vdso_and_check = VDSO    $@
>        cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index efd9e9ea17f25..f6cab2316c46a 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -184,6 +184,8 @@ SECTIONS
>
>         BUG_TABLE
>
> +       PGO_CLANG_DATA
> +
>         ORC_UNWIND_TABLE
>
>         . = ALIGN(PAGE_SIZE);
> diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
> index 84b09c230cbd5..5f22b31446ad4 100644
> --- a/arch/x86/platform/efi/Makefile
> +++ b/arch/x86/platform/efi/Makefile
> @@ -2,6 +2,7 @@
>  OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
>  KASAN_SANITIZE := n
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  obj-$(CONFIG_EFI)              += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
>  obj-$(CONFIG_EFI_MIXED)                += efi_thunk_$(BITS).o
> diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
> index 95ea17a9d20cb..36f20e99da0bc 100644
> --- a/arch/x86/purgatory/Makefile
> +++ b/arch/x86/purgatory/Makefile
> @@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk
>
>  # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
>  GCOV_PROFILE   := n
> +PGO_PROFILE    := n
>  KASAN_SANITIZE := n
>  UBSAN_SANITIZE := n
>  KCSAN_SANITIZE := n
> diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
> index 83f1b6a56449f..21797192f958f 100644
> --- a/arch/x86/realmode/rm/Makefile
> +++ b/arch/x86/realmode/rm/Makefile
> @@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
>  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
>  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>  UBSAN_SANITIZE := n
> diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
> index 5943387e3f357..54f5768f58530 100644
> --- a/arch/x86/um/vdso/Makefile
> +++ b/arch/x86/um/vdso/Makefile
> @@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO    $@
>
>  VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
>  GCOV_PROFILE := n
> +PGO_PROFILE := n
>
>  #
>  # Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 8a94388e38b33..2d81623b33f29 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -40,6 +40,7 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
>  KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
>  GCOV_PROFILE                   := n
> +PGO_PROFILE                    := n
>  # Sanitizer runtimes are unavailable and cannot be linked here.
>  KASAN_SANITIZE                 := n
>  KCSAN_SANITIZE                 := n
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index b2b3d81b1535a..3a591bb18c5fb 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -316,6 +316,49 @@
>  #define THERMAL_TABLE(name)
>  #endif
>
> +#ifdef CONFIG_PGO_CLANG
> +#define PGO_CLANG_DATA                                                 \
> +       __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) {     \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_start = .;                                   \
> +               __llvm_prf_data_start = .;                              \
> +               KEEP(*(__llvm_prf_data))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_data_end = .;                                \
> +       }                                                               \
> +       __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) {     \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_cnts_start = .;                              \
> +               KEEP(*(__llvm_prf_cnts))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_cnts_end = .;                                \
> +       }                                                               \
> +       __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) {   \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_names_start = .;                             \
> +               KEEP(*(__llvm_prf_names))                               \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_names_end = .;                               \
> +               . = ALIGN(8);                                           \
> +       }                                                               \
> +       __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) {     \
> +               __llvm_prf_vals_start = .;                              \
> +               KEEP(*(__llvm_prf_vals))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_vals_end = .;                                \
> +               . = ALIGN(8);                                           \
> +       }                                                               \
> +       __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) {     \
> +               __llvm_prf_vnds_start = .;                              \
> +               KEEP(*(__llvm_prf_vnds))                                \
> +               . = ALIGN(8);                                           \
> +               __llvm_prf_vnds_end = .;                                \
> +               __llvm_prf_end = .;                                     \
> +       }
> +#else
> +#define PGO_CLANG_DATA
> +#endif
> +
>  #define KERNEL_DTB()                                                   \
>         STRUCT_ALIGN();                                                 \
>         __dtb_start = .;                                                \
> @@ -1125,6 +1168,7 @@
>                 CONSTRUCTORS                                            \
>         }                                                               \
>         BUG_TABLE                                                       \
> +       PGO_CLANG_DATA

^ since dropping other arch support from v4, could probably drop this,
too. We should be covered by the modification to
arch/x86/kernel/vmlinux.lds.S, right?

>
>  #define INIT_TEXT_SECTION(inittext_align)                              \
>         . = ALIGN(inittext_align);                                      \
> diff --git a/kernel/Makefile b/kernel/Makefile
> index aa7368c7eabf3..0b34ca228ba46 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -111,6 +111,7 @@ obj-$(CONFIG_BPF) += bpf/
>  obj-$(CONFIG_KCSAN) += kcsan/
>  obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>  obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
> +obj-$(CONFIG_PGO_CLANG) += pgo/
>
>  obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
> new file mode 100644
> index 0000000000000..76a640b6cf6ed
> --- /dev/null
> +++ b/kernel/pgo/Kconfig
> @@ -0,0 +1,35 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
> +
> +config ARCH_SUPPORTS_PGO_CLANG
> +       bool
> +
> +config PGO_CLANG
> +       bool "Enable clang's PGO-based kernel profiling"
> +       depends on DEBUG_FS
> +       depends on ARCH_SUPPORTS_PGO_CLANG
> +       depends on CC_IS_CLANG && CLANG_VERSION >= 120000
> +       help
> +         This option enables clang's PGO (Profile Guided Optimization) based
> +         code profiling to better optimize the kernel.
> +
> +         If unsure, say N.
> +
> +         Run a representative workload for your application on a kernel
> +         compiled with this option and download the raw profile file from
> +         /sys/kernel/debug/pgo/profraw. This file needs to be processed with
> +         llvm-profdata. It may be merged with other collected raw profiles.
> +
> +         Copy the resulting profile file into vmlinux.profdata, and enable
> +         KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
> +         kernel.
> +
> +         Note that a kernel compiled with profiling flags will be
> +         significantly larger and run slower. Also be sure to exclude files
> +         from profiling which are not linked to the kernel image to prevent
> +         linker errors.
> +
> +         Note that the debugfs filesystem has to be mounted to access
> +         profiling data.
> +
> +endmenu
> diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
> new file mode 100644
> index 0000000000000..41e27cefd9a47
> --- /dev/null
> +++ b/kernel/pgo/Makefile
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier: GPL-2.0
> +GCOV_PROFILE   := n
> +PGO_PROFILE    := n
> +
> +obj-y  += fs.o instrument.o
> diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> new file mode 100644
> index 0000000000000..68b24672be10a
> --- /dev/null
> +++ b/kernel/pgo/fs.c
> @@ -0,0 +1,382 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#define pr_fmt(fmt)    "pgo: " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/debugfs.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include "pgo.h"
> +
> +static struct dentry *directory;
> +
> +struct prf_private_data {
> +       void *buffer;
> +       unsigned long size;
> +};
> +
> +/*
> + * Raw profile data format:
> + *
> + *     - llvm_prf_header
> + *     - __llvm_prf_data
> + *     - __llvm_prf_cnts
> + *     - __llvm_prf_names
> + *     - zero padding to 8 bytes
> + *     - for each llvm_prf_data in __llvm_prf_data:
> + *             - llvm_prf_value_data
> + *                     - llvm_prf_value_record + site count array
> + *                             - llvm_prf_value_node_data
> + *                             ...
> + *                     ...
> + *             ...
> + */
> +
> +static void prf_fill_header(void **buffer)
> +{
> +       struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
> +
> +       header->magic = LLVM_PRF_MAGIC;
> +       header->version = LLVM_PRF_VARIANT_MASK_IR | LLVM_PRF_VERSION;
> +       header->data_size = prf_data_count();
> +       header->padding_bytes_before_counters = 0;
> +       header->counters_size = prf_cnts_count();
> +       header->padding_bytes_after_counters = 0;
> +       header->names_size = prf_names_count();
> +       header->counters_delta = (u64)__llvm_prf_cnts_start;
> +       header->names_delta = (u64)__llvm_prf_names_start;
> +       header->value_kind_last = LLVM_PRF_IPVK_LAST;
> +
> +       *buffer += sizeof(*header);
> +}
> +
> +/*
> + * Copy the source into the buffer, incrementing the pointer into buffer in the
> + * process.
> + */
> +static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
> +{
> +       memcpy(*buffer, src, size);
> +       *buffer += size;
> +}
> +
> +static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
> +{
> +       struct llvm_prf_value_node **nodes =
> +               (struct llvm_prf_value_node **)p->values;
> +       u32 kinds = 0;
> +       u32 size = 0;
> +       unsigned int kind;
> +       unsigned int n;
> +       unsigned int s = 0;
> +
> +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> +               unsigned int sites = p->num_value_sites[kind];
> +
> +               if (!sites)
> +                       continue;
> +
> +               /* Record + site count array */
> +               size += prf_get_value_record_size(sites);
> +               kinds++;
> +
> +               if (!nodes)
> +                       continue;
> +
> +               for (n = 0; n < sites; n++) {
> +                       u32 count = 0;
> +                       struct llvm_prf_value_node *site = nodes[s + n];
> +
> +                       while (site && ++count <= U8_MAX)
> +                               site = site->next;
> +
> +                       size += count *
> +                               sizeof(struct llvm_prf_value_node_data);
> +               }
> +
> +               s += sites;
> +       }
> +
> +       if (size)
> +               size += sizeof(struct llvm_prf_value_data);
> +
> +       if (value_kinds)
> +               *value_kinds = kinds;
> +
> +       return size;
> +}
> +
> +static u32 prf_get_value_size(void)
> +{
> +       u32 size = 0;
> +       struct llvm_prf_data *p;
> +
> +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> +               size += __prf_get_value_size(p, NULL);
> +
> +       return size;
> +}
> +
> +/* Serialize the profiling's value. */
> +static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
> +{
> +       struct llvm_prf_value_data header;
> +       struct llvm_prf_value_node **nodes =
> +               (struct llvm_prf_value_node **)p->values;
> +       unsigned int kind;
> +       unsigned int n;
> +       unsigned int s = 0;
> +
> +       header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
> +
> +       if (!header.num_value_kinds)
> +               /* Nothing to write. */
> +               return;
> +
> +       prf_copy_to_buffer(buffer, &header, sizeof(header));
> +
> +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> +               struct llvm_prf_value_record *record;
> +               u8 *counts;
> +               unsigned int sites = p->num_value_sites[kind];
> +
> +               if (!sites)
> +                       continue;
> +
> +               /* Profiling value record. */
> +               record = *(struct llvm_prf_value_record **)buffer;
> +               *buffer += prf_get_value_record_header_size();
> +
> +               record->kind = kind;
> +               record->num_value_sites = sites;
> +
> +               /* Site count array. */
> +               counts = *(u8 **)buffer;
> +               *buffer += prf_get_value_record_site_count_size(sites);
> +
> +               /*
> +                * If we don't have nodes, we can skip updating the site count
> +                * array, because the buffer is zero filled.
> +                */
> +               if (!nodes)
> +                       continue;
> +
> +               for (n = 0; n < sites; n++) {
> +                       u32 count = 0;
> +                       struct llvm_prf_value_node *site = nodes[s + n];
> +
> +                       while (site && ++count <= U8_MAX) {
> +                               prf_copy_to_buffer(buffer, site,
> +                                                  sizeof(struct llvm_prf_value_node_data));
> +                               site = site->next;
> +                       }
> +
> +                       counts[n] = (u8)count;
> +               }
> +
> +               s += sites;
> +       }
> +}
> +
> +static void prf_serialize_values(void **buffer)
> +{
> +       struct llvm_prf_data *p;
> +
> +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> +               prf_serialize_value(p, buffer);
> +}
> +
> +static inline unsigned long prf_get_padding(unsigned long size)
> +{
> +       return 7 & (8 - size % 8);
> +}

This is ugly but it looks like this corresponds with
__llvm_profile_get_num_padding_bytes() in
llvm-project/compiler-rt/lib/profile/InstrProfiling.c?  If there are
platforms where `sizeof(unsigned long) != 8` and are supported by the
kernel, it might be nicer to spell out `sizeof(unsigned long)` rather
than hardcode 8. Should we also use u64 for the parameter and u8 for
the return type?

> +
> +static unsigned long prf_buffer_size(void)
> +{
> +       return sizeof(struct llvm_prf_header) +
> +                       prf_data_size() +
> +                       prf_cnts_size() +
> +                       prf_names_size() +
> +                       prf_get_padding(prf_names_size()) +
> +                       prf_get_value_size();
> +}
> +
> +/* Serialize the profiling data into a format LLVM's tools can understand. */
> +static int prf_serialize(struct prf_private_data *p)
> +{
> +       int err = 0;
> +       void *buffer;
> +
> +       p->size = prf_buffer_size();
> +       p->buffer = vzalloc(p->size);
> +
> +       if (!p->buffer) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       buffer = p->buffer;
> +
> +       prf_fill_header(&buffer);
> +       prf_copy_to_buffer(&buffer, __llvm_prf_data_start,  prf_data_size());
> +       prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start,  prf_cnts_size());
> +       prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
> +       buffer += prf_get_padding(prf_names_size());
> +
> +       prf_serialize_values(&buffer);
> +
> +out:
> +       return err;
> +}
> +
> +/* open() implementation for PGO. Creates a copy of the profiling data set. */
> +static int prf_open(struct inode *inode, struct file *file)
> +{
> +       struct prf_private_data *data;
> +       unsigned long flags;
> +       int err;
> +
> +       data = kzalloc(sizeof(*data), GFP_KERNEL);
> +       if (!data) {
> +               err = -ENOMEM;
> +               goto out;
> +       }
> +
> +       flags = prf_lock();
> +
> +       err = prf_serialize(data);
> +       if (err) {
> +               kfree(data);
> +               goto out_unlock;
> +       }
> +
> +       file->private_data = data;
> +
> +out_unlock:
> +       prf_unlock(flags);

This is an improvement over earlier editions, but kfree() is still
within the critical section.  I wonder if it can be moved out? If not,
why, precisely? Otherwise are we sure we have the concurrency correct?
Might be worth pursuing in a follow up patch once the core has landed.

Also, it looks like the comment above the definition of pgo_lock and
allocate_node() seem to indicate the same lock is used for
serialization.  I'm curious to know more about why we can't access
current_node and serialize at the same time? At the least, it seems
that `prf_serialize` should have a similar comment to `allocate_node`
regarding the caller being expected to hold the `pgo_lock` via a call
to `prf_lock()`, yeah?

I can't help but look at the two call sites of prf_lock() and be
suspicious that pgo_lock is technically guarding access to more
variables than described in the comment.  It would be good to explain
exactly what is going on should we need to revisit the concurrency
here in the future (and lower the bus factor).

> +out:
> +       return err;
> +}
> +
> +/* read() implementation for PGO. */
> +static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
> +                       loff_t *ppos)
> +{
> +       struct prf_private_data *data = file->private_data;
> +
> +       BUG_ON(!data);
> +
> +       return simple_read_from_buffer(buf, count, ppos, data->buffer,
> +                                      data->size);
> +}
> +
> +/* release() implementation for PGO. Release resources allocated by open(). */
> +static int prf_release(struct inode *inode, struct file *file)
> +{
> +       struct prf_private_data *data = file->private_data;
> +
> +       if (data) {
> +               vfree(data->buffer);
> +               kfree(data);
> +       }
> +
> +       return 0;
> +}
> +
> +static const struct file_operations prf_fops = {
> +       .owner          = THIS_MODULE,
> +       .open           = prf_open,
> +       .read           = prf_read,
> +       .llseek         = default_llseek,
> +       .release        = prf_release
> +};
> +
> +/* write() implementation for resetting PGO's profile data. */
> +static ssize_t reset_write(struct file *file, const char __user *addr,
> +                          size_t len, loff_t *pos)
> +{
> +       struct llvm_prf_data *data;
> +
> +       memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
> +
> +       for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; ++data) {

^ this patch mixes pre-increment and post-increment in loops.  The
kernel coding style docs (Documentation/process/coding-style.rst)
don't make a call on this, but it might be nice to be internally
consistent throughout the patch.  I assume that's from having mixed
authors.  Not a huge issue, but I'm pedantic.

> +               struct llvm_prf_value_node **vnodes;
> +               u64 current_vsite_count;
> +               u32 i;
> +
> +               if (!data->values)
> +                       continue;
> +
> +               current_vsite_count = 0;
> +               vnodes = (struct llvm_prf_value_node **)data->values;
> +
> +               for (i = LLVM_PRF_IPVK_FIRST; i <= LLVM_PRF_IPVK_LAST; ++i)
> +                       current_vsite_count += data->num_value_sites[i];
> +
> +               for (i = 0; i < current_vsite_count; ++i) {
> +                       struct llvm_prf_value_node *current_vnode = vnodes[i];
> +
> +                       while (current_vnode) {
> +                               current_vnode->count = 0;
> +                               current_vnode = current_vnode->next;
> +                       }
> +               }
> +       }
> +
> +       return len;
> +}
> +
> +static const struct file_operations prf_reset_fops = {
> +       .owner          = THIS_MODULE,
> +       .write          = reset_write,
> +       .llseek         = noop_llseek,
> +};
> +
> +/* Create debugfs entries. */
> +static int __init pgo_init(void)
> +{
> +       directory = debugfs_create_dir("pgo", NULL);
> +       if (!directory)
> +               goto err_remove;
> +
> +       if (!debugfs_create_file("profraw", 0600, directory, NULL,
> +                                &prf_fops))
> +               goto err_remove;
> +
> +       if (!debugfs_create_file("reset", 0200, directory, NULL,
> +                                &prf_reset_fops))
> +               goto err_remove;
> +
> +       return 0;
> +
> +err_remove:
> +       pr_err("initialization failed\n");
> +       return -EIO;
> +}
> +
> +/* Remove debugfs entries. */
> +static void __exit pgo_exit(void)
> +{
> +       debugfs_remove_recursive(directory);
> +}
> +
> +module_init(pgo_init);
> +module_exit(pgo_exit);
> diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> new file mode 100644
> index 0000000000000..6084ff0652e85
> --- /dev/null
> +++ b/kernel/pgo/instrument.c
> @@ -0,0 +1,185 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#define pr_fmt(fmt)    "pgo: " fmt
> +
> +#include <linux/bitops.h>
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <linux/spinlock.h>
> +#include <linux/types.h>
> +#include "pgo.h"
> +
> +/* Lock guarding value node access and serialization. */
> +static DEFINE_SPINLOCK(pgo_lock);
> +static int current_node;
> +
> +unsigned long prf_lock(void)
> +{
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&pgo_lock, flags);
> +
> +       return flags;
> +}
> +
> +void prf_unlock(unsigned long flags)
> +{
> +       spin_unlock_irqrestore(&pgo_lock, flags);
> +}
> +
> +/*
> + * Return a newly allocated profiling value node which contains the tracked
> + * value by the value profiler.
> + * Note: caller *must* hold pgo_lock.
> + */
> +static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> +                                                u32 index, u64 value)
> +{
> +       if (&__llvm_prf_vnds_start[current_node + 1] >= __llvm_prf_vnds_end)
> +               return NULL; /* Out of nodes */
> +
> +       current_node++;
> +
> +       /* Make sure the node is entirely within the section */
> +       if (&__llvm_prf_vnds_start[current_node] >= __llvm_prf_vnds_end ||
> +           &__llvm_prf_vnds_start[current_node + 1] > __llvm_prf_vnds_end)
> +               return NULL;
> +
> +       return &__llvm_prf_vnds_start[current_node];
> +}
> +
> +/*
> + * Counts the number of times a target value is seen.
> + *
> + * Records the target value for the CounterIndex if not seen before. Otherwise,
> + * increments the counter associated w/ the target value.
> + */
> +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
> +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
> +{
> +       struct llvm_prf_data *p = (struct llvm_prf_data *)data;
> +       struct llvm_prf_value_node **counters;
> +       struct llvm_prf_value_node *curr;
> +       struct llvm_prf_value_node *min = NULL;
> +       struct llvm_prf_value_node *prev = NULL;
> +       u64 min_count = U64_MAX;
> +       u8 values = 0;
> +       unsigned long flags;
> +
> +       if (!p || !p->values)
> +               return;
> +
> +       counters = (struct llvm_prf_value_node **)p->values;
> +       curr = counters[index];
> +
> +       while (curr) {
> +               if (target_value == curr->value) {
> +                       curr->count++;
> +                       return;
> +               }
> +
> +               if (curr->count < min_count) {
> +                       min_count = curr->count;
> +                       min = curr;
> +               }
> +
> +               prev = curr;
> +               curr = curr->next;
> +               values++;
> +       }
> +
> +       if (values >= LLVM_PRF_MAX_NUM_VALS_PER_SITE) {
> +               if (!min->count || !(--min->count)) {
> +                       curr = min;
> +                       curr->value = target_value;
> +                       curr->count++;
> +               }
> +               return;
> +       }
> +
> +       /* Lock when updating the value node structure. */
> +       flags = prf_lock();
> +
> +       curr = allocate_node(p, index, target_value);
> +       if (!curr)
> +               goto out;
> +
> +       curr->value = target_value;
> +       curr->count++;
> +
> +       if (!counters[index])
> +               counters[index] = curr;
> +       else if (prev && !prev->next)
> +               prev->next = curr;
> +
> +out:
> +       prf_unlock(flags);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_target);
> +
> +/* Counts the number of times a range of targets values are seen. */
> +void __llvm_profile_instrument_range(u64 target_value, void *data,
> +                                    u32 index, s64 precise_start,
> +                                    s64 precise_last, s64 large_value);
> +void __llvm_profile_instrument_range(u64 target_value, void *data,
> +                                    u32 index, s64 precise_start,
> +                                    s64 precise_last, s64 large_value)
> +{
> +       if (large_value != S64_MIN && (s64)target_value >= large_value)
> +               target_value = large_value;
> +       else if ((s64)target_value < precise_start ||
> +                (s64)target_value > precise_last)
> +               target_value = precise_last + 1;
> +
> +       __llvm_profile_instrument_target(target_value, data, index);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_range);
> +
> +static u64 inst_prof_get_range_rep_value(u64 value)
> +{
> +       if (value <= 8)
> +               /* The first ranges are individually tracked, us it as is. */

^ typo, "use"

> +               return value;
> +       else if (value >= 513)
> +               /* The last range is mapped to its lowest value. */
> +               return 513;
> +       else if (hweight64(value) == 1)
> +               /* If it's a power of two, use it as is. */
> +               return value;
> +
> +       /* Otherwise, take to the previous power of two + 1. */
> +       return (1 << (64 - __builtin_clzll(value) - 1)) + 1;
> +}
> +
> +/*
> + * The target values are partitioned into multiple ranges. The range spec is
> + * defined in compiler-rt/include/profile/InstrProfData.inc.
> + */
> +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> +                                    u32 counter_index);
> +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> +                                    u32 counter_index)
> +{
> +       u64 rep_value;
> +
> +       /* Map the target value to the representative value of its range. */
> +       rep_value = inst_prof_get_range_rep_value(target_value);
> +       __llvm_profile_instrument_target(rep_value, data, counter_index);
> +}
> +EXPORT_SYMBOL(__llvm_profile_instrument_memop);
> diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> new file mode 100644
> index 0000000000000..df0aa278f28bd
> --- /dev/null
> +++ b/kernel/pgo/pgo.h
> @@ -0,0 +1,206 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2019 Google, Inc.
> + *
> + * Author:
> + *     Sami Tolvanen <samitolvanen@google.com>
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + */
> +
> +#ifndef _PGO_H
> +#define _PGO_H
> +
> +/*
> + * Note: These internal LLVM definitions must match the compiler version.
> + * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
> + */
> +
> +#ifdef CONFIG_64BIT
> +       #define LLVM_PRF_MAGIC          \
> +               ((u64)255 << 56 |       \
> +                (u64)'l' << 48 |       \
> +                (u64)'p' << 40 |       \
> +                (u64)'r' << 32 |       \
> +                (u64)'o' << 24 |       \
> +                (u64)'f' << 16 |       \
> +                (u64)'r' << 8  |       \
> +                (u64)129)
> +#else
> +       #define LLVM_PRF_MAGIC          \
> +               ((u64)255 << 56 |       \
> +                (u64)'l' << 48 |       \
> +                (u64)'p' << 40 |       \
> +                (u64)'r' << 32 |       \
> +                (u64)'o' << 24 |       \
> +                (u64)'f' << 16 |       \
> +                (u64)'R' << 8  |       \
> +                (u64)129)
> +#endif
> +
> +#define LLVM_PRF_VERSION               5
> +#define LLVM_PRF_DATA_ALIGN            8
> +#define LLVM_PRF_IPVK_FIRST            0
> +#define LLVM_PRF_IPVK_LAST             1
> +#define LLVM_PRF_MAX_NUM_VALS_PER_SITE 16

llvm/include/llvm/ProfileData/InstrProfData.inc defines
INSTR_PROF_MAX_NUM_VAL_PER_SITE as 255; does that need to match?

> +
> +#define LLVM_PRF_VARIANT_MASK_IR       (0x1ull << 56)
> +#define LLVM_PRF_VARIANT_MASK_CSIR     (0x1ull << 57)
> +
> +/**
> + * struct llvm_prf_header - represents the raw profile header data structure.
> + * @magic: the magic token for the file format.
> + * @version: the version of the file format.
> + * @data_size: the number of entries in the profile data section.
> + * @padding_bytes_before_counters: the number of padding bytes before the
> + *   counters.
> + * @counters_size: the size in bytes of the LLVM profile section containing the
> + *   counters.
> + * @padding_bytes_after_counters: the number of padding bytes after the
> + *   counters.
> + * @names_size: the size in bytes of the LLVM profile section containing the
> + *   counters' names.
> + * @counters_delta: the beginning of the LLMV profile counters section.
> + * @names_delta: the beginning of the LLMV profile names section.
> + * @value_kind_last: the last profile value kind.
> + */
> +struct llvm_prf_header {
> +       u64 magic;
> +       u64 version;
> +       u64 data_size;
> +       u64 padding_bytes_before_counters;
> +       u64 counters_size;
> +       u64 padding_bytes_after_counters;
> +       u64 names_size;
> +       u64 counters_delta;
> +       u64 names_delta;
> +       u64 value_kind_last;
> +};
> +
> +/**
> + * struct llvm_prf_data - represents the per-function control structure.
> + * @name_ref: the reference to the function's name.
> + * @func_hash: the hash value of the function.
> + * @counter_ptr: a pointer to the profile counter.
> + * @function_ptr: a pointer to the function.
> + * @values: the profiling values associated with this function.
> + * @num_counters: the number of counters in the function.
> + * @num_value_sites: the number of value profile sites.
> + */
> +struct llvm_prf_data {
> +       const u64 name_ref;
> +       const u64 func_hash;
> +       const void *counter_ptr;
> +       const void *function_ptr;
> +       void *values;
> +       const u32 num_counters;
> +       const u16 num_value_sites[LLVM_PRF_IPVK_LAST + 1];
> +} __aligned(LLVM_PRF_DATA_ALIGN);
> +
> +/**
> + * structure llvm_prf_value_node_data - represents the data part of the struct
> + *   llvm_prf_value_node data structure.
> + * @value: the value counters.
> + * @count: the counters' count.
> + */
> +struct llvm_prf_value_node_data {
> +       u64 value;
> +       u64 count;
> +};
> +
> +/**
> + * struct llvm_prf_value_node - represents an internal data structure used by
> + *   the value profiler.
> + * @value: the value counters.
> + * @count: the counters' count.
> + * @next: the next value node.
> + */
> +struct llvm_prf_value_node {
> +       u64 value;
> +       u64 count;
> +       struct llvm_prf_value_node *next;
> +};
> +
> +/**
> + * struct llvm_prf_value_data - represents the value profiling data in indexed
> + *   format.
> + * @total_size: the total size in bytes including this field.
> + * @num_value_kinds: the number of value profile kinds that has value profile
> + *   data.
> + */
> +struct llvm_prf_value_data {
> +       u32 total_size;
> +       u32 num_value_kinds;
> +};
> +
> +/**
> + * struct llvm_prf_value_record - represents the on-disk layout of the value
> + *   profile data of a particular kind for one function.
> + * @kind: the kind of the value profile record.
> + * @num_value_sites: the number of value profile sites.
> + * @site_count_array: the first element of the array that stores the number
> + *   of profiled values for each value site.
> + */
> +struct llvm_prf_value_record {
> +       u32 kind;
> +       u32 num_value_sites;
> +       u8 site_count_array[];
> +};
> +
> +#define prf_get_value_record_header_size()             \
> +       offsetof(struct llvm_prf_value_record, site_count_array)
> +#define prf_get_value_record_site_count_size(sites)    \
> +       roundup((sites), 8)
> +#define prf_get_value_record_size(sites)               \
> +       (prf_get_value_record_header_size() +           \
> +        prf_get_value_record_site_count_size((sites)))
> +
> +/* Data sections */
> +extern struct llvm_prf_data __llvm_prf_data_start[];
> +extern struct llvm_prf_data __llvm_prf_data_end[];
> +
> +extern u64 __llvm_prf_cnts_start[];
> +extern u64 __llvm_prf_cnts_end[];
> +
> +extern char __llvm_prf_names_start[];
> +extern char __llvm_prf_names_end[];
> +
> +extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
> +extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
> +
> +/* Locking for vnodes */
> +extern unsigned long prf_lock(void);
> +extern void prf_unlock(unsigned long flags);
> +
> +#define __DEFINE_PRF_SIZE(s) \
> +       static inline unsigned long prf_ ## s ## _size(void)            \
> +       {                                                               \
> +               unsigned long start =                                   \
> +                       (unsigned long)__llvm_prf_ ## s ## _start;      \
> +               unsigned long end =                                     \
> +                       (unsigned long)__llvm_prf_ ## s ## _end;        \
> +               return roundup(end - start,                             \
> +                               sizeof(__llvm_prf_ ## s ## _start[0])); \
> +       }                                                               \
> +       static inline unsigned long prf_ ## s ## _count(void)           \
> +       {                                                               \
> +               return prf_ ## s ## _size() /                           \
> +                       sizeof(__llvm_prf_ ## s ## _start[0]);          \
> +       }
> +
> +__DEFINE_PRF_SIZE(data);
> +__DEFINE_PRF_SIZE(cnts);
> +__DEFINE_PRF_SIZE(names);
> +__DEFINE_PRF_SIZE(vnds);
> +
> +#undef __DEFINE_PRF_SIZE
> +
> +#endif /* _PGO_H */
> diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> index 213677a5ed33e..9b218afb5cb87 100644
> --- a/scripts/Makefile.lib
> +++ b/scripts/Makefile.lib
> @@ -143,6 +143,16 @@ _c_flags += $(if $(patsubst n%,, \
>                 $(CFLAGS_GCOV))
>  endif
>
> +#
> +# Enable clang's PGO profiling flags for a file or directory depending on
> +# variables PGO_PROFILE_obj.o and PGO_PROFILE.
> +#
> +ifeq ($(CONFIG_PGO_CLANG),y)
> +_c_flags += $(if $(patsubst n%,, \
> +               $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
> +               $(CFLAGS_PGO_CLANG))
> +endif
> +
>  #
>  # Enable address sanitizer flags for kernel except some files or directories
>  # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
> --
> 2.30.0.284.gd98b1dd5eaa7-goog
>
Sedat Dilek Jan. 21, 2021, 2:03 a.m. UTC | #21
On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
>
> On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > >
> > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > [ big snip ]
> > > >
> > > > [More snippage.]
> > > >
> > > > > [ CC Fangrui ]
> > > > >
> > > > > With the attached...
> > > > >
> > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > undefined symbols
> > > > >
> > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > >
> > > > Thanks for confirming that this works with the above patch.
> > > >
> > > > > @ Bill Nick Sami Nathan
> > > > >
> > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > >
> > > > The integrated assembler and this option are more-or-less orthogonal
> > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > >
> > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > >
> > > > I know Nick did several tests with PGO. He may have looked into it
> > > > already, but we can check.
> > > >
> > >
> > > Reproducible.
> > >
> > > LLVM_IAS=1 + DWARF5 = Not bootable
> > >
> > > I will try:
> > >
> > > LLVM_IAS=1 + DWARF4
> > >
> >
> > I was not able to boot into such a built Linux-kernel.
> >
> PGO will have no effect on debugging data. If this is an issue with
> DWARF, then it's likely orthogonal to the PGO patch.
>
> > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> >
> > Of course, this could be an issue with my system's LLVM/Clang.
> >
> > Debian clang version
> > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> >
> Please use the official clang 11.0.1 release
> (https://releases.llvm.org/download.html), modifying the
> kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> 12 for the minimal version is because of an issue that was recently
> fixed.
>

I downgraded to clang-11.1.0-rc1.
( See attachment. )

Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.

But again after generating vmlinux.profdata and doing the PGO-rebuild
- the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
With GNU/as I can boot.

So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
v2 is not allowed).
There is something wrong (here) with passing LLVM_IAS=1 to make when
doing the PGO-rebuild.

Can someone please verify and confirm that the PGO-rebuild with
LLVM_IAS=1 boots or boots not?

Thanks.

- Sedat -

> > Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> > and especially CONFIG_DEBUG_INFO_DWARF5=y?
> > Success means I was able to boot in QEMU and/or bare metal.
> >
> The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.
>
> -bw
Bill Wendling Jan. 21, 2021, 8:24 a.m. UTC | #22
On Wed, Jan 20, 2021 at 4:51 PM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> Thanks Bill, mostly questions below.  Patch looks good to me modulo
> disabling profiling for one crypto TU, mixing style of pre/post
> increment, and some comments around locking.  With those addressed,
> I'm hoping akpm@ would consider picking this up.
>
> On Sat, Jan 16, 2021 at 1:44 AM Bill Wendling <morbo@google.com> wrote:
> >
> > From: Sami Tolvanen <samitolvanen@google.com>
> >
> > Enable the use of clang's Profile-Guided Optimization[1]. To generate a
> > profile, the kernel is instrumented with PGO counters, a representative
> > workload is run, and the raw profile data is collected from
> > /sys/kernel/debug/pgo/profraw.
> >
> > The raw profile data must be processed by clang's "llvm-profdata" tool
> > before it can be used during recompilation:
> >
> >   $ cp /sys/kernel/debug/pgo/profraw vmlinux.profraw
> >   $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> >
> > Multiple raw profiles may be merged during this step.
> >
> > The data can now be used by the compiler:
> >
> >   $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> >
> > This initial submission is restricted to x86, as that's the platform we
> > know works. This restriction can be lifted once other platforms have
> > been verified to work with PGO.
> >
> > Note that this method of profiling the kernel is clang-native, unlike
> > the clang support in kernel/gcov.
> >
> > [1] https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> >
> > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> > Co-developed-by: Bill Wendling <morbo@google.com>
> > Signed-off-by: Bill Wendling <morbo@google.com>
> > ---
> > v2: - Added "__llvm_profile_instrument_memop" based on Nathan Chancellor's
> >       testing.
> >     - Corrected documentation, re PGO flags when using LTO, based on Fangrui
> >       Song's comments.
> > v3: - Added change log section based on Sedat Dilek's comments.
> > v4: - Remove non-x86 Makfile changes and se "hweight64" instead of using our
> >       own popcount implementation, based on Nick Desaulniers's comment.
> > v5: - Correct padding calculation, discovered by Nathan Chancellor.
> > ---
> >  Documentation/dev-tools/index.rst     |   1 +
> >  Documentation/dev-tools/pgo.rst       | 127 +++++++++
> >  MAINTAINERS                           |   9 +
> >  Makefile                              |   3 +
> >  arch/Kconfig                          |   1 +
> >  arch/x86/Kconfig                      |   1 +
> >  arch/x86/boot/Makefile                |   1 +
> >  arch/x86/boot/compressed/Makefile     |   1 +
> >  arch/x86/crypto/Makefile              |   2 +
> >  arch/x86/entry/vdso/Makefile          |   1 +
> >  arch/x86/kernel/vmlinux.lds.S         |   2 +
> >  arch/x86/platform/efi/Makefile        |   1 +
> >  arch/x86/purgatory/Makefile           |   1 +
> >  arch/x86/realmode/rm/Makefile         |   1 +
> >  arch/x86/um/vdso/Makefile             |   1 +
> >  drivers/firmware/efi/libstub/Makefile |   1 +
> >  include/asm-generic/vmlinux.lds.h     |  44 +++
> >  kernel/Makefile                       |   1 +
> >  kernel/pgo/Kconfig                    |  35 +++
> >  kernel/pgo/Makefile                   |   5 +
> >  kernel/pgo/fs.c                       | 382 ++++++++++++++++++++++++++
> >  kernel/pgo/instrument.c               | 185 +++++++++++++
> >  kernel/pgo/pgo.h                      | 206 ++++++++++++++
> >  scripts/Makefile.lib                  |  10 +
> >  24 files changed, 1022 insertions(+)
> >  create mode 100644 Documentation/dev-tools/pgo.rst
> >  create mode 100644 kernel/pgo/Kconfig
> >  create mode 100644 kernel/pgo/Makefile
> >  create mode 100644 kernel/pgo/fs.c
> >  create mode 100644 kernel/pgo/instrument.c
> >  create mode 100644 kernel/pgo/pgo.h
> >
> > diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
> > index f7809c7b1ba9e..8d6418e858062 100644
> > --- a/Documentation/dev-tools/index.rst
> > +++ b/Documentation/dev-tools/index.rst
> > @@ -26,6 +26,7 @@ whole; patches welcome!
> >     kgdb
> >     kselftest
> >     kunit/index
> > +   pgo
> >
> >
> >  .. only::  subproject and html
> > diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
> > new file mode 100644
> > index 0000000000000..b7f11d8405b73
> > --- /dev/null
> > +++ b/Documentation/dev-tools/pgo.rst
> > @@ -0,0 +1,127 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +===============================
> > +Using PGO with the Linux kernel
> > +===============================
> > +
> > +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
> > +when building with Clang. The profiling data is exported via the ``pgo``
> > +debugfs directory.
> > +
> > +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
> > +
> > +
> > +Preparation
> > +===========
> > +
> > +Configure the kernel with:
> > +
> > +.. code-block:: make
> > +
> > +   CONFIG_DEBUG_FS=y
> > +   CONFIG_PGO_CLANG=y
> > +
> > +Note that kernels compiled with profiling flags will be significantly larger
> > +and run slower.
> > +
> > +Profiling data will only become accessible once debugfs has been mounted:
> > +
> > +.. code-block:: sh
> > +
> > +   mount -t debugfs none /sys/kernel/debug
> > +
> > +
> > +Customization
> > +=============
> > +
> > +You can enable or disable profiling for individual file and directories by
> > +adding a line similar to the following to the respective kernel Makefile:
> > +
> > +- For a single file (e.g. main.o)
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := y
> > +
> > +- For all files in one directory
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := y
> > +
> > +To exclude files from being profiled use
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE_main.o := n
> > +
> > +and
> > +
> > +  .. code-block:: make
> > +
> > +     PGO_PROFILE := n
> > +
> > +Only files which are linked to the main kernel image or are compiled as kernel
> > +modules are supported by this mechanism.
> > +
> > +
> > +Files
> > +=====
> > +
> > +The PGO kernel support creates the following files in debugfs:
> > +
> > +``/sys/kernel/debug/pgo``
> > +       Parent directory for all PGO-related files.
> > +
> > +``/sys/kernel/debug/pgo/reset``
> > +       Global reset file: resets all coverage data to zero when written to.
> > +
> > +``/sys/kernel/debug/profraw``
> > +       The raw PGO data that must be processed with ``llvm_profdata``.
> > +
> > +
> > +Workflow
> > +========
> > +
> > +The PGO kernel can be run on the host or test machines. The data though should
> > +be analyzed with Clang's tools from the same Clang version as the kernel was
> > +compiled. Clang's tolerant of version skew, but it's easier to use the same
> > +Clang version.
> > +
> > +The profiling data is useful for optimizing the kernel, analyzing coverage,
> > +etc. Clang offers tools to perform these tasks.
> > +
> > +Here is an example workflow for profiling an instrumented kernel with PGO and
> > +using the result to optimize the kernel:
> > +
> > +1) Install the kernel on the TEST machine.
> > +
> > +2) Reset the data counters right before running the load tests
> > +
> > +   .. code-block:: sh
> > +
> > +      $ echo 1 > /sys/kernel/debug/pgo/reset
> > +
> > +3) Run the load tests.
> > +
> > +4) Collect the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
> > +
> > +5) (Optional) Download the raw profile data to the HOST machine.
> > +
> > +6) Process the raw profile data
> > +
> > +   .. code-block:: sh
> > +
> > +      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
> > +
> > +   Note that multiple raw profile data files can be merged during this step.
> > +
> > +7) Rebuild the kernel using the profile data (PGO disabled)
> > +
> > +   .. code-block:: sh
> > +
> > +      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 79b400c97059f..cb1f1f2b2baf4 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -13948,6 +13948,15 @@ S:     Maintained
> >  F:     include/linux/personality.h
> >  F:     include/uapi/linux/personality.h
> >
> > +PGO BASED KERNEL PROFILING
> > +M:     Sami Tolvanen <samitolvanen@google.com>
> > +M:     Bill Wendling <wcw@google.com>
> > +R:     Nathan Chancellor <natechancellor@gmail.com>
> > +R:     Nick Desaulniers <ndesaulniers@google.com>
> > +S:     Supported
> > +F:     Documentation/dev-tools/pgo.rst
> > +F:     kernel/pgo
> > +
> >  PHOENIX RC FLIGHT CONTROLLER ADAPTER
> >  M:     Marcus Folkesson <marcus.folkesson@gmail.com>
> >  L:     linux-input@vger.kernel.org
> > diff --git a/Makefile b/Makefile
> > index 9e73f82e0d863..9128bfe1ccc97 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -659,6 +659,9 @@ endif # KBUILD_EXTMOD
> >  # Defaults to vmlinux, but the arch makefile usually adds further targets
> >  all: vmlinux
> >
> > +CFLAGS_PGO_CLANG := -fprofile-generate
> > +export CFLAGS_PGO_CLANG
> > +
> >  CFLAGS_GCOV    := -fprofile-arcs -ftest-coverage \
> >         $(call cc-option,-fno-tree-loop-im) \
> >         $(call cc-disable-warning,maybe-uninitialized,)
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 24862d15f3a36..f39d3991f6bfe 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -1112,6 +1112,7 @@ config ARCH_SPLIT_ARG64
> >            pairs of 32-bit arguments, select this option.
> >
> >  source "kernel/gcov/Kconfig"
> > +source "kernel/pgo/Kconfig"
> >
> >  source "scripts/gcc-plugins/Kconfig"
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 21f851179ff08..36305ea61dc09 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -96,6 +96,7 @@ config X86
> >         select ARCH_SUPPORTS_DEBUG_PAGEALLOC
> >         select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
> >         select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
> > +       select ARCH_SUPPORTS_PGO_CLANG          if X86_64
> >         select ARCH_USE_BUILTIN_BSWAP
> >         select ARCH_USE_QUEUED_RWLOCKS
> >         select ARCH_USE_QUEUED_SPINLOCKS
> > diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
> > index fe605205b4ce2..383853e32f673 100644
> > --- a/arch/x86/boot/Makefile
> > +++ b/arch/x86/boot/Makefile
> > @@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  KBUILD_CFLAGS  += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
> >  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE := n
> >
> >  $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
> > diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> > index e0bc3988c3faa..ed12ab65f6065 100644
> > --- a/arch/x86/boot/compressed/Makefile
> > +++ b/arch/x86/boot/compressed/Makefile
> > @@ -54,6 +54,7 @@ CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
> >
> >  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE :=n
> >
> >  KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
> > diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
> > index a31de0c6ccde2..775fa0b368e98 100644
> > --- a/arch/x86/crypto/Makefile
> > +++ b/arch/x86/crypto/Makefile
> > @@ -4,6 +4,8 @@
> >
> >  OBJECT_FILES_NON_STANDARD := y
> >
> > +PGO_PROFILE_curve25519-x86_64.o := n
> > +
>
> ^ Do you have more info about this?
>
This gave an error during compilation complaining about lacking
registers in some instances. This file is mostly inline asm or code
that doesn't super benefit from profiling, so I disabled it.

Note that the register issue happens only with PGO. Normal compilation is fine.

> >  obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
> >
> >  obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
> > diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> > index 02e3e42f380bd..26e2b3af0145c 100644
> > --- a/arch/x86/entry/vdso/Makefile
> > +++ b/arch/x86/entry/vdso/Makefile
> > @@ -179,6 +179,7 @@ quiet_cmd_vdso = VDSO    $@
> >  VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
> >         $(call ld-option, --eh-frame-hdr) -Bsymbolic
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  quiet_cmd_vdso_and_check = VDSO    $@
> >        cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
> > diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> > index efd9e9ea17f25..f6cab2316c46a 100644
> > --- a/arch/x86/kernel/vmlinux.lds.S
> > +++ b/arch/x86/kernel/vmlinux.lds.S
> > @@ -184,6 +184,8 @@ SECTIONS
> >
> >         BUG_TABLE
> >
> > +       PGO_CLANG_DATA
> > +
> >         ORC_UNWIND_TABLE
> >
> >         . = ALIGN(PAGE_SIZE);
> > diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
> > index 84b09c230cbd5..5f22b31446ad4 100644
> > --- a/arch/x86/platform/efi/Makefile
> > +++ b/arch/x86/platform/efi/Makefile
> > @@ -2,6 +2,7 @@
> >  OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
> >  KASAN_SANITIZE := n
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  obj-$(CONFIG_EFI)              += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
> >  obj-$(CONFIG_EFI_MIXED)                += efi_thunk_$(BITS).o
> > diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
> > index 95ea17a9d20cb..36f20e99da0bc 100644
> > --- a/arch/x86/purgatory/Makefile
> > +++ b/arch/x86/purgatory/Makefile
> > @@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk
> >
> >  # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
> >  GCOV_PROFILE   := n
> > +PGO_PROFILE    := n
> >  KASAN_SANITIZE := n
> >  UBSAN_SANITIZE := n
> >  KCSAN_SANITIZE := n
> > diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
> > index 83f1b6a56449f..21797192f958f 100644
> > --- a/arch/x86/realmode/rm/Makefile
> > +++ b/arch/x86/realmode/rm/Makefile
> > @@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
> >  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
> >  KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >  UBSAN_SANITIZE := n
> > diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
> > index 5943387e3f357..54f5768f58530 100644
> > --- a/arch/x86/um/vdso/Makefile
> > +++ b/arch/x86/um/vdso/Makefile
> > @@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO    $@
> >
> >  VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
> >  GCOV_PROFILE := n
> > +PGO_PROFILE := n
> >
> >  #
> >  # Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
> > diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> > index 8a94388e38b33..2d81623b33f29 100644
> > --- a/drivers/firmware/efi/libstub/Makefile
> > +++ b/drivers/firmware/efi/libstub/Makefile
> > @@ -40,6 +40,7 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
> >  KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> >
> >  GCOV_PROFILE                   := n
> > +PGO_PROFILE                    := n
> >  # Sanitizer runtimes are unavailable and cannot be linked here.
> >  KASAN_SANITIZE                 := n
> >  KCSAN_SANITIZE                 := n
> > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> > index b2b3d81b1535a..3a591bb18c5fb 100644
> > --- a/include/asm-generic/vmlinux.lds.h
> > +++ b/include/asm-generic/vmlinux.lds.h
> > @@ -316,6 +316,49 @@
> >  #define THERMAL_TABLE(name)
> >  #endif
> >
> > +#ifdef CONFIG_PGO_CLANG
> > +#define PGO_CLANG_DATA                                                 \
> > +       __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) {     \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_start = .;                                   \
> > +               __llvm_prf_data_start = .;                              \
> > +               KEEP(*(__llvm_prf_data))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_data_end = .;                                \
> > +       }                                                               \
> > +       __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) {     \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_cnts_start = .;                              \
> > +               KEEP(*(__llvm_prf_cnts))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_cnts_end = .;                                \
> > +       }                                                               \
> > +       __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) {   \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_names_start = .;                             \
> > +               KEEP(*(__llvm_prf_names))                               \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_names_end = .;                               \
> > +               . = ALIGN(8);                                           \
> > +       }                                                               \
> > +       __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) {     \
> > +               __llvm_prf_vals_start = .;                              \
> > +               KEEP(*(__llvm_prf_vals))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_vals_end = .;                                \
> > +               . = ALIGN(8);                                           \
> > +       }                                                               \
> > +       __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) {     \
> > +               __llvm_prf_vnds_start = .;                              \
> > +               KEEP(*(__llvm_prf_vnds))                                \
> > +               . = ALIGN(8);                                           \
> > +               __llvm_prf_vnds_end = .;                                \
> > +               __llvm_prf_end = .;                                     \
> > +       }
> > +#else
> > +#define PGO_CLANG_DATA
> > +#endif
> > +
> >  #define KERNEL_DTB()                                                   \
> >         STRUCT_ALIGN();                                                 \
> >         __dtb_start = .;                                                \
> > @@ -1125,6 +1168,7 @@
> >                 CONSTRUCTORS                                            \
> >         }                                                               \
> >         BUG_TABLE                                                       \
> > +       PGO_CLANG_DATA
>
> ^ since dropping other arch support from v4, could probably drop this,
> too. We should be covered by the modification to
> arch/x86/kernel/vmlinux.lds.S, right?
>
Possibly, but I'd like to keep it here anyway. It's the correct place
for this info, and will benefit us when we do enable other platforms.

> >
> >  #define INIT_TEXT_SECTION(inittext_align)                              \
> >         . = ALIGN(inittext_align);                                      \
> > diff --git a/kernel/Makefile b/kernel/Makefile
> > index aa7368c7eabf3..0b34ca228ba46 100644
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -111,6 +111,7 @@ obj-$(CONFIG_BPF) += bpf/
> >  obj-$(CONFIG_KCSAN) += kcsan/
> >  obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
> >  obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
> > +obj-$(CONFIG_PGO_CLANG) += pgo/
> >
> >  obj-$(CONFIG_PERF_EVENTS) += events/
> >
> > diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
> > new file mode 100644
> > index 0000000000000..76a640b6cf6ed
> > --- /dev/null
> > +++ b/kernel/pgo/Kconfig
> > @@ -0,0 +1,35 @@
> > +# SPDX-License-Identifier: GPL-2.0-only
> > +menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
> > +
> > +config ARCH_SUPPORTS_PGO_CLANG
> > +       bool
> > +
> > +config PGO_CLANG
> > +       bool "Enable clang's PGO-based kernel profiling"
> > +       depends on DEBUG_FS
> > +       depends on ARCH_SUPPORTS_PGO_CLANG
> > +       depends on CC_IS_CLANG && CLANG_VERSION >= 120000
> > +       help
> > +         This option enables clang's PGO (Profile Guided Optimization) based
> > +         code profiling to better optimize the kernel.
> > +
> > +         If unsure, say N.
> > +
> > +         Run a representative workload for your application on a kernel
> > +         compiled with this option and download the raw profile file from
> > +         /sys/kernel/debug/pgo/profraw. This file needs to be processed with
> > +         llvm-profdata. It may be merged with other collected raw profiles.
> > +
> > +         Copy the resulting profile file into vmlinux.profdata, and enable
> > +         KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
> > +         kernel.
> > +
> > +         Note that a kernel compiled with profiling flags will be
> > +         significantly larger and run slower. Also be sure to exclude files
> > +         from profiling which are not linked to the kernel image to prevent
> > +         linker errors.
> > +
> > +         Note that the debugfs filesystem has to be mounted to access
> > +         profiling data.
> > +
> > +endmenu
> > diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
> > new file mode 100644
> > index 0000000000000..41e27cefd9a47
> > --- /dev/null
> > +++ b/kernel/pgo/Makefile
> > @@ -0,0 +1,5 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +GCOV_PROFILE   := n
> > +PGO_PROFILE    := n
> > +
> > +obj-y  += fs.o instrument.o
> > diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
> > new file mode 100644
> > index 0000000000000..68b24672be10a
> > --- /dev/null
> > +++ b/kernel/pgo/fs.c
> > @@ -0,0 +1,382 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#define pr_fmt(fmt)    "pgo: " fmt
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/fs.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/vmalloc.h>
> > +#include "pgo.h"
> > +
> > +static struct dentry *directory;
> > +
> > +struct prf_private_data {
> > +       void *buffer;
> > +       unsigned long size;
> > +};
> > +
> > +/*
> > + * Raw profile data format:
> > + *
> > + *     - llvm_prf_header
> > + *     - __llvm_prf_data
> > + *     - __llvm_prf_cnts
> > + *     - __llvm_prf_names
> > + *     - zero padding to 8 bytes
> > + *     - for each llvm_prf_data in __llvm_prf_data:
> > + *             - llvm_prf_value_data
> > + *                     - llvm_prf_value_record + site count array
> > + *                             - llvm_prf_value_node_data
> > + *                             ...
> > + *                     ...
> > + *             ...
> > + */
> > +
> > +static void prf_fill_header(void **buffer)
> > +{
> > +       struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
> > +
> > +       header->magic = LLVM_PRF_MAGIC;
> > +       header->version = LLVM_PRF_VARIANT_MASK_IR | LLVM_PRF_VERSION;
> > +       header->data_size = prf_data_count();
> > +       header->padding_bytes_before_counters = 0;
> > +       header->counters_size = prf_cnts_count();
> > +       header->padding_bytes_after_counters = 0;
> > +       header->names_size = prf_names_count();
> > +       header->counters_delta = (u64)__llvm_prf_cnts_start;
> > +       header->names_delta = (u64)__llvm_prf_names_start;
> > +       header->value_kind_last = LLVM_PRF_IPVK_LAST;
> > +
> > +       *buffer += sizeof(*header);
> > +}
> > +
> > +/*
> > + * Copy the source into the buffer, incrementing the pointer into buffer in the
> > + * process.
> > + */
> > +static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
> > +{
> > +       memcpy(*buffer, src, size);
> > +       *buffer += size;
> > +}
> > +
> > +static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
> > +{
> > +       struct llvm_prf_value_node **nodes =
> > +               (struct llvm_prf_value_node **)p->values;
> > +       u32 kinds = 0;
> > +       u32 size = 0;
> > +       unsigned int kind;
> > +       unsigned int n;
> > +       unsigned int s = 0;
> > +
> > +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> > +               unsigned int sites = p->num_value_sites[kind];
> > +
> > +               if (!sites)
> > +                       continue;
> > +
> > +               /* Record + site count array */
> > +               size += prf_get_value_record_size(sites);
> > +               kinds++;
> > +
> > +               if (!nodes)
> > +                       continue;
> > +
> > +               for (n = 0; n < sites; n++) {
> > +                       u32 count = 0;
> > +                       struct llvm_prf_value_node *site = nodes[s + n];
> > +
> > +                       while (site && ++count <= U8_MAX)
> > +                               site = site->next;
> > +
> > +                       size += count *
> > +                               sizeof(struct llvm_prf_value_node_data);
> > +               }
> > +
> > +               s += sites;
> > +       }
> > +
> > +       if (size)
> > +               size += sizeof(struct llvm_prf_value_data);
> > +
> > +       if (value_kinds)
> > +               *value_kinds = kinds;
> > +
> > +       return size;
> > +}
> > +
> > +static u32 prf_get_value_size(void)
> > +{
> > +       u32 size = 0;
> > +       struct llvm_prf_data *p;
> > +
> > +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> > +               size += __prf_get_value_size(p, NULL);
> > +
> > +       return size;
> > +}
> > +
> > +/* Serialize the profiling's value. */
> > +static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
> > +{
> > +       struct llvm_prf_value_data header;
> > +       struct llvm_prf_value_node **nodes =
> > +               (struct llvm_prf_value_node **)p->values;
> > +       unsigned int kind;
> > +       unsigned int n;
> > +       unsigned int s = 0;
> > +
> > +       header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
> > +
> > +       if (!header.num_value_kinds)
> > +               /* Nothing to write. */
> > +               return;
> > +
> > +       prf_copy_to_buffer(buffer, &header, sizeof(header));
> > +
> > +       for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
> > +               struct llvm_prf_value_record *record;
> > +               u8 *counts;
> > +               unsigned int sites = p->num_value_sites[kind];
> > +
> > +               if (!sites)
> > +                       continue;
> > +
> > +               /* Profiling value record. */
> > +               record = *(struct llvm_prf_value_record **)buffer;
> > +               *buffer += prf_get_value_record_header_size();
> > +
> > +               record->kind = kind;
> > +               record->num_value_sites = sites;
> > +
> > +               /* Site count array. */
> > +               counts = *(u8 **)buffer;
> > +               *buffer += prf_get_value_record_site_count_size(sites);
> > +
> > +               /*
> > +                * If we don't have nodes, we can skip updating the site count
> > +                * array, because the buffer is zero filled.
> > +                */
> > +               if (!nodes)
> > +                       continue;
> > +
> > +               for (n = 0; n < sites; n++) {
> > +                       u32 count = 0;
> > +                       struct llvm_prf_value_node *site = nodes[s + n];
> > +
> > +                       while (site && ++count <= U8_MAX) {
> > +                               prf_copy_to_buffer(buffer, site,
> > +                                                  sizeof(struct llvm_prf_value_node_data));
> > +                               site = site->next;
> > +                       }
> > +
> > +                       counts[n] = (u8)count;
> > +               }
> > +
> > +               s += sites;
> > +       }
> > +}
> > +
> > +static void prf_serialize_values(void **buffer)
> > +{
> > +       struct llvm_prf_data *p;
> > +
> > +       for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
> > +               prf_serialize_value(p, buffer);
> > +}
> > +
> > +static inline unsigned long prf_get_padding(unsigned long size)
> > +{
> > +       return 7 & (8 - size % 8);
> > +}
>
> This is ugly but it looks like this corresponds with
> __llvm_profile_get_num_padding_bytes() in
> llvm-project/compiler-rt/lib/profile/InstrProfiling.c?  If there are
> platforms where `sizeof(unsigned long) != 8` and are supported by the
> kernel, it might be nicer to spell out `sizeof(unsigned long)` rather
> than hardcode 8. Should we also use u64 for the parameter and u8 for
> the return type?
>
It's probably best to use what llvm uses in that function
(sizeof(uint64_t)). I can replace it.

> > +
> > +static unsigned long prf_buffer_size(void)
> > +{
> > +       return sizeof(struct llvm_prf_header) +
> > +                       prf_data_size() +
> > +                       prf_cnts_size() +
> > +                       prf_names_size() +
> > +                       prf_get_padding(prf_names_size()) +
> > +                       prf_get_value_size();
> > +}
> > +
> > +/* Serialize the profiling data into a format LLVM's tools can understand. */
> > +static int prf_serialize(struct prf_private_data *p)
> > +{
> > +       int err = 0;
> > +       void *buffer;
> > +
> > +       p->size = prf_buffer_size();
> > +       p->buffer = vzalloc(p->size);
> > +
> > +       if (!p->buffer) {
> > +               err = -ENOMEM;
> > +               goto out;
> > +       }
> > +
> > +       buffer = p->buffer;
> > +
> > +       prf_fill_header(&buffer);
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_data_start,  prf_data_size());
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start,  prf_cnts_size());
> > +       prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
> > +       buffer += prf_get_padding(prf_names_size());
> > +
> > +       prf_serialize_values(&buffer);
> > +
> > +out:
> > +       return err;
> > +}
> > +
> > +/* open() implementation for PGO. Creates a copy of the profiling data set. */
> > +static int prf_open(struct inode *inode, struct file *file)
> > +{
> > +       struct prf_private_data *data;
> > +       unsigned long flags;
> > +       int err;
> > +
> > +       data = kzalloc(sizeof(*data), GFP_KERNEL);
> > +       if (!data) {
> > +               err = -ENOMEM;
> > +               goto out;
> > +       }
> > +
> > +       flags = prf_lock();
> > +
> > +       err = prf_serialize(data);
> > +       if (err) {
> > +               kfree(data);
> > +               goto out_unlock;
> > +       }
> > +
> > +       file->private_data = data;
> > +
> > +out_unlock:
> > +       prf_unlock(flags);
>
> This is an improvement over earlier editions, but kfree() is still
> within the critical section.  I wonder if it can be moved out? If not,
> why, precisely? Otherwise are we sure we have the concurrency correct?
> Might be worth pursuing in a follow up patch once the core has landed.
>
The kfree() isn't on the critical path, but done only when an error
occurs. I could add an "unlikely()" in the if-conditional hoping that
it's moved out-of-line, but the code it would be skipping would be a
couple of asm instructions. While I appreciate that performance in the
kernel is super important, we've already warned that performance with
an instrumented kernel won't be as good. :-)

> Also, it looks like the comment above the definition of pgo_lock and
> allocate_node() seem to indicate the same lock is used for
> serialization.  I'm curious to know more about why we can't access
> current_node and serialize at the same time? At the least, it seems
> that `prf_serialize` should have a similar comment to `allocate_node`
> regarding the caller being expected to hold the `pgo_lock` via a call
> to `prf_lock()`, yeah?
>
> I can't help but look at the two call sites of prf_lock() and be
> suspicious that pgo_lock is technically guarding access to more
> variables than described in the comment.  It would be good to explain
> exactly what is going on should we need to revisit the concurrency
> here in the future (and lower the bus factor).
>
I'll update the comments.

> > +out:
> > +       return err;
> > +}
> > +
> > +/* read() implementation for PGO. */
> > +static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
> > +                       loff_t *ppos)
> > +{
> > +       struct prf_private_data *data = file->private_data;
> > +
> > +       BUG_ON(!data);
> > +
> > +       return simple_read_from_buffer(buf, count, ppos, data->buffer,
> > +                                      data->size);
> > +}
> > +
> > +/* release() implementation for PGO. Release resources allocated by open(). */
> > +static int prf_release(struct inode *inode, struct file *file)
> > +{
> > +       struct prf_private_data *data = file->private_data;
> > +
> > +       if (data) {
> > +               vfree(data->buffer);
> > +               kfree(data);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +static const struct file_operations prf_fops = {
> > +       .owner          = THIS_MODULE,
> > +       .open           = prf_open,
> > +       .read           = prf_read,
> > +       .llseek         = default_llseek,
> > +       .release        = prf_release
> > +};
> > +
> > +/* write() implementation for resetting PGO's profile data. */
> > +static ssize_t reset_write(struct file *file, const char __user *addr,
> > +                          size_t len, loff_t *pos)
> > +{
> > +       struct llvm_prf_data *data;
> > +
> > +       memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
> > +
> > +       for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; ++data) {
>
> ^ this patch mixes pre-increment and post-increment in loops.  The
> kernel coding style docs (Documentation/process/coding-style.rst)
> don't make a call on this, but it might be nice to be internally
> consistent throughout the patch.  I assume that's from having mixed
> authors.  Not a huge issue, but I'm pedantic.
>
Okay.

> > +               struct llvm_prf_value_node **vnodes;
> > +               u64 current_vsite_count;
> > +               u32 i;
> > +
> > +               if (!data->values)
> > +                       continue;
> > +
> > +               current_vsite_count = 0;
> > +               vnodes = (struct llvm_prf_value_node **)data->values;
> > +
> > +               for (i = LLVM_PRF_IPVK_FIRST; i <= LLVM_PRF_IPVK_LAST; ++i)
> > +                       current_vsite_count += data->num_value_sites[i];
> > +
> > +               for (i = 0; i < current_vsite_count; ++i) {
> > +                       struct llvm_prf_value_node *current_vnode = vnodes[i];
> > +
> > +                       while (current_vnode) {
> > +                               current_vnode->count = 0;
> > +                               current_vnode = current_vnode->next;
> > +                       }
> > +               }
> > +       }
> > +
> > +       return len;
> > +}
> > +
> > +static const struct file_operations prf_reset_fops = {
> > +       .owner          = THIS_MODULE,
> > +       .write          = reset_write,
> > +       .llseek         = noop_llseek,
> > +};
> > +
> > +/* Create debugfs entries. */
> > +static int __init pgo_init(void)
> > +{
> > +       directory = debugfs_create_dir("pgo", NULL);
> > +       if (!directory)
> > +               goto err_remove;
> > +
> > +       if (!debugfs_create_file("profraw", 0600, directory, NULL,
> > +                                &prf_fops))
> > +               goto err_remove;
> > +
> > +       if (!debugfs_create_file("reset", 0200, directory, NULL,
> > +                                &prf_reset_fops))
> > +               goto err_remove;
> > +
> > +       return 0;
> > +
> > +err_remove:
> > +       pr_err("initialization failed\n");
> > +       return -EIO;
> > +}
> > +
> > +/* Remove debugfs entries. */
> > +static void __exit pgo_exit(void)
> > +{
> > +       debugfs_remove_recursive(directory);
> > +}
> > +
> > +module_init(pgo_init);
> > +module_exit(pgo_exit);
> > diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
> > new file mode 100644
> > index 0000000000000..6084ff0652e85
> > --- /dev/null
> > +++ b/kernel/pgo/instrument.c
> > @@ -0,0 +1,185 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#define pr_fmt(fmt)    "pgo: " fmt
> > +
> > +#include <linux/bitops.h>
> > +#include <linux/kernel.h>
> > +#include <linux/export.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/types.h>
> > +#include "pgo.h"
> > +
> > +/* Lock guarding value node access and serialization. */
> > +static DEFINE_SPINLOCK(pgo_lock);
> > +static int current_node;
> > +
> > +unsigned long prf_lock(void)
> > +{
> > +       unsigned long flags;
> > +
> > +       spin_lock_irqsave(&pgo_lock, flags);
> > +
> > +       return flags;
> > +}
> > +
> > +void prf_unlock(unsigned long flags)
> > +{
> > +       spin_unlock_irqrestore(&pgo_lock, flags);
> > +}
> > +
> > +/*
> > + * Return a newly allocated profiling value node which contains the tracked
> > + * value by the value profiler.
> > + * Note: caller *must* hold pgo_lock.
> > + */
> > +static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
> > +                                                u32 index, u64 value)
> > +{
> > +       if (&__llvm_prf_vnds_start[current_node + 1] >= __llvm_prf_vnds_end)
> > +               return NULL; /* Out of nodes */
> > +
> > +       current_node++;
> > +
> > +       /* Make sure the node is entirely within the section */
> > +       if (&__llvm_prf_vnds_start[current_node] >= __llvm_prf_vnds_end ||
> > +           &__llvm_prf_vnds_start[current_node + 1] > __llvm_prf_vnds_end)
> > +               return NULL;
> > +
> > +       return &__llvm_prf_vnds_start[current_node];
> > +}
> > +
> > +/*
> > + * Counts the number of times a target value is seen.
> > + *
> > + * Records the target value for the CounterIndex if not seen before. Otherwise,
> > + * increments the counter associated w/ the target value.
> > + */
> > +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
> > +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
> > +{
> > +       struct llvm_prf_data *p = (struct llvm_prf_data *)data;
> > +       struct llvm_prf_value_node **counters;
> > +       struct llvm_prf_value_node *curr;
> > +       struct llvm_prf_value_node *min = NULL;
> > +       struct llvm_prf_value_node *prev = NULL;
> > +       u64 min_count = U64_MAX;
> > +       u8 values = 0;
> > +       unsigned long flags;
> > +
> > +       if (!p || !p->values)
> > +               return;
> > +
> > +       counters = (struct llvm_prf_value_node **)p->values;
> > +       curr = counters[index];
> > +
> > +       while (curr) {
> > +               if (target_value == curr->value) {
> > +                       curr->count++;
> > +                       return;
> > +               }
> > +
> > +               if (curr->count < min_count) {
> > +                       min_count = curr->count;
> > +                       min = curr;
> > +               }
> > +
> > +               prev = curr;
> > +               curr = curr->next;
> > +               values++;
> > +       }
> > +
> > +       if (values >= LLVM_PRF_MAX_NUM_VALS_PER_SITE) {
> > +               if (!min->count || !(--min->count)) {
> > +                       curr = min;
> > +                       curr->value = target_value;
> > +                       curr->count++;
> > +               }
> > +               return;
> > +       }
> > +
> > +       /* Lock when updating the value node structure. */
> > +       flags = prf_lock();
> > +
> > +       curr = allocate_node(p, index, target_value);
> > +       if (!curr)
> > +               goto out;
> > +
> > +       curr->value = target_value;
> > +       curr->count++;
> > +
> > +       if (!counters[index])
> > +               counters[index] = curr;
> > +       else if (prev && !prev->next)
> > +               prev->next = curr;
> > +
> > +out:
> > +       prf_unlock(flags);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_target);
> > +
> > +/* Counts the number of times a range of targets values are seen. */
> > +void __llvm_profile_instrument_range(u64 target_value, void *data,
> > +                                    u32 index, s64 precise_start,
> > +                                    s64 precise_last, s64 large_value);
> > +void __llvm_profile_instrument_range(u64 target_value, void *data,
> > +                                    u32 index, s64 precise_start,
> > +                                    s64 precise_last, s64 large_value)
> > +{
> > +       if (large_value != S64_MIN && (s64)target_value >= large_value)
> > +               target_value = large_value;
> > +       else if ((s64)target_value < precise_start ||
> > +                (s64)target_value > precise_last)
> > +               target_value = precise_last + 1;
> > +
> > +       __llvm_profile_instrument_target(target_value, data, index);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_range);
> > +
> > +static u64 inst_prof_get_range_rep_value(u64 value)
> > +{
> > +       if (value <= 8)
> > +               /* The first ranges are individually tracked, us it as is. */
>
> ^ typo, "use"
>
> > +               return value;
> > +       else if (value >= 513)
> > +               /* The last range is mapped to its lowest value. */
> > +               return 513;
> > +       else if (hweight64(value) == 1)
> > +               /* If it's a power of two, use it as is. */
> > +               return value;
> > +
> > +       /* Otherwise, take to the previous power of two + 1. */
> > +       return (1 << (64 - __builtin_clzll(value) - 1)) + 1;
> > +}
> > +
> > +/*
> > + * The target values are partitioned into multiple ranges. The range spec is
> > + * defined in compiler-rt/include/profile/InstrProfData.inc.
> > + */
> > +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> > +                                    u32 counter_index);
> > +void __llvm_profile_instrument_memop(u64 target_value, void *data,
> > +                                    u32 counter_index)
> > +{
> > +       u64 rep_value;
> > +
> > +       /* Map the target value to the representative value of its range. */
> > +       rep_value = inst_prof_get_range_rep_value(target_value);
> > +       __llvm_profile_instrument_target(rep_value, data, counter_index);
> > +}
> > +EXPORT_SYMBOL(__llvm_profile_instrument_memop);
> > diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
> > new file mode 100644
> > index 0000000000000..df0aa278f28bd
> > --- /dev/null
> > +++ b/kernel/pgo/pgo.h
> > @@ -0,0 +1,206 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Copyright (C) 2019 Google, Inc.
> > + *
> > + * Author:
> > + *     Sami Tolvanen <samitolvanen@google.com>
> > + *
> > + * This software is licensed under the terms of the GNU General Public
> > + * License version 2, as published by the Free Software Foundation, and
> > + * may be copied, distributed, and modified under those terms.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + */
> > +
> > +#ifndef _PGO_H
> > +#define _PGO_H
> > +
> > +/*
> > + * Note: These internal LLVM definitions must match the compiler version.
> > + * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
> > + */
> > +
> > +#ifdef CONFIG_64BIT
> > +       #define LLVM_PRF_MAGIC          \
> > +               ((u64)255 << 56 |       \
> > +                (u64)'l' << 48 |       \
> > +                (u64)'p' << 40 |       \
> > +                (u64)'r' << 32 |       \
> > +                (u64)'o' << 24 |       \
> > +                (u64)'f' << 16 |       \
> > +                (u64)'r' << 8  |       \
> > +                (u64)129)
> > +#else
> > +       #define LLVM_PRF_MAGIC          \
> > +               ((u64)255 << 56 |       \
> > +                (u64)'l' << 48 |       \
> > +                (u64)'p' << 40 |       \
> > +                (u64)'r' << 32 |       \
> > +                (u64)'o' << 24 |       \
> > +                (u64)'f' << 16 |       \
> > +                (u64)'R' << 8  |       \
> > +                (u64)129)
> > +#endif
> > +
> > +#define LLVM_PRF_VERSION               5
> > +#define LLVM_PRF_DATA_ALIGN            8
> > +#define LLVM_PRF_IPVK_FIRST            0
> > +#define LLVM_PRF_IPVK_LAST             1
> > +#define LLVM_PRF_MAX_NUM_VALS_PER_SITE 16
>
> llvm/include/llvm/ProfileData/InstrProfData.inc defines
> INSTR_PROF_MAX_NUM_VAL_PER_SITE as 255; does that need to match?
>
Sure. I also updated the names to better match LLVM's names.

> > +
> > +#define LLVM_PRF_VARIANT_MASK_IR       (0x1ull << 56)
> > +#define LLVM_PRF_VARIANT_MASK_CSIR     (0x1ull << 57)
> > +
> > +/**
> > + * struct llvm_prf_header - represents the raw profile header data structure.
> > + * @magic: the magic token for the file format.
> > + * @version: the version of the file format.
> > + * @data_size: the number of entries in the profile data section.
> > + * @padding_bytes_before_counters: the number of padding bytes before the
> > + *   counters.
> > + * @counters_size: the size in bytes of the LLVM profile section containing the
> > + *   counters.
> > + * @padding_bytes_after_counters: the number of padding bytes after the
> > + *   counters.
> > + * @names_size: the size in bytes of the LLVM profile section containing the
> > + *   counters' names.
> > + * @counters_delta: the beginning of the LLMV profile counters section.
> > + * @names_delta: the beginning of the LLMV profile names section.
> > + * @value_kind_last: the last profile value kind.
> > + */
> > +struct llvm_prf_header {
> > +       u64 magic;
> > +       u64 version;
> > +       u64 data_size;
> > +       u64 padding_bytes_before_counters;
> > +       u64 counters_size;
> > +       u64 padding_bytes_after_counters;
> > +       u64 names_size;
> > +       u64 counters_delta;
> > +       u64 names_delta;
> > +       u64 value_kind_last;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_data - represents the per-function control structure.
> > + * @name_ref: the reference to the function's name.
> > + * @func_hash: the hash value of the function.
> > + * @counter_ptr: a pointer to the profile counter.
> > + * @function_ptr: a pointer to the function.
> > + * @values: the profiling values associated with this function.
> > + * @num_counters: the number of counters in the function.
> > + * @num_value_sites: the number of value profile sites.
> > + */
> > +struct llvm_prf_data {
> > +       const u64 name_ref;
> > +       const u64 func_hash;
> > +       const void *counter_ptr;
> > +       const void *function_ptr;
> > +       void *values;
> > +       const u32 num_counters;
> > +       const u16 num_value_sites[LLVM_PRF_IPVK_LAST + 1];
> > +} __aligned(LLVM_PRF_DATA_ALIGN);
> > +
> > +/**
> > + * structure llvm_prf_value_node_data - represents the data part of the struct
> > + *   llvm_prf_value_node data structure.
> > + * @value: the value counters.
> > + * @count: the counters' count.
> > + */
> > +struct llvm_prf_value_node_data {
> > +       u64 value;
> > +       u64 count;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_node - represents an internal data structure used by
> > + *   the value profiler.
> > + * @value: the value counters.
> > + * @count: the counters' count.
> > + * @next: the next value node.
> > + */
> > +struct llvm_prf_value_node {
> > +       u64 value;
> > +       u64 count;
> > +       struct llvm_prf_value_node *next;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_data - represents the value profiling data in indexed
> > + *   format.
> > + * @total_size: the total size in bytes including this field.
> > + * @num_value_kinds: the number of value profile kinds that has value profile
> > + *   data.
> > + */
> > +struct llvm_prf_value_data {
> > +       u32 total_size;
> > +       u32 num_value_kinds;
> > +};
> > +
> > +/**
> > + * struct llvm_prf_value_record - represents the on-disk layout of the value
> > + *   profile data of a particular kind for one function.
> > + * @kind: the kind of the value profile record.
> > + * @num_value_sites: the number of value profile sites.
> > + * @site_count_array: the first element of the array that stores the number
> > + *   of profiled values for each value site.
> > + */
> > +struct llvm_prf_value_record {
> > +       u32 kind;
> > +       u32 num_value_sites;
> > +       u8 site_count_array[];
> > +};
> > +
> > +#define prf_get_value_record_header_size()             \
> > +       offsetof(struct llvm_prf_value_record, site_count_array)
> > +#define prf_get_value_record_site_count_size(sites)    \
> > +       roundup((sites), 8)
> > +#define prf_get_value_record_size(sites)               \
> > +       (prf_get_value_record_header_size() +           \
> > +        prf_get_value_record_site_count_size((sites)))
> > +
> > +/* Data sections */
> > +extern struct llvm_prf_data __llvm_prf_data_start[];
> > +extern struct llvm_prf_data __llvm_prf_data_end[];
> > +
> > +extern u64 __llvm_prf_cnts_start[];
> > +extern u64 __llvm_prf_cnts_end[];
> > +
> > +extern char __llvm_prf_names_start[];
> > +extern char __llvm_prf_names_end[];
> > +
> > +extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
> > +extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
> > +
> > +/* Locking for vnodes */
> > +extern unsigned long prf_lock(void);
> > +extern void prf_unlock(unsigned long flags);
> > +
> > +#define __DEFINE_PRF_SIZE(s) \
> > +       static inline unsigned long prf_ ## s ## _size(void)            \
> > +       {                                                               \
> > +               unsigned long start =                                   \
> > +                       (unsigned long)__llvm_prf_ ## s ## _start;      \
> > +               unsigned long end =                                     \
> > +                       (unsigned long)__llvm_prf_ ## s ## _end;        \
> > +               return roundup(end - start,                             \
> > +                               sizeof(__llvm_prf_ ## s ## _start[0])); \
> > +       }                                                               \
> > +       static inline unsigned long prf_ ## s ## _count(void)           \
> > +       {                                                               \
> > +               return prf_ ## s ## _size() /                           \
> > +                       sizeof(__llvm_prf_ ## s ## _start[0]);          \
> > +       }
> > +
> > +__DEFINE_PRF_SIZE(data);
> > +__DEFINE_PRF_SIZE(cnts);
> > +__DEFINE_PRF_SIZE(names);
> > +__DEFINE_PRF_SIZE(vnds);
> > +
> > +#undef __DEFINE_PRF_SIZE
> > +
> > +#endif /* _PGO_H */
> > diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
> > index 213677a5ed33e..9b218afb5cb87 100644
> > --- a/scripts/Makefile.lib
> > +++ b/scripts/Makefile.lib
> > @@ -143,6 +143,16 @@ _c_flags += $(if $(patsubst n%,, \
> >                 $(CFLAGS_GCOV))
> >  endif
> >
> > +#
> > +# Enable clang's PGO profiling flags for a file or directory depending on
> > +# variables PGO_PROFILE_obj.o and PGO_PROFILE.
> > +#
> > +ifeq ($(CONFIG_PGO_CLANG),y)
> > +_c_flags += $(if $(patsubst n%,, \
> > +               $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
> > +               $(CFLAGS_PGO_CLANG))
> > +endif
> > +
> >  #
> >  # Enable address sanitizer flags for kernel except some files or directories
> >  # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
> > --
> > 2.30.0.284.gd98b1dd5eaa7-goog
> >

Sending patch v6. PTAL.

-bw
Sedat Dilek Jan. 21, 2021, 10:44 p.m. UTC | #23
On Thu, Jan 21, 2021 at 3:03 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > > >
> > > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > [ big snip ]
> > > > >
> > > > > [More snippage.]
> > > > >
> > > > > > [ CC Fangrui ]
> > > > > >
> > > > > > With the attached...
> > > > > >
> > > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > > undefined symbols
> > > > > >
> > > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > > >
> > > > > Thanks for confirming that this works with the above patch.
> > > > >
> > > > > > @ Bill Nick Sami Nathan
> > > > > >
> > > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > > >
> > > > > The integrated assembler and this option are more-or-less orthogonal
> > > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > > >
> > > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > > >
> > > > > I know Nick did several tests with PGO. He may have looked into it
> > > > > already, but we can check.
> > > > >
> > > >
> > > > Reproducible.
> > > >
> > > > LLVM_IAS=1 + DWARF5 = Not bootable
> > > >
> > > > I will try:
> > > >
> > > > LLVM_IAS=1 + DWARF4
> > > >
> > >
> > > I was not able to boot into such a built Linux-kernel.
> > >
> > PGO will have no effect on debugging data. If this is an issue with
> > DWARF, then it's likely orthogonal to the PGO patch.
> >
> > > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> > >
> > > Of course, this could be an issue with my system's LLVM/Clang.
> > >
> > > Debian clang version
> > > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > >
> > Please use the official clang 11.0.1 release
> > (https://releases.llvm.org/download.html), modifying the
> > kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> > 12 for the minimal version is because of an issue that was recently
> > fixed.
> >
>
> I downgraded to clang-11.1.0-rc1.
> ( See attachment. )
>
> Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.
>
> But again after generating vmlinux.profdata and doing the PGO-rebuild
> - the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
> With GNU/as I can boot.
>
> So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
> v2 is not allowed).
> There is something wrong (here) with passing LLVM_IAS=1 to make when
> doing the PGO-rebuild.
>
> Can someone please verify and confirm that the PGO-rebuild with
> LLVM_IAS=1 boots or boots not?
>
> Thanks.
>
> - Sedat -
>
> > > Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> > > and especially CONFIG_DEBUG_INFO_DWARF5=y?
> > > Success means I was able to boot in QEMU and/or bare metal.
> > >
> > The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.
> >

I passed LLVM_IAS=1 with KAFLAGS=-fprofile-use=vmlinux.profdata:

/usr/bin/perf_5.10 stat make V=1 -j4 HOSTCC=clang HOSTCXX=clang++
HOSTLD=ld.lld CC=clang LD=ld.lld PAHOLE=/opt/paho
le/bin/pahole LOCALVERSION=-2-amd64-clang11-pgo KBUILD_VERBOSE=1
KBUILD_BUILD_HOST=iniza KBUILD_BUILD_USER=sedat.dilek@gmail.com
KBUILD_BUILD_TIMESTAMP=2021-01-21 bind
eb-pkg KDEB_PKGVERSION=5.11.0~rc4-2~bullseye+dileks1 LLVM=1
KCFLAGS=-fprofile-use=vmlinux.profdata LLVM_IAS=1
KAFLAGS=-fprofile-use=vmlinux.profdata

The resulting Linux-kernel does not boot.

But I see in the build-log these warnings:

warning: arch/x86/platform/efi/quirks.c: Function control flow change
detected (hash mismatch) efi_arch_mem_reserve Hash = 73770966985
[-Wbackend-plugin]
warning: arch/x86/platform/efi/efi.c: Function control flow change
detected (hash mismatch) efi_attr_is_visible Hash = 57959232386
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) memcmp Hash = 12884901887
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) bcmp Hash = 12884901887
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strcmp Hash = 44149752232
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strnlen Hash = 29212902728
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) simple_strtoull Hash =
288230479369728480 [-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strstr Hash = 76464046323
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) strchr Hash = 30948479515
[-Wbackend-plugin]
warning: arch/x86/boot/compressed/string.c: Function control flow
change detected (hash mismatch) kstrtoull Hash = 288230543187488006
[-Wbackend-plugin]

What does "Function control flow change detected (hash mismatch)" mean?
Related with my boot problems?

- Sedat -
Nick Desaulniers Jan. 22, 2021, 1:42 a.m. UTC | #24
On Wed, Jan 20, 2021 at 6:03 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > > >
> > > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > [ big snip ]
> > > > >
> > > > > [More snippage.]
> > > > >
> > > > > > [ CC Fangrui ]
> > > > > >
> > > > > > With the attached...
> > > > > >
> > > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > > undefined symbols
> > > > > >
> > > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > > >
> > > > > Thanks for confirming that this works with the above patch.
> > > > >
> > > > > > @ Bill Nick Sami Nathan
> > > > > >
> > > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > > >
> > > > > The integrated assembler and this option are more-or-less orthogonal
> > > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > > >
> > > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > > >
> > > > > I know Nick did several tests with PGO. He may have looked into it
> > > > > already, but we can check.
> > > > >
> > > >
> > > > Reproducible.
> > > >
> > > > LLVM_IAS=1 + DWARF5 = Not bootable
> > > >
> > > > I will try:
> > > >
> > > > LLVM_IAS=1 + DWARF4
> > > >
> > >
> > > I was not able to boot into such a built Linux-kernel.
> > >
> > PGO will have no effect on debugging data. If this is an issue with
> > DWARF, then it's likely orthogonal to the PGO patch.
> >
> > > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> > >
> > > Of course, this could be an issue with my system's LLVM/Clang.
> > >
> > > Debian clang version
> > > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > >
> > Please use the official clang 11.0.1 release
> > (https://releases.llvm.org/download.html), modifying the
> > kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> > 12 for the minimal version is because of an issue that was recently
> > fixed.
> >
>
> I downgraded to clang-11.1.0-rc1.
> ( See attachment. )
>
> Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.
>
> But again after generating vmlinux.profdata and doing the PGO-rebuild
> - the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
> With GNU/as I can boot.
>
> So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
> v2 is not allowed).
> There is something wrong (here) with passing LLVM_IAS=1 to make when
> doing the PGO-rebuild.
>
> Can someone please verify and confirm that the PGO-rebuild with
> LLVM_IAS=1 boots or boots not?

I was able to build+boot with LLVM_IAS=1 on my personal laptop (no
dwarf 5, just mainline+v5).

>
> Thanks.
>
> - Sedat -
>
> > > Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> > > and especially CONFIG_DEBUG_INFO_DWARF5=y?
> > > Success means I was able to boot in QEMU and/or bare metal.
> > >
> > The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.

I agree, providing test results with patches that haven't landed yet
can cloud the interpretation of results.  It would be helpful to drop
local patch sets before trying this.

If the resulting image still isn't working for you, can you please
provide your config? Surely we'd be able to reproduce boot failures in
QEMU?  Nothing comes to mind about a change of assemblers causing an
issue; I would assume assembly cannot be instrumented by the compiler
(even though the compiler is the "driver" of the assembler).

The hash warnings are certainly curious.
IndexedInstrProfReader::getInstrProfRecord() is the only place in LLVM
sources that can emit that.
Sedat Dilek Jan. 22, 2021, 1:49 a.m. UTC | #25
On Fri, Jan 22, 2021 at 2:43 AM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Wed, Jan 20, 2021 at 6:03 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > > > >
> > > > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > >
> > > > > > > [ big snip ]
> > > > > >
> > > > > > [More snippage.]
> > > > > >
> > > > > > > [ CC Fangrui ]
> > > > > > >
> > > > > > > With the attached...
> > > > > > >
> > > > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > > > undefined symbols
> > > > > > >
> > > > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > > > >
> > > > > > Thanks for confirming that this works with the above patch.
> > > > > >
> > > > > > > @ Bill Nick Sami Nathan
> > > > > > >
> > > > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > > > >
> > > > > > The integrated assembler and this option are more-or-less orthogonal
> > > > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > > > >
> > > > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > > > >
> > > > > > I know Nick did several tests with PGO. He may have looked into it
> > > > > > already, but we can check.
> > > > > >
> > > > >
> > > > > Reproducible.
> > > > >
> > > > > LLVM_IAS=1 + DWARF5 = Not bootable
> > > > >
> > > > > I will try:
> > > > >
> > > > > LLVM_IAS=1 + DWARF4
> > > > >
> > > >
> > > > I was not able to boot into such a built Linux-kernel.
> > > >
> > > PGO will have no effect on debugging data. If this is an issue with
> > > DWARF, then it's likely orthogonal to the PGO patch.
> > >
> > > > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> > > >
> > > > Of course, this could be an issue with my system's LLVM/Clang.
> > > >
> > > > Debian clang version
> > > > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > > >
> > > Please use the official clang 11.0.1 release
> > > (https://releases.llvm.org/download.html), modifying the
> > > kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> > > 12 for the minimal version is because of an issue that was recently
> > > fixed.
> > >
> >
> > I downgraded to clang-11.1.0-rc1.
> > ( See attachment. )
> >
> > Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.
> >
> > But again after generating vmlinux.profdata and doing the PGO-rebuild
> > - the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
> > With GNU/as I can boot.
> >
> > So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
> > v2 is not allowed).
> > There is something wrong (here) with passing LLVM_IAS=1 to make when
> > doing the PGO-rebuild.
> >
> > Can someone please verify and confirm that the PGO-rebuild with
> > LLVM_IAS=1 boots or boots not?
>
> I was able to build+boot with LLVM_IAS=1 on my personal laptop (no
> dwarf 5, just mainline+v5).
>

To clarify:

I can build a PGO-enabled Linux-kernel and boot it.
Afterwards generate a vmlinux.profdata.
In a next step: A rebuild without PGO-Kconfig disabled + LLVM_IAS=1
does not boot.

- Sedat -

> >
> > Thanks.
> >
> > - Sedat -
> >
> > > > Can you give me a LLVM commit-id where you had success with LLVM_IAS=1
> > > > and especially CONFIG_DEBUG_INFO_DWARF5=y?
> > > > Success means I was able to boot in QEMU and/or bare metal.
> > > >
> > > The DWARF5 patch isn't in yet, so I don't want to rely upon it too much.
>
> I agree, providing test results with patches that haven't landed yet
> can cloud the interpretation of results.  It would be helpful to drop
> local patch sets before trying this.
>
> If the resulting image still isn't working for you, can you please
> provide your config? Surely we'd be able to reproduce boot failures in
> QEMU?  Nothing comes to mind about a change of assemblers causing an
> issue; I would assume assembly cannot be instrumented by the compiler
> (even though the compiler is the "driver" of the assembler).
>
> The hash warnings are certainly curious.
> IndexedInstrProfReader::getInstrProfRecord() is the only place in LLVM
> sources that can emit that.
> --
> Thanks,
> ~Nick Desaulniers
Nick Desaulniers Jan. 22, 2021, 1:52 a.m. UTC | #26
On Thu, Jan 21, 2021 at 5:49 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Fri, Jan 22, 2021 at 2:43 AM Nick Desaulniers
> <ndesaulniers@google.com> wrote:
> >
> > On Wed, Jan 20, 2021 at 6:03 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > >
> > > On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
> > > >
> > > > On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > >
> > > > > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > > > > >
> > > > > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > >
> > > > > > > > [ big snip ]
> > > > > > >
> > > > > > > [More snippage.]
> > > > > > >
> > > > > > > > [ CC Fangrui ]
> > > > > > > >
> > > > > > > > With the attached...
> > > > > > > >
> > > > > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > > > > undefined symbols
> > > > > > > >
> > > > > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > > > > >
> > > > > > > Thanks for confirming that this works with the above patch.
> > > > > > >
> > > > > > > > @ Bill Nick Sami Nathan
> > > > > > > >
> > > > > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > > > > >
> > > > > > > The integrated assembler and this option are more-or-less orthogonal
> > > > > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > > > > >
> > > > > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > > > > >
> > > > > > > I know Nick did several tests with PGO. He may have looked into it
> > > > > > > already, but we can check.
> > > > > > >
> > > > > >
> > > > > > Reproducible.
> > > > > >
> > > > > > LLVM_IAS=1 + DWARF5 = Not bootable
> > > > > >
> > > > > > I will try:
> > > > > >
> > > > > > LLVM_IAS=1 + DWARF4
> > > > > >
> > > > >
> > > > > I was not able to boot into such a built Linux-kernel.
> > > > >
> > > > PGO will have no effect on debugging data. If this is an issue with
> > > > DWARF, then it's likely orthogonal to the PGO patch.
> > > >
> > > > > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> > > > >
> > > > > Of course, this could be an issue with my system's LLVM/Clang.
> > > > >
> > > > > Debian clang version
> > > > > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > > > >
> > > > Please use the official clang 11.0.1 release
> > > > (https://releases.llvm.org/download.html), modifying the
> > > > kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> > > > 12 for the minimal version is because of an issue that was recently
> > > > fixed.
> > > >
> > >
> > > I downgraded to clang-11.1.0-rc1.
> > > ( See attachment. )
> > >
> > > Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.
> > >
> > > But again after generating vmlinux.profdata and doing the PGO-rebuild
> > > - the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
> > > With GNU/as I can boot.
> > >
> > > So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
> > > v2 is not allowed).
> > > There is something wrong (here) with passing LLVM_IAS=1 to make when
> > > doing the PGO-rebuild.
> > >
> > > Can someone please verify and confirm that the PGO-rebuild with
> > > LLVM_IAS=1 boots or boots not?
> >
> > I was able to build+boot with LLVM_IAS=1 on my personal laptop (no
> > dwarf 5, just mainline+v5).
> >
>
> To clarify:
>
> I can build a PGO-enabled Linux-kernel and boot it.
> Afterwards generate a vmlinux.profdata.
> In a next step: A rebuild without PGO-Kconfig disabled + LLVM_IAS=1
> does not boot.

Does the rebuild produce the hash warnings previously reported?

Can you send your .config for this?
Sedat Dilek Jan. 22, 2021, 1:54 a.m. UTC | #27
On Fri, Jan 22, 2021 at 2:52 AM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Thu, Jan 21, 2021 at 5:49 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> >
> > On Fri, Jan 22, 2021 at 2:43 AM Nick Desaulniers
> > <ndesaulniers@google.com> wrote:
> > >
> > > On Wed, Jan 20, 2021 at 6:03 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 18, 2021 at 10:56 PM Bill Wendling <morbo@google.com> wrote:
> > > > >
> > > > > On Mon, Jan 18, 2021 at 9:26 AM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Jan 18, 2021 at 1:39 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > >
> > > > > > > On Mon, Jan 18, 2021 at 3:32 AM Bill Wendling <morbo@google.com> wrote:
> > > > > > > >
> > > > > > > > On Sun, Jan 17, 2021 at 4:27 PM Sedat Dilek <sedat.dilek@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > > [ big snip ]
> > > > > > > >
> > > > > > > > [More snippage.]
> > > > > > > >
> > > > > > > > > [ CC Fangrui ]
> > > > > > > > >
> > > > > > > > > With the attached...
> > > > > > > > >
> > > > > > > > >    [PATCH v3] module: Ignore _GLOBAL_OFFSET_TABLE_ when warning for
> > > > > > > > > undefined symbols
> > > > > > > > >
> > > > > > > > > ...I was finally able to boot into a rebuild PGO-optimized Linux-kernel.
> > > > > > > > > For details see ClangBuiltLinux issue #1250 "Unknown symbol
> > > > > > > > > _GLOBAL_OFFSET_TABLE_ loading kernel modules".
> > > > > > > > >
> > > > > > > > Thanks for confirming that this works with the above patch.
> > > > > > > >
> > > > > > > > > @ Bill Nick Sami Nathan
> > > > > > > > >
> > > > > > > > > 1, Can you say something of the impact passing "LLVM_IAS=1" to make?
> > > > > > > >
> > > > > > > > The integrated assembler and this option are more-or-less orthogonal
> > > > > > > > to each other. One can still use the GNU assembler with PGO. If you're
> > > > > > > > having an issue, it may be related to ClangBuiltLinux issue #1250.
> > > > > > > >
> > > > > > > > > 2. Can you please try Nick's DWARF v5 support patchset v5 and
> > > > > > > > > CONFIG_DEBUG_INFO_DWARF5=y (see attachments)?
> > > > > > > > >
> > > > > > > > I know Nick did several tests with PGO. He may have looked into it
> > > > > > > > already, but we can check.
> > > > > > > >
> > > > > > >
> > > > > > > Reproducible.
> > > > > > >
> > > > > > > LLVM_IAS=1 + DWARF5 = Not bootable
> > > > > > >
> > > > > > > I will try:
> > > > > > >
> > > > > > > LLVM_IAS=1 + DWARF4
> > > > > > >
> > > > > >
> > > > > > I was not able to boot into such a built Linux-kernel.
> > > > > >
> > > > > PGO will have no effect on debugging data. If this is an issue with
> > > > > DWARF, then it's likely orthogonal to the PGO patch.
> > > > >
> > > > > > For me worked: DWARF2 and LLVM_IAS=1 *not* set.
> > > > > >
> > > > > > Of course, this could be an issue with my system's LLVM/Clang.
> > > > > >
> > > > > > Debian clang version
> > > > > > 12.0.0-++20210115111113+45ef053bd709-1~exp1~20210115101809.3724
> > > > > >
> > > > > Please use the official clang 11.0.1 release
> > > > > (https://releases.llvm.org/download.html), modifying the
> > > > > kernel/pgo/Kconfig as I suggested above. The reason we specify clang
> > > > > 12 for the minimal version is because of an issue that was recently
> > > > > fixed.
> > > > >
> > > >
> > > > I downgraded to clang-11.1.0-rc1.
> > > > ( See attachment. )
> > > >
> > > > Doing the first build with PGO enabled plus DWARF5 and LLVM_IAS=1 works.
> > > >
> > > > But again after generating vmlinux.profdata and doing the PGO-rebuild
> > > > - the resulting Linux-kernel does NOT boot in QEMU or on bare metal.
> > > > With GNU/as I can boot.
> > > >
> > > > So this is independent of DWARF v4 or DWARF v5 (LLVM_IAS=1 and DWARF
> > > > v2 is not allowed).
> > > > There is something wrong (here) with passing LLVM_IAS=1 to make when
> > > > doing the PGO-rebuild.
> > > >
> > > > Can someone please verify and confirm that the PGO-rebuild with
> > > > LLVM_IAS=1 boots or boots not?
> > >
> > > I was able to build+boot with LLVM_IAS=1 on my personal laptop (no
> > > dwarf 5, just mainline+v5).
> > >
> >
> > To clarify:
> >
> > I can build a PGO-enabled Linux-kernel and boot it.
> > Afterwards generate a vmlinux.profdata.
> > In a next step: A rebuild without PGO-Kconfig disabled + LLVM_IAS=1
> > does not boot.
>
> Does the rebuild produce the hash warnings previously reported?
>
> Can you send your .config for this?

Exactly!

Attached is config-5.11.0-rc4-2-amd64-clang11-pgo.

- Sedat -

Patch
diff mbox series

diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index f7809c7b1ba9e..8d6418e858062 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -26,6 +26,7 @@  whole; patches welcome!
    kgdb
    kselftest
    kunit/index
+   pgo
 
 
 .. only::  subproject and html
diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
new file mode 100644
index 0000000000000..b7f11d8405b73
--- /dev/null
+++ b/Documentation/dev-tools/pgo.rst
@@ -0,0 +1,127 @@ 
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Using PGO with the Linux kernel
+===============================
+
+Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
+when building with Clang. The profiling data is exported via the ``pgo``
+debugfs directory.
+
+.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
+
+
+Preparation
+===========
+
+Configure the kernel with:
+
+.. code-block:: make
+
+   CONFIG_DEBUG_FS=y
+   CONFIG_PGO_CLANG=y
+
+Note that kernels compiled with profiling flags will be significantly larger
+and run slower.
+
+Profiling data will only become accessible once debugfs has been mounted:
+
+.. code-block:: sh
+
+   mount -t debugfs none /sys/kernel/debug
+
+
+Customization
+=============
+
+You can enable or disable profiling for individual file and directories by
+adding a line similar to the following to the respective kernel Makefile:
+
+- For a single file (e.g. main.o)
+
+  .. code-block:: make
+
+     PGO_PROFILE_main.o := y
+
+- For all files in one directory
+
+  .. code-block:: make
+
+     PGO_PROFILE := y
+
+To exclude files from being profiled use
+
+  .. code-block:: make
+
+     PGO_PROFILE_main.o := n
+
+and
+
+  .. code-block:: make
+
+     PGO_PROFILE := n
+
+Only files which are linked to the main kernel image or are compiled as kernel
+modules are supported by this mechanism.
+
+
+Files
+=====
+
+The PGO kernel support creates the following files in debugfs:
+
+``/sys/kernel/debug/pgo``
+	Parent directory for all PGO-related files.
+
+``/sys/kernel/debug/pgo/reset``
+	Global reset file: resets all coverage data to zero when written to.
+
+``/sys/kernel/debug/profraw``
+	The raw PGO data that must be processed with ``llvm_profdata``.
+
+
+Workflow
+========
+
+The PGO kernel can be run on the host or test machines. The data though should
+be analyzed with Clang's tools from the same Clang version as the kernel was
+compiled. Clang's tolerant of version skew, but it's easier to use the same
+Clang version.
+
+The profiling data is useful for optimizing the kernel, analyzing coverage,
+etc. Clang offers tools to perform these tasks.
+
+Here is an example workflow for profiling an instrumented kernel with PGO and
+using the result to optimize the kernel:
+
+1) Install the kernel on the TEST machine.
+
+2) Reset the data counters right before running the load tests
+
+   .. code-block:: sh
+
+      $ echo 1 > /sys/kernel/debug/pgo/reset
+
+3) Run the load tests.
+
+4) Collect the raw profile data
+
+   .. code-block:: sh
+
+      $ cp -a /sys/kernel/debug/pgo/profraw /tmp/vmlinux.profraw
+
+5) (Optional) Download the raw profile data to the HOST machine.
+
+6) Process the raw profile data
+
+   .. code-block:: sh
+
+      $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
+
+   Note that multiple raw profile data files can be merged during this step.
+
+7) Rebuild the kernel using the profile data (PGO disabled)
+
+   .. code-block:: sh
+
+      $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
diff --git a/MAINTAINERS b/MAINTAINERS
index 79b400c97059f..cb1f1f2b2baf4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13948,6 +13948,15 @@  S:	Maintained
 F:	include/linux/personality.h
 F:	include/uapi/linux/personality.h
 
+PGO BASED KERNEL PROFILING
+M:	Sami Tolvanen <samitolvanen@google.com>
+M:	Bill Wendling <wcw@google.com>
+R:	Nathan Chancellor <natechancellor@gmail.com>
+R:	Nick Desaulniers <ndesaulniers@google.com>
+S:	Supported
+F:	Documentation/dev-tools/pgo.rst
+F:	kernel/pgo
+
 PHOENIX RC FLIGHT CONTROLLER ADAPTER
 M:	Marcus Folkesson <marcus.folkesson@gmail.com>
 L:	linux-input@vger.kernel.org
diff --git a/Makefile b/Makefile
index 9e73f82e0d863..9128bfe1ccc97 100644
--- a/Makefile
+++ b/Makefile
@@ -659,6 +659,9 @@  endif # KBUILD_EXTMOD
 # Defaults to vmlinux, but the arch makefile usually adds further targets
 all: vmlinux
 
+CFLAGS_PGO_CLANG := -fprofile-generate
+export CFLAGS_PGO_CLANG
+
 CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage \
 	$(call cc-option,-fno-tree-loop-im) \
 	$(call cc-disable-warning,maybe-uninitialized,)
diff --git a/arch/Kconfig b/arch/Kconfig
index 24862d15f3a36..f39d3991f6bfe 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1112,6 +1112,7 @@  config ARCH_SPLIT_ARG64
 	   pairs of 32-bit arguments, select this option.
 
 source "kernel/gcov/Kconfig"
+source "kernel/pgo/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 21f851179ff08..36305ea61dc09 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -96,6 +96,7 @@  config X86
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
+	select ARCH_SUPPORTS_PGO_CLANG		if X86_64
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index fe605205b4ce2..383853e32f673 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,6 +71,7 @@  KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 KBUILD_CFLAGS	+= $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 GCOV_PROFILE := n
+PGO_PROFILE := n
 UBSAN_SANITIZE := n
 
 $(obj)/bzImage: asflags-y  := $(SVGA_MODE)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e0bc3988c3faa..ed12ab65f6065 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -54,6 +54,7 @@  CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
+PGO_PROFILE := n
 UBSAN_SANITIZE :=n
 
 KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a31de0c6ccde2..775fa0b368e98 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,6 +4,8 @@ 
 
 OBJECT_FILES_NON_STANDARD := y
 
+PGO_PROFILE_curve25519-x86_64.o := n
+
 obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 02e3e42f380bd..26e2b3af0145c 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -179,6 +179,7 @@  quiet_cmd_vdso = VDSO    $@
 VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
 	$(call ld-option, --eh-frame-hdr) -Bsymbolic
 GCOV_PROFILE := n
+PGO_PROFILE := n
 
 quiet_cmd_vdso_and_check = VDSO    $@
       cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index efd9e9ea17f25..f6cab2316c46a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -184,6 +184,8 @@  SECTIONS
 
 	BUG_TABLE
 
+	PGO_CLANG_DATA
+
 	ORC_UNWIND_TABLE
 
 	. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 84b09c230cbd5..5f22b31446ad4 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -2,6 +2,7 @@ 
 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
 KASAN_SANITIZE := n
 GCOV_PROFILE := n
+PGO_PROFILE := n
 
 obj-$(CONFIG_EFI) 		+= quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_EFI_MIXED)		+= efi_thunk_$(BITS).o
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 95ea17a9d20cb..36f20e99da0bc 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -23,6 +23,7 @@  targets += purgatory.ro purgatory.chk
 
 # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
 GCOV_PROFILE	:= n
+PGO_PROFILE	:= n
 KASAN_SANITIZE	:= n
 UBSAN_SANITIZE	:= n
 KCSAN_SANITIZE	:= n
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 83f1b6a56449f..21797192f958f 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -76,4 +76,5 @@  KBUILD_CFLAGS	:= $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 GCOV_PROFILE := n
+PGO_PROFILE := n
 UBSAN_SANITIZE := n
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 5943387e3f357..54f5768f58530 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -64,6 +64,7 @@  quiet_cmd_vdso = VDSO    $@
 
 VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
 GCOV_PROFILE := n
+PGO_PROFILE := n
 
 #
 # Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 8a94388e38b33..2d81623b33f29 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -40,6 +40,7 @@  KBUILD_CFLAGS			:= $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
 KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
 
 GCOV_PROFILE			:= n
+PGO_PROFILE			:= n
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b2b3d81b1535a..3a591bb18c5fb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -316,6 +316,49 @@ 
 #define THERMAL_TABLE(name)
 #endif
 
+#ifdef CONFIG_PGO_CLANG
+#define PGO_CLANG_DATA							\
+	__llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) {	\
+		. = ALIGN(8);						\
+		__llvm_prf_start = .;					\
+		__llvm_prf_data_start = .;				\
+		KEEP(*(__llvm_prf_data))				\
+		. = ALIGN(8);						\
+		__llvm_prf_data_end = .;				\
+	}								\
+	__llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) {	\
+		. = ALIGN(8);						\
+		__llvm_prf_cnts_start = .;				\
+		KEEP(*(__llvm_prf_cnts))				\
+		. = ALIGN(8);						\
+		__llvm_prf_cnts_end = .;				\
+	}								\
+	__llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) {	\
+		. = ALIGN(8);						\
+		__llvm_prf_names_start = .;				\
+		KEEP(*(__llvm_prf_names))				\
+		. = ALIGN(8);						\
+		__llvm_prf_names_end = .;				\
+		. = ALIGN(8);						\
+	}								\
+	__llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) {	\
+		__llvm_prf_vals_start = .;				\
+		KEEP(*(__llvm_prf_vals))				\
+		. = ALIGN(8);						\
+		__llvm_prf_vals_end = .;				\
+		. = ALIGN(8);						\
+	}								\
+	__llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) {	\
+		__llvm_prf_vnds_start = .;				\
+		KEEP(*(__llvm_prf_vnds))				\
+		. = ALIGN(8);						\
+		__llvm_prf_vnds_end = .;				\
+		__llvm_prf_end = .;					\
+	}
+#else
+#define PGO_CLANG_DATA
+#endif
+
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
 	__dtb_start = .;						\
@@ -1125,6 +1168,7 @@ 
 		CONSTRUCTORS						\
 	}								\
 	BUG_TABLE							\
+	PGO_CLANG_DATA
 
 #define INIT_TEXT_SECTION(inittext_align)				\
 	. = ALIGN(inittext_align);					\
diff --git a/kernel/Makefile b/kernel/Makefile
index aa7368c7eabf3..0b34ca228ba46 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -111,6 +111,7 @@  obj-$(CONFIG_BPF) += bpf/
 obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
+obj-$(CONFIG_PGO_CLANG) += pgo/
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
new file mode 100644
index 0000000000000..76a640b6cf6ed
--- /dev/null
+++ b/kernel/pgo/Kconfig
@@ -0,0 +1,35 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
+
+config ARCH_SUPPORTS_PGO_CLANG
+	bool
+
+config PGO_CLANG
+	bool "Enable clang's PGO-based kernel profiling"
+	depends on DEBUG_FS
+	depends on ARCH_SUPPORTS_PGO_CLANG
+	depends on CC_IS_CLANG && CLANG_VERSION >= 120000
+	help
+	  This option enables clang's PGO (Profile Guided Optimization) based
+	  code profiling to better optimize the kernel.
+
+	  If unsure, say N.
+
+	  Run a representative workload for your application on a kernel
+	  compiled with this option and download the raw profile file from
+	  /sys/kernel/debug/pgo/profraw. This file needs to be processed with
+	  llvm-profdata. It may be merged with other collected raw profiles.
+
+	  Copy the resulting profile file into vmlinux.profdata, and enable
+	  KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
+	  kernel.
+
+	  Note that a kernel compiled with profiling flags will be
+	  significantly larger and run slower. Also be sure to exclude files
+	  from profiling which are not linked to the kernel image to prevent
+	  linker errors.
+
+	  Note that the debugfs filesystem has to be mounted to access
+	  profiling data.
+
+endmenu
diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
new file mode 100644
index 0000000000000..41e27cefd9a47
--- /dev/null
+++ b/kernel/pgo/Makefile
@@ -0,0 +1,5 @@ 
+# SPDX-License-Identifier: GPL-2.0
+GCOV_PROFILE	:= n
+PGO_PROFILE	:= n
+
+obj-y	+= fs.o instrument.o
diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
new file mode 100644
index 0000000000000..68b24672be10a
--- /dev/null
+++ b/kernel/pgo/fs.c
@@ -0,0 +1,382 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ *	Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt)	"pgo: " fmt
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "pgo.h"
+
+static struct dentry *directory;
+
+struct prf_private_data {
+	void *buffer;
+	unsigned long size;
+};
+
+/*
+ * Raw profile data format:
+ *
+ *	- llvm_prf_header
+ *	- __llvm_prf_data
+ *	- __llvm_prf_cnts
+ *	- __llvm_prf_names
+ *	- zero padding to 8 bytes
+ *	- for each llvm_prf_data in __llvm_prf_data:
+ *		- llvm_prf_value_data
+ *			- llvm_prf_value_record + site count array
+ *				- llvm_prf_value_node_data
+ *				...
+ *			...
+ *		...
+ */
+
+static void prf_fill_header(void **buffer)
+{
+	struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
+
+	header->magic = LLVM_PRF_MAGIC;
+	header->version = LLVM_PRF_VARIANT_MASK_IR | LLVM_PRF_VERSION;
+	header->data_size = prf_data_count();
+	header->padding_bytes_before_counters = 0;
+	header->counters_size = prf_cnts_count();
+	header->padding_bytes_after_counters = 0;
+	header->names_size = prf_names_count();
+	header->counters_delta = (u64)__llvm_prf_cnts_start;
+	header->names_delta = (u64)__llvm_prf_names_start;
+	header->value_kind_last = LLVM_PRF_IPVK_LAST;
+
+	*buffer += sizeof(*header);
+}
+
+/*
+ * Copy the source into the buffer, incrementing the pointer into buffer in the
+ * process.
+ */
+static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
+{
+	memcpy(*buffer, src, size);
+	*buffer += size;
+}
+
+static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
+{
+	struct llvm_prf_value_node **nodes =
+		(struct llvm_prf_value_node **)p->values;
+	u32 kinds = 0;
+	u32 size = 0;
+	unsigned int kind;
+	unsigned int n;
+	unsigned int s = 0;
+
+	for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
+		unsigned int sites = p->num_value_sites[kind];
+
+		if (!sites)
+			continue;
+
+		/* Record + site count array */
+		size += prf_get_value_record_size(sites);
+		kinds++;
+
+		if (!nodes)
+			continue;
+
+		for (n = 0; n < sites; n++) {
+			u32 count = 0;
+			struct llvm_prf_value_node *site = nodes[s + n];
+
+			while (site && ++count <= U8_MAX)
+				site = site->next;
+
+			size += count *
+				sizeof(struct llvm_prf_value_node_data);
+		}
+
+		s += sites;
+	}
+
+	if (size)
+		size += sizeof(struct llvm_prf_value_data);
+
+	if (value_kinds)
+		*value_kinds = kinds;
+
+	return size;
+}
+
+static u32 prf_get_value_size(void)
+{
+	u32 size = 0;
+	struct llvm_prf_data *p;
+
+	for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
+		size += __prf_get_value_size(p, NULL);
+
+	return size;
+}
+
+/* Serialize the profiling's value. */
+static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
+{
+	struct llvm_prf_value_data header;
+	struct llvm_prf_value_node **nodes =
+		(struct llvm_prf_value_node **)p->values;
+	unsigned int kind;
+	unsigned int n;
+	unsigned int s = 0;
+
+	header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
+
+	if (!header.num_value_kinds)
+		/* Nothing to write. */
+		return;
+
+	prf_copy_to_buffer(buffer, &header, sizeof(header));
+
+	for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
+		struct llvm_prf_value_record *record;
+		u8 *counts;
+		unsigned int sites = p->num_value_sites[kind];
+
+		if (!sites)
+			continue;
+
+		/* Profiling value record. */
+		record = *(struct llvm_prf_value_record **)buffer;
+		*buffer += prf_get_value_record_header_size();
+
+		record->kind = kind;
+		record->num_value_sites = sites;
+
+		/* Site count array. */
+		counts = *(u8 **)buffer;
+		*buffer += prf_get_value_record_site_count_size(sites);
+
+		/*
+		 * If we don't have nodes, we can skip updating the site count
+		 * array, because the buffer is zero filled.
+		 */
+		if (!nodes)
+			continue;
+
+		for (n = 0; n < sites; n++) {
+			u32 count = 0;
+			struct llvm_prf_value_node *site = nodes[s + n];
+
+			while (site && ++count <= U8_MAX) {
+				prf_copy_to_buffer(buffer, site,
+						   sizeof(struct llvm_prf_value_node_data));
+				site = site->next;
+			}
+
+			counts[n] = (u8)count;
+		}
+
+		s += sites;
+	}
+}
+
+static void prf_serialize_values(void **buffer)
+{
+	struct llvm_prf_data *p;
+
+	for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
+		prf_serialize_value(p, buffer);
+}
+
+static inline unsigned long prf_get_padding(unsigned long size)
+{
+	return 7 & (8 - size % 8);
+}
+
+static unsigned long prf_buffer_size(void)
+{
+	return sizeof(struct llvm_prf_header) +
+			prf_data_size()	+
+			prf_cnts_size() +
+			prf_names_size() +
+			prf_get_padding(prf_names_size()) +
+			prf_get_value_size();
+}
+
+/* Serialize the profiling data into a format LLVM's tools can understand. */
+static int prf_serialize(struct prf_private_data *p)
+{
+	int err = 0;
+	void *buffer;
+
+	p->size = prf_buffer_size();
+	p->buffer = vzalloc(p->size);
+
+	if (!p->buffer) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	buffer = p->buffer;
+
+	prf_fill_header(&buffer);
+	prf_copy_to_buffer(&buffer, __llvm_prf_data_start,  prf_data_size());
+	prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start,  prf_cnts_size());
+	prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
+	buffer += prf_get_padding(prf_names_size());
+
+	prf_serialize_values(&buffer);
+
+out:
+	return err;
+}
+
+/* open() implementation for PGO. Creates a copy of the profiling data set. */
+static int prf_open(struct inode *inode, struct file *file)
+{
+	struct prf_private_data *data;
+	unsigned long flags;
+	int err;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	flags = prf_lock();
+
+	err = prf_serialize(data);
+	if (err) {
+		kfree(data);
+		goto out_unlock;
+	}
+
+	file->private_data = data;
+
+out_unlock:
+	prf_unlock(flags);
+out:
+	return err;
+}
+
+/* read() implementation for PGO. */
+static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
+			loff_t *ppos)
+{
+	struct prf_private_data *data = file->private_data;
+
+	BUG_ON(!data);
+
+	return simple_read_from_buffer(buf, count, ppos, data->buffer,
+				       data->size);
+}
+
+/* release() implementation for PGO. Release resources allocated by open(). */
+static int prf_release(struct inode *inode, struct file *file)
+{
+	struct prf_private_data *data = file->private_data;
+
+	if (data) {
+		vfree(data->buffer);
+		kfree(data);
+	}
+
+	return 0;
+}
+
+static const struct file_operations prf_fops = {
+	.owner		= THIS_MODULE,
+	.open		= prf_open,
+	.read		= prf_read,
+	.llseek		= default_llseek,
+	.release	= prf_release
+};
+
+/* write() implementation for resetting PGO's profile data. */
+static ssize_t reset_write(struct file *file, const char __user *addr,
+			   size_t len, loff_t *pos)
+{
+	struct llvm_prf_data *data;
+
+	memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
+
+	for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; ++data) {
+		struct llvm_prf_value_node **vnodes;
+		u64 current_vsite_count;
+		u32 i;
+
+		if (!data->values)
+			continue;
+
+		current_vsite_count = 0;
+		vnodes = (struct llvm_prf_value_node **)data->values;
+
+		for (i = LLVM_PRF_IPVK_FIRST; i <= LLVM_PRF_IPVK_LAST; ++i)
+			current_vsite_count += data->num_value_sites[i];
+
+		for (i = 0; i < current_vsite_count; ++i) {
+			struct llvm_prf_value_node *current_vnode = vnodes[i];
+
+			while (current_vnode) {
+				current_vnode->count = 0;
+				current_vnode = current_vnode->next;
+			}
+		}
+	}
+
+	return len;
+}
+
+static const struct file_operations prf_reset_fops = {
+	.owner		= THIS_MODULE,
+	.write		= reset_write,
+	.llseek		= noop_llseek,
+};
+
+/* Create debugfs entries. */
+static int __init pgo_init(void)
+{
+	directory = debugfs_create_dir("pgo", NULL);
+	if (!directory)
+		goto err_remove;
+
+	if (!debugfs_create_file("profraw", 0600, directory, NULL,
+				 &prf_fops))
+		goto err_remove;
+
+	if (!debugfs_create_file("reset", 0200, directory, NULL,
+				 &prf_reset_fops))
+		goto err_remove;
+
+	return 0;
+
+err_remove:
+	pr_err("initialization failed\n");
+	return -EIO;
+}
+
+/* Remove debugfs entries. */
+static void __exit pgo_exit(void)
+{
+	debugfs_remove_recursive(directory);
+}
+
+module_init(pgo_init);
+module_exit(pgo_exit);
diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
new file mode 100644
index 0000000000000..6084ff0652e85
--- /dev/null
+++ b/kernel/pgo/instrument.c
@@ -0,0 +1,185 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ *	Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt)	"pgo: " fmt
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "pgo.h"
+
+/* Lock guarding value node access and serialization. */
+static DEFINE_SPINLOCK(pgo_lock);
+static int current_node;
+
+unsigned long prf_lock(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pgo_lock, flags);
+
+	return flags;
+}
+
+void prf_unlock(unsigned long flags)
+{
+	spin_unlock_irqrestore(&pgo_lock, flags);
+}
+
+/*
+ * Return a newly allocated profiling value node which contains the tracked
+ * value by the value profiler.
+ * Note: caller *must* hold pgo_lock.
+ */
+static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
+						 u32 index, u64 value)
+{
+	if (&__llvm_prf_vnds_start[current_node + 1] >= __llvm_prf_vnds_end)
+		return NULL; /* Out of nodes */
+
+	current_node++;
+
+	/* Make sure the node is entirely within the section */
+	if (&__llvm_prf_vnds_start[current_node] >= __llvm_prf_vnds_end ||
+	    &__llvm_prf_vnds_start[current_node + 1] > __llvm_prf_vnds_end)
+		return NULL;
+
+	return &__llvm_prf_vnds_start[current_node];
+}
+
+/*
+ * Counts the number of times a target value is seen.
+ *
+ * Records the target value for the CounterIndex if not seen before. Otherwise,
+ * increments the counter associated w/ the target value.
+ */
+void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
+void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
+{
+	struct llvm_prf_data *p = (struct llvm_prf_data *)data;
+	struct llvm_prf_value_node **counters;
+	struct llvm_prf_value_node *curr;
+	struct llvm_prf_value_node *min = NULL;
+	struct llvm_prf_value_node *prev = NULL;
+	u64 min_count = U64_MAX;
+	u8 values = 0;
+	unsigned long flags;
+
+	if (!p || !p->values)
+		return;
+
+	counters = (struct llvm_prf_value_node **)p->values;
+	curr = counters[index];
+
+	while (curr) {
+		if (target_value == curr->value) {
+			curr->count++;
+			return;
+		}
+
+		if (curr->count < min_count) {
+			min_count = curr->count;
+			min = curr;
+		}
+
+		prev = curr;
+		curr = curr->next;
+		values++;
+	}
+
+	if (values >= LLVM_PRF_MAX_NUM_VALS_PER_SITE) {
+		if (!min->count || !(--min->count)) {
+			curr = min;
+			curr->value = target_value;
+			curr->count++;
+		}
+		return;
+	}
+
+	/* Lock when updating the value node structure. */
+	flags = prf_lock();
+
+	curr = allocate_node(p, index, target_value);
+	if (!curr)
+		goto out;
+
+	curr->value = target_value;
+	curr->count++;
+
+	if (!counters[index])
+		counters[index] = curr;
+	else if (prev && !prev->next)
+		prev->next = curr;
+
+out:
+	prf_unlock(flags);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_target);
+
+/* Counts the number of times a range of targets values are seen. */
+void __llvm_profile_instrument_range(u64 target_value, void *data,
+				     u32 index, s64 precise_start,
+				     s64 precise_last, s64 large_value);
+void __llvm_profile_instrument_range(u64 target_value, void *data,
+				     u32 index, s64 precise_start,
+				     s64 precise_last, s64 large_value)
+{
+	if (large_value != S64_MIN && (s64)target_value >= large_value)
+		target_value = large_value;
+	else if ((s64)target_value < precise_start ||
+		 (s64)target_value > precise_last)
+		target_value = precise_last + 1;
+
+	__llvm_profile_instrument_target(target_value, data, index);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_range);
+
+static u64 inst_prof_get_range_rep_value(u64 value)
+{
+	if (value <= 8)
+		/* The first ranges are individually tracked, us it as is. */
+		return value;
+	else if (value >= 513)
+		/* The last range is mapped to its lowest value. */
+		return 513;
+	else if (hweight64(value) == 1)
+		/* If it's a power of two, use it as is. */
+		return value;
+
+	/* Otherwise, take to the previous power of two + 1. */
+	return (1 << (64 - __builtin_clzll(value) - 1)) + 1;
+}
+
+/*
+ * The target values are partitioned into multiple ranges. The range spec is
+ * defined in compiler-rt/include/profile/InstrProfData.inc.
+ */
+void __llvm_profile_instrument_memop(u64 target_value, void *data,
+				     u32 counter_index);
+void __llvm_profile_instrument_memop(u64 target_value, void *data,
+				     u32 counter_index)
+{
+	u64 rep_value;
+
+	/* Map the target value to the representative value of its range. */
+	rep_value = inst_prof_get_range_rep_value(target_value);
+	__llvm_profile_instrument_target(rep_value, data, counter_index);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_memop);
diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
new file mode 100644
index 0000000000000..df0aa278f28bd
--- /dev/null
+++ b/kernel/pgo/pgo.h
@@ -0,0 +1,206 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ *	Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _PGO_H
+#define _PGO_H
+
+/*
+ * Note: These internal LLVM definitions must match the compiler version.
+ * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
+ */
+
+#ifdef CONFIG_64BIT
+	#define LLVM_PRF_MAGIC		\
+		((u64)255 << 56 |	\
+		 (u64)'l' << 48 |	\
+		 (u64)'p' << 40 |	\
+		 (u64)'r' << 32 |	\
+		 (u64)'o' << 24 |	\
+		 (u64)'f' << 16 |	\
+		 (u64)'r' << 8  |	\
+		 (u64)129)
+#else
+	#define LLVM_PRF_MAGIC		\
+		((u64)255 << 56 |	\
+		 (u64)'l' << 48 |	\
+		 (u64)'p' << 40 |	\
+		 (u64)'r' << 32 |	\
+		 (u64)'o' << 24 |	\
+		 (u64)'f' << 16 |	\
+		 (u64)'R' << 8  |	\
+		 (u64)129)
+#endif
+
+#define LLVM_PRF_VERSION		5
+#define LLVM_PRF_DATA_ALIGN		8
+#define LLVM_PRF_IPVK_FIRST		0
+#define LLVM_PRF_IPVK_LAST		1
+#define LLVM_PRF_MAX_NUM_VALS_PER_SITE	16
+
+#define LLVM_PRF_VARIANT_MASK_IR	(0x1ull << 56)
+#define LLVM_PRF_VARIANT_MASK_CSIR	(0x1ull << 57)
+
+/**
+ * struct llvm_prf_header - represents the raw profile header data structure.
+ * @magic: the magic token for the file format.
+ * @version: the version of the file format.
+ * @data_size: the number of entries in the profile data section.
+ * @padding_bytes_before_counters: the number of padding bytes before the
+ *   counters.
+ * @counters_size: the size in bytes of the LLVM profile section containing the
+ *   counters.
+ * @padding_bytes_after_counters: the number of padding bytes after the
+ *   counters.
+ * @names_size: the size in bytes of the LLVM profile section containing the
+ *   counters' names.
+ * @counters_delta: the beginning of the LLMV profile counters section.
+ * @names_delta: the beginning of the LLMV profile names section.
+ * @value_kind_last: the last profile value kind.
+ */
+struct llvm_prf_header {
+	u64 magic;
+	u64 version;
+	u64 data_size;
+	u64 padding_bytes_before_counters;
+	u64 counters_size;
+	u64 padding_bytes_after_counters;
+	u64 names_size;
+	u64 counters_delta;
+	u64 names_delta;
+	u64 value_kind_last;
+};
+
+/**
+ * struct llvm_prf_data - represents the per-function control structure.
+ * @name_ref: the reference to the function's name.
+ * @func_hash: the hash value of the function.
+ * @counter_ptr: a pointer to the profile counter.
+ * @function_ptr: a pointer to the function.
+ * @values: the profiling values associated with this function.
+ * @num_counters: the number of counters in the function.
+ * @num_value_sites: the number of value profile sites.
+ */
+struct llvm_prf_data {
+	const u64 name_ref;
+	const u64 func_hash;
+	const void *counter_ptr;
+	const void *function_ptr;
+	void *values;
+	const u32 num_counters;
+	const u16 num_value_sites[LLVM_PRF_IPVK_LAST + 1];
+} __aligned(LLVM_PRF_DATA_ALIGN);
+
+/**
+ * structure llvm_prf_value_node_data - represents the data part of the struct
+ *   llvm_prf_value_node data structure.
+ * @value: the value counters.
+ * @count: the counters' count.
+ */
+struct llvm_prf_value_node_data {
+	u64 value;
+	u64 count;
+};
+
+/**
+ * struct llvm_prf_value_node - represents an internal data structure used by
+ *   the value profiler.
+ * @value: the value counters.
+ * @count: the counters' count.
+ * @next: the next value node.
+ */
+struct llvm_prf_value_node {
+	u64 value;
+	u64 count;
+	struct llvm_prf_value_node *next;
+};
+
+/**
+ * struct llvm_prf_value_data - represents the value profiling data in indexed
+ *   format.
+ * @total_size: the total size in bytes including this field.
+ * @num_value_kinds: the number of value profile kinds that has value profile
+ *   data.
+ */
+struct llvm_prf_value_data {
+	u32 total_size;
+	u32 num_value_kinds;
+};
+
+/**
+ * struct llvm_prf_value_record - represents the on-disk layout of the value
+ *   profile data of a particular kind for one function.
+ * @kind: the kind of the value profile record.
+ * @num_value_sites: the number of value profile sites.
+ * @site_count_array: the first element of the array that stores the number
+ *   of profiled values for each value site.
+ */
+struct llvm_prf_value_record {
+	u32 kind;
+	u32 num_value_sites;
+	u8 site_count_array[];
+};
+
+#define prf_get_value_record_header_size()		\
+	offsetof(struct llvm_prf_value_record, site_count_array)
+#define prf_get_value_record_site_count_size(sites)	\
+	roundup((sites), 8)
+#define prf_get_value_record_size(sites)		\
+	(prf_get_value_record_header_size() +		\
+	 prf_get_value_record_site_count_size((sites)))
+
+/* Data sections */
+extern struct llvm_prf_data __llvm_prf_data_start[];
+extern struct llvm_prf_data __llvm_prf_data_end[];
+
+extern u64 __llvm_prf_cnts_start[];
+extern u64 __llvm_prf_cnts_end[];
+
+extern char __llvm_prf_names_start[];
+extern char __llvm_prf_names_end[];
+
+extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
+extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
+
+/* Locking for vnodes */
+extern unsigned long prf_lock(void);
+extern void prf_unlock(unsigned long flags);
+
+#define __DEFINE_PRF_SIZE(s) \
+	static inline unsigned long prf_ ## s ## _size(void)		\
+	{								\
+		unsigned long start =					\
+			(unsigned long)__llvm_prf_ ## s ## _start;	\
+		unsigned long end =					\
+			(unsigned long)__llvm_prf_ ## s ## _end;	\
+		return roundup(end - start,				\
+				sizeof(__llvm_prf_ ## s ## _start[0]));	\
+	}								\
+	static inline unsigned long prf_ ## s ## _count(void)		\
+	{								\
+		return prf_ ## s ## _size() /				\
+			sizeof(__llvm_prf_ ## s ## _start[0]);		\
+	}
+
+__DEFINE_PRF_SIZE(data);
+__DEFINE_PRF_SIZE(cnts);
+__DEFINE_PRF_SIZE(names);
+__DEFINE_PRF_SIZE(vnds);
+
+#undef __DEFINE_PRF_SIZE
+
+#endif /* _PGO_H */
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 213677a5ed33e..9b218afb5cb87 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -143,6 +143,16 @@  _c_flags += $(if $(patsubst n%,, \
 		$(CFLAGS_GCOV))
 endif
 
+#
+# Enable clang's PGO profiling flags for a file or directory depending on
+# variables PGO_PROFILE_obj.o and PGO_PROFILE.
+#
+ifeq ($(CONFIG_PGO_CLANG),y)
+_c_flags += $(if $(patsubst n%,, \
+		$(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
+		$(CFLAGS_PGO_CLANG))
+endif
+
 #
 # Enable address sanitizer flags for kernel except some files or directories
 # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)