linux-kbuild.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO
@ 2014-02-08  8:01 Andi Kleen
  2014-02-08  8:01 ` [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2 Andi Kleen
                   ` (15 more replies)
  0 siblings, 16 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

The VDSO does not play well with LTO, so just disable LTO for it.
Also pass a 32bit linker flag for the 32bit version.

Cc: x86@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/vdso/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index fd14be1..598f163 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -2,6 +2,8 @@
 # Building vDSO images for x86.
 #
 
+KBUILD_CFLAGS += ${DISABLE_LTO}
+
 VDSO64-$(CONFIG_X86_64)		:= y
 VDSOX32-$(CONFIG_X86_X32_ABI)	:= y
 VDSO32-$(CONFIG_X86_32)		:= y
@@ -35,7 +37,8 @@ export CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 			-Wl,--no-undefined \
-		      	-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+			-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
+			$(DISABLE_LTO)
 
 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
 
@@ -127,7 +130,7 @@ vdso32.so-$(VDSO32-y)		+= sysenter
 vdso32-images			= $(vdso32.so-y:%=vdso32-%.so)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
-VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 
 # This makes sure the $(obj) subdirectory exists even though vdso32/
 # is not a kbuild sub-make subdirectory.
@@ -181,7 +184,8 @@ quiet_cmd_vdso = VDSO    $@
 		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+		${LTO_CFLAGS}
 GCOV_PROFILE := n
 
 #
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08 18:52   ` H. Peter Anvin
  2014-02-08  8:01 ` [PATCH 03/17] lto: Make asmlinkage __visible Andi Kleen
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

The fancy x86 hweight uses different compiler options for the
hweight file. This does not work with LTO. Just disable the optimization
with LTO

v2: Simplify Kconfig checks (Jan Beulich)
Cc: x86@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/Kconfig                    | 1 +
 arch/x86/include/asm/arch_hweight.h | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 940e50e..f125c5f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -255,6 +255,7 @@ config X86_32_LAZY_GS
 
 config ARCH_HWEIGHT_CFLAGS
 	string
+	default "" if LTO
 	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
 	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
 
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 9686c3d..ca80549 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -25,9 +25,14 @@ static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res = 0;
 
+#ifdef CONFIG_LTO
+	res  = __sw_hweight32(w);
+#else
+
 	asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
 		     : "="REG_OUT (res)
 		     : REG_IN (w));
+#endif
 
 	return res;
 }
@@ -46,6 +51,9 @@ static inline unsigned long __arch_hweight64(__u64 w)
 {
 	unsigned long res = 0;
 
+#ifdef CONFIG_LTO
+	res = __sw_hweight64(w);
+#else
 #ifdef CONFIG_X86_32
 	return  __arch_hweight32((u32)w) +
 		__arch_hweight32((u32)(w >> 32));
@@ -54,6 +62,7 @@ static inline unsigned long __arch_hweight64(__u64 w)
 		     : "="REG_OUT (res)
 		     : REG_IN (w));
 #endif /* CONFIG_X86_32 */
+#endif
 
 	return res;
 }
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/17] lto: Make asmlinkage __visible
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
  2014-02-08  8:01 ` [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2 Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 04/17] lto, workaround: Add workaround for initcall reordering Andi Kleen
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/linkage.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index a6a42dd..34a513a 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -12,9 +12,9 @@
 #endif
 
 #ifdef __cplusplus
-#define CPP_ASMLINKAGE extern "C"
+#define CPP_ASMLINKAGE extern "C" __visible
 #else
-#define CPP_ASMLINKAGE
+#define CPP_ASMLINKAGE __visible
 #endif
 
 #ifndef asmlinkage
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/17] lto, workaround: Add workaround for initcall reordering
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
  2014-02-08  8:01 ` [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2 Andi Kleen
  2014-02-08  8:01 ` [PATCH 03/17] lto: Make asmlinkage __visible Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 05/17] lto: Handle LTO common symbols in module loader Andi Kleen
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

Work around a LTO gcc problem: when there is no reference to a variable
in a module it will be moved to the end of the program. This causes
reordering of initcalls which the kernel does not like.
Add a dummy reference function to avoid this. The function is
deleted by the linker.

This replaces a previous much slower workaround.

Thanks to Honza Hubicka for suggesting this technique.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/init.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index e168880..a3ba270 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -163,6 +163,23 @@ extern bool initcall_debug;
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_LTO
+/* Work around a LTO gcc problem: when there is no reference to a variable
+ * in a module it will be moved to the end of the program. This causes
+ * reordering of initcalls which the kernel does not like.
+ * Add a dummy reference function to avoid this. The function is
+ * deleted by the linker.
+ */
+#define LTO_REFERENCE_INITCALL(x) \
+	; /* yes this is needed */			\
+	static __used __exit void *reference_##x(void)	\
+	{						\
+		return &x;				\
+	}
+#else
+#define LTO_REFERENCE_INITCALL(x)
+#endif
+
 /* initcalls are now grouped by functionality into separate 
  * subsections. Ordering inside the subsections is determined
  * by link order. 
@@ -175,7 +192,8 @@ extern bool initcall_debug;
 
 #define __define_initcall(fn, id) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" #id ".init"))) = fn
+	__attribute__((__section__(".initcall" #id ".init"))) = fn; \
+	LTO_REFERENCE_INITCALL(__initcall_##fn##id)
 
 /*
  * Early initcalls run before initializing SMP.
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/17] lto: Handle LTO common symbols in module loader
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (2 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 04/17] lto, workaround: Add workaround for initcall reordering Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-12  1:04   ` Rusty Russell
  2014-02-08  8:01 ` [PATCH 06/17] lto: Disable LTO for sys_ni Andi Kleen
                   ` (11 subsequent siblings)
  15 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Joe Mario, rusty, Andi Kleen

From: Joe Mario <jmario@redhat.com>

Here is the workaround I made for having the kernel not reject modules
built with -flto.  The clean solution would be to get the compiler to not
emit the symbol.  Or if it has to emit the symbol, then emit it as
initialized data but put it into a comdat/linkonce section.

Minor tweaks by AK over Joe's patch.

Cc: rusty@rustcorp.com.au
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/module.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/module.c b/kernel/module.c
index d24fcf2..b99e801 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1948,6 +1948,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 
 		switch (sym[i].st_shndx) {
 		case SHN_COMMON:
+			/* Ignore common symbols */
+			if (!strncmp(name, "__gnu_lto", 9))
+				break;
+
 			/* We compiled with -fno-common.  These are not
 			   supposed to happen.  */
 			pr_debug("Common symbol: %s\n", name);
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/17] lto: Disable LTO for sys_ni
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (3 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 05/17] lto: Handle LTO common symbols in module loader Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 07/17] Kbuild, lto, workaround: Don't warn for initcall_reference in modpost Andi Kleen
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

The assembler alias code in cond_syscall does not work
when compiled for LTO. Just disable LTO for that file.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/Makefile b/kernel/Makefile
index bc010ee..31c26c6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -18,6 +18,9 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
 CFLAGS_REMOVE_irq_work.o = -pg
 endif
 
+# cond_syscall is currently not LTO compatible
+CFLAGS_sys_ni.o = $(DISABLE_LTO)
+
 obj-y += sched/
 obj-y += locking/
 obj-y += power/
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/17] Kbuild, lto, workaround: Don't warn for initcall_reference in modpost
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (4 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 06/17] lto: Disable LTO for sys_ni Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 08/17] Kbuild, lto: Drop .number postfixes " Andi Kleen
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

This reference is discarded, but can cause warnings when it refers to
exit. Ignore for now.

This is a workaround and can be removed once we get rid of
-fno-toplevel-reorder

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/mod/modpost.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4061098..1f1b154 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1455,6 +1455,10 @@ static void check_section_mismatch(const char *modname, struct elf_info *elf,
 		to = find_elf_symbol(elf, r->r_addend, sym);
 		tosym = sym_name(elf, to);
 
+		if (!strncmp(fromsym, "reference___initcall",
+				sizeof("reference___initcall")-1))
+			return;
+
 		/* check whitelist - we may ignore it */
 		if (secref_whitelist(mismatch,
 					fromsec, fromsym, tosec, tosym)) {
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/17] Kbuild, lto: Drop .number postfixes in modpost
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (5 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 07/17] Kbuild, lto, workaround: Don't warn for initcall_reference in modpost Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 09/17] Kbuild, lto: add ld-version and ld-ifversion macros Andi Kleen
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

LTO turns all global symbols effectively into statics. This
has the side effect that they all have a .NUMBER postfix to make
them unique. In modpost drop this postfix because it confuses
it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/mod/modpost.c | 15 ++++++++++++++-
 scripts/mod/modpost.h |  2 +-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1f1b154..f91dd45 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1684,6 +1684,19 @@ static void check_sec_ref(struct module *mod, const char *modname,
 	}
 }
 
+static char *remove_dot(char *s)
+{
+	char *end;
+	int n = strcspn(s, ".");
+
+	if (n > 0 && s[n] != 0) {
+		strtoul(s + n + 1, &end, 10);
+		if  (end > s + n + 1 && (*end == '.' || *end == 0))
+			s[n] = 0;
+	}
+	return s;
+}
+
 static void read_symbols(char *modname)
 {
 	const char *symname;
@@ -1722,7 +1735,7 @@ static void read_symbols(char *modname)
 	}
 
 	for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
-		symname = info.strtab + sym->st_name;
+		symname = remove_dot(info.strtab + sym->st_name);
 
 		handle_modversions(mod, &info, sym, symname);
 		handle_moddevtable(mod, &info, sym, symname);
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 51207e4..168b43d 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -127,7 +127,7 @@ struct elf_info {
 	Elf_Section  export_gpl_sec;
 	Elf_Section  export_unused_gpl_sec;
 	Elf_Section  export_gpl_future_sec;
-	const char   *strtab;
+	char         *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;
 
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/17] Kbuild, lto: add ld-version and ld-ifversion macros
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (6 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 08/17] Kbuild, lto: Drop .number postfixes " Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 10/17] Kbuild, lto: Add a gcc-ld script to let run gcc as ld Andi Kleen
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

To check the linker version. Used by the LTO makefile.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/Kbuild.include | 9 +++++++++
 scripts/ld-version.sh  | 8 ++++++++
 2 files changed, 17 insertions(+)
 create mode 100755 scripts/ld-version.sh

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 547e15d..93a0da2 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -155,6 +155,15 @@ ld-option = $(call try-run,\
 # Important: no spaces around options
 ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))
 
+# ld-version
+# Usage: $(call ld-version)
+# Note this is mainly for HJ Lu's 3 number binutil versions
+ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
+
+# ld-ifversion
+# Usage:  $(call ld-ifversion, -ge, 22252, y)
+ld-ifversion = $(shell [ $(call ld-version) $(1) $(2) ] && echo $(3))
+
 ######
 
 ###
diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh
new file mode 100755
index 0000000..198580d
--- /dev/null
+++ b/scripts/ld-version.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/awk -f
+# extract linker version number from stdin and turn into single number
+	{
+	gsub(".*)", "");
+	split($1,a, ".");
+	print a[1]*10000000 + a[2]*100000 + a[3]*10000 + a[4]*100 + a[5];
+	exit
+	}
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/17] Kbuild, lto: Add a gcc-ld script to let run gcc as ld
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (7 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 09/17] Kbuild, lto: add ld-version and ld-ifversion macros Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 11/17] Kbuild, lto: Disable LTO for asm-offsets.c Andi Kleen
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

For LTO we need to run the link step with gcc, not ld.
Since there are a lot of linker options passed to it, add a gcc-ld wrapper
that wraps them as -Wl,

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/gcc-ld | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 scripts/gcc-ld

diff --git a/scripts/gcc-ld b/scripts/gcc-ld
new file mode 100644
index 0000000..cadab9a
--- /dev/null
+++ b/scripts/gcc-ld
@@ -0,0 +1,29 @@
+#!/bin/sh
+# run gcc with ld options
+# used as a wrapper to execute link time optimizations
+# yes virginia, this is not pretty
+
+ARGS="-nostdlib"
+
+while [ "$1" != "" ] ; do
+	case "$1" in
+	-save-temps|-m32|-m64) N="$1" ;;
+	-r) N="$1" ;;
+	-[Wg]*) N="$1" ;;
+	-[olv]|-[Ofd]*|-nostdlib) N="$1" ;;
+	--end-group|--start-group)
+		 N="-Wl,$1" ;;
+	-[RTFGhIezcbyYu]*|\
+--script|--defsym|-init|-Map|--oformat|-rpath|\
+-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\
+--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m)
+		A="$1" ; shift ; N="-Wl,$A,$1" ;;
+	-[m]*) N="$1" ;;
+	-*) N="-Wl,$1" ;;
+	*)  N="$1" ;;
+	esac
+	ARGS="$ARGS $N"
+	shift
+done
+
+exec $CC $ARGS
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 11/17] Kbuild, lto: Disable LTO for asm-offsets.c
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (8 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 10/17] Kbuild, lto: Add a gcc-ld script to let run gcc as ld Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO Andi Kleen
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

The asm-offset.c technique to fish data out of the assembler file
does not work with LTO. Just disable for the asm-offset.c build.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/Makefile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5d859c..9f0ee22 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,7 +198,7 @@ $(multi-objs-y:.o=.s)   : modname = $(modname-multi)
 $(multi-objs-y:.o=.lst) : modname = $(modname-multi)
 
 quiet_cmd_cc_s_c = CC $(quiet_modtag)  $@
-cmd_cc_s_c       = $(CC) $(c_flags) -fverbose-asm -S -o $@ $<
+cmd_cc_s_c       = $(CC) $(c_flags) $(DISABLE_LTO) -fverbose-asm -S -o $@ $<
 
 $(obj)/%.s: $(src)/%.c FORCE
 	$(call if_changed_dep,cc_s_c)
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (9 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 11/17] Kbuild, lto: Disable LTO for asm-offsets.c Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-14  4:26   ` H. Peter Anvin
  2014-02-08  8:01 ` [PATCH 13/17] Kbuild, lto: Handle basic LTO in modpost Andi Kleen
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

LTO gcc puts a lot of data into $TMPDIR, essentially another copy
of the object directory to pass the repartitioned object files
to the code generation processes.

TMPDIR defaults to /tmp With /tmp as tmpfs it's easy to drive systems to
out of memory, because they will compete with the already high anonymous
memory consumption of the wpa LTO pass.

When LTO is set always set TMPDIR to the object directory. This could
be slightly slower, but is far safer and eliminates another parameter
the LTO user would need to set manually.

I made it conditional on LTO for now.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Makefile | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Makefile b/Makefile
index 606ef7c..d1189ea 100644
--- a/Makefile
+++ b/Makefile
@@ -407,6 +407,14 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
 export KBUILD_ARFLAGS
 
+ifdef CONFIG_LTO
+# LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs
+# it's easy to drive the machine OOM. Use the object directory
+# instead
+TMPDIR := ${objtree}
+export TMPDIR
+endif
+
 # When compiling out-of-tree modules, put MODVERDIR in the module
 # tree rather than in the kernel tree. The kernel tree might
 # even be read-only.
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 13/17] Kbuild, lto: Handle basic LTO in modpost
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (10 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support Andi Kleen
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

- Don't warn about LTO marker symbols. modpost runs before
the linker, so the module is not necessarily LTOed yet.
- Don't complain about .gnu.lto* sections

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 scripts/mod/modpost.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index f91dd45..63804a1 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -623,7 +623,10 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
 
 	switch (sym->st_shndx) {
 	case SHN_COMMON:
-		warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
+		if (!strncmp(symname, "__gnu_lto_", sizeof("__gnu_lto_")-1)) {
+			/* Should warn here, but modpost runs before the linker */
+		} else
+			warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
 		break;
 	case SHN_UNDEF:
 		/* undefined symbol */
@@ -849,6 +852,7 @@ static const char *section_white_list[] =
 	".xt.lit",         /* xtensa */
 	".arcextmap*",			/* arc */
 	".gnu.linkonce.arcext*",	/* arc : modules */
+	".gnu.lto*",
 	NULL
 };
 
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (11 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 13/17] Kbuild, lto: Handle basic LTO in modpost Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-14  4:28   ` H. Peter Anvin
  2014-02-08  8:01 ` [PATCH 15/17] Kbuild, lto: Add LTO build Documentation Andi Kleen
                   ` (2 subsequent siblings)
  15 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

With LTO gcc will do whole program optimizations for
the whole kernel and each module. This increases compile time,
but can generate faster and smaller code and allows
the compiler to do global checking.

LTO allows gcc to inline functions between different files and
do various other optimization across the whole binary.

It might also trigger bugs due to more aggressive optimization.
It allows gcc to drop unused code. It also allows it to check
types over the whole program.

This adds the basic Kbuild plumbing for LTO:

- In Kbuild add a new scripts/Makefile.lto that checks
the tool chain (note the checks may not be fully bulletproof)
and when the tests pass sets the LTO options
Currently LTO is very finicky about the tool chain.
- Add a new LDFINAL variable that controls the final link
for vmlinux or module. In this case we call gcc-ld instead
of ld, to run the LTO step.
- For slim LTO builds (object files containing no backup
executable) force AR to gcc-ar
- Theoretically LTO should pass through compiler options from
the compiler to the link step, but this doesn't work for all options.
So the Makefile sets most of these options manually.
- Kconfigs:
Since LTO with allyesconfig needs more than 4G of memory (~8G)
and has the potential to makes people's system swap to death.
I used a nested config that ensures that a simple
allyesconfig disables LTO. It has to be explicitely
enabled.
- Some depencies on other Kconfigs:
MODVERSIONS, GCOV, FUNCTION_TRACER, KALLSYMS_ALL, single chain WCHAN are
incompatible with LTO currently. MODVERSIONS should be fixable,
but the others require setting special compiler options
for specific files, which LTO currently doesn't support.
[MODVERSIONS should in principle work with gcc 4.9, but still disabled]
- I also disable strict copy user checks because they trigger
errors with LTO.
- I had to use a hack to support the single pass kallsyms,
as gcc-nm does not support static symbols currently
- modpost symbol checking is downgraded to a warning,
as in some cases modpost runs before the final link
and it cannot resolve LTO symbols at this point.

For more information see Documentation/lto-build

Thanks to HJ Lu, Joe Mario, Honza Hubicka, Richard Guenther,
Don Zickus, Changlong Xie who helped with this project
(and probably some more who I forgot, sorry)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Makefile                 |  9 +++++-
 arch/x86/Kconfig         |  2 +-
 init/Kconfig             | 70 +++++++++++++++++++++++++++++++++++++++-
 kernel/gcov/Kconfig      |  2 +-
 scripts/Makefile.lto     | 84 ++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/Makefile.modpost |  7 ++--
 scripts/gcc-ld           |  1 +
 scripts/kallsyms.c       |  4 ++-
 scripts/link-vmlinux.sh  | 28 +++++++++++++---
 9 files changed, 194 insertions(+), 13 deletions(-)
 create mode 100644 scripts/Makefile.lto

diff --git a/Makefile b/Makefile
index d1189ea..68e1528 100644
--- a/Makefile
+++ b/Makefile
@@ -335,9 +335,14 @@ include $(srctree)/scripts/Kbuild.include
 
 AS		= $(CROSS_COMPILE)as
 LD		= $(CROSS_COMPILE)ld
+LDFINAL	= $(LD)
 CC		= $(CROSS_COMPILE)gcc
 CPP		= $(CC) -E
+ifdef CONFIG_LTO_SLIM
+AR		= $(CROSS_COMPILE)gcc-ar
+else
 AR		= $(CROSS_COMPILE)ar
+endif
 NM		= $(CROSS_COMPILE)nm
 STRIP		= $(CROSS_COMPILE)strip
 OBJCOPY		= $(CROSS_COMPILE)objcopy
@@ -396,7 +401,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S
 
 export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
 export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP
+export CPP AR NM STRIP OBJCOPY OBJDUMP LDFINAL
 export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE
 export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
 
@@ -707,6 +712,8 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
 	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
+include ${srctree}/scripts/Makefile.lto
+
 # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments
 KBUILD_CPPFLAGS += $(KCPPFLAGS)
 KBUILD_AFLAGS += $(KAFLAGS)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f125c5f..bba793f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -577,7 +577,7 @@ config X86_32_IRIS
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
-	prompt "Single-depth WCHAN output"
+	prompt "Single-depth WCHAN output" if !LTO && !FRAME_POINTER
 	depends on X86
 	---help---
 	  Calculate simpler /proc/<PID>/wchan values. If this option
diff --git a/init/Kconfig b/init/Kconfig
index 009a797..9561935 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1241,6 +1241,70 @@ config CC_OPTIMIZE_FOR_SIZE
 
 	  If unsure, say N.
 
+config LTO_MENU
+	bool "Enable gcc link time optimizations"
+	# Only tested on X86 for now. For other architectures you likely
+	# have to fix some things first, like adding asmlinkages etc.
+	depends on X86
+	# lto does not support excluding flags for specific files
+	# right now. Can be removed if that is fixed.
+	depends on !FUNCTION_TRACER
+	help
+	  With this option gcc will do whole program optimizations for
+	  the whole kernel and module. This increases compile time, but can
+	  lead to better code. It allows gcc to inline functions between
+	  different files. It might also trigger bugs due to more
+	  aggressive optimization. It allows gcc to drop unused code.
+	  With this option gcc will also do some global checking over
+	  different source files.
+
+	  This requires a gcc 4.7 or later compiler and
+	  Linux binutils 2.21.51.0.3 or later.  It does not currently
+	  work with a FSF release of binutils or with gold.
+
+	  On larger configurations this may need more than 4GB of RAM.
+	  It will likely not work on those with a 32bit compiler. Also
+	  /tmp in tmpfs may lead to faster running out of RAM
+	  (in this case set the TMPDIR environment variable to a different
+	  directory directly on disk)
+
+	  When the toolchain support is not available this will (hopefully)
+	  be automatically disabled.
+
+	  For more information see Documentation/lto-build
+
+config LTO_DISABLE
+         bool "Disable LTO again"
+         depends on LTO_MENU
+         default n
+         help
+           This option is merely here so that allyesconfig or allmodconfig does
+           not enable LTO. If you want to actually use LTO do not enable.
+
+config LTO
+	bool
+	default y
+	depends on LTO_MENU && !LTO_DISABLE
+
+config LTO_DEBUG
+	bool "Enable LTO compile time debugging"
+	depends on LTO
+
+config LTO_CP_CLONE
+	bool "Allow aggressive cloning for constant specialization"
+	depends on LTO
+	help
+	  Allow the compiler to clone functions for specific arguments.
+	  Experimential. Will increase text size.
+
+config LTO_SLIM
+	#bool "Use slim lto"
+	def_bool y
+	depends on LTO
+	help
+	  Do not generate all code twice. The object files will only contain
+	  LTO information. This lowers build time.
+
 config SYSCTL
 	bool
 
@@ -1317,7 +1381,10 @@ config KALLSYMS
 
 config KALLSYMS_ALL
 	bool "Include all symbols in kallsyms"
-	depends on DEBUG_KERNEL && KALLSYMS
+	# the method LTO uses to predict the symbol table
+	# only supports functions for now
+	# This can be removed once http://gcc.gnu.org/PR60016 is fixed
+	depends on DEBUG_KERNEL && KALLSYMS && !LTO
 	help
 	   Normally kallsyms only contains the symbols of functions for nicer
 	   OOPS messages and backtraces (i.e., symbols from the text and inittext
@@ -1712,6 +1779,7 @@ config MODULE_FORCE_UNLOAD
 
 config MODVERSIONS
 	bool "Module versioning support"
+	depends on !LTO
 	help
 	  Usually, you have to use modules compiled with your kernel.
 	  Saying Y here makes it sometimes possible to use modules
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index d04ce8a..32f65b7 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -2,7 +2,7 @@ menu "GCOV-based kernel profiling"
 
 config GCOV_KERNEL
 	bool "Enable gcov-based kernel profiling"
-	depends on DEBUG_FS
+	depends on DEBUG_FS && !LTO
 	select CONSTRUCTORS if !UML
 	default n
 	---help---
diff --git a/scripts/Makefile.lto b/scripts/Makefile.lto
new file mode 100644
index 0000000..df1d8ea
--- /dev/null
+++ b/scripts/Makefile.lto
@@ -0,0 +1,84 @@
+#
+# Support for gcc link time optimization
+#
+
+DISABLE_LTO :=
+LTO_CFLAGS :=
+
+export DISABLE_LTO
+export LTO_CFLAGS
+
+ifdef CONFIG_LTO
+# 4.7 works mostly, but it sometimes loses symbols on large builds
+# This can be worked around by marking those symbols visible,
+# but that is fairly ugly and the problem is gone with 4.8
+# So only allow it with 4.8 for now.
+ifeq ($(call cc-ifversion, -ge, 0408,y),y)
+ifneq ($(call cc-option,${LTO_CFLAGS},n),n)
+# We need HJ Lu's Linux binutils because mainline binutils does not
+# support mixing assembler and LTO code in the same ld -r object.
+# XXX check if the gcc plugin ld is the expected one too
+# XXX some Fedora binutils should also support it. How to check for that?
+ifeq ($(call ld-ifversion,-ge,22710001,y),y)
+        LTO_CFLAGS := -flto -fno-toplevel-reorder
+	LTO_FINAL_CFLAGS := -fuse-linker-plugin
+
+# the -fno-toplevel-reorder is to preserve the order of initcalls
+# everything else should tolerate reordering
+        LTO_FINAL_CFLAGS +=-fno-toplevel-reorder
+
+# enable LTO and set the jobs used by the LTO phase
+# this should be -flto=jobserver to coordinate with the
+# parent make, but work around
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50639
+# use as many jobs as processors are online for now
+	LTO_FINAL_CFLAGS := -flto=$(shell getconf _NPROCESSORS_ONLN)
+	#LTO_FINAL_CFLAGS := -flto=jobserver
+
+ifdef CONFIG_LTO_SLIM
+	# requires plugin ar passed and very recent HJ binutils
+        LTO_CFLAGS += -fno-fat-lto-objects
+endif
+# Used to disable LTO for specific files (e.g. vdso)
+	DISABLE_LTO := -fno-lto
+
+	LTO_FINAL_CFLAGS += ${LTO_CFLAGS} -fwhole-program
+
+ifdef CONFIG_LTO_DEBUG
+	LTO_FINAL_CFLAGS += -dH -fdump-ipa-cgraph -fdump-ipa-inline-details
+	# -Wl,-plugin-save-temps -save-temps
+	LTO_CFLAGS +=
+endif
+ifdef CONFIG_LTO_CP_CLONE
+	LTO_FINAL_CFLAGS += -fipa-cp-clone
+	LTO_CFLAGS += -fipa-cp-clone
+endif
+
+	# In principle gcc should pass through options in the object files,
+	# but it doesn't always work. So do it here manually
+	# Note that special options for individual files does not
+	# work currently (except for some special cases that only
+	# affect the compiler frontend)
+	# The main offenders are FTRACE and GCOV -- we exclude
+	# those in the config.
+	LTO_FINAL_CFLAGS += $(filter -g%,${KBUILD_CFLAGS})
+	LTO_FINAL_CFLAGS += $(filter -O%,${KBUILD_CFLAGS})
+	LTO_FINAL_CFLAGS += $(filter -f%,${KBUILD_CFLAGS})
+	LTO_FINAL_CFLAGS += $(filter -m%,${KBUILD_CFLAGS})
+	LTO_FINAL_CFLAGS += $(filter -W%,${KBUILD_CFLAGS})
+
+	KBUILD_CFLAGS += ${LTO_CFLAGS}
+
+	LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \
+                  ${LTO_FINAL_CFLAGS}
+
+else
+        $(warning "WARNING: Too old linker version $(call ld-version) for kernel LTO. You need Linux binutils. CONFIG_LTO disabled.")
+endif
+else
+        $(warning "WARNING: Compiler/Linker does not support LTO/WHOPR with linker plugin. CONFIG_LTO disabled.")
+endif
+else
+        $(warning "WARNING: GCC $(call cc-version) too old for LTO/WHOPR. CONFIG_LTO disabled")
+endif
+endif
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 69f0a14..9c40dae 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -77,7 +77,8 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
+ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
+ $(if $(CONFIG_LTO),-w)
 
 MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
 
@@ -115,8 +116,8 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
 targets += $(modules:.ko=.mod.o)
 
 # Step 6), final link of the modules
-quiet_cmd_ld_ko_o = LD [M]  $@
-      cmd_ld_ko_o = $(LD) -r $(LDFLAGS)                                 \
+quiet_cmd_ld_ko_o = LDFINAL [M]  $@
+      cmd_ld_ko_o = $(LDFINAL) -r $(LDFLAGS)                            \
                              $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
                              -o $@ $(filter-out FORCE,$^)
 
diff --git a/scripts/gcc-ld b/scripts/gcc-ld
index cadab9a..a9161da 100644
--- a/scripts/gcc-ld
+++ b/scripts/gcc-ld
@@ -18,6 +18,7 @@ while [ "$1" != "" ] ; do
 -rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\
 --version-script|--dynamic-list|--version-exports-symbol|--wrap|-m)
 		A="$1" ; shift ; N="-Wl,$A,$1" ;;
+	--param) shift ; N="--param $1" ;;
 	-[m]*) N="$1" ;;
 	-*) N="-Wl,$1" ;;
 	*)  N="$1" ;;
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index d0e2b56..e479076 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -247,11 +247,13 @@ static int symbol_valid(struct sym_entry *s)
 		 * the kallsyms data are added.  If these symbols move then
 		 * they may get dropped in pass 2, which breaks the kallsyms
 		 * rules.
+		 * But don't do this for predicted fake symbols with 0 value.
 		 */
-		if ((s->addr == text_range_text->end &&
+		if (((s->addr == text_range_text->end &&
 				strcmp((char *)s->sym + offset, text_range_text->etext)) ||
 		    (s->addr == text_range_inittext->end &&
 				strcmp((char *)s->sym + offset, text_range_inittext->etext)))
+			&& text_range_text->end != 0)
 			return 0;
 	}
 
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 0300047..4c5435f 100644
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -53,7 +53,7 @@ vmlinux_link()
 	local lds="${objtree}/${KBUILD_LDS}"
 
 	if [ "${SRCARCH}" != "um" ]; then
-		${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}                  \
+		${LDFINAL} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}                  \
 			-T ${lds} ${KBUILD_VMLINUX_INIT}                     \
 			--start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1}
 	else
@@ -90,10 +90,28 @@ kallsyms()
 	local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL}               \
 		      ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}"
 
-	${NM} -n ${1} | \
-		awk 'NF == 3 { print}' |
-		scripts/kallsyms ${kallsymopt} | \
+	# workaround for slim LTO gcc-nm not outputing static symbols
+	# http://gcc.gnu.org/PR60016
+	# generate a fake symbol table based on the LTO function sections.
+	# This unfortunately "knows" about the internal LTO file format
+	# and only works for functions
+	# needs perl for now when building for LTO
+	(
+	if $OBJDUMP --section-headers ${1} | grep -q \.gnu\.lto_ ; then
+		${OBJDUMP} --section-headers ${1} |
+		perl -ne '
+@n = split;
+next unless $n[1] =~ /\.gnu\.lto_([_a-zA-Z][^.]+)/;
+next if $n[1] eq $prev;
+$prev = $n[1];
+print "0 T ",$1,"\n"'
+	fi
+	${NM} -n ${1} | awk 'NF == 3 { print }'
+	)  > ${2}_sym
+	# run without pipe to make kallsyms errors stop the script
+	./scripts/kallsyms ${kallsymopt} < ${2}_sym |
 		${CC} ${aflags} -c -o ${2} -x assembler-with-cpp -
+
 }
 
 # Create map file with all symbols from ${1}
@@ -181,7 +199,7 @@ if [ -n "${CONFIG_KALLSYMS}" ] ; then
 	kallsymsso=.tmp_kallsyms1.o
 fi
 
-info LD vmlinux
+info LDFINAL vmlinux
 vmlinux_link "${kallsymsso}" vmlinux
 if [ -n "${CONFIG_KALLSYMS}" ] ; then
 	# Now regenerate the kallsyms table and patch it into the
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 15/17] Kbuild, lto: Add LTO build Documentation
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (12 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 16/17] lto: Mark spinlocks noinline when inline spinlocks are disabled Andi Kleen
  2014-02-08  8:01 ` [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed Andi Kleen
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen

Add build documentation for LTO.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/lto-build | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 Documentation/lto-build

diff --git a/Documentation/lto-build b/Documentation/lto-build
new file mode 100644
index 0000000..4c8a713
--- /dev/null
+++ b/Documentation/lto-build
@@ -0,0 +1,121 @@
+Link time optimization (LTO) for the Linux kernel
+
+This is an experimental feature which still has various problems.
+
+Link Time Optimization allows the compiler to optimize the complete program
+instead of just each file.  Link Time Optimization was a new feature in gcc 4.6,
+but only really works with gcc 4.7. The kernel LTO build also requires
+the Linux binutils (the normal FSF releases do not work at the moment)
+
+The compiler can inline functions between files and do some other global
+optimizations.  It will also drop unused functions which can make the kernel
+image smaller in some circumstances.  The binary gets somewhat larger.
+In return the resulting kernels (usually) have better performance.
+
+Build time and memory consumption at build time will increase.
+The build time penalty depends on the size of the vmlinux. Reasonable
+sized vmlinux build about twice as long, much larger monolithic kernels
+like allyesconfig ~4x as long. Modular kernels are less affected.
+
+Normal "reasonable" builds work with less than 4GB of RAM, but very large
+configurations like allyesconfig may need more memory. The actual
+memory needed depends on the available memory (gcc sizes its garbage
+collector pools based on that or on the ulimit -m limits)
+
+Issues:
+- Various workarounds in kernel needed for toolchain problems.
+- A few kernel features are currently incompatible with LTO, in particular
+function tracing, because they require special compiler flags for
+specific files, which is not supported in LTO right now.
+- The build is faster with LTO_SLIM enabled, but this still triggers
+problems in some circumstances (currently disabled)
+- Jobserver control for -j does not work correctly for the final
+LTO phase. The makefiles hardcodes -j<number of online cpus>
+
+Configuration:
+- Enable CONFIG_LTO_MENU and then disable CONFIG_LTO_DISABLE.
+This is mainly to not have allyesconfig default to LTO.
+- FUNCTION_TRACER, STACK_TRACER, FUNCTION_GRAPH_TRACER have to disabled
+because they are currently incompatible with LTO.
+- MODVERSIONS have to be disabled because they are not fixed for LTO
+yet.
+
+Requirements:
+- Enough memory: 4GB for a standard build, more for allyesconfig
+If you are tight on memory and use tmpfs as /tmp define TMPDIR and
+point it to a directory on disk.  The peak memory usage
+happens single threaded (when lto-wpa merges types), so dialing
+back -j options will not help much.
+
+A 32bit compiler is unlikely to work due to the memory requirements.
+You can however build a kernel targetted at 32bit on a 64bit host.
+
+- Get the Linux binutils from
+http://www.kernel.org/pub/linux/devel/binutils/
+Sorry standard binutils releases don't work
+The kernel build has to use this linker, so if it is installed
+in a non standard location use LD=... on the make line.
+
+- gcc 4.7 built with plugin ld (--with-plugin-ld) also pointing to the
+linker from the Linux binutils and LTO
+
+If the gcc is not built with this option it may also work to put the correct
+binutils linker first in $PATH when building. I haven't tested
+this however.
+
+Example build procedure for the tool chain and kernel. This does not
+overwrite the standard compiler toolchain on the system. If you already
+have a suitable gcc 4.7+ compiler and linker the toolchain build can
+be skipped (note that a distribution gcc 4.7 is not necessarily
+correctly configured for LTO)
+
+Get the Linux binutils from http://www.kernel.org/pub/linux/devel/binutils/
+The standard binutils do not work at this point!
+
+Unpack binutils
+
+cd binutils-VERSION  (or plain binutils in some versions)
+./configure --prefix=/opt/binutils-VERSION --enable-plugins
+nice -n20 make -j$(getconf _NPROCESSORS_ONLN)
+sudo make install
+sudo ln -sf /opt/binutils-VERSION/bin/ld /usr/local/bin/ld-plugin
+
+Unpack gcc-4.7
+
+mkdir obj-gcc
+# please don't skip this cd. the build will not work correctly in the
+# source dir, you have to use the separate object dir
+cd obj-gcc
+# make sure to install gmp-devel and mpfr-devel
+# and the 32bit glibc package if you have a multilib system
+# if mpc-devel is not there get it from
+# http://www.multiprecision.org/mpc/download/mpc-0.8.2.tar.gz
+# and install in gcc-4.7*/mpc
+../gcc-4.7*/configure --prefix=/opt/gcc-4.7 --enable-lto \
+--with-plugin-ld=/usr/local/bin/ld-plugin  \
+--disable-nls --enable-languages=c,c++ \
+--disable-libstdcxx-pch
+nice -n20 make -j$(getconf _NPROCESSORS_ONLN)
+sudo make install-no-fixedincludes
+sudo ln -sf /opt/gcc-4.7/bin/gcc /usr/local/bin/gcc47
+sudo ln -sf /opt/gcc-4.7/bin/gcc-ar /usr/local/bin/gcc-ar47
+
+# get lto tree in linux-lto
+
+mkdir obj-lto
+cd obj-lto
+# copy a suitable kernel config file into .config
+make -C ../linux-lto O=$(pwd)  oldconfig
+./source/scripts/config --disable function_tracer --disable function_graph_tracer \
+			 --disable stack_tracer --enable lto_menu \
+                         --disable lto_disable --disable lto_debug --disable lto_slim
+export TMPDIR=$(pwd)
+# this lowers memory usage with /tmp=tmpfs
+# note the special ar is only needed if CONFIG_LTO_SLIM is enabled
+# The PATH is that gcc-ar finds a plugin aware ar, if your standard
+# binutils doesn't support that. If the standard ar supports --plugin
+# it is not needed
+PATH=/opt/binutils-VERSION:$PATH nice -n20 make CC=gcc47 LD=ld-plugin AR=gcc-ar47 \
+-j $(getconf _NPROCESSORS_ONLN)
+
+Andi Kleen
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 16/17] lto: Mark spinlocks noinline when inline spinlocks are disabled
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (13 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 15/17] Kbuild, lto: Add LTO build Documentation Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-08  8:01 ` [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed Andi Kleen
  15 siblings, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen, mingo

Otherwise LTO will inline them anyways

Cc: mingo@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/locking/spinlock.c | 56 +++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5..975bfe9 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -130,7 +130,7 @@ BUILD_LOCK_OPS(write, rwlock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK
-int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
 {
 	return __raw_spin_trylock(lock);
 }
@@ -138,7 +138,7 @@ EXPORT_SYMBOL(_raw_spin_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH
-int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
+noinline int __lockfunc _raw_spin_trylock_bh(raw_spinlock_t *lock)
 {
 	return __raw_spin_trylock_bh(lock);
 }
@@ -146,7 +146,7 @@ EXPORT_SYMBOL(_raw_spin_trylock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK
-void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
 {
 	__raw_spin_lock(lock);
 }
@@ -154,7 +154,7 @@ EXPORT_SYMBOL(_raw_spin_lock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
+noinline unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
 {
 	return __raw_spin_lock_irqsave(lock);
 }
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQ
-void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_irq(raw_spinlock_t *lock)
 {
 	__raw_spin_lock_irq(lock);
 }
@@ -170,7 +170,7 @@ EXPORT_SYMBOL(_raw_spin_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_LOCK_BH
-void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
 {
 	__raw_spin_lock_bh(lock);
 }
@@ -178,7 +178,7 @@ EXPORT_SYMBOL(_raw_spin_lock_bh);
 #endif
 
 #ifdef CONFIG_UNINLINE_SPIN_UNLOCK
-void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__raw_spin_unlock(lock);
 }
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(_raw_spin_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE
-void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
 {
 	__raw_spin_unlock_irqrestore(lock, flags);
 }
@@ -194,7 +194,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ
-void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_irq(raw_spinlock_t *lock)
 {
 	__raw_spin_unlock_irq(lock);
 }
@@ -202,7 +202,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_SPIN_UNLOCK_BH
-void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
+noinline void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock)
 {
 	__raw_spin_unlock_bh(lock);
 }
@@ -210,7 +210,7 @@ EXPORT_SYMBOL(_raw_spin_unlock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_READ_TRYLOCK
-int __lockfunc _raw_read_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_read_trylock(rwlock_t *lock)
 {
 	return __raw_read_trylock(lock);
 }
@@ -218,7 +218,7 @@ EXPORT_SYMBOL(_raw_read_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK
-void __lockfunc _raw_read_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock(rwlock_t *lock)
 {
 	__raw_read_lock(lock);
 }
@@ -226,7 +226,7 @@ EXPORT_SYMBOL(_raw_read_lock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock)
 {
 	return __raw_read_lock_irqsave(lock);
 }
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(_raw_read_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_IRQ
-void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_irq(rwlock_t *lock)
 {
 	__raw_read_lock_irq(lock);
 }
@@ -242,7 +242,7 @@ EXPORT_SYMBOL(_raw_read_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_READ_LOCK_BH
-void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_lock_bh(rwlock_t *lock)
 {
 	__raw_read_lock_bh(lock);
 }
@@ -250,7 +250,7 @@ EXPORT_SYMBOL(_raw_read_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK
-void __lockfunc _raw_read_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock(rwlock_t *lock)
 {
 	__raw_read_unlock(lock);
 }
@@ -258,7 +258,7 @@ EXPORT_SYMBOL(_raw_read_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE
-void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__raw_read_unlock_irqrestore(lock, flags);
 }
@@ -266,7 +266,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_IRQ
-void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_irq(rwlock_t *lock)
 {
 	__raw_read_unlock_irq(lock);
 }
@@ -274,7 +274,7 @@ EXPORT_SYMBOL(_raw_read_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_READ_UNLOCK_BH
-void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_read_unlock_bh(rwlock_t *lock)
 {
 	__raw_read_unlock_bh(lock);
 }
@@ -282,7 +282,7 @@ EXPORT_SYMBOL(_raw_read_unlock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_TRYLOCK
-int __lockfunc _raw_write_trylock(rwlock_t *lock)
+noinline int __lockfunc _raw_write_trylock(rwlock_t *lock)
 {
 	return __raw_write_trylock(lock);
 }
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(_raw_write_trylock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK
-void __lockfunc _raw_write_lock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock(rwlock_t *lock)
 {
 	__raw_write_lock(lock);
 }
@@ -298,7 +298,7 @@ EXPORT_SYMBOL(_raw_write_lock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE
-unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
+noinline unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock)
 {
 	return __raw_write_lock_irqsave(lock);
 }
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(_raw_write_lock_irqsave);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_IRQ
-void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_irq(rwlock_t *lock)
 {
 	__raw_write_lock_irq(lock);
 }
@@ -314,7 +314,7 @@ EXPORT_SYMBOL(_raw_write_lock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_LOCK_BH
-void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_lock_bh(rwlock_t *lock)
 {
 	__raw_write_lock_bh(lock);
 }
@@ -322,7 +322,7 @@ EXPORT_SYMBOL(_raw_write_lock_bh);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK
-void __lockfunc _raw_write_unlock(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock(rwlock_t *lock)
 {
 	__raw_write_unlock(lock);
 }
@@ -330,7 +330,7 @@ EXPORT_SYMBOL(_raw_write_unlock);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE
-void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+noinline void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__raw_write_unlock_irqrestore(lock, flags);
 }
@@ -338,7 +338,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irqrestore);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ
-void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_irq(rwlock_t *lock)
 {
 	__raw_write_unlock_irq(lock);
 }
@@ -346,7 +346,7 @@ EXPORT_SYMBOL(_raw_write_unlock_irq);
 #endif
 
 #ifndef CONFIG_INLINE_WRITE_UNLOCK_BH
-void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
+noinline void __lockfunc _raw_write_unlock_bh(rwlock_t *lock)
 {
 	__raw_write_unlock_bh(lock);
 }
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed
  2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
                   ` (14 preceding siblings ...)
  2014-02-08  8:01 ` [PATCH 16/17] lto: Mark spinlocks noinline when inline spinlocks are disabled Andi Kleen
@ 2014-02-08  8:01 ` Andi Kleen
  2014-02-12  1:13   ` Rusty Russell
  15 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08  8:01 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-kbuild, x86, Andi Kleen, rusty

When __gnu_lto_* is present that means that the module hasn't run with
LTO yet.

Cc: rusty@rustcorp.com.au
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 kernel/module.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/module.c b/kernel/module.c
index b99e801..2052155 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1949,8 +1949,11 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 		switch (sym[i].st_shndx) {
 		case SHN_COMMON:
 			/* Ignore common symbols */
-			if (!strncmp(name, "__gnu_lto", 9))
+			if (!strncmp(name, "__gnu_lto", 9)) {
+				printk("%s: module not link time optimized\n",
+				       mod->name);
 				break;
+			}
 
 			/* We compiled with -fno-common.  These are not
 			   supposed to happen.  */
-- 
1.8.5.2


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2
  2014-02-08  8:01 ` [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2 Andi Kleen
@ 2014-02-08 18:52   ` H. Peter Anvin
  2014-02-08 20:21     ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2014-02-08 18:52 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: linux-kbuild, x86

On 02/08/2014 12:01 AM, Andi Kleen wrote:
> The fancy x86 hweight uses different compiler options for the
> hweight file. This does not work with LTO. Just disable the optimization
> with LTO

No, I'm going to NAK this.  This means not using the POPCNT instruction
if LTO is enabled, and that really isn't an acceptable option.

	-hpa



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2
  2014-02-08 18:52   ` H. Peter Anvin
@ 2014-02-08 20:21     ` Andi Kleen
  2014-02-08 21:43       ` H. Peter Anvin
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-08 20:21 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: linux-kernel, linux-kbuild, x86

On Sat, Feb 08, 2014 at 10:52:07AM -0800, H. Peter Anvin wrote:
> On 02/08/2014 12:01 AM, Andi Kleen wrote:
> > The fancy x86 hweight uses different compiler options for the
> > hweight file. This does not work with LTO. Just disable the optimization
> > with LTO
> 
> No, I'm going to NAK this.  This means not using the POPCNT instruction
> if LTO is enabled, and that really isn't an acceptable option.

I thought the use was obscure?

Ok, suppose can just disable LTO for the file.
The only drawback is that the functions will not be optimized away when
not used, as they'll need to be __visible.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2
  2014-02-08 20:21     ` Andi Kleen
@ 2014-02-08 21:43       ` H. Peter Anvin
  0 siblings, 0 replies; 26+ messages in thread
From: H. Peter Anvin @ 2014-02-08 21:43 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, linux-kbuild, x86

That's fine.

On February 8, 2014 12:21:24 PM PST, Andi Kleen <ak@linux.intel.com> wrote:
>On Sat, Feb 08, 2014 at 10:52:07AM -0800, H. Peter Anvin wrote:
>> On 02/08/2014 12:01 AM, Andi Kleen wrote:
>> > The fancy x86 hweight uses different compiler options for the
>> > hweight file. This does not work with LTO. Just disable the
>optimization
>> > with LTO
>> 
>> No, I'm going to NAK this.  This means not using the POPCNT
>instruction
>> if LTO is enabled, and that really isn't an acceptable option.
>
>I thought the use was obscure?
>
>Ok, suppose can just disable LTO for the file.
>The only drawback is that the functions will not be optimized away when
>not used, as they'll need to be __visible.
>
>-Andi

-- 
Sent from my mobile phone.  Please pardon brevity and lack of formatting.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 05/17] lto: Handle LTO common symbols in module loader
  2014-02-08  8:01 ` [PATCH 05/17] lto: Handle LTO common symbols in module loader Andi Kleen
@ 2014-02-12  1:04   ` Rusty Russell
  0 siblings, 0 replies; 26+ messages in thread
From: Rusty Russell @ 2014-02-12  1:04 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: linux-kbuild, x86, Joe Mario

Andi Kleen <ak@linux.intel.com> writes:
> From: Joe Mario <jmario@redhat.com>
>
> Here is the workaround I made for having the kernel not reject modules
> built with -flto.  The clean solution would be to get the compiler to not
> emit the symbol.  Or if it has to emit the symbol, then emit it as
> initialized data but put it into a comdat/linkonce section.
>
> Minor tweaks by AK over Joe's patch.

Patch is fine, but what's with the comment?

>  		switch (sym[i].st_shndx) {
>  		case SHN_COMMON:
> +			/* Ignore common symbols */
> +			if (!strncmp(name, "__gnu_lto", 9))
> +				break;
> +

You mean, "/* Ignore symbols from -flto */"?

Other than that, I'm happy for this to go via some other tree:

Acked-by: Rusty Russell <rusty@rustcorp.com.au>

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed
  2014-02-08  8:01 ` [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed Andi Kleen
@ 2014-02-12  1:13   ` Rusty Russell
  0 siblings, 0 replies; 26+ messages in thread
From: Rusty Russell @ 2014-02-12  1:13 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: linux-kbuild, x86

Andi Kleen <ak@linux.intel.com> writes:
> When __gnu_lto_* is present that means that the module hasn't run with
> LTO yet.

In practice, this means they didn't build their kernel properly, right?
It shouldn't break anything, but it seems really weird.  And how many
times will the prink fire on a single module?

Seems like a job for pr_warn?

Thanks,
Rusty.

> ---
>  kernel/module.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/module.c b/kernel/module.c
> index b99e801..2052155 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -1949,8 +1949,11 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
>  		switch (sym[i].st_shndx) {
>  		case SHN_COMMON:
>  			/* Ignore common symbols */
> -			if (!strncmp(name, "__gnu_lto", 9))
> +			if (!strncmp(name, "__gnu_lto", 9)) {
> +				printk("%s: module not link time optimized\n",
> +				       mod->name);
>  				break;
> +			}
>  
>  			/* We compiled with -fno-common.  These are not
>  			   supposed to happen.  */
> -- 
> 1.8.5.2

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO
  2014-02-08  8:01 ` [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO Andi Kleen
@ 2014-02-14  4:26   ` H. Peter Anvin
  0 siblings, 0 replies; 26+ messages in thread
From: H. Peter Anvin @ 2014-02-14  4:26 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: linux-kbuild, x86

On 02/08/2014 12:01 AM, Andi Kleen wrote:
> LTO gcc puts a lot of data into $TMPDIR, essentially another copy
> of the object directory to pass the repartitioned object files
> to the code generation processes.
> 
> TMPDIR defaults to /tmp With /tmp as tmpfs it's easy to drive systems to
> out of memory, because they will compete with the already high anonymous
> memory consumption of the wpa LTO pass.
> 
> When LTO is set always set TMPDIR to the object directory. This could
> be slightly slower, but is far safer and eliminates another parameter
> the LTO user would need to set manually.
> 
> I made it conditional on LTO for now.

I think this really ought to use ?= so it doesn't override a TMPDIR
explicitly set by the user.

	-hpa



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support
  2014-02-08  8:01 ` [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support Andi Kleen
@ 2014-02-14  4:28   ` H. Peter Anvin
  2014-02-14 14:36     ` Andi Kleen
  0 siblings, 1 reply; 26+ messages in thread
From: H. Peter Anvin @ 2014-02-14  4:28 UTC (permalink / raw)
  To: Andi Kleen, linux-kernel; +Cc: linux-kbuild, x86

I am about to commit the patches before this except 02/17 and 12/17 to
tip:x86/asmlinkage; however, I figure we need a new 02/17 before
committing the actual LTO patches to avoid build breakage.

	-hpa


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support
  2014-02-14  4:28   ` H. Peter Anvin
@ 2014-02-14 14:36     ` Andi Kleen
  2014-02-14 16:25       ` H. Peter Anvin
  0 siblings, 1 reply; 26+ messages in thread
From: Andi Kleen @ 2014-02-14 14:36 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: linux-kernel, linux-kbuild, x86

On Thu, Feb 13, 2014 at 08:28:10PM -0800, H. Peter Anvin wrote:
> I am about to commit the patches before this except 02/17 and 12/17 to
> tip:x86/asmlinkage; however, I figure we need a new 02/17 before
> committing the actual LTO patches to avoid build breakage.

Thanks.

Yes I'll repost today.

I was assuming those would go through the kbuild tree.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support
  2014-02-14 14:36     ` Andi Kleen
@ 2014-02-14 16:25       ` H. Peter Anvin
  0 siblings, 0 replies; 26+ messages in thread
From: H. Peter Anvin @ 2014-02-14 16:25 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, linux-kbuild, x86

On 02/14/2014 06:36 AM, Andi Kleen wrote:
> On Thu, Feb 13, 2014 at 08:28:10PM -0800, H. Peter Anvin wrote:
>> I am about to commit the patches before this except 02/17 and 12/17 to
>> tip:x86/asmlinkage; however, I figure we need a new 02/17 before
>> committing the actual LTO patches to avoid build breakage.
> 
> Thanks.
> 
> Yes I'll repost today.
> 
> I was assuming those would go through the kbuild tree.
> 

I'll check with Michal to see if he cares.

	-hpa



^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2014-02-14 16:26 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-08  8:01 [PATCH 01/17] x86, lto: Disable LTO for the x86 VDSO Andi Kleen
2014-02-08  8:01 ` [PATCH 02/17] x86, lto: Disable fancy hweight optimizations for LTO v2 Andi Kleen
2014-02-08 18:52   ` H. Peter Anvin
2014-02-08 20:21     ` Andi Kleen
2014-02-08 21:43       ` H. Peter Anvin
2014-02-08  8:01 ` [PATCH 03/17] lto: Make asmlinkage __visible Andi Kleen
2014-02-08  8:01 ` [PATCH 04/17] lto, workaround: Add workaround for initcall reordering Andi Kleen
2014-02-08  8:01 ` [PATCH 05/17] lto: Handle LTO common symbols in module loader Andi Kleen
2014-02-12  1:04   ` Rusty Russell
2014-02-08  8:01 ` [PATCH 06/17] lto: Disable LTO for sys_ni Andi Kleen
2014-02-08  8:01 ` [PATCH 07/17] Kbuild, lto, workaround: Don't warn for initcall_reference in modpost Andi Kleen
2014-02-08  8:01 ` [PATCH 08/17] Kbuild, lto: Drop .number postfixes " Andi Kleen
2014-02-08  8:01 ` [PATCH 09/17] Kbuild, lto: add ld-version and ld-ifversion macros Andi Kleen
2014-02-08  8:01 ` [PATCH 10/17] Kbuild, lto: Add a gcc-ld script to let run gcc as ld Andi Kleen
2014-02-08  8:01 ` [PATCH 11/17] Kbuild, lto: Disable LTO for asm-offsets.c Andi Kleen
2014-02-08  8:01 ` [PATCH 12/17] Kbuild, lto: Set TMPDIR for LTO Andi Kleen
2014-02-14  4:26   ` H. Peter Anvin
2014-02-08  8:01 ` [PATCH 13/17] Kbuild, lto: Handle basic LTO in modpost Andi Kleen
2014-02-08  8:01 ` [PATCH 14/17] Kbuild, lto: Add Link Time Optimization support Andi Kleen
2014-02-14  4:28   ` H. Peter Anvin
2014-02-14 14:36     ` Andi Kleen
2014-02-14 16:25       ` H. Peter Anvin
2014-02-08  8:01 ` [PATCH 15/17] Kbuild, lto: Add LTO build Documentation Andi Kleen
2014-02-08  8:01 ` [PATCH 16/17] lto: Mark spinlocks noinline when inline spinlocks are disabled Andi Kleen
2014-02-08  8:01 ` [PATCH 17/17] lto, module: Warn about modules that are not fully LTOed Andi Kleen
2014-02-12  1:13   ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).