All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jann Horn <jannh@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Masahiro Yamada <masahiroy@kernel.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Miguel Ojeda <ojeda@kernel.org>,
	Zhen Lei <thunder.leizhen@huawei.com>,
	Arnd Bergmann <arnd@arndb.de>,
	linux-kernel@vger.kernel.org, linux-kbuild@vger.kernel.org,
	Jann Horn <jannh@google.com>
Subject: [PATCH 2/2] kallsyms: build faster by using .incbin
Date: Wed, 21 Feb 2024 21:26:54 +0100	[thread overview]
Message-ID: <20240221202655.2423854-2-jannh@google.com> (raw)
In-Reply-To: <20240221202655.2423854-1-jannh@google.com>

Currently, kallsyms builds a big assembly file (~19M with a normal
kernel config), and then the assembler has to turn that big assembly
file back into binary data, which takes around a second per kallsyms
invocation. (Normally there are two kallsyms invocations per build.)

It is much faster to instead directly output binary data, which can
be imported in an assembly file using ".incbin". This is also the
approach taken by arch/x86/boot/compressed/mkpiggy.c.
So this patch switches kallsyms to that approach.

A complication with this is that the endianness of numbers between
host and target might not match (for example, when cross-compiling);
and there seems to be no kconfig symbol that tells us what endianness
the target has.
So pass the path to the intermediate vmlinux ELF file to the kallsyms
tool, and let it parse the ELF header to figure out the target's
endianness.

I have verified that running kallsyms without these changes and
kallsyms with these changes on the same input System.map results
in identical object files.

This change reduces the time for an incremental kernel rebuild
(touch fs/ioctl.c, then re-run make) from 27.7s to 24.1s (medians
over 16 runs each) on my machine - saving around 3.6 seconds.

Signed-off-by: Jann Horn <jannh@google.com>
---
 scripts/kallsyms.c      | 196 ++++++++++++++++++++++++++++++++--------
 scripts/link-vmlinux.sh |   5 +-
 2 files changed, 159 insertions(+), 42 deletions(-)

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index f35be95adfbe..ef03d723aded 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -27,6 +27,10 @@
 #include <string.h>
 #include <ctype.h>
 #include <limits.h>
+#include <endian.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 
@@ -75,7 +79,7 @@ static unsigned char best_table_len[256];
 static void usage(void)
 {
 	fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
-			"[--lto-clang] in.map > out.S\n");
+			"[--lto-clang] in in.map out.S out.bin\n");
 	exit(1);
 }
 
@@ -290,20 +294,57 @@ static void read_map(const char *in)
 	fclose(fp);
 }
 
+static bool is_64bit, is_little_endian;
+static char *asm_path, *bin_path;
+static FILE *asm_file, *bin_file;
+static size_t bin_offset, bin_included;
+
 static void output_label(const char *label)
 {
-	printf(".globl %s\n", label);
-	printf("\tALGN\n");
-	printf("%s:\n", label);
+	fprintf(asm_file, ".globl %s\n", label);
+	fprintf(asm_file, "\tALGN\n");
+	fprintf(asm_file, "%s:\n", label);
 }
 
 /* Provide proper symbols relocatability by their '_text' relativeness. */
 static void output_address(unsigned long long addr)
 {
 	if (_text <= addr)
-		printf("\tPTR\t_text + %#llx\n", addr - _text);
+		fprintf(asm_file, "\tPTR\t_text + %#llx\n", addr - _text);
 	else
-		printf("\tPTR\t_text - %#llx\n", _text - addr);
+		fprintf(asm_file, "\tPTR\t_text - %#llx\n", _text - addr);
+}
+
+/*
+ * Include all data that has been written into bin_file since the last call to
+ * this function.
+ */
+static void include_bin_data(void)
+{
+	fprintf(asm_file, ".incbin \"%s\", %zu, %zu\n", bin_path,
+		bin_included, bin_offset - bin_included);
+	bin_included = bin_offset;
+}
+
+static void output_bin_data(const void *data, size_t len)
+{
+	if (fwrite(data, 1, len, bin_file) != len) {
+		fprintf(stderr, "kallsyms: unable to write output\n");
+		exit(EXIT_FAILURE);
+	}
+	bin_offset += len;
+}
+static void output_bin_u32(uint32_t value)
+{
+	uint32_t encoded = is_little_endian ? htole32(value) : htobe32(value);
+
+	output_bin_data(&encoded, sizeof(encoded));
+}
+static void output_bin_u16(uint16_t value)
+{
+	uint16_t encoded = is_little_endian ? htole16(value) : htobe16(value);
+
+	output_bin_data(&encoded, sizeof(encoded));
 }
 
 /* uncompress a compressed symbol. When this function is called, the best table
@@ -384,25 +425,36 @@ static void sort_symbols_by_name(void)
 
 static void write_src(void)
 {
-	unsigned int i, k, off;
+	unsigned int i, off;
 	unsigned int best_idx[256];
 	unsigned int *markers;
 	char buf[KSYM_NAME_LEN];
 
-	printf("#include <asm/bitsperlong.h>\n");
-	printf("#if BITS_PER_LONG == 64\n");
-	printf("#define PTR .quad\n");
-	printf("#define ALGN .balign 8\n");
-	printf("#else\n");
-	printf("#define PTR .long\n");
-	printf("#define ALGN .balign 4\n");
-	printf("#endif\n");
+	asm_file = fopen(asm_path, "w");
+	if (!asm_file) {
+		perror("unable to open asm output");
+		exit(EXIT_FAILURE);
+	}
+	bin_file = fopen(bin_path, "w");
+	if (!bin_file) {
+		perror("unable to open bin output");
+		exit(EXIT_FAILURE);
+	}
+
+	fprintf(asm_file, "#include <asm/bitsperlong.h>\n");
+	fprintf(asm_file, "#if BITS_PER_LONG == 64\n");
+	fprintf(asm_file, "#define PTR .quad\n");
+	fprintf(asm_file, "#define ALGN .balign 8\n");
+	fprintf(asm_file, "#else\n");
+	fprintf(asm_file, "#define PTR .long\n");
+	fprintf(asm_file, "#define ALGN .balign 4\n");
+	fprintf(asm_file, "#endif\n");
 
-	printf("\t.section .rodata, \"a\"\n");
+	fprintf(asm_file, "\t.section .rodata, \"a\"\n");
 
 	output_label("kallsyms_num_syms");
-	printf("\t.long\t%u\n", table_cnt);
-	printf("\n");
+	fprintf(asm_file, "\t.long\t%u\n", table_cnt);
+	fprintf(asm_file, "\n");
 
 	/* table of offset markers, that give the offset in the compressed stream
 	 * every 256 symbols */
@@ -437,20 +489,23 @@ static void write_src(void)
 		/* Encode length with ULEB128. */
 		if (table[i]->len <= 0x7F) {
 			/* Most symbols use a single byte for the length. */
-			printf("\t.byte 0x%02x", table[i]->len);
+			unsigned char len_encoded[1] = { table[i]->len };
+
+			output_bin_data(len_encoded, sizeof(len_encoded));
 			off += table[i]->len + 1;
 		} else {
 			/* "Big" symbols use two bytes. */
-			printf("\t.byte 0x%02x, 0x%02x",
+			unsigned char len_encoded[2] = {
 				(table[i]->len & 0x7F) | 0x80,
-				(table[i]->len >> 7) & 0x7F);
+				(table[i]->len >> 7) & 0x7F
+			};
+
+			output_bin_data(len_encoded, sizeof(len_encoded));
 			off += table[i]->len + 2;
 		}
-		for (k = 0; k < table[i]->len; k++)
-			printf(", 0x%02x", table[i]->sym[k]);
-		printf("\n");
+		output_bin_data(table[i]->sym, table[i]->len);
 	}
-	printf("\n");
+	include_bin_data();
 
 	/*
 	 * Now that we wrote out the compressed symbol names, restore the
@@ -463,8 +518,8 @@ static void write_src(void)
 
 	output_label("kallsyms_markers");
 	for (i = 0; i < ((table_cnt + 255) >> 8); i++)
-		printf("\t.long\t%u\n", markers[i]);
-	printf("\n");
+		output_bin_u32(markers[i]);
+	include_bin_data();
 
 	free(markers);
 
@@ -473,15 +528,15 @@ static void write_src(void)
 	for (i = 0; i < 256; i++) {
 		best_idx[i] = off;
 		expand_symbol(best_table[i], best_table_len[i], buf);
-		printf("\t.asciz\t\"%s\"\n", buf);
+		output_bin_data(buf, strlen(buf)+1);
 		off += strlen(buf) + 1;
 	}
-	printf("\n");
+	include_bin_data();
 
 	output_label("kallsyms_token_index");
 	for (i = 0; i < 256; i++)
-		printf("\t.short\t%d\n", best_idx[i]);
-	printf("\n");
+		output_bin_u16(best_idx[i]);
+	include_bin_data();
 
 	output_label("kallsyms_offsets");
 
@@ -513,13 +568,12 @@ static void write_src(void)
 				table[i]->addr);
 			exit(EXIT_FAILURE);
 		}
-		printf("\t.long\t%#x	/* %s */\n", (int)offset, table[i]->sym);
+		output_bin_u32((uint32_t)offset);
 	}
-	printf("\n");
+	include_bin_data();
 
 	output_label("kallsyms_relative_base");
 	output_address(relative_base);
-	printf("\n");
 
 	if (lto_clang)
 		for (i = 0; i < table_cnt; i++)
@@ -527,12 +581,24 @@ static void write_src(void)
 
 	sort_symbols_by_name();
 	output_label("kallsyms_seqs_of_names");
-	for (i = 0; i < table_cnt; i++)
-		printf("\t.byte 0x%02x, 0x%02x, 0x%02x\n",
+	for (i = 0; i < table_cnt; i++) {
+		unsigned char seq_encoded[3] = {
 			(unsigned char)(table[i]->seq >> 16),
 			(unsigned char)(table[i]->seq >> 8),
-			(unsigned char)(table[i]->seq >> 0));
-	printf("\n");
+			(unsigned char)(table[i]->seq >> 0)
+		};
+		output_bin_data(seq_encoded, sizeof(seq_encoded));
+	}
+	include_bin_data();
+
+	if (fclose(asm_file)) {
+		perror("unable to write to asm output");
+		exit(EXIT_FAILURE);
+	}
+	if (fclose(bin_file)) {
+		perror("unable to write to bin output");
+		exit(EXIT_FAILURE);
+	}
 }
 
 
@@ -795,6 +861,52 @@ static void record_relative_base(void)
 		}
 }
 
+static void get_target_data_types(const char *elf_path)
+{
+	int elf_fd = open(elf_path, O_RDONLY);
+	unsigned char elf_ident[EI_NIDENT];
+
+	if (elf_fd == -1) {
+		perror("open ELF");
+		exit(EXIT_FAILURE);
+	}
+	if (read(elf_fd, elf_ident, sizeof(elf_ident)) != sizeof(elf_ident)) {
+		perror("read ELF header");
+		exit(EXIT_FAILURE);
+	}
+	close(elf_fd);
+
+	if (elf_ident[EI_MAG0] != ELFMAG0 || elf_ident[EI_MAG1] != ELFMAG1 ||
+	    elf_ident[EI_MAG2] != ELFMAG2 || elf_ident[EI_MAG3] != ELFMAG3) {
+		fprintf(stderr, "kallsyms: input ELF has invalid header\n");
+		exit(EXIT_FAILURE);
+	}
+
+	switch (elf_ident[EI_CLASS]) {
+	case ELFCLASS32:
+		is_64bit = false;
+		break;
+	case ELFCLASS64:
+		is_64bit = true;
+		break;
+	default:
+		fprintf(stderr, "kallsyms: input ELF has invalid bitness\n");
+		exit(EXIT_FAILURE);
+	}
+
+	switch (elf_ident[EI_DATA]) {
+	case ELFDATA2LSB:
+		is_little_endian = true;
+		break;
+	case ELFDATA2MSB:
+		is_little_endian = false;
+		break;
+	default:
+		fprintf(stderr, "kallsyms: input ELF has invalid endianness\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
 int main(int argc, char **argv)
 {
 	while (1) {
@@ -813,10 +925,14 @@ int main(int argc, char **argv)
 			usage();
 	}
 
-	if (optind >= argc)
+	if (optind+4 != argc)
 		usage();
+	asm_path = argv[optind+2];
+	bin_path = argv[optind+3];
+
+	get_target_data_types(argv[optind]);
 
-	read_map(argv[optind]);
+	read_map(argv[optind+1]);
 	shrink_table();
 	if (absolute_percpu)
 		make_percpus_absolute();
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 5127371d3393..1b5ff33a2d4a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -162,7 +162,7 @@ kallsyms()
 	fi
 
 	info KSYMS ${2}
-	scripts/kallsyms ${kallsymopt} ${1} > ${2}
+	scripts/kallsyms ${kallsymopt} ${1} ${2} ${3} ${4}
 }
 
 # Perform one step in kallsyms generation, including temporary linking of
@@ -173,10 +173,11 @@ kallsyms_step()
 	kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1}
 	kallsymso=${kallsyms_vmlinux}.o
 	kallsyms_S=${kallsyms_vmlinux}.S
+	kallsyms_bin=${kallsyms_vmlinux}.bin
 
 	vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o}
 	mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms ${kallsymso_prev}
-	kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S}
+	kallsyms ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms ${kallsyms_S} ${kallsyms_bin}
 
 	info AS ${kallsyms_S}
 	${CC} ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS} \
-- 
2.44.0.rc0.258.g7320e95886-goog


  reply	other threads:[~2024-02-21 20:27 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-21 20:26 [PATCH 1/2] kallsyms: get rid of code for absolute kallsyms Jann Horn
2024-02-21 20:26 ` Jann Horn [this message]
2024-02-21 20:29   ` [PATCH 2/2] kallsyms: build faster by using .incbin Jann Horn
2024-02-22  4:06   ` Masahiro Yamada
2024-02-22  8:20     ` Arnd Bergmann
2024-02-22 11:20     ` Jann Horn
2024-02-22 14:20       ` Jann Horn
2024-02-23  4:26         ` Masahiro Yamada
2024-02-21 21:18 ` [PATCH 1/2] kallsyms: get rid of code for absolute kallsyms Arnd Bergmann
2024-02-23 12:33 ` Masahiro Yamada

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240221202655.2423854-2-jannh@google.com \
    --to=jannh@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=linux-kbuild@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masahiroy@kernel.org \
    --cc=ndesaulniers@google.com \
    --cc=ojeda@kernel.org \
    --cc=thunder.leizhen@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.