All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
@ 2020-04-29  6:46 Huaixin Chang
  2020-04-29  6:46 ` [PATCH 1/2] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-04-29  6:46 UTC (permalink / raw)
  To: linux-kernel, linux-kbuild
  Cc: bp, hpa, jpoimboe, luto, michal.lkml, mingo, peterz, tglx, x86,
	yamada.masahiro

Move building of fast lookup table from boot to sorttable tool. This saves us
6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.

Huaixin Chang (2):
  scripts/sorttable: Build orc fast lookup table via sorttable tool
  x86/unwind/orc: Remove unwind_init() from x86 boot

 arch/x86/include/asm/unwind.h |  2 -
 arch/x86/kernel/setup.c       |  2 -
 arch/x86/kernel/unwind_orc.c  | 51 ----------------------
 scripts/sorttable.h           | 99 ++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 92 insertions(+), 62 deletions(-)

-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-04-29  6:46 [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
@ 2020-04-29  6:46 ` Huaixin Chang
  2020-04-29  6:46 ` [PATCH 2/2] x86/unwind/orc: Remove unwind_init() from x86 boot Huaixin Chang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-04-29  6:46 UTC (permalink / raw)
  To: linux-kernel, linux-kbuild
  Cc: bp, hpa, jpoimboe, luto, michal.lkml, mingo, peterz, tglx, x86,
	yamada.masahiro

Since orc tables are already sorted by sorttable tool, let us move
building of fast lookup table into sorttable tool too. This saves us
6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
with 64 cores.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 scripts/sorttable.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 7 deletions(-)

diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..a36c76c17be4 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -93,12 +93,50 @@
 char g_err[ERRSTR_MAXSZ];
 int *g_orc_ip_table;
 struct orc_entry *g_orc_table;
+static unsigned long orc_ip_table_offset;
 
 pthread_t orc_sort_thread;
 
+struct orc_sort_param {
+	size_t		lookup_table_size;
+	unsigned int	*orc_lookup_table;
+	unsigned long	start_ip;
+	size_t		text_size;
+	unsigned int	orc_num_entries;
+};
+
 static inline unsigned long orc_ip(const int *ip)
 {
-	return (unsigned long)ip + *ip;
+	return (unsigned long)ip + *ip + orc_ip_table_offset;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
 }
 
 static int orc_sort_cmp(const void *_a, const void *_b)
@@ -130,18 +168,24 @@ static void *sort_orctable(void *arg)
 	int *idxs = NULL;
 	int *tmp_orc_ip_table = NULL;
 	struct orc_entry *tmp_orc_table = NULL;
-	unsigned int *orc_ip_size = (unsigned int *)arg;
-	unsigned int num_entries = *orc_ip_size / sizeof(int);
+	struct orc_sort_param *param = (struct orc_sort_param *)arg;
+	unsigned int num_entries = param->orc_num_entries;
+	unsigned int orc_ip_size = num_entries * sizeof(int);
 	unsigned int orc_size = num_entries * sizeof(struct orc_entry);
+	unsigned int lookup_num_blocks = param->lookup_table_size / sizeof(int);
+	unsigned int *orc_lookup = param->orc_lookup_table;
+	unsigned long lookup_start_ip = param->start_ip;
+	unsigned long lookup_stop_ip = param->start_ip + param->text_size;
+	struct orc_entry *orc;
 
-	idxs = (int *)malloc(*orc_ip_size);
+	idxs = (int *)malloc(orc_ip_size);
 	if (!idxs) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
 			 strerror(errno));
 		pthread_exit(g_err);
 	}
 
-	tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
+	tmp_orc_ip_table = (int *)malloc(orc_ip_size);
 	if (!tmp_orc_ip_table) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
 			 strerror(errno));
@@ -173,6 +217,31 @@ static void *sort_orctable(void *arg)
 		g_orc_table[i] = tmp_orc_table[idxs[i]];
 	}
 
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(g_orc_ip_table, g_orc_table,
+				 num_entries,
+				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			snprintf(g_err, ERRSTR_MAXSZ,
+					"Corrupt .orc_unwind table\n");
+			pthread_exit(g_err);
+		}
+
+		orc_lookup[i] = orc - g_orc_table;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
+			 lookup_stop_ip);
+	if (!orc) {
+		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
+		pthread_exit(g_err);
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
+
 	free(idxs);
 	free(tmp_orc_ip_table);
 	free(tmp_orc_table);
@@ -221,6 +290,8 @@ static int do_sort(Elf_Ehdr *ehdr,
 	unsigned int orc_ip_size = 0;
 	unsigned int orc_size = 0;
 	unsigned int orc_num_entries = 0;
+	unsigned long orc_ip_addr = 0;
+	struct orc_sort_param param;
 #endif
 
 	shstrndx = r2(&ehdr->e_shstrndx);
@@ -259,17 +330,27 @@ static int do_sort(Elf_Ehdr *ehdr,
 			orc_ip_size = s->sh_size;
 			g_orc_ip_table = (int *)((void *)ehdr +
 						   s->sh_offset);
+			orc_ip_addr = s->sh_addr;
 		}
 		if (!strcmp(secstrings + idx, ".orc_unwind")) {
 			orc_size = s->sh_size;
 			g_orc_table = (struct orc_entry *)((void *)ehdr +
 							     s->sh_offset);
 		}
+		if (!strcmp(secstrings + idx, ".orc_lookup")) {
+			param.lookup_table_size = s->sh_size;
+			param.orc_lookup_table = (unsigned int *)
+				((void *)ehdr + s->sh_offset);
+		}
+		if (!strcmp(secstrings + idx, ".text")) {
+			param.text_size = s->sh_size;
+			param.start_ip = s->sh_addr;
+		}
 #endif
 	} /* for loop */
 
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
-	if (!g_orc_ip_table || !g_orc_table) {
+	if (!g_orc_ip_table || !g_orc_table || !param.orc_lookup_table) {
 		fprintf(stderr,
 			"incomplete ORC unwind tables in file: %s\n", fname);
 		goto out;
@@ -285,9 +366,13 @@ static int do_sort(Elf_Ehdr *ehdr,
 		goto out;
 	}
 
+	/* Make orc_ip return virtual address at execution. */
+	orc_ip_table_offset = orc_ip_addr - (unsigned long)g_orc_ip_table;
+
 	/* create thread to sort ORC unwind tables concurrently */
+	param.orc_num_entries = orc_num_entries;
 	if (pthread_create(&orc_sort_thread, NULL,
-			   sort_orctable, &orc_ip_size)) {
+			   sort_orctable, &param)) {
 		fprintf(stderr,
 			"pthread_create orc_sort_thread failed '%s': %s\n",
 			strerror(errno), fname);
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] x86/unwind/orc: Remove unwind_init() from x86 boot
  2020-04-29  6:46 [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2020-04-29  6:46 ` [PATCH 1/2] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
@ 2020-04-29  6:46 ` Huaixin Chang
  2020-04-29  8:49 ` [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Peter Zijlstra
  2020-05-22 18:28 ` Josh Poimboeuf
  3 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-04-29  6:46 UTC (permalink / raw)
  To: linux-kernel, linux-kbuild
  Cc: bp, hpa, jpoimboe, luto, michal.lkml, mingo, peterz, tglx, x86,
	yamada.masahiro

All preparation has been moved to scripts/sorttable tool. No need to
init unwind on boot now.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 arch/x86/include/asm/unwind.h |  2 --
 arch/x86/kernel/setup.c       |  2 --
 arch/x86/kernel/unwind_orc.c  | 51 -------------------------------------------
 3 files changed, 55 deletions(-)

diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 499578f7e6d7..db5aea8c9d98 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -89,11 +89,9 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
 #endif
 
 #ifdef CONFIG_UNWINDER_ORC
-void unwind_init(void);
 void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 			void *orc, size_t orc_size);
 #else
-static inline void unwind_init(void) {}
 static inline
 void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 			void *orc, size_t orc_size) {}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4b3fa6cd3106..4a17026c6550 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1243,8 +1243,6 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_apply_memmap_quirks();
 #endif
-
-	unwind_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index e9cc182aa97e..3db43e6e75f2 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -20,7 +20,6 @@ int *cur_orc_ip_table = __start_orc_unwind_ip;
 struct orc_entry *cur_orc_table = __start_orc_unwind;
 
 unsigned int lookup_num_blocks;
-bool orc_init;
 
 static inline unsigned long orc_ip(const int *ip)
 {
@@ -142,9 +141,6 @@ static struct orc_entry *orc_find(unsigned long ip)
 {
 	static struct orc_entry *orc;
 
-	if (!orc_init)
-		return NULL;
-
 	if (ip == 0)
 		return &null_orc_entry;
 
@@ -259,53 +255,6 @@ void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
 }
 #endif
 
-void __init unwind_init(void)
-{
-	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
-	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
-	size_t num_entries = orc_ip_size / sizeof(int);
-	struct orc_entry *orc;
-	int i;
-
-	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
-	    orc_size % sizeof(struct orc_entry) != 0 ||
-	    num_entries != orc_size / sizeof(struct orc_entry)) {
-		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-
-	/*
-	 * Note, the orc_unwind and orc_unwind_ip tables were already
-	 * sorted at build time via the 'sorttable' tool.
-	 * It's ready for binary search straight away, no need to sort it.
-	 */
-
-	/* Initialize the fast lookup table: */
-	lookup_num_blocks = orc_lookup_end - orc_lookup;
-	for (i = 0; i < lookup_num_blocks-1; i++) {
-		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
-				 num_entries,
-				 LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
-		if (!orc) {
-			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-			return;
-		}
-
-		orc_lookup[i] = orc - __start_orc_unwind;
-	}
-
-	/* Initialize the ending block: */
-	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
-			 LOOKUP_STOP_IP);
-	if (!orc) {
-		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
-
-	orc_init = true;
-}
-
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
 	if (unwind_done(state))
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-04-29  6:46 [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2020-04-29  6:46 ` [PATCH 1/2] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
  2020-04-29  6:46 ` [PATCH 2/2] x86/unwind/orc: Remove unwind_init() from x86 boot Huaixin Chang
@ 2020-04-29  8:49 ` Peter Zijlstra
  2020-04-30  2:32   ` changhuaixin
  2020-05-22 18:28 ` Josh Poimboeuf
  3 siblings, 1 reply; 24+ messages in thread
From: Peter Zijlstra @ 2020-04-29  8:49 UTC (permalink / raw)
  To: Huaixin Chang
  Cc: linux-kernel, linux-kbuild, bp, hpa, jpoimboe, luto, michal.lkml,
	mingo, tglx, x86, yamada.masahiro

On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
> Move building of fast lookup table from boot to sorttable tool. This saves us
> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.

And what does it add to the build time?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-04-29  8:49 ` [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Peter Zijlstra
@ 2020-04-30  2:32   ` changhuaixin
  2020-04-30  4:06     ` Josh Poimboeuf
  0 siblings, 1 reply; 24+ messages in thread
From: changhuaixin @ 2020-04-30  2:32 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: changhuaixin, linux-kernel, linux-kbuild, bp, hpa, jpoimboe,
	luto, michal.lkml, mingo, tglx, x86, yamada.masahiro



> On Apr 29, 2020, at 4:49 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> 
> On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
>> Move building of fast lookup table from boot to sorttable tool. This saves us
>> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.
> 
> And what does it add to the build time?

It takes a little more than 7ms to build fast lookup table in sorttable on the same CPU. And it is on the critical path.  

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-04-30  2:32   ` changhuaixin
@ 2020-04-30  4:06     ` Josh Poimboeuf
  2020-04-30  4:10       ` Josh Poimboeuf
  0 siblings, 1 reply; 24+ messages in thread
From: Josh Poimboeuf @ 2020-04-30  4:06 UTC (permalink / raw)
  To: changhuaixin
  Cc: Peter Zijlstra, linux-kernel, linux-kbuild, bp, hpa, luto,
	michal.lkml, mingo, tglx, x86, yamada.masahiro

On Thu, Apr 30, 2020 at 10:32:17AM +0800, changhuaixin wrote:
> 
> 
> > On Apr 29, 2020, at 4:49 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > 
> > On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
> >> Move building of fast lookup table from boot to sorttable tool. This saves us
> >> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.
> > 
> > And what does it add to the build time?
> 
> It takes a little more than 7ms to build fast lookup table in
> sorttable on the same CPU. And it is on the critical path.

Thanks, I like it.  It will help make the in-kernel unwinder even
simpler.  And it will enable unwinding from early boot.

Maybe someday we can move all the table sorting code into objtool, once
we have objtool running on vmlinux.o.

I'll try to review the patches soon.

-- 
Josh


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-04-30  4:06     ` Josh Poimboeuf
@ 2020-04-30  4:10       ` Josh Poimboeuf
  0 siblings, 0 replies; 24+ messages in thread
From: Josh Poimboeuf @ 2020-04-30  4:10 UTC (permalink / raw)
  To: changhuaixin
  Cc: Peter Zijlstra, linux-kernel, linux-kbuild, bp, hpa, luto,
	michal.lkml, mingo, tglx, x86, yamada.masahiro

On Wed, Apr 29, 2020 at 11:06:58PM -0500, Josh Poimboeuf wrote:
> On Thu, Apr 30, 2020 at 10:32:17AM +0800, changhuaixin wrote:
> > 
> > 
> > > On Apr 29, 2020, at 4:49 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > > 
> > > On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
> > >> Move building of fast lookup table from boot to sorttable tool. This saves us
> > >> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.
> > > 
> > > And what does it add to the build time?
> > 
> > It takes a little more than 7ms to build fast lookup table in
> > sorttable on the same CPU. And it is on the critical path.
> 
> Thanks, I like it.  It will help make the in-kernel unwinder even
> simpler.  And it will enable unwinding from early boot.
> 
> Maybe someday we can move all the table sorting code into objtool, once
> we have objtool running on vmlinux.o.
> 
> I'll try to review the patches soon.

BTW, another cool feature would be for sorttable to run on modules
during the module linking phase.

-- 
Josh


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-04-29  6:46 [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
                   ` (2 preceding siblings ...)
  2020-04-29  8:49 ` [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Peter Zijlstra
@ 2020-05-22 18:28 ` Josh Poimboeuf
  2020-05-25  3:33   ` changhuaixin
       [not found]   ` <482837A8-E9D9-4229-B7B1-8E14403FB2AC@linux.alibaba.com>
  3 siblings, 2 replies; 24+ messages in thread
From: Josh Poimboeuf @ 2020-05-22 18:28 UTC (permalink / raw)
  To: Huaixin Chang
  Cc: linux-kernel, linux-kbuild, bp, hpa, luto, michal.lkml, mingo,
	peterz, tglx, x86, yamada.masahiro

On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
> Move building of fast lookup table from boot to sorttable tool. This saves us
> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.
> 
> Huaixin Chang (2):
>   scripts/sorttable: Build orc fast lookup table via sorttable tool
>   x86/unwind/orc: Remove unwind_init() from x86 boot
> 
>  arch/x86/include/asm/unwind.h |  2 -
>  arch/x86/kernel/setup.c       |  2 -
>  arch/x86/kernel/unwind_orc.c  | 51 ----------------------
>  scripts/sorttable.h           | 99 ++++++++++++++++++++++++++++++++++++++++---
>  4 files changed, 92 insertions(+), 62 deletions(-)

I tested this (rebased on tip/master), it seems to break ORC
completely... e.g. /proc/self/stack is empty.

-- 
Josh


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-05-22 18:28 ` Josh Poimboeuf
@ 2020-05-25  3:33   ` changhuaixin
       [not found]   ` <482837A8-E9D9-4229-B7B1-8E14403FB2AC@linux.alibaba.com>
  1 sibling, 0 replies; 24+ messages in thread
From: changhuaixin @ 2020-05-25  3:33 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: changhuaixin, linux-kernel, linux-kbuild, bp, hpa, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Thanks for your kindly reply. Let me have a check.

> On May 23, 2020, at 2:28 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> 
> On Wed, Apr 29, 2020 at 02:46:24PM +0800, Huaixin Chang wrote:
>> Move building of fast lookup table from boot to sorttable tool. This saves us
>> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.
>> 
>> Huaixin Chang (2):
>>  scripts/sorttable: Build orc fast lookup table via sorttable tool
>>  x86/unwind/orc: Remove unwind_init() from x86 boot
>> 
>> arch/x86/include/asm/unwind.h |  2 -
>> arch/x86/kernel/setup.c       |  2 -
>> arch/x86/kernel/unwind_orc.c  | 51 ----------------------
>> scripts/sorttable.h           | 99 ++++++++++++++++++++++++++++++++++++++++---
>> 4 files changed, 92 insertions(+), 62 deletions(-)
> 
> I tested this (rebased on tip/master), it seems to break ORC
> completely... e.g. /proc/self/stack is empty.
> 
> -- 
> Josh


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
       [not found]   ` <482837A8-E9D9-4229-B7B1-8E14403FB2AC@linux.alibaba.com>
@ 2020-06-01 17:38     ` Josh Poimboeuf
  2020-06-03 13:47       ` changhuaixin
                         ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Josh Poimboeuf @ 2020-06-01 17:38 UTC (permalink / raw)
  To: changhuaixin
  Cc: linux-kernel, linux-kbuild, bp, hpa, luto, michal.lkml, mingo,
	peterz, tglx, x86, yamada.masahiro

On Sun, May 31, 2020 at 01:26:54PM +0800, changhuaixin wrote:
>    It turned out to be an alignment problem. If sh_size of previous section
>    orc_unwind is not 4-byte aligned, sh_offset of the following orc_lookup
>    section is not 4-byte aligned too. However, the VMA of section orc_lookup
>    is aligned to the nearest 4-byte. Thus, the orc_lookup section means two
>    different ares for scripts/sorttable tool and kernel.
> 
>    Sections headers look like this when it happens:
> 
>    12 .orc_unwind_ip 00172124  ffffffff82573b28  0000000002573b28  01773b28
>     2**0
>                     CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
>    13 .orc_unwind   0022b1b6  ffffffff826e5c4c  00000000026e5c4c  018e5c4c
>     2**0
>                     CONTENTS, ALLOC, LOAD, READONLY, DATA
>    14 .orc_lookup   0003003c  ffffffff82910e04  0000000002910e04  01b10e02
>     2**0
>                     ALLOC
>    15 .vvar         00001000  ffffffff82941000  0000000002941000  01b41000
>     2**4
>                     CONTENTS, ALLOC, LOAD, DATA
> 
>    Sorttable tool uses the are starting with offset 0x01b10e02 for 0x0003003c
>    bytes. While kernel use the area starting with VMA at  0xffffffff82910e04
>    for 0x0003003c bytes, meaning that each entry in this table used by kernel
>    is actually 2 bytes behind the corresponding entry set from sorttable
>    tool.
> 
>    Any suggestion on fixing this?

The VMA and LMA are both 4-byte aligned.  The file offset alignment
(0x01b10e02) shouldn't matter.

Actually it looks like the problem is that the section doesn't have
CONTENTS, so it's just loaded as a BSS section (all zeros).  The section
needs to be type SHT_PROGBITS instead of SHT_NOBITS.

$ readelf -S vmlinux |grep orc_lookup
  [16] .orc_lookup       NOBITS           ffffffff82b68418  01d68418

I tried to fix it with

diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a36c76c17be4..76adb1fb88f8 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -341,6 +341,7 @@ static int do_sort(Elf_Ehdr *ehdr,
 			param.lookup_table_size = s->sh_size;
 			param.orc_lookup_table = (unsigned int *)
 				((void *)ehdr + s->sh_offset);
+			w(SHT_PROGBITS, &s->sh_type);
 		}
 		if (!strcmp(secstrings + idx, ".text")) {
 			param.text_size = s->sh_size;


But that makes kallsyms unhappy, so I guess we need to do it from the
linker script where .orc_lookup is created.

Linker script doesn't seem to allow manual specification of the section
type, so this is the best I could come up with:

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..49f4f5bc6165 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -826,6 +826,8 @@
 		. += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /	\
 			LOOKUP_BLOCK_SIZE) + 1) * 4;			\
 		orc_lookup_end = .;					\
+		/* HACK: force SHT_PROGBITS so sorttable can edit: */	\
+		BYTE(1);						\
 	}
 #else
 #define ORC_UNWIND_TABLE


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool
  2020-06-01 17:38     ` Josh Poimboeuf
@ 2020-06-03 13:47       ` changhuaixin
  2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2 siblings, 0 replies; 24+ messages in thread
From: changhuaixin @ 2020-06-03 13:47 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: changhuaixin, linux-kernel, linux-kbuild, bp, hpa, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro



> On Jun 2, 2020, at 1:38 AM, Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> 
> On Sun, May 31, 2020 at 01:26:54PM +0800, changhuaixin wrote:
>>   It turned out to be an alignment problem. If sh_size of previous section
>>   orc_unwind is not 4-byte aligned, sh_offset of the following orc_lookup
>>   section is not 4-byte aligned too. However, the VMA of section orc_lookup
>>   is aligned to the nearest 4-byte. Thus, the orc_lookup section means two
>>   different ares for scripts/sorttable tool and kernel.
>> 
>>   Sections headers look like this when it happens:
>> 
>>   12 .orc_unwind_ip 00172124  ffffffff82573b28  0000000002573b28  01773b28
>>    2**0
>>                    CONTENTS, ALLOC, LOAD, RELOC, READONLY, DATA
>>   13 .orc_unwind   0022b1b6  ffffffff826e5c4c  00000000026e5c4c  018e5c4c
>>    2**0
>>                    CONTENTS, ALLOC, LOAD, READONLY, DATA
>>   14 .orc_lookup   0003003c  ffffffff82910e04  0000000002910e04  01b10e02
>>    2**0
>>                    ALLOC
>>   15 .vvar         00001000  ffffffff82941000  0000000002941000  01b41000
>>    2**4
>>                    CONTENTS, ALLOC, LOAD, DATA
>> 
>>   Sorttable tool uses the are starting with offset 0x01b10e02 for 0x0003003c
>>   bytes. While kernel use the area starting with VMA at  0xffffffff82910e04
>>   for 0x0003003c bytes, meaning that each entry in this table used by kernel
>>   is actually 2 bytes behind the corresponding entry set from sorttable
>>   tool.
>> 
>>   Any suggestion on fixing this?
> 
> The VMA and LMA are both 4-byte aligned.  The file offset alignment
> (0x01b10e02) shouldn't matter.
> 
> Actually it looks like the problem is that the section doesn't have
> CONTENTS, so it's just loaded as a BSS section (all zeros).  The section
> needs to be type SHT_PROGBITS instead of SHT_NOBITS.
> 
> $ readelf -S vmlinux |grep orc_lookup
>  [16] .orc_lookup       NOBITS           ffffffff82b68418  01d68418
> 
> I tried to fix it with
> 
> diff --git a/scripts/sorttable.h b/scripts/sorttable.h
> index a36c76c17be4..76adb1fb88f8 100644
> --- a/scripts/sorttable.h
> +++ b/scripts/sorttable.h
> @@ -341,6 +341,7 @@ static int do_sort(Elf_Ehdr *ehdr,
> 			param.lookup_table_size = s->sh_size;
> 			param.orc_lookup_table = (unsigned int *)
> 				((void *)ehdr + s->sh_offset);
> +			w(SHT_PROGBITS, &s->sh_type);
> 		}
> 		if (!strcmp(secstrings + idx, ".text")) {
> 			param.text_size = s->sh_size;
> 
> 
> But that makes kallsyms unhappy, so I guess we need to do it from the
> linker script where .orc_lookup is created.
> 
> Linker script doesn't seem to allow manual specification of the section
> type, so this is the best I could come up with:
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index db600ef218d7..49f4f5bc6165 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -826,6 +826,8 @@
> 		. += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /	\
> 			LOOKUP_BLOCK_SIZE) + 1) * 4;			\
> 		orc_lookup_end = .;					\
> +		/* HACK: force SHT_PROGBITS so sorttable can edit: */	\
> +		BYTE(1);						\
> 	}
> #else
> #define ORC_UNWIND_TABLE

Thanks! It works.



^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 0/3] Build ORC fast lookup table in scripts/sorttable tool
  2020-06-01 17:38     ` Josh Poimboeuf
  2020-06-03 13:47       ` changhuaixin
@ 2020-06-03 14:31       ` Huaixin Chang
  2020-06-03 14:31         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
                           ` (2 more replies)
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2 siblings, 3 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:31 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Move building of fast lookup table from boot to sorttable tool. This saves us
6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores.

Changelog v2:
1. Type of section orc_lookup needs to be SHT_PROGBITS.
2. unwind_init() cannot be removed totally as setting lookup_num_blocks is needed.

Huaixin Chang (3):
  scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS
  scripts/sorttable: Build orc fast lookup table via sorttable tool
  x86/unwind/orc: Simplify unwind_init() for x86 boot

 arch/x86/kernel/unwind_orc.c      | 40 ----------------
 include/asm-generic/vmlinux.lds.h |  2 +
 scripts/sorttable.h               | 99 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 94 insertions(+), 47 deletions(-)

-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS
  2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
@ 2020-06-03 14:31         ` Huaixin Chang
  2020-06-03 14:31         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
  2020-06-03 14:31         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
  2 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:31 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

In order to edit orc_lookup table via sorttable, type of section
orc_lookup needs to be SHT_PROGBITS instead of SHT_NOBITS.

Linker script doesn't seem to allow manual specification of the section
type, so just write a byte into the section instead.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 include/asm-generic/vmlinux.lds.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..49f4f5bc6165 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -826,6 +826,8 @@
 		. += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /	\
 			LOOKUP_BLOCK_SIZE) + 1) * 4;			\
 		orc_lookup_end = .;					\
+		/* HACK: force SHT_PROGBITS so sorttable can edit: */	\
+		BYTE(1);						\
 	}
 #else
 #define ORC_UNWIND_TABLE
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
  2020-06-03 14:31         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
@ 2020-06-03 14:31         ` Huaixin Chang
  2020-06-03 14:31         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
  2 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:31 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Since orc tables are already sorted by sorttable tool, let us move
building of fast lookup table into sorttable tool too. This saves us
6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
with 64 cores.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 scripts/sorttable.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 7 deletions(-)

diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..a36c76c17be4 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -93,12 +93,50 @@
 char g_err[ERRSTR_MAXSZ];
 int *g_orc_ip_table;
 struct orc_entry *g_orc_table;
+static unsigned long orc_ip_table_offset;
 
 pthread_t orc_sort_thread;
 
+struct orc_sort_param {
+	size_t		lookup_table_size;
+	unsigned int	*orc_lookup_table;
+	unsigned long	start_ip;
+	size_t		text_size;
+	unsigned int	orc_num_entries;
+};
+
 static inline unsigned long orc_ip(const int *ip)
 {
-	return (unsigned long)ip + *ip;
+	return (unsigned long)ip + *ip + orc_ip_table_offset;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
 }
 
 static int orc_sort_cmp(const void *_a, const void *_b)
@@ -130,18 +168,24 @@ static void *sort_orctable(void *arg)
 	int *idxs = NULL;
 	int *tmp_orc_ip_table = NULL;
 	struct orc_entry *tmp_orc_table = NULL;
-	unsigned int *orc_ip_size = (unsigned int *)arg;
-	unsigned int num_entries = *orc_ip_size / sizeof(int);
+	struct orc_sort_param *param = (struct orc_sort_param *)arg;
+	unsigned int num_entries = param->orc_num_entries;
+	unsigned int orc_ip_size = num_entries * sizeof(int);
 	unsigned int orc_size = num_entries * sizeof(struct orc_entry);
+	unsigned int lookup_num_blocks = param->lookup_table_size / sizeof(int);
+	unsigned int *orc_lookup = param->orc_lookup_table;
+	unsigned long lookup_start_ip = param->start_ip;
+	unsigned long lookup_stop_ip = param->start_ip + param->text_size;
+	struct orc_entry *orc;
 
-	idxs = (int *)malloc(*orc_ip_size);
+	idxs = (int *)malloc(orc_ip_size);
 	if (!idxs) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
 			 strerror(errno));
 		pthread_exit(g_err);
 	}
 
-	tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
+	tmp_orc_ip_table = (int *)malloc(orc_ip_size);
 	if (!tmp_orc_ip_table) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
 			 strerror(errno));
@@ -173,6 +217,31 @@ static void *sort_orctable(void *arg)
 		g_orc_table[i] = tmp_orc_table[idxs[i]];
 	}
 
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(g_orc_ip_table, g_orc_table,
+				 num_entries,
+				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			snprintf(g_err, ERRSTR_MAXSZ,
+					"Corrupt .orc_unwind table\n");
+			pthread_exit(g_err);
+		}
+
+		orc_lookup[i] = orc - g_orc_table;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
+			 lookup_stop_ip);
+	if (!orc) {
+		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
+		pthread_exit(g_err);
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
+
 	free(idxs);
 	free(tmp_orc_ip_table);
 	free(tmp_orc_table);
@@ -221,6 +290,8 @@ static int do_sort(Elf_Ehdr *ehdr,
 	unsigned int orc_ip_size = 0;
 	unsigned int orc_size = 0;
 	unsigned int orc_num_entries = 0;
+	unsigned long orc_ip_addr = 0;
+	struct orc_sort_param param;
 #endif
 
 	shstrndx = r2(&ehdr->e_shstrndx);
@@ -259,17 +330,27 @@ static int do_sort(Elf_Ehdr *ehdr,
 			orc_ip_size = s->sh_size;
 			g_orc_ip_table = (int *)((void *)ehdr +
 						   s->sh_offset);
+			orc_ip_addr = s->sh_addr;
 		}
 		if (!strcmp(secstrings + idx, ".orc_unwind")) {
 			orc_size = s->sh_size;
 			g_orc_table = (struct orc_entry *)((void *)ehdr +
 							     s->sh_offset);
 		}
+		if (!strcmp(secstrings + idx, ".orc_lookup")) {
+			param.lookup_table_size = s->sh_size;
+			param.orc_lookup_table = (unsigned int *)
+				((void *)ehdr + s->sh_offset);
+		}
+		if (!strcmp(secstrings + idx, ".text")) {
+			param.text_size = s->sh_size;
+			param.start_ip = s->sh_addr;
+		}
 #endif
 	} /* for loop */
 
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
-	if (!g_orc_ip_table || !g_orc_table) {
+	if (!g_orc_ip_table || !g_orc_table || !param.orc_lookup_table) {
 		fprintf(stderr,
 			"incomplete ORC unwind tables in file: %s\n", fname);
 		goto out;
@@ -285,9 +366,13 @@ static int do_sort(Elf_Ehdr *ehdr,
 		goto out;
 	}
 
+	/* Make orc_ip return virtual address at execution. */
+	orc_ip_table_offset = orc_ip_addr - (unsigned long)g_orc_ip_table;
+
 	/* create thread to sort ORC unwind tables concurrently */
+	param.orc_num_entries = orc_num_entries;
 	if (pthread_create(&orc_sort_thread, NULL,
-			   sort_orctable, &orc_ip_size)) {
+			   sort_orctable, &param)) {
 		fprintf(stderr,
 			"pthread_create orc_sort_thread failed '%s': %s\n",
 			strerror(errno), fname);
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot
  2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
  2020-06-03 14:31         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
  2020-06-03 14:31         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
@ 2020-06-03 14:31         ` Huaixin Chang
  2 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:31 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

The orc fast lookup table is built by scripts/sorttable tool. All that
is left is setting lookup_num_blocks.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 arch/x86/kernel/unwind_orc.c | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 7f969b2d240f..11b87ecf8919 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -264,48 +264,8 @@ void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
 
 void __init unwind_init(void)
 {
-	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
-	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
-	size_t num_entries = orc_ip_size / sizeof(int);
-	struct orc_entry *orc;
-	int i;
-
-	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
-	    orc_size % sizeof(struct orc_entry) != 0 ||
-	    num_entries != orc_size / sizeof(struct orc_entry)) {
-		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-
-	/*
-	 * Note, the orc_unwind and orc_unwind_ip tables were already
-	 * sorted at build time via the 'sorttable' tool.
-	 * It's ready for binary search straight away, no need to sort it.
-	 */
-
 	/* Initialize the fast lookup table: */
 	lookup_num_blocks = orc_lookup_end - orc_lookup;
-	for (i = 0; i < lookup_num_blocks-1; i++) {
-		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
-				 num_entries,
-				 LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
-		if (!orc) {
-			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-			return;
-		}
-
-		orc_lookup[i] = orc - __start_orc_unwind;
-	}
-
-	/* Initialize the ending block: */
-	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
-			 LOOKUP_STOP_IP);
-	if (!orc) {
-		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
-
 	orc_init = true;
 }
 
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool
  2020-06-01 17:38     ` Josh Poimboeuf
  2020-06-03 13:47       ` changhuaixin
  2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
@ 2020-06-03 14:39       ` Huaixin Chang
  2020-06-03 14:39         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
                           ` (3 more replies)
  2 siblings, 4 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:39 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Move building of fast lookup table from boot to sorttable tool. This saves us
6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores. It
adds a little more than 7ms to boot time when testing on the same CPU.

Changelog v3:
1. Modify annotation of unwind_init().

Changelog v2:
1. Type of section orc_lookup needs to be SHT_PROGBITS.
2. unwind_init() cannot be removed totally as setting lookup_num_blocks is needed.

Huaixin Chang (3):
  scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS
  scripts/sorttable: Build orc fast lookup table via sorttable tool
  x86/unwind/orc: Simplify unwind_init() for x86 boot

 arch/x86/kernel/unwind_orc.c      | 41 +---------------
 include/asm-generic/vmlinux.lds.h |  2 +
 scripts/sorttable.h               | 99 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 96 insertions(+), 46 deletions(-)

-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
@ 2020-06-03 14:39         ` Huaixin Chang
  2020-06-03 14:39         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:39 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

In order to edit orc_lookup table via sorttable, type of section
orc_lookup needs to be SHT_PROGBITS instead of SHT_NOBITS.

Linker script doesn't seem to allow manual specification of the section
type, so just write a byte into the section instead.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 include/asm-generic/vmlinux.lds.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index db600ef218d7..49f4f5bc6165 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -826,6 +826,8 @@
 		. += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /	\
 			LOOKUP_BLOCK_SIZE) + 1) * 4;			\
 		orc_lookup_end = .;					\
+		/* HACK: force SHT_PROGBITS so sorttable can edit: */	\
+		BYTE(1);						\
 	}
 #else
 #define ORC_UNWIND_TABLE
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2020-06-03 14:39         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
@ 2020-06-03 14:39         ` Huaixin Chang
  2020-06-03 14:39         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
  2020-06-29  2:14         ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool changhuaixin
  3 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:39 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Since orc tables are already sorted by sorttable tool, let us move
building of fast lookup table into sorttable tool too. This saves us
6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
with 64 cores.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 scripts/sorttable.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 7 deletions(-)

diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..a36c76c17be4 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -93,12 +93,50 @@
 char g_err[ERRSTR_MAXSZ];
 int *g_orc_ip_table;
 struct orc_entry *g_orc_table;
+static unsigned long orc_ip_table_offset;
 
 pthread_t orc_sort_thread;
 
+struct orc_sort_param {
+	size_t		lookup_table_size;
+	unsigned int	*orc_lookup_table;
+	unsigned long	start_ip;
+	size_t		text_size;
+	unsigned int	orc_num_entries;
+};
+
 static inline unsigned long orc_ip(const int *ip)
 {
-	return (unsigned long)ip + *ip;
+	return (unsigned long)ip + *ip + orc_ip_table_offset;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
 }
 
 static int orc_sort_cmp(const void *_a, const void *_b)
@@ -130,18 +168,24 @@ static void *sort_orctable(void *arg)
 	int *idxs = NULL;
 	int *tmp_orc_ip_table = NULL;
 	struct orc_entry *tmp_orc_table = NULL;
-	unsigned int *orc_ip_size = (unsigned int *)arg;
-	unsigned int num_entries = *orc_ip_size / sizeof(int);
+	struct orc_sort_param *param = (struct orc_sort_param *)arg;
+	unsigned int num_entries = param->orc_num_entries;
+	unsigned int orc_ip_size = num_entries * sizeof(int);
 	unsigned int orc_size = num_entries * sizeof(struct orc_entry);
+	unsigned int lookup_num_blocks = param->lookup_table_size / sizeof(int);
+	unsigned int *orc_lookup = param->orc_lookup_table;
+	unsigned long lookup_start_ip = param->start_ip;
+	unsigned long lookup_stop_ip = param->start_ip + param->text_size;
+	struct orc_entry *orc;
 
-	idxs = (int *)malloc(*orc_ip_size);
+	idxs = (int *)malloc(orc_ip_size);
 	if (!idxs) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
 			 strerror(errno));
 		pthread_exit(g_err);
 	}
 
-	tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
+	tmp_orc_ip_table = (int *)malloc(orc_ip_size);
 	if (!tmp_orc_ip_table) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
 			 strerror(errno));
@@ -173,6 +217,31 @@ static void *sort_orctable(void *arg)
 		g_orc_table[i] = tmp_orc_table[idxs[i]];
 	}
 
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(g_orc_ip_table, g_orc_table,
+				 num_entries,
+				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			snprintf(g_err, ERRSTR_MAXSZ,
+					"Corrupt .orc_unwind table\n");
+			pthread_exit(g_err);
+		}
+
+		orc_lookup[i] = orc - g_orc_table;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
+			 lookup_stop_ip);
+	if (!orc) {
+		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
+		pthread_exit(g_err);
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
+
 	free(idxs);
 	free(tmp_orc_ip_table);
 	free(tmp_orc_table);
@@ -221,6 +290,8 @@ static int do_sort(Elf_Ehdr *ehdr,
 	unsigned int orc_ip_size = 0;
 	unsigned int orc_size = 0;
 	unsigned int orc_num_entries = 0;
+	unsigned long orc_ip_addr = 0;
+	struct orc_sort_param param;
 #endif
 
 	shstrndx = r2(&ehdr->e_shstrndx);
@@ -259,17 +330,27 @@ static int do_sort(Elf_Ehdr *ehdr,
 			orc_ip_size = s->sh_size;
 			g_orc_ip_table = (int *)((void *)ehdr +
 						   s->sh_offset);
+			orc_ip_addr = s->sh_addr;
 		}
 		if (!strcmp(secstrings + idx, ".orc_unwind")) {
 			orc_size = s->sh_size;
 			g_orc_table = (struct orc_entry *)((void *)ehdr +
 							     s->sh_offset);
 		}
+		if (!strcmp(secstrings + idx, ".orc_lookup")) {
+			param.lookup_table_size = s->sh_size;
+			param.orc_lookup_table = (unsigned int *)
+				((void *)ehdr + s->sh_offset);
+		}
+		if (!strcmp(secstrings + idx, ".text")) {
+			param.text_size = s->sh_size;
+			param.start_ip = s->sh_addr;
+		}
 #endif
 	} /* for loop */
 
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
-	if (!g_orc_ip_table || !g_orc_table) {
+	if (!g_orc_ip_table || !g_orc_table || !param.orc_lookup_table) {
 		fprintf(stderr,
 			"incomplete ORC unwind tables in file: %s\n", fname);
 		goto out;
@@ -285,9 +366,13 @@ static int do_sort(Elf_Ehdr *ehdr,
 		goto out;
 	}
 
+	/* Make orc_ip return virtual address at execution. */
+	orc_ip_table_offset = orc_ip_addr - (unsigned long)g_orc_ip_table;
+
 	/* create thread to sort ORC unwind tables concurrently */
+	param.orc_num_entries = orc_num_entries;
 	if (pthread_create(&orc_sort_thread, NULL,
-			   sort_orctable, &orc_ip_size)) {
+			   sort_orctable, &param)) {
 		fprintf(stderr,
 			"pthread_create orc_sort_thread failed '%s': %s\n",
 			strerror(errno), fname);
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  2020-06-03 14:39         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
  2020-06-03 14:39         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
@ 2020-06-03 14:39         ` Huaixin Chang
  2020-06-29  2:14         ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool changhuaixin
  3 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-06-03 14:39 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

The orc fast lookup table is built by scripts/sorttable tool. All that
is left is setting lookup_num_blocks.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 arch/x86/kernel/unwind_orc.c | 41 ++---------------------------------------
 1 file changed, 2 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 7f969b2d240f..e4cf124c7a51 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -264,48 +264,11 @@ void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
 
 void __init unwind_init(void)
 {
-	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
-	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
-	size_t num_entries = orc_ip_size / sizeof(int);
-	struct orc_entry *orc;
-	int i;
-
-	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
-	    orc_size % sizeof(struct orc_entry) != 0 ||
-	    num_entries != orc_size / sizeof(struct orc_entry)) {
-		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-
 	/*
-	 * Note, the orc_unwind and orc_unwind_ip tables were already
-	 * sorted at build time via the 'sorttable' tool.
-	 * It's ready for binary search straight away, no need to sort it.
+	 * The fast lookup table is built via sorttable tool. Initialize
+	 * lookup_num_blocks only.
 	 */
-
-	/* Initialize the fast lookup table: */
 	lookup_num_blocks = orc_lookup_end - orc_lookup;
-	for (i = 0; i < lookup_num_blocks-1; i++) {
-		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
-				 num_entries,
-				 LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
-		if (!orc) {
-			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-			return;
-		}
-
-		orc_lookup[i] = orc - __start_orc_unwind;
-	}
-
-	/* Initialize the ending block: */
-	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries,
-			 LOOKUP_STOP_IP);
-	if (!orc) {
-		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
-		return;
-	}
-	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
-
 	orc_init = true;
 }
 
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool
  2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
                           ` (2 preceding siblings ...)
  2020-06-03 14:39         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
@ 2020-06-29  2:14         ` changhuaixin
  3 siblings, 0 replies; 24+ messages in thread
From: changhuaixin @ 2020-06-29  2:14 UTC (permalink / raw)
  To: jpoimboe
  Cc: changhuaixin, bp, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, Peter Zijlstra, tglx, x86, yamada.masahiro

Hi Josh, will you please have a look at this patchset?

There might be another way to set SHT_PROGBITS of section .orc_lookup by writing section headers when orc_unwind and orc_unwind_ip tables are writen. It might be as follows:

diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 3f98dcfbc177..860d4dcec8e6 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -183,6 +183,10 @@ int create_orc_sections(struct objtool_file *file)
        u_sec = elf_create_section(file->elf, ".orc_unwind",
                                   sizeof(struct orc_entry), idx);

+       /* make flags of section orc_lookup right */
+       if (!elf_create_section(file->elf, ".orc_lookup", sizeof(int), 0))
+               return -1;
+

What do you think about this way of setting SHT_PROGBITS?

> On Jun 3, 2020, at 10:39 PM, Huaixin Chang <changhuaixin@linux.alibaba.com> wrote:
> 
> Move building of fast lookup table from boot to sorttable tool. This saves us
> 6380us boot time on Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz with cores. It
> adds a little more than 7ms to boot time when testing on the same CPU.
> 
> Changelog v3:
> 1. Modify annotation of unwind_init().
> 
> Changelog v2:
> 1. Type of section orc_lookup needs to be SHT_PROGBITS.
> 2. unwind_init() cannot be removed totally as setting lookup_num_blocks is needed.
> 
> Huaixin Chang (3):
>  scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS
>  scripts/sorttable: Build orc fast lookup table via sorttable tool
>  x86/unwind/orc: Simplify unwind_init() for x86 boot
> 
> arch/x86/kernel/unwind_orc.c      | 41 +---------------
> include/asm-generic/vmlinux.lds.h |  2 +
> scripts/sorttable.h               | 99 ++++++++++++++++++++++++++++++++++++---
> 3 files changed, 96 insertions(+), 46 deletions(-)
> 
> -- 
> 2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/3] scripts/sorttable: Build ORC fast lookup table via sorttable tool
  2020-08-07  4:17 ` [PATCH v2 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
@ 2020-08-07  4:18   ` Huaixin Chang
  0 siblings, 0 replies; 24+ messages in thread
From: Huaixin Chang @ 2020-08-07  4:18 UTC (permalink / raw)
  To: changhuaixin
  Cc: bp, hpa, jpoimboe, linux-kbuild, linux-kernel, luto, michal.lkml,
	mingo, peterz, tglx, x86, yamada.masahiro

Since ORC tables are already sorted by sorttable tool, let us move
building of fast lookup table into sorttable tool too. This saves us
6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
with 64 cores.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 arch/x86/include/asm/orc_lookup.h      | 16 ------
 arch/x86/include/asm/orc_types.h       | 16 ++++++
 arch/x86/kernel/vmlinux.lds.S          |  2 +-
 scripts/sorttable.h                    | 96 +++++++++++++++++++++++++++++++---
 tools/arch/x86/include/asm/orc_types.h | 16 ++++++
 5 files changed, 122 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/orc_lookup.h b/arch/x86/include/asm/orc_lookup.h
index 241631282e43..c75eb1f82bdb 100644
--- a/arch/x86/include/asm/orc_lookup.h
+++ b/arch/x86/include/asm/orc_lookup.h
@@ -5,22 +5,6 @@
 #ifndef _ORC_LOOKUP_H
 #define _ORC_LOOKUP_H
 
-/*
- * This is a lookup table for speeding up access to the .orc_unwind table.
- * Given an input address offset, the corresponding lookup table entry
- * specifies a subset of the .orc_unwind table to search.
- *
- * Each block represents the end of the previous range and the start of the
- * next range.  An extra block is added to give the last range an end.
- *
- * The block size should be a power of 2 to avoid a costly 'div' instruction.
- *
- * A block size of 256 was chosen because it roughly doubles unwinder
- * performance while only adding ~5% to the ORC data footprint.
- */
-#define LOOKUP_BLOCK_ORDER	8
-#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
-
 #ifndef LINKER_SCRIPT
 
 extern unsigned int orc_lookup[];
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index d25534940bde..b93c6a7b4da4 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -9,6 +9,22 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
 /*
  * The ORC_REG_* registers are base registers which are used to find other
  * registers on the stack.
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9a03e5b23135..75760e7f6319 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -29,7 +29,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page_types.h>
-#include <asm/orc_lookup.h>
+#include <asm/orc_types.h>
 #include <asm/cache.h>
 #include <asm/boot.h>
 
diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..de9822f8ae8f 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -93,12 +93,50 @@
 char g_err[ERRSTR_MAXSZ];
 int *g_orc_ip_table;
 struct orc_entry *g_orc_table;
+static unsigned long orc_ip_table_offset;
 
 pthread_t orc_sort_thread;
 
+struct orc_sort_param {
+	size_t		lookup_table_size;
+	unsigned int	*orc_lookup_table;
+	unsigned long	start_ip;
+	size_t		text_size;
+	unsigned int	orc_num_entries;
+};
+
 static inline unsigned long orc_ip(const int *ip)
 {
-	return (unsigned long)ip + *ip;
+	return (unsigned long)ip + *ip + orc_ip_table_offset;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
 }
 
 static int orc_sort_cmp(const void *_a, const void *_b)
@@ -130,18 +168,24 @@ static void *sort_orctable(void *arg)
 	int *idxs = NULL;
 	int *tmp_orc_ip_table = NULL;
 	struct orc_entry *tmp_orc_table = NULL;
-	unsigned int *orc_ip_size = (unsigned int *)arg;
-	unsigned int num_entries = *orc_ip_size / sizeof(int);
+	struct orc_sort_param *param = (struct orc_sort_param *)arg;
+	unsigned int num_entries = param->orc_num_entries;
+	unsigned int orc_ip_size = num_entries * sizeof(int);
 	unsigned int orc_size = num_entries * sizeof(struct orc_entry);
+	unsigned int lookup_num_blocks = param->lookup_table_size / sizeof(int);
+	unsigned int *orc_lookup = param->orc_lookup_table;
+	unsigned long lookup_start_ip = param->start_ip;
+	unsigned long lookup_stop_ip = param->start_ip + param->text_size;
+	struct orc_entry *orc;
 
-	idxs = (int *)malloc(*orc_ip_size);
+	idxs = (int *)malloc(orc_ip_size);
 	if (!idxs) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
 			 strerror(errno));
 		pthread_exit(g_err);
 	}
 
-	tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
+	tmp_orc_ip_table = (int *)malloc(orc_ip_size);
 	if (!tmp_orc_ip_table) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
 			 strerror(errno));
@@ -173,6 +217,28 @@ static void *sort_orctable(void *arg)
 		g_orc_table[i] = tmp_orc_table[idxs[i]];
 	}
 
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(g_orc_ip_table, g_orc_table,
+				 num_entries,
+				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			snprintf(g_err, ERRSTR_MAXSZ,
+					"Corrupt .orc_unwind table\n");
+			pthread_exit(g_err);
+		}
+
+		orc_lookup[i] = orc - g_orc_table;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
+			 lookup_stop_ip);
+	if (!orc) {
+		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
+		pthread_exit(g_err);
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
+
 	free(idxs);
 	free(tmp_orc_ip_table);
 	free(tmp_orc_table);
@@ -221,6 +287,8 @@ static int do_sort(Elf_Ehdr *ehdr,
 	unsigned int orc_ip_size = 0;
 	unsigned int orc_size = 0;
 	unsigned int orc_num_entries = 0;
+	unsigned long orc_ip_addr = 0;
+	struct orc_sort_param param;
 #endif
 
 	shstrndx = r2(&ehdr->e_shstrndx);
@@ -259,17 +327,27 @@ static int do_sort(Elf_Ehdr *ehdr,
 			orc_ip_size = s->sh_size;
 			g_orc_ip_table = (int *)((void *)ehdr +
 						   s->sh_offset);
+			orc_ip_addr = s->sh_addr;
 		}
 		if (!strcmp(secstrings + idx, ".orc_unwind")) {
 			orc_size = s->sh_size;
 			g_orc_table = (struct orc_entry *)((void *)ehdr +
 							     s->sh_offset);
 		}
+		if (!strcmp(secstrings + idx, ".orc_lookup")) {
+			param.lookup_table_size = s->sh_size;
+			param.orc_lookup_table = (unsigned int *)
+				((void *)ehdr + s->sh_offset);
+		}
+		if (!strcmp(secstrings + idx, ".text")) {
+			param.text_size = s->sh_size;
+			param.start_ip = s->sh_addr;
+		}
 #endif
 	} /* for loop */
 
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
-	if (!g_orc_ip_table || !g_orc_table) {
+	if (!g_orc_ip_table || !g_orc_table || !param.orc_lookup_table) {
 		fprintf(stderr,
 			"incomplete ORC unwind tables in file: %s\n", fname);
 		goto out;
@@ -285,9 +363,13 @@ static int do_sort(Elf_Ehdr *ehdr,
 		goto out;
 	}
 
+	/* Make orc_ip return virtual address at execution. */
+	orc_ip_table_offset = orc_ip_addr - (unsigned long)g_orc_ip_table;
+
 	/* create thread to sort ORC unwind tables concurrently */
+	param.orc_num_entries = orc_num_entries;
 	if (pthread_create(&orc_sort_thread, NULL,
-			   sort_orctable, &orc_ip_size)) {
+			   sort_orctable, &param)) {
 		fprintf(stderr,
 			"pthread_create orc_sort_thread failed '%s': %s\n",
 			strerror(errno), fname);
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index d25534940bde..b93c6a7b4da4 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -9,6 +9,22 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
 /*
  * The ORC_REG_* registers are base registers which are used to find other
  * registers on the stack.
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-07-24 13:53   ` Ingo Molnar
@ 2020-07-27  2:19     ` changhuaixin
  0 siblings, 0 replies; 24+ messages in thread
From: changhuaixin @ 2020-07-27  2:19 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: changhuaixin, jpoimboe, bp, hpa, linux-kbuild, linux-kernel,
	luto, michal.lkml, mingo, Peter Zijlstra, tglx, x86,
	yamada.masahiro



> On Jul 24, 2020, at 9:53 PM, Ingo Molnar <mingo@kernel.org> wrote:
> 
> 
> * Huaixin Chang <changhuaixin@linux.alibaba.com> wrote:
> 
>> Since orc tables are already sorted by sorttable tool, let us move
>> building of fast lookup table into sorttable tool too. This saves us
>> 6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
>> with 64 cores.
> 
> Neat!
> 
>> +struct orc_sort_param {
>> +	size_t		lookup_table_size;
>> +	unsigned int	*orc_lookup_table;
>> +	unsigned long	start_ip;
>> +	size_t		text_size;
>> +	unsigned int	orc_num_entries;
>> +};
> 
>> 
>> +#define LOOKUP_BLOCK_ORDER	8
>> +#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
>> +
>> +	for (i = 0; i < lookup_num_blocks-1; i++) {
>> +		orc = __orc_find(g_orc_ip_table, g_orc_table,
>> +				 num_entries,
>> +				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
>> +		if (!orc) {
>> +			snprintf(g_err, ERRSTR_MAXSZ,
>> +					"Corrupt .orc_unwind table\n");
>> +			pthread_exit(g_err);
>> +		}
>> +
>> +		orc_lookup[i] = orc - g_orc_table;
>> +	}
>> +
>> +	/* Initialize the ending block: */
>> +	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
>> +			 lookup_stop_ip);
>> +	if (!orc) {
>> +		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
>> +		pthread_exit(g_err);
>> +	}
>> +	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
> 
> Yeah, so now this definition of LOOKUP_BLOCK_* basicaly duplicates the 
> arch/x86/include/asm/orc_lookup.h size, with no obvious link between 
> the two. This is asking for trouble.
> 
> <asm/orc_lookup.h> looks simple enough - can we include it in 
> scripts/sorttable.h?
> 
> Or better yet, please move these two defines into <asm/orc_types.h>, 
> which is already included in sorttable.h.
> 
Thanks!
Moving these two into <asm/orc_types.h> and capitalized spelling will be done in the following patches.

Huaixin

> BTW., please update your patches to spell 'ORC' in a capitalized 
> fashion, like most of the existing code does:
> 
>> 	/* create thread to sort ORC unwind tables concurrently */
> 
> Thanks,
> 
> 	Ingo


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-07-23  3:46 ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
@ 2020-07-24 13:53   ` Ingo Molnar
  2020-07-27  2:19     ` changhuaixin
  0 siblings, 1 reply; 24+ messages in thread
From: Ingo Molnar @ 2020-07-24 13:53 UTC (permalink / raw)
  To: Huaixin Chang
  Cc: jpoimboe, bp, hpa, linux-kbuild, linux-kernel, luto, michal.lkml,
	mingo, peterz, tglx, x86, yamada.masahiro


* Huaixin Chang <changhuaixin@linux.alibaba.com> wrote:

> Since orc tables are already sorted by sorttable tool, let us move
> building of fast lookup table into sorttable tool too. This saves us
> 6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
> with 64 cores.

Neat!

> +struct orc_sort_param {
> +	size_t		lookup_table_size;
> +	unsigned int	*orc_lookup_table;
> +	unsigned long	start_ip;
> +	size_t		text_size;
> +	unsigned int	orc_num_entries;
> +};

>  
> +#define LOOKUP_BLOCK_ORDER	8
> +#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
> +
> +	for (i = 0; i < lookup_num_blocks-1; i++) {
> +		orc = __orc_find(g_orc_ip_table, g_orc_table,
> +				 num_entries,
> +				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
> +		if (!orc) {
> +			snprintf(g_err, ERRSTR_MAXSZ,
> +					"Corrupt .orc_unwind table\n");
> +			pthread_exit(g_err);
> +		}
> +
> +		orc_lookup[i] = orc - g_orc_table;
> +	}
> +
> +	/* Initialize the ending block: */
> +	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
> +			 lookup_stop_ip);
> +	if (!orc) {
> +		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
> +		pthread_exit(g_err);
> +	}
> +	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;

Yeah, so now this definition of LOOKUP_BLOCK_* basicaly duplicates the 
arch/x86/include/asm/orc_lookup.h size, with no obvious link between 
the two. This is asking for trouble.

<asm/orc_lookup.h> looks simple enough - can we include it in 
scripts/sorttable.h?

Or better yet, please move these two defines into <asm/orc_types.h>, 
which is already included in sorttable.h.

BTW., please update your patches to spell 'ORC' in a capitalized 
fashion, like most of the existing code does:

>  	/* create thread to sort ORC unwind tables concurrently */

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool
  2020-07-23  3:46 [PATCH RESEND " Huaixin Chang
@ 2020-07-23  3:46 ` Huaixin Chang
  2020-07-24 13:53   ` Ingo Molnar
  2020-08-07  4:17 ` [PATCH v2 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
  1 sibling, 1 reply; 24+ messages in thread
From: Huaixin Chang @ 2020-07-23  3:46 UTC (permalink / raw)
  To: jpoimboe
  Cc: bp, changhuaixin, hpa, linux-kbuild, linux-kernel, luto,
	michal.lkml, mingo, peterz, tglx, x86, yamada.masahiro

Since orc tables are already sorted by sorttable tool, let us move
building of fast lookup table into sorttable tool too. This saves us
6380us from boot time under Intel(R) Xeon(R) CPU E5-2682 v4 @ 2.50GHz
with 64 cores.

Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Shile Zhang <shile.zhang@linux.alibaba.com>
---
 scripts/sorttable.h | 99 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 7 deletions(-)

diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index a2baa2fefb13..a36c76c17be4 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -93,12 +93,50 @@
 char g_err[ERRSTR_MAXSZ];
 int *g_orc_ip_table;
 struct orc_entry *g_orc_table;
+static unsigned long orc_ip_table_offset;
 
 pthread_t orc_sort_thread;
 
+struct orc_sort_param {
+	size_t		lookup_table_size;
+	unsigned int	*orc_lookup_table;
+	unsigned long	start_ip;
+	size_t		text_size;
+	unsigned int	orc_num_entries;
+};
+
 static inline unsigned long orc_ip(const int *ip)
 {
-	return (unsigned long)ip + *ip;
+	return (unsigned long)ip + *ip + orc_ip_table_offset;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *last = ip_table + num_entries - 1;
+	int *mid = first, *found = first;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
 }
 
 static int orc_sort_cmp(const void *_a, const void *_b)
@@ -130,18 +168,24 @@ static void *sort_orctable(void *arg)
 	int *idxs = NULL;
 	int *tmp_orc_ip_table = NULL;
 	struct orc_entry *tmp_orc_table = NULL;
-	unsigned int *orc_ip_size = (unsigned int *)arg;
-	unsigned int num_entries = *orc_ip_size / sizeof(int);
+	struct orc_sort_param *param = (struct orc_sort_param *)arg;
+	unsigned int num_entries = param->orc_num_entries;
+	unsigned int orc_ip_size = num_entries * sizeof(int);
 	unsigned int orc_size = num_entries * sizeof(struct orc_entry);
+	unsigned int lookup_num_blocks = param->lookup_table_size / sizeof(int);
+	unsigned int *orc_lookup = param->orc_lookup_table;
+	unsigned long lookup_start_ip = param->start_ip;
+	unsigned long lookup_stop_ip = param->start_ip + param->text_size;
+	struct orc_entry *orc;
 
-	idxs = (int *)malloc(*orc_ip_size);
+	idxs = (int *)malloc(orc_ip_size);
 	if (!idxs) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc idxs: %s",
 			 strerror(errno));
 		pthread_exit(g_err);
 	}
 
-	tmp_orc_ip_table = (int *)malloc(*orc_ip_size);
+	tmp_orc_ip_table = (int *)malloc(orc_ip_size);
 	if (!tmp_orc_ip_table) {
 		snprintf(g_err, ERRSTR_MAXSZ, "malloc tmp_orc_ip_table: %s",
 			 strerror(errno));
@@ -173,6 +217,31 @@ static void *sort_orctable(void *arg)
 		g_orc_table[i] = tmp_orc_table[idxs[i]];
 	}
 
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(g_orc_ip_table, g_orc_table,
+				 num_entries,
+				 lookup_start_ip + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			snprintf(g_err, ERRSTR_MAXSZ,
+					"Corrupt .orc_unwind table\n");
+			pthread_exit(g_err);
+		}
+
+		orc_lookup[i] = orc - g_orc_table;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(g_orc_ip_table, g_orc_table, num_entries,
+			 lookup_stop_ip);
+	if (!orc) {
+		snprintf(g_err, ERRSTR_MAXSZ, "Corrupt .orc_unwind table\n");
+		pthread_exit(g_err);
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - g_orc_table;
+
 	free(idxs);
 	free(tmp_orc_ip_table);
 	free(tmp_orc_table);
@@ -221,6 +290,8 @@ static int do_sort(Elf_Ehdr *ehdr,
 	unsigned int orc_ip_size = 0;
 	unsigned int orc_size = 0;
 	unsigned int orc_num_entries = 0;
+	unsigned long orc_ip_addr = 0;
+	struct orc_sort_param param;
 #endif
 
 	shstrndx = r2(&ehdr->e_shstrndx);
@@ -259,17 +330,27 @@ static int do_sort(Elf_Ehdr *ehdr,
 			orc_ip_size = s->sh_size;
 			g_orc_ip_table = (int *)((void *)ehdr +
 						   s->sh_offset);
+			orc_ip_addr = s->sh_addr;
 		}
 		if (!strcmp(secstrings + idx, ".orc_unwind")) {
 			orc_size = s->sh_size;
 			g_orc_table = (struct orc_entry *)((void *)ehdr +
 							     s->sh_offset);
 		}
+		if (!strcmp(secstrings + idx, ".orc_lookup")) {
+			param.lookup_table_size = s->sh_size;
+			param.orc_lookup_table = (unsigned int *)
+				((void *)ehdr + s->sh_offset);
+		}
+		if (!strcmp(secstrings + idx, ".text")) {
+			param.text_size = s->sh_size;
+			param.start_ip = s->sh_addr;
+		}
 #endif
 	} /* for loop */
 
 #if defined(SORTTABLE_64) && defined(UNWINDER_ORC_ENABLED)
-	if (!g_orc_ip_table || !g_orc_table) {
+	if (!g_orc_ip_table || !g_orc_table || !param.orc_lookup_table) {
 		fprintf(stderr,
 			"incomplete ORC unwind tables in file: %s\n", fname);
 		goto out;
@@ -285,9 +366,13 @@ static int do_sort(Elf_Ehdr *ehdr,
 		goto out;
 	}
 
+	/* Make orc_ip return virtual address at execution. */
+	orc_ip_table_offset = orc_ip_addr - (unsigned long)g_orc_ip_table;
+
 	/* create thread to sort ORC unwind tables concurrently */
+	param.orc_num_entries = orc_num_entries;
 	if (pthread_create(&orc_sort_thread, NULL,
-			   sort_orctable, &orc_ip_size)) {
+			   sort_orctable, &param)) {
 		fprintf(stderr,
 			"pthread_create orc_sort_thread failed '%s': %s\n",
 			strerror(errno), fname);
-- 
2.14.4.44.g2045bb6


^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2020-08-07  4:19 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-29  6:46 [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
2020-04-29  6:46 ` [PATCH 1/2] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
2020-04-29  6:46 ` [PATCH 2/2] x86/unwind/orc: Remove unwind_init() from x86 boot Huaixin Chang
2020-04-29  8:49 ` [PATCH 0/2] Build ORC fast lookup table in scripts/sorttable tool Peter Zijlstra
2020-04-30  2:32   ` changhuaixin
2020-04-30  4:06     ` Josh Poimboeuf
2020-04-30  4:10       ` Josh Poimboeuf
2020-05-22 18:28 ` Josh Poimboeuf
2020-05-25  3:33   ` changhuaixin
     [not found]   ` <482837A8-E9D9-4229-B7B1-8E14403FB2AC@linux.alibaba.com>
2020-06-01 17:38     ` Josh Poimboeuf
2020-06-03 13:47       ` changhuaixin
2020-06-03 14:31       ` [PATCH v2 0/3] " Huaixin Chang
2020-06-03 14:31         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
2020-06-03 14:31         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
2020-06-03 14:31         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
2020-06-03 14:39       ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
2020-06-03 14:39         ` [PATCH 1/3] scripts/sorttable: Change section type of orc_lookup to SHT_PROGBITS Huaixin Chang
2020-06-03 14:39         ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
2020-06-03 14:39         ` [PATCH 3/3] x86/unwind/orc: Simplify unwind_init() for x86 boot Huaixin Chang
2020-06-29  2:14         ` [PATCH v3 0/3] Build ORC fast lookup table in scripts/sorttable tool changhuaixin
2020-07-23  3:46 [PATCH RESEND " Huaixin Chang
2020-07-23  3:46 ` [PATCH 2/3] scripts/sorttable: Build orc fast lookup table via sorttable tool Huaixin Chang
2020-07-24 13:53   ` Ingo Molnar
2020-07-27  2:19     ` changhuaixin
2020-08-07  4:17 ` [PATCH v2 0/3] Build ORC fast lookup table in scripts/sorttable tool Huaixin Chang
2020-08-07  4:18   ` [PATCH 2/3] scripts/sorttable: Build ORC fast lookup table via sorttable tool Huaixin Chang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.