linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: simplify interrupt dispatch table
@ 2015-04-03 19:49 Denys Vlasenko
  2015-04-04  7:06 ` Ingo Molnar
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Denys Vlasenko @ 2015-04-03 19:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Denys Vlasenko, Linus Torvalds, Steven Rostedt, Borislav Petkov,
	H. Peter Anvin, Andy Lutomirski, Oleg Nesterov,
	Frederic Weisbecker, Alexei Starovoitov, Will Drewry, Kees Cook,
	x86, linux-kernel

Interrupt entry points are handled with the following code:
Each 32-byte code block contains seven entry points

		...
		[push][jump 22] // 4 bytes
		[push][jump 18] // 4 bytes
		[push][jump 14] // 4 bytes
		[push][jump 10] // 4 bytes
		[push][jump  6] // 4 bytes
		[push][jump  2] // 4 bytes
		[push][jump common_interrupt][padding] // 8 bytes

		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump common_interrupt][padding]

		[padding_2]
	common_interrupt:

And there is a table which holds pointers to every entry point,
IOW: to every push.

In cold cache, two jumps are still costlier than one, even though we get
the benefit of them residing in the same cacheline.

This change replaces short jumps with near ones to common_interrupt, and pads
every push+jump pair to 8 bytes. This way, each interrupt takes only one jump.

This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before dispatch table
with ".align 8" - we do not need anything stronger than that.

The table of entry addresses (the interrupt[] array) is no longer
necessary, the address of entries can be easily calculated as
(irq_entries_start + i*8).

   text	   data	    bss	    dec	    hex	filename
  12546	      0	      0	  12546	   3102	entry_64.o.before
  11626	      0	      0	  11626	   2d6a	entry_64.o

The size decrease is because 1656 bytes of .init.rodata are gone.
That's initdata, though. The resident size does go up a bit.

Run-tested (32 and 64 bits).

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Linus Torvalds <torvalds@linux-foundation.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ingo Molnar <mingo@kernel.org>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Alexei Starovoitov <ast@plumgrid.com>
CC: Will Drewry <wad@chromium.org>
CC: Kees Cook <keescook@chromium.org>
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/include/asm/hw_irq.h |  5 ++---
 arch/x86/kernel/entry_32.S    | 41 ++++++++++-------------------------------
 arch/x86/kernel/entry_64.S    | 41 ++++++++++-------------------------------
 arch/x86/kernel/irqinit.c     |  3 ++-
 arch/x86/lguest/boot.c        |  3 ++-
 5 files changed, 26 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290..e9571dd 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-				    - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED	(-1)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4c8cc34..25ba39a 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -720,43 +720,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 238c232..b485d1d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -607,44 +607,23 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-	.section .init.rodata,"a"
-ENTRY(interrupt)
-	.section .entry.text
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 	CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181e..cd10a64 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
 	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+		set_intr_gate(i, irq_entries_start +
+				8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 #ifdef CONFIG_X86_LOCAL_APIC
 	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8561585..717908b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+			set_intr_gate(i, irq_entries_start +
+					8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 
 	/*
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: simplify interrupt dispatch table
  2015-04-03 19:49 [PATCH] x86: simplify interrupt dispatch table Denys Vlasenko
@ 2015-04-04  7:06 ` Ingo Molnar
  2015-04-07 15:26 ` Borislav Petkov
  2015-04-08  7:43 ` [tip:x86/asm] x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout tip-bot for Denys Vlasenko
  2 siblings, 0 replies; 4+ messages in thread
From: Ingo Molnar @ 2015-04-04  7:06 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Linus Torvalds, Steven Rostedt, Borislav Petkov, H. Peter Anvin,
	Andy Lutomirski, Oleg Nesterov, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook, x86, linux-kernel


* Denys Vlasenko <dvlasenk@redhat.com> wrote:

> Interrupt entry points are handled with the following code:
> Each 32-byte code block contains seven entry points
> 
> 		...
> 		[push][jump 22] // 4 bytes
> 		[push][jump 18] // 4 bytes
> 		[push][jump 14] // 4 bytes
> 		[push][jump 10] // 4 bytes
> 		[push][jump  6] // 4 bytes
> 		[push][jump  2] // 4 bytes
> 		[push][jump common_interrupt][padding] // 8 bytes
> 
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump common_interrupt][padding]
> 
> 		[padding_2]
> 	common_interrupt:
> 
> And there is a table which holds pointers to every entry point,
> IOW: to every push.
> 
> In cold cache, two jumps are still costlier than one, even though we get
> the benefit of them residing in the same cacheline.
> 
> This change replaces short jumps with near ones to common_interrupt, and pads
> every push+jump pair to 8 bytes. This way, each interrupt takes only one jump.
> 
> This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before dispatch table
> with ".align 8" - we do not need anything stronger than that.
> 
> The table of entry addresses (the interrupt[] array) is no longer
> necessary, the address of entries can be easily calculated as
> (irq_entries_start + i*8).
> 
>    text	   data	    bss	    dec	    hex	filename
>   12546	      0	      0	  12546	   3102	entry_64.o.before
>   11626	      0	      0	  11626	   2d6a	entry_64.o
> 
> The size decrease is because 1656 bytes of .init.rodata are gone.
> That's initdata, though. The resident size does go up a bit.

So I like this a lot, as it's straight, simple and obvious, both to 
hardware and to humans as well. (This is btw. quite close to the irq 
entry code layout we used to have historically.)

We could do three other changes that would probably help a lot more in 
practice than the addition or elimination of a single instruction:

1)

  We could try to not spread vectors, as modern APICs seem to handle 
  clustered vectors a lot better and we don't actually use irq 
  priority levels like other OSs so we are free to choose our vectors.

  This compresses the I$ footprint a bit more if lots of related
  irq sources are firing towards the same CPUs that share one or more
  caches (HT threads, cores, node local siblings).

  Even on single-node systems this would still compress the IDT and 
  the entry code cache footprint a bit.

2)

  We could allocate the IDT per CPU (or per node), lowering the D$ 
  cache miss costs on NUMA systems. (This, if we allowed the IDTs to 
  diverge, would also allow more irq sources sent to separate 
  vectors.)

  The simplest model of this, where each IDT is just a copy of each
  other, is relatively easy to implement, as the IDT is page aligned 
  and ro mapped already.

3)

  We could allocate the entry code itself too per cpu (or per node), 
  lowering the I$ cache miss costs on NUMA systems. This would be a 
  bit trickier to implement, as that part of the image has to be 
  relinked during bootup, but is doable.

I'd do 3) only once we are done with the current audit/cleanup/rewrite 
of the entry code.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: simplify interrupt dispatch table
  2015-04-03 19:49 [PATCH] x86: simplify interrupt dispatch table Denys Vlasenko
  2015-04-04  7:06 ` Ingo Molnar
@ 2015-04-07 15:26 ` Borislav Petkov
  2015-04-08  7:43 ` [tip:x86/asm] x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout tip-bot for Denys Vlasenko
  2 siblings, 0 replies; 4+ messages in thread
From: Borislav Petkov @ 2015-04-07 15:26 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Ingo Molnar, Linus Torvalds, Steven Rostedt, H. Peter Anvin,
	Andy Lutomirski, Oleg Nesterov, Frederic Weisbecker,
	Alexei Starovoitov, Will Drewry, Kees Cook, x86, linux-kernel

On Fri, Apr 03, 2015 at 09:49:13PM +0200, Denys Vlasenko wrote:
> Interrupt entry points are handled with the following code:
> Each 32-byte code block contains seven entry points
> 
> 		...
> 		[push][jump 22] // 4 bytes
> 		[push][jump 18] // 4 bytes
> 		[push][jump 14] // 4 bytes
> 		[push][jump 10] // 4 bytes
> 		[push][jump  6] // 4 bytes
> 		[push][jump  2] // 4 bytes
> 		[push][jump common_interrupt][padding] // 8 bytes
> 
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump]
> 		[push][jump common_interrupt][padding]
> 
> 		[padding_2]
> 	common_interrupt:
> 
> And there is a table which holds pointers to every entry point,
> IOW: to every push.
> 
> In cold cache, two jumps are still costlier than one, even though we get
> the benefit of them residing in the same cacheline.
> 
> This change replaces short jumps with near ones to common_interrupt, and pads
> every push+jump pair to 8 bytes. This way, each interrupt takes only one jump.
> 
> This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before dispatch table
> with ".align 8" - we do not need anything stronger than that.
> 
> The table of entry addresses (the interrupt[] array) is no longer
> necessary, the address of entries can be easily calculated as
> (irq_entries_start + i*8).
> 
>    text	   data	    bss	    dec	    hex	filename
>   12546	      0	      0	  12546	   3102	entry_64.o.before
>   11626	      0	      0	  11626	   2d6a	entry_64.o
> 
> The size decrease is because 1656 bytes of .init.rodata are gone.
> That's initdata, though. The resident size does go up a bit.
> 
> Run-tested (32 and 64 bits).
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Borislav Petkov <bp@alien8.de>
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Frederic Weisbecker <fweisbec@gmail.com>
> CC: Alexei Starovoitov <ast@plumgrid.com>
> CC: Will Drewry <wad@chromium.org>
> CC: Kees Cook <keescook@chromium.org>
> CC: x86@kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
>  arch/x86/include/asm/hw_irq.h |  5 ++---
>  arch/x86/kernel/entry_32.S    | 41 ++++++++++-------------------------------
>  arch/x86/kernel/entry_64.S    | 41 ++++++++++-------------------------------
>  arch/x86/kernel/irqinit.c     |  3 ++-
>  arch/x86/lguest/boot.c        |  3 ++-
>  5 files changed, 26 insertions(+), 67 deletions(-)

Acked-and-tested-by: Borislav Petkov <bp@suse.de>

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [tip:x86/asm] x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout
  2015-04-03 19:49 [PATCH] x86: simplify interrupt dispatch table Denys Vlasenko
  2015-04-04  7:06 ` Ingo Molnar
  2015-04-07 15:26 ` Borislav Petkov
@ 2015-04-08  7:43 ` tip-bot for Denys Vlasenko
  2 siblings, 0 replies; 4+ messages in thread
From: tip-bot for Denys Vlasenko @ 2015-04-08  7:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: bp, bp, luto, linux-kernel, wad, hpa, tglx, mingo, keescook,
	dvlasenk, torvalds, ast, rostedt, fweisbec, oleg

Commit-ID:  3304c9c37bef30ebd2ef71d986e6568372ce80f8
Gitweb:     http://git.kernel.org/tip/3304c9c37bef30ebd2ef71d986e6568372ce80f8
Author:     Denys Vlasenko <dvlasenk@redhat.com>
AuthorDate: Fri, 3 Apr 2015 21:49:13 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 8 Apr 2015 09:02:13 +0200

x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout

Interrupt entry points are handled with the following code,
each 32-byte code block contains seven entry points:

		...
		[push][jump 22] // 4 bytes
		[push][jump 18] // 4 bytes
		[push][jump 14] // 4 bytes
		[push][jump 10] // 4 bytes
		[push][jump  6] // 4 bytes
		[push][jump  2] // 4 bytes
		[push][jump common_interrupt][padding] // 8 bytes

		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump]
		[push][jump common_interrupt][padding]

		[padding_2]
	common_interrupt:

And there is a table which holds pointers to every entry point,
IOW: to every push.

In cold cache, two jumps are still costlier than one, even
though we get the benefit of them residing in the same
cacheline.

This change replaces short jumps with near ones to
'common_interrupt', and pads every push+jump pair to 8 bytes. This
way, each interrupt takes only one jump.

This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before
dispatch table with ".align 8" - we do not need anything
stronger than that.

The table of entry addresses (the interrupt[] array) is no
longer necessary, the address of entries can be easily
calculated as (irq_entries_start + i*8).

   text	   data	    bss	    dec	    hex	filename
  12546	      0	      0	  12546	   3102	entry_64.o.before
  11626	      0	      0	  11626	   2d6a	entry_64.o

The size decrease is because 1656 bytes of .init.rodata are
gone. That's initdata, though. The resident size does go up a
bit.

Run-tested (32 and 64 bits).

Acked-and-Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1428090553-7283-1-git-send-email-dvlasenk@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/hw_irq.h |  5 ++---
 arch/x86/kernel/entry_32.S    | 41 ++++++++++-------------------------------
 arch/x86/kernel/entry_64.S    | 41 ++++++++++-------------------------------
 arch/x86/kernel/irqinit.c     |  3 ++-
 arch/x86/lguest/boot.c        |  3 ++-
 5 files changed, 26 insertions(+), 67 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290..e9571dd 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-				    - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED	(-1)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index effa279..02bec0f 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -723,43 +723,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:	pushl_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e4c8103..4ca03c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -608,44 +608,23 @@ ENTRY(ret_from_fork)
 END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-	.section .init.rodata,"a"
-ENTRY(interrupt)
-	.section .entry.text
-	.p2align 5
-	.p2align CONFIG_X86_L1_CACHE_SHIFT
+	.align 8
 ENTRY(irq_entries_start)
 	INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-	.balign 32
-  .rept	7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
+    vector=vector+1
+	jmp	common_interrupt
 	CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:	pushq_cfi $(~vector+0x80)	/* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-	jmp 2f
-      .endif
-      .previous
-	.quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:	jmp common_interrupt
-.endr
+	.align	8
+    .endr
 	CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181e..cd10a64 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
 	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+		set_intr_gate(i, irq_entries_start +
+				8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 #ifdef CONFIG_X86_LOCAL_APIC
 	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8561585..717908b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)
-			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+			set_intr_gate(i, irq_entries_start +
+					8 * (i - FIRST_EXTERNAL_VECTOR));
 	}
 
 	/*

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-04-08  7:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-03 19:49 [PATCH] x86: simplify interrupt dispatch table Denys Vlasenko
2015-04-04  7:06 ` Ingo Molnar
2015-04-07 15:26 ` Borislav Petkov
2015-04-08  7:43 ` [tip:x86/asm] x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout tip-bot for Denys Vlasenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).