* [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-07 11:48 ` Borislav Petkov
2019-01-31 19:24 ` [PATCH v6 02/27] x86: Use symbol name in jump table " Thomas Garnier
` (27 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Herbert Xu, David S. Miller,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
x86, linux-crypto, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/crypto/aegis128-aesni-asm.S | 6 +-
arch/x86/crypto/aegis128l-aesni-asm.S | 8 +-
arch/x86/crypto/aegis256-aesni-asm.S | 6 +-
arch/x86/crypto/aes-x86_64-asm_64.S | 45 +++++----
arch/x86/crypto/aesni-intel_asm.S | 8 +-
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 42 ++++-----
arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 44 ++++-----
arch/x86/crypto/camellia-x86_64-asm_64.S | 8 +-
arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 50 +++++-----
arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 44 +++++----
arch/x86/crypto/des3_ede-asm_64.S | 96 +++++++++++++-------
arch/x86/crypto/ghash-clmulni-intel_asm.S | 4 +-
arch/x86/crypto/glue_helper-asm-avx.S | 4 +-
arch/x86/crypto/glue_helper-asm-avx2.S | 6 +-
arch/x86/crypto/morus1280-avx2-asm.S | 4 +-
arch/x86/crypto/morus1280-sse2-asm.S | 8 +-
arch/x86/crypto/morus640-sse2-asm.S | 6 +-
arch/x86/crypto/sha256-avx2-asm.S | 23 +++--
18 files changed, 236 insertions(+), 176 deletions(-)
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 5f7e43d4f64a..a48f790954f8 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -203,8 +203,8 @@ ENTRY(crypto_aegis128_aesni_init)
movdqa KEY, STATE4
/* load the constants: */
- movdqa .Laegis128_const_0, STATE2
- movdqa .Laegis128_const_1, STATE1
+ movdqa .Laegis128_const_0(%rip), STATE2
+ movdqa .Laegis128_const_1(%rip), STATE1
pxor STATE2, STATE3
pxor STATE1, STATE4
@@ -684,7 +684,7 @@ ENTRY(crypto_aegis128_aesni_dec_tail)
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
- movdqa .Laegis128_counter, T1
+ movdqa .Laegis128_counter(%rip), T1
pcmpgtb T1, T0
pand T0, MSG
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 491dd61c845c..a097b8956af8 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -331,8 +331,8 @@ ENTRY(crypto_aegis128l_aesni_init)
pxor MSG0, STATE4
/* load the constants: */
- movdqa .Laegis128l_const_0, STATE2
- movdqa .Laegis128l_const_1, STATE1
+ movdqa .Laegis128l_const_0(%rip), STATE2
+ movdqa .Laegis128l_const_1(%rip), STATE1
movdqa STATE1, STATE3
pxor STATE2, STATE5
pxor STATE1, STATE6
@@ -765,8 +765,8 @@ ENTRY(crypto_aegis128l_aesni_dec_tail)
punpcklbw T0, T0
punpcklbw T0, T0
movdqa T0, T1
- movdqa .Laegis128l_counter0, T2
- movdqa .Laegis128l_counter1, T3
+ movdqa .Laegis128l_counter0(%rip), T2
+ movdqa .Laegis128l_counter1(%rip), T3
pcmpgtb T2, T0
pcmpgtb T3, T1
pand T0, MSG0
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 8870c7c5d9a4..195648b93fd1 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -273,8 +273,8 @@ ENTRY(crypto_aegis256_aesni_init)
movdqa T3, STATE1
/* load the constants: */
- movdqa .Laegis256_const_0, STATE3
- movdqa .Laegis256_const_1, STATE2
+ movdqa .Laegis256_const_0(%rip), STATE3
+ movdqa .Laegis256_const_1(%rip), STATE2
pxor STATE3, STATE4
pxor STATE2, STATE5
@@ -647,7 +647,7 @@ ENTRY(crypto_aegis256_aesni_dec_tail)
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
- movdqa .Laegis256_counter, T1
+ movdqa .Laegis256_counter(%rip), T1
pcmpgtb T1, T0
pand T0, MSG
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 8739cf7795de..42eaacb589b3 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -48,8 +48,12 @@
#define R10 %r10
#define R11 %r11
+/* Hold global for PIE support */
+#define RBASE %r12
+
#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
ENTRY(FUNC); \
+ pushq RBASE; \
movq r1,r2; \
leaq KEY+48(r8),r9; \
movq r10,r11; \
@@ -74,54 +78,63 @@
movl r6 ## E,4(r9); \
movl r7 ## E,8(r9); \
movl r8 ## E,12(r9); \
+ popq RBASE; \
ret; \
ENDPROC(FUNC);
+#define round_mov(tab_off, reg_i, reg_o) \
+ leaq tab_off(%rip), RBASE; \
+ movl (RBASE,reg_i,4), reg_o;
+
+#define round_xor(tab_off, reg_i, reg_o) \
+ leaq tab_off(%rip), RBASE; \
+ xorl (RBASE,reg_i,4), reg_o;
+
#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
- movl TAB+1024(,r5,4),r5 ## E;\
+ round_mov(TAB+1024, r5, r5 ## E)\
movw r4 ## X,r2 ## X; \
- movl TAB(,r6,4),r6 ## E; \
+ round_mov(TAB, r6, r6 ## E) \
roll $16,r2 ## E; \
shrl $16,r4 ## E; \
movzbl r4 ## L,r7 ## E; \
movzbl r4 ## H,r4 ## E; \
xorl OFFSET(r8),ra ## E; \
xorl OFFSET+4(r8),rb ## E; \
- xorl TAB+3072(,r4,4),r5 ## E;\
- xorl TAB+2048(,r7,4),r6 ## E;\
+ round_xor(TAB+3072, r4, r5 ## E)\
+ round_xor(TAB+2048, r7, r6 ## E)\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
- movl TAB+1024(,r4,4),r4 ## E;\
+ round_mov(TAB+1024, r4, r4 ## E)\
movw r3 ## X,r1 ## X; \
roll $16,r1 ## E; \
shrl $16,r3 ## E; \
- xorl TAB(,r7,4),r5 ## E; \
+ round_xor(TAB, r7, r5 ## E) \
movzbl r3 ## L,r7 ## E; \
movzbl r3 ## H,r3 ## E; \
- xorl TAB+3072(,r3,4),r4 ## E;\
- xorl TAB+2048(,r7,4),r5 ## E;\
+ round_xor(TAB+3072, r3, r4 ## E)\
+ round_xor(TAB+2048, r7, r5 ## E)\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r3 ## E; \
shrl $16,r1 ## E; \
- xorl TAB+3072(,r3,4),r6 ## E;\
- movl TAB+2048(,r7,4),r3 ## E;\
+ round_xor(TAB+3072, r3, r6 ## E)\
+ round_mov(TAB+2048, r7, r3 ## E)\
movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r1 ## E; \
- xorl TAB+1024(,r1,4),r6 ## E;\
- xorl TAB(,r7,4),r3 ## E; \
+ round_xor(TAB+1024, r1, r6 ## E)\
+ round_xor(TAB, r7, r3 ## E) \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
shrl $16,r2 ## E; \
- xorl TAB+3072(,r1,4),r3 ## E;\
- xorl TAB+2048(,r7,4),r4 ## E;\
+ round_xor(TAB+3072, r1, r3 ## E)\
+ round_xor(TAB+2048, r7, r4 ## E)\
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r2 ## E; \
xorl OFFSET+8(r8),rc ## E; \
xorl OFFSET+12(r8),rd ## E; \
- xorl TAB+1024(,r1,4),r3 ## E;\
- xorl TAB(,r2,4),r4 ## E;
+ round_xor(TAB+1024, r1, r3 ## E)\
+ round_xor(TAB, r2, r4 ## E)
#define move_regs(r1,r2,r3,r4) \
movl r3 ## E,r1 ## E; \
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cb2deb61c5d9..8be8cef8263a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2610,7 +2610,7 @@ ENDPROC(aesni_cbc_dec)
*/
.align 4
_aesni_inc_init:
- movaps .Lbswap_mask, BSWAP_MASK
+ movaps .Lbswap_mask(%rip), BSWAP_MASK
movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR
mov $1, TCTR_LOW
@@ -2738,12 +2738,12 @@ ENTRY(aesni_xts_crypt8)
cmpb $0, %cl
movl $0, %ecx
movl $240, %r10d
- leaq _aesni_enc4, %r11
- leaq _aesni_dec4, %rax
+ leaq _aesni_enc4(%rip), %r11
+ leaq _aesni_dec4(%rip), %rax
cmovel %r10d, %ecx
cmoveq %rax, %r11
- movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+ movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
movups (IVP), IV
mov 480(KEYP), KLEN
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index a14af6eb09cb..f94ec9a5552b 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -53,10 +53,10 @@
/* \
* S-function with AES subbytes \
*/ \
- vmovdqa .Linv_shift_row, t4; \
- vbroadcastss .L0f0f0f0f, t7; \
- vmovdqa .Lpre_tf_lo_s1, t0; \
- vmovdqa .Lpre_tf_hi_s1, t1; \
+ vmovdqa .Linv_shift_row(%rip), t4; \
+ vbroadcastss .L0f0f0f0f(%rip), t7; \
+ vmovdqa .Lpre_tf_lo_s1(%rip), t0; \
+ vmovdqa .Lpre_tf_hi_s1(%rip), t1; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
@@ -69,8 +69,8 @@
vpshufb t4, x6, x6; \
\
/* prefilter sboxes 1, 2 and 3 */ \
- vmovdqa .Lpre_tf_lo_s4, t2; \
- vmovdqa .Lpre_tf_hi_s4, t3; \
+ vmovdqa .Lpre_tf_lo_s4(%rip), t2; \
+ vmovdqa .Lpre_tf_hi_s4(%rip), t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x1, t0, t1, t7, t6); \
@@ -84,8 +84,8 @@
filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
- vmovdqa .Lpost_tf_lo_s1, t0; \
- vmovdqa .Lpost_tf_hi_s1, t1; \
+ vmovdqa .Lpost_tf_lo_s1(%rip), t0; \
+ vmovdqa .Lpost_tf_hi_s1(%rip), t1; \
vaesenclast t4, x0, x0; \
vaesenclast t4, x7, x7; \
vaesenclast t4, x1, x1; \
@@ -96,16 +96,16 @@
vaesenclast t4, x6, x6; \
\
/* postfilter sboxes 1 and 4 */ \
- vmovdqa .Lpost_tf_lo_s3, t2; \
- vmovdqa .Lpost_tf_hi_s3, t3; \
+ vmovdqa .Lpost_tf_lo_s3(%rip), t2; \
+ vmovdqa .Lpost_tf_hi_s3(%rip), t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x3, t0, t1, t7, t6); \
filter_8bit(x6, t0, t1, t7, t6); \
\
/* postfilter sbox 3 */ \
- vmovdqa .Lpost_tf_lo_s2, t4; \
- vmovdqa .Lpost_tf_hi_s2, t5; \
+ vmovdqa .Lpost_tf_lo_s2(%rip), t4; \
+ vmovdqa .Lpost_tf_hi_s2(%rip), t5; \
filter_8bit(x2, t2, t3, t7, t6); \
filter_8bit(x5, t2, t3, t7, t6); \
\
@@ -444,7 +444,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
- vmovdqu .Lshufb_16x16b, a0; \
+ vmovdqu .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
@@ -483,7 +483,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio, key) \
vmovq key, x0; \
- vpshufb .Lpack_bswap, x0, x0; \
+ vpshufb .Lpack_bswap(%rip), x0, x0; \
\
vpxor 0 * 16(rio), x0, y7; \
vpxor 1 * 16(rio), x0, y6; \
@@ -534,7 +534,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vmovdqu x0, stack_tmp0; \
\
vmovq key, x0; \
- vpshufb .Lpack_bswap, x0, x0; \
+ vpshufb .Lpack_bswap(%rip), x0, x0; \
\
vpxor x0, y7, y7; \
vpxor x0, y6, y6; \
@@ -1017,7 +1017,7 @@ ENTRY(camellia_ctr_16way)
subq $(16 * 16), %rsp;
movq %rsp, %rax;
- vmovdqa .Lbswap128_mask, %xmm14;
+ vmovdqa .Lbswap128_mask(%rip), %xmm14;
/* load IV and byteswap */
vmovdqu (%rcx), %xmm0;
@@ -1066,7 +1066,7 @@ ENTRY(camellia_ctr_16way)
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
- vpshufb .Lpack_bswap, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap(%rip), %xmm15, %xmm15;
vpxor %xmm0, %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1134,7 +1134,7 @@ camellia_xts_crypt_16way:
subq $(16 * 16), %rsp;
movq %rsp, %rax;
- vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
+ vmovdqa .Lxts_gf128mul_and_shl1_mask(%rip), %xmm14;
/* load IV */
vmovdqu (%rcx), %xmm0;
@@ -1210,7 +1210,7 @@ camellia_xts_crypt_16way:
/* inpack16_pre: */
vmovq (key_table)(CTX, %r8, 8), %xmm15;
- vpshufb .Lpack_bswap, %xmm15, %xmm15;
+ vpshufb .Lpack_bswap(%rip), %xmm15, %xmm15;
vpxor 0 * 16(%rax), %xmm15, %xmm0;
vpxor %xmm1, %xmm15, %xmm1;
vpxor %xmm2, %xmm15, %xmm2;
@@ -1265,7 +1265,7 @@ ENTRY(camellia_xts_enc_16way)
*/
xorl %r8d, %r8d; /* input whitening key, 0 for enc */
- leaq __camellia_enc_blk16, %r9;
+ leaq __camellia_enc_blk16(%rip), %r9;
jmp camellia_xts_crypt_16way;
ENDPROC(camellia_xts_enc_16way)
@@ -1283,7 +1283,7 @@ ENTRY(camellia_xts_dec_16way)
movl $24, %eax;
cmovel %eax, %r8d; /* input whitening key, last for dec */
- leaq __camellia_dec_blk16, %r9;
+ leaq __camellia_dec_blk16(%rip), %r9;
jmp camellia_xts_crypt_16way;
ENDPROC(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index b66bbfa62f50..11bbaa1cd4a7 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -70,12 +70,12 @@
/* \
* S-function with AES subbytes \
*/ \
- vbroadcasti128 .Linv_shift_row, t4; \
- vpbroadcastd .L0f0f0f0f, t7; \
- vbroadcasti128 .Lpre_tf_lo_s1, t5; \
- vbroadcasti128 .Lpre_tf_hi_s1, t6; \
- vbroadcasti128 .Lpre_tf_lo_s4, t2; \
- vbroadcasti128 .Lpre_tf_hi_s4, t3; \
+ vbroadcasti128 .Linv_shift_row(%rip), t4; \
+ vpbroadcastd .L0f0f0f0f(%rip), t7; \
+ vbroadcasti128 .Lpre_tf_lo_s1(%rip), t5; \
+ vbroadcasti128 .Lpre_tf_hi_s1(%rip), t6; \
+ vbroadcasti128 .Lpre_tf_lo_s4(%rip), t2; \
+ vbroadcasti128 .Lpre_tf_hi_s4(%rip), t3; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
@@ -121,8 +121,8 @@
vinserti128 $1, t2##_x, x6, x6; \
vextracti128 $1, x1, t3##_x; \
vextracti128 $1, x4, t2##_x; \
- vbroadcasti128 .Lpost_tf_lo_s1, t0; \
- vbroadcasti128 .Lpost_tf_hi_s1, t1; \
+ vbroadcasti128 .Lpost_tf_lo_s1(%rip), t0; \
+ vbroadcasti128 .Lpost_tf_hi_s1(%rip), t1; \
vaesenclast t4##_x, x2##_x, x2##_x; \
vaesenclast t4##_x, t6##_x, t6##_x; \
vinserti128 $1, t6##_x, x2, x2; \
@@ -137,16 +137,16 @@
vinserti128 $1, t2##_x, x4, x4; \
\
/* postfilter sboxes 1 and 4 */ \
- vbroadcasti128 .Lpost_tf_lo_s3, t2; \
- vbroadcasti128 .Lpost_tf_hi_s3, t3; \
+ vbroadcasti128 .Lpost_tf_lo_s3(%rip), t2; \
+ vbroadcasti128 .Lpost_tf_hi_s3(%rip), t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x3, t0, t1, t7, t6); \
filter_8bit(x6, t0, t1, t7, t6); \
\
/* postfilter sbox 3 */ \
- vbroadcasti128 .Lpost_tf_lo_s2, t4; \
- vbroadcasti128 .Lpost_tf_hi_s2, t5; \
+ vbroadcasti128 .Lpost_tf_lo_s2(%rip), t4; \
+ vbroadcasti128 .Lpost_tf_hi_s2(%rip), t5; \
filter_8bit(x2, t2, t3, t7, t6); \
filter_8bit(x5, t2, t3, t7, t6); \
\
@@ -483,7 +483,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \
\
- vbroadcasti128 .Lshufb_16x16b, a0; \
+ vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \
vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \
@@ -522,7 +522,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
y6, y7, rio, key) \
vpbroadcastq key, x0; \
- vpshufb .Lpack_bswap, x0, x0; \
+ vpshufb .Lpack_bswap(%rip), x0, x0; \
\
vpxor 0 * 32(rio), x0, y7; \
vpxor 1 * 32(rio), x0, y6; \
@@ -573,7 +573,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vmovdqu x0, stack_tmp0; \
\
vpbroadcastq key, x0; \
- vpshufb .Lpack_bswap, x0, x0; \
+ vpshufb .Lpack_bswap(%rip), x0, x0; \
\
vpxor x0, y7, y7; \
vpxor x0, y6, y6; \
@@ -1113,7 +1113,7 @@ ENTRY(camellia_ctr_32way)
vmovdqu (%rcx), %xmm0;
vmovdqa %xmm0, %xmm1;
inc_le128(%xmm0, %xmm15, %xmm14);
- vbroadcasti128 .Lbswap128_mask, %ymm14;
+ vbroadcasti128 .Lbswap128_mask(%rip), %ymm14;
vinserti128 $1, %xmm0, %ymm1, %ymm0;
vpshufb %ymm14, %ymm0, %ymm13;
vmovdqu %ymm13, 15 * 32(%rax);
@@ -1159,7 +1159,7 @@ ENTRY(camellia_ctr_32way)
/* inpack32_pre: */
vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap(%rip), %ymm15, %ymm15;
vpxor %ymm0, %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1243,13 +1243,13 @@ camellia_xts_crypt_32way:
subq $(16 * 32), %rsp;
movq %rsp, %rax;
- vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12;
+ vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0(%rip), %ymm12;
/* load IV and construct second IV */
vmovdqu (%rcx), %xmm0;
vmovdqa %xmm0, %xmm15;
gf128mul_x_ble(%xmm0, %xmm12, %xmm13);
- vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13;
+ vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1(%rip), %ymm13;
vinserti128 $1, %xmm0, %ymm15, %ymm0;
vpxor 0 * 32(%rdx), %ymm0, %ymm15;
vmovdqu %ymm15, 15 * 32(%rax);
@@ -1326,7 +1326,7 @@ camellia_xts_crypt_32way:
/* inpack32_pre: */
vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
- vpshufb .Lpack_bswap, %ymm15, %ymm15;
+ vpshufb .Lpack_bswap(%rip), %ymm15, %ymm15;
vpxor 0 * 32(%rax), %ymm15, %ymm0;
vpxor %ymm1, %ymm15, %ymm1;
vpxor %ymm2, %ymm15, %ymm2;
@@ -1384,7 +1384,7 @@ ENTRY(camellia_xts_enc_32way)
xorl %r8d, %r8d; /* input whitening key, 0 for enc */
- leaq __camellia_enc_blk32, %r9;
+ leaq __camellia_enc_blk32(%rip), %r9;
jmp camellia_xts_crypt_32way;
ENDPROC(camellia_xts_enc_32way)
@@ -1402,7 +1402,7 @@ ENTRY(camellia_xts_dec_32way)
movl $24, %eax;
cmovel %eax, %r8d; /* input whitening key, last for dec */
- leaq __camellia_dec_blk32, %r9;
+ leaq __camellia_dec_blk32(%rip), %r9;
jmp camellia_xts_crypt_32way;
ENDPROC(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 95ba6956a7f6..ef1137406959 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -92,11 +92,13 @@
#define RXORbl %r9b
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
+ leaq T0(%rip), tmp1; \
movzbl ab ## bl, tmp2 ## d; \
+ xorq (tmp1, tmp2, 8), dst; \
+ leaq T1(%rip), tmp2; \
movzbl ab ## bh, tmp1 ## d; \
- rorq $16, ab; \
- xorq T0(, tmp2, 8), dst; \
- xorq T1(, tmp1, 8), dst;
+ xorq (tmp2, tmp1, 8), dst; \
+ rorq $16, ab;
/**********************************************************************
1-way camellia
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 86107c961bb4..64eb5c87d04a 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -98,16 +98,20 @@
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
- movzbl src ## bh, RID1d; \
- movzbl src ## bl, RID2d; \
- shrq $16, src; \
- movl s1(, RID1, 4), dst ## d; \
- op1 s2(, RID2, 4), dst ## d; \
- movzbl src ## bh, RID1d; \
- movzbl src ## bl, RID2d; \
- interleave_op(il_reg); \
- op2 s3(, RID1, 4), dst ## d; \
- op3 s4(, RID2, 4), dst ## d;
+ movzbl src ## bh, RID1d; \
+ leaq s1(%rip), RID2; \
+ movl (RID2, RID1, 4), dst ## d; \
+ movzbl src ## bl, RID2d; \
+ leaq s2(%rip), RID1; \
+ op1 (RID1, RID2, 4), dst ## d; \
+ shrq $16, src; \
+ movzbl src ## bh, RID1d; \
+ leaq s3(%rip), RID2; \
+ op2 (RID2, RID1, 4), dst ## d; \
+ movzbl src ## bl, RID2d; \
+ leaq s4(%rip), RID1; \
+ op3 (RID1, RID2, 4), dst ## d; \
+ interleave_op(il_reg);
#define dummy(d) /* do nothing */
@@ -166,15 +170,15 @@
subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
#define enc_preload_rkr() \
- vbroadcastss .L16_mask, RKR; \
+ vbroadcastss .L16_mask(%rip), RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor kr(CTX), RKR, RKR;
#define dec_preload_rkr() \
- vbroadcastss .L16_mask, RKR; \
+ vbroadcastss .L16_mask(%rip), RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor kr(CTX), RKR, RKR; \
- vpshufb .Lbswap128_mask, RKR, RKR;
+ vpshufb .Lbswap128_mask(%rip), RKR, RKR;
#define transpose_2x4(x0, x1, t0, t1) \
vpunpckldq x1, x0, t0; \
@@ -251,9 +255,9 @@ __cast5_enc_blk16:
movq %rdi, CTX;
- vmovdqa .Lbswap_mask, RKM;
- vmovd .Lfirst_mask, R1ST;
- vmovd .L32_mask, R32;
+ vmovdqa .Lbswap_mask(%rip), RKM;
+ vmovd .Lfirst_mask(%rip), R1ST;
+ vmovd .L32_mask(%rip), R32;
enc_preload_rkr();
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
@@ -287,7 +291,7 @@ __cast5_enc_blk16:
popq %rbx;
popq %r15;
- vmovdqa .Lbswap_mask, RKM;
+ vmovdqa .Lbswap_mask(%rip), RKM;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -325,9 +329,9 @@ __cast5_dec_blk16:
movq %rdi, CTX;
- vmovdqa .Lbswap_mask, RKM;
- vmovd .Lfirst_mask, R1ST;
- vmovd .L32_mask, R32;
+ vmovdqa .Lbswap_mask(%rip), RKM;
+ vmovd .Lfirst_mask(%rip), R1ST;
+ vmovd .L32_mask(%rip), R32;
dec_preload_rkr();
inpack_blocks(RL1, RR1, RTMP, RX, RKM);
@@ -358,7 +362,7 @@ __cast5_dec_blk16:
round(RL, RR, 1, 2);
round(RR, RL, 0, 1);
- vmovdqa .Lbswap_mask, RKM;
+ vmovdqa .Lbswap_mask(%rip), RKM;
popq %rbx;
popq %r15;
@@ -521,8 +525,8 @@ ENTRY(cast5_ctr_16way)
vpcmpeqd RKR, RKR, RKR;
vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
- vmovdqa .Lbswap_iv_mask, R1ST;
- vmovdqa .Lbswap128_mask, RKM;
+ vmovdqa .Lbswap_iv_mask(%rip), R1ST;
+ vmovdqa .Lbswap128_mask(%rip), RKM;
/* load IV and byteswap */
vmovq (%rcx), RX;
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 7f30b6f0d72c..da1b7e4a23e4 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -98,16 +98,20 @@
#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
- movzbl src ## bh, RID1d; \
- movzbl src ## bl, RID2d; \
- shrq $16, src; \
- movl s1(, RID1, 4), dst ## d; \
- op1 s2(, RID2, 4), dst ## d; \
- movzbl src ## bh, RID1d; \
- movzbl src ## bl, RID2d; \
- interleave_op(il_reg); \
- op2 s3(, RID1, 4), dst ## d; \
- op3 s4(, RID2, 4), dst ## d;
+ movzbl src ## bh, RID1d; \
+ leaq s1(%rip), RID2; \
+ movl (RID2, RID1, 4), dst ## d; \
+ movzbl src ## bl, RID2d; \
+ leaq s2(%rip), RID1; \
+ op1 (RID1, RID2, 4), dst ## d; \
+ shrq $16, src; \
+ movzbl src ## bh, RID1d; \
+ leaq s3(%rip), RID2; \
+ op2 (RID2, RID1, 4), dst ## d; \
+ movzbl src ## bl, RID2d; \
+ leaq s4(%rip), RID1; \
+ op3 (RID1, RID2, 4), dst ## d; \
+ interleave_op(il_reg);
#define dummy(d) /* do nothing */
@@ -190,10 +194,10 @@
qop(RD, RC, 1);
#define shuffle(mask) \
- vpshufb mask, RKR, RKR;
+ vpshufb mask(%rip), RKR, RKR;
#define preload_rkr(n, do_mask, mask) \
- vbroadcastss .L16_mask, RKR; \
+ vbroadcastss .L16_mask(%rip), RKR; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor (kr+n*16)(CTX), RKR, RKR; \
do_mask(mask);
@@ -275,9 +279,9 @@ __cast6_enc_blk8:
movq %rdi, CTX;
- vmovdqa .Lbswap_mask, RKM;
- vmovd .Lfirst_mask, R1ST;
- vmovd .L32_mask, R32;
+ vmovdqa .Lbswap_mask(%rip), RKM;
+ vmovd .Lfirst_mask(%rip), R1ST;
+ vmovd .L32_mask(%rip), R32;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -301,7 +305,7 @@ __cast6_enc_blk8:
popq %rbx;
popq %r15;
- vmovdqa .Lbswap_mask, RKM;
+ vmovdqa .Lbswap_mask(%rip), RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -323,9 +327,9 @@ __cast6_dec_blk8:
movq %rdi, CTX;
- vmovdqa .Lbswap_mask, RKM;
- vmovd .Lfirst_mask, R1ST;
- vmovd .L32_mask, R32;
+ vmovdqa .Lbswap_mask(%rip), RKM;
+ vmovd .Lfirst_mask(%rip), R1ST;
+ vmovd .L32_mask(%rip), R32;
inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -349,7 +353,7 @@ __cast6_dec_blk8:
popq %rbx;
popq %r15;
- vmovdqa .Lbswap_mask, RKM;
+ vmovdqa .Lbswap_mask(%rip), RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index 8e49ce117494..4bbd3ec78df5 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -138,21 +138,29 @@
movzbl RW0bl, RT2d; \
movzbl RW0bh, RT3d; \
shrq $16, RW0; \
- movq s8(, RT0, 8), RT0; \
- xorq s6(, RT1, 8), to; \
+ leaq s8(%rip), RW1; \
+ movq (RW1, RT0, 8), RT0; \
+ leaq s6(%rip), RW1; \
+ xorq (RW1, RT1, 8), to; \
movzbl RW0bl, RL1d; \
movzbl RW0bh, RT1d; \
shrl $16, RW0d; \
- xorq s4(, RT2, 8), RT0; \
- xorq s2(, RT3, 8), to; \
+ leaq s4(%rip), RW1; \
+ xorq (RW1, RT2, 8), RT0; \
+ leaq s2(%rip), RW1; \
+ xorq (RW1, RT3, 8), to; \
movzbl RW0bl, RT2d; \
movzbl RW0bh, RT3d; \
- xorq s7(, RL1, 8), RT0; \
- xorq s5(, RT1, 8), to; \
- xorq s3(, RT2, 8), RT0; \
+ leaq s7(%rip), RW1; \
+ xorq (RW1, RL1, 8), RT0; \
+ leaq s5(%rip), RW1; \
+ xorq (RW1, RT1, 8), to; \
+ leaq s3(%rip), RW1; \
+ xorq (RW1, RT2, 8), RT0; \
load_next_key(n, RW0); \
xorq RT0, to; \
- xorq s1(, RT3, 8), to; \
+ leaq s1(%rip), RW1; \
+ xorq (RW1, RT3, 8), to; \
#define load_next_key(n, RWx) \
movq (((n) + 1) * 8)(CTX), RWx;
@@ -364,65 +372,89 @@ ENDPROC(des3_ede_x86_64_crypt_blk)
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrq $16, RW0; \
- xorq s8(, RT3, 8), to##0; \
- xorq s6(, RT1, 8), to##0; \
+ leaq s8(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##0; \
+ leaq s6(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrq $16, RW0; \
- xorq s4(, RT3, 8), to##0; \
- xorq s2(, RT1, 8), to##0; \
+ leaq s4(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##0; \
+ leaq s2(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
shrl $16, RW0d; \
- xorq s7(, RT3, 8), to##0; \
- xorq s5(, RT1, 8), to##0; \
+ leaq s7(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##0; \
+ leaq s5(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##0; \
movzbl RW0bl, RT3d; \
movzbl RW0bh, RT1d; \
load_next_key(n, RW0); \
- xorq s3(, RT3, 8), to##0; \
- xorq s1(, RT1, 8), to##0; \
+ leaq s3(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##0; \
+ leaq s1(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##0; \
xorq from##1, RW1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrq $16, RW1; \
- xorq s8(, RT3, 8), to##1; \
- xorq s6(, RT1, 8), to##1; \
+ leaq s8(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##1; \
+ leaq s6(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrq $16, RW1; \
- xorq s4(, RT3, 8), to##1; \
- xorq s2(, RT1, 8), to##1; \
+ leaq s4(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##1; \
+ leaq s2(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
shrl $16, RW1d; \
- xorq s7(, RT3, 8), to##1; \
- xorq s5(, RT1, 8), to##1; \
+ leaq s7(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##1; \
+ leaq s5(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##1; \
movzbl RW1bl, RT3d; \
movzbl RW1bh, RT1d; \
do_movq(RW0, RW1); \
- xorq s3(, RT3, 8), to##1; \
- xorq s1(, RT1, 8), to##1; \
+ leaq s3(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##1; \
+ leaq s1(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##1; \
xorq from##2, RW2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrq $16, RW2; \
- xorq s8(, RT3, 8), to##2; \
- xorq s6(, RT1, 8), to##2; \
+ leaq s8(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##2; \
+ leaq s6(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrq $16, RW2; \
- xorq s4(, RT3, 8), to##2; \
- xorq s2(, RT1, 8), to##2; \
+ leaq s4(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##2; \
+ leaq s2(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
shrl $16, RW2d; \
- xorq s7(, RT3, 8), to##2; \
- xorq s5(, RT1, 8), to##2; \
+ leaq s7(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##2; \
+ leaq s5(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##2; \
movzbl RW2bl, RT3d; \
movzbl RW2bh, RT1d; \
do_movq(RW0, RW2); \
- xorq s3(, RT3, 8), to##2; \
- xorq s1(, RT1, 8), to##2;
+ leaq s3(%rip), RT2; \
+ xorq (RT2, RT3, 8), to##2; \
+ leaq s1(%rip), RT2; \
+ xorq (RT2, RT1, 8), to##2;
#define __movq(src, dst) \
movq src, dst;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index f94375a8dcd1..d56a281221fb 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -97,7 +97,7 @@ ENTRY(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA
movups (%rsi), SHASH
- movaps .Lbswap_mask, BSWAP
+ movaps .Lbswap_mask(%rip), BSWAP
PSHUFB_XMM BSWAP DATA
call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA
@@ -114,7 +114,7 @@ ENTRY(clmul_ghash_update)
FRAME_BEGIN
cmp $16, %rdx
jb .Lupdate_just_ret # check length
- movaps .Lbswap_mask, BSWAP
+ movaps .Lbswap_mask(%rip), BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
PSHUFB_XMM BSWAP DATA
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
index 02ee2308fb38..8a49ab1699ef 100644
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -54,7 +54,7 @@
#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
vpcmpeqd t0, t0, t0; \
vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
- vmovdqa bswap, t1; \
+ vmovdqa bswap(%rip), t1; \
\
/* load IV and byteswap */ \
vmovdqu (iv), x7; \
@@ -99,7 +99,7 @@
#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
t1, xts_gf128mul_and_shl1_mask) \
- vmovdqa xts_gf128mul_and_shl1_mask, t0; \
+ vmovdqa xts_gf128mul_and_shl1_mask(%rip), t0; \
\
/* load IV */ \
vmovdqu (iv), tiv; \
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S
index a53ac11dd385..e04c80467bd2 100644
--- a/arch/x86/crypto/glue_helper-asm-avx2.S
+++ b/arch/x86/crypto/glue_helper-asm-avx2.S
@@ -67,7 +67,7 @@
vmovdqu (iv), t2x; \
vmovdqa t2x, t3x; \
inc_le128(t2x, t0x, t1x); \
- vbroadcasti128 bswap, t1; \
+ vbroadcasti128 bswap(%rip), t1; \
vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \
vpshufb t1, t2, x0; \
\
@@ -124,13 +124,13 @@
tivx, t0, t0x, t1, t1x, t2, t2x, t3, \
xts_gf128mul_and_shl1_mask_0, \
xts_gf128mul_and_shl1_mask_1) \
- vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \
+ vbroadcasti128 xts_gf128mul_and_shl1_mask_0(%rip), t1; \
\
/* load IV and construct second IV */ \
vmovdqu (iv), tivx; \
vmovdqa tivx, t0x; \
gf128mul_x_ble(tivx, t1x, t2x); \
- vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \
+ vbroadcasti128 xts_gf128mul_and_shl1_mask_1(%rip), t2; \
vinserti128 $1, tivx, t0, tiv; \
vpxor (0*32)(src), tiv, x0; \
vmovdqu tiv, (0*32)(dst); \
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index de182c460f82..8d38df563b10 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -258,7 +258,7 @@ ENTRY(crypto_morus1280_avx2_init)
/* load all zeros: */
vpxor STATE3, STATE3, STATE3
/* load the constant: */
- vmovdqa .Lmorus1280_const, STATE4
+ vmovdqa .Lmorus1280_const(%rip), STATE4
/* update 16 times with zero: */
call __morus1280_update_zero
@@ -555,7 +555,7 @@ ENTRY(crypto_morus1280_avx2_dec_tail)
/* mask with byte count: */
movq %rcx, T0_LOW
vpbroadcastb T0_LOW, T0
- vmovdqa .Lmorus1280_counter, T1
+ vmovdqa .Lmorus1280_counter(%rip), T1
vpcmpgtb T1, T0, T0
vpand T0, MSG, MSG
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index da5d2905db60..ba77b6c0980e 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -387,8 +387,8 @@ ENTRY(crypto_morus1280_sse2_init)
pxor STATE3_LO, STATE3_LO
pxor STATE3_HI, STATE3_HI
/* load the constant: */
- movdqa .Lmorus640_const_0, STATE4_LO
- movdqa .Lmorus640_const_1, STATE4_HI
+ movdqa .Lmorus640_const_0(%rip), STATE4_LO
+ movdqa .Lmorus640_const_1(%rip), STATE4_HI
/* update 16 times with zero: */
call __morus1280_update_zero
@@ -802,8 +802,8 @@ ENTRY(crypto_morus1280_sse2_dec_tail)
punpcklbw T0_LO, T0_LO
punpcklbw T0_LO, T0_LO
movdqa T0_LO, T0_HI
- movdqa .Lmorus640_counter_0, T1_LO
- movdqa .Lmorus640_counter_1, T1_HI
+ movdqa .Lmorus640_counter_0(%rip), T1_LO
+ movdqa .Lmorus640_counter_1(%rip), T1_HI
pcmpgtb T1_LO, T0_LO
pcmpgtb T1_HI, T0_HI
pand T0_LO, MSG_LO
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index 414db480250e..09155d58a28f 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -238,8 +238,8 @@ ENTRY(crypto_morus640_sse2_init)
/* load all ones: */
pcmpeqd STATE2, STATE2
/* load the constants: */
- movdqa .Lmorus640_const_0, STATE3
- movdqa .Lmorus640_const_1, STATE4
+ movdqa .Lmorus640_const_0(%rip), STATE3
+ movdqa .Lmorus640_const_1(%rip), STATE4
/* update 16 times with zero: */
call __morus640_update_zero
@@ -545,7 +545,7 @@ ENTRY(crypto_morus640_sse2_dec_tail)
punpcklbw T0, T0
punpcklbw T0, T0
punpcklbw T0, T0
- movdqa .Lmorus640_counter, T1
+ movdqa .Lmorus640_counter(%rip), T1
pcmpgtb T1, T0
pand T0, MSG
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 1420db15dcdd..2ced4b2f6c76 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -588,37 +588,42 @@ last_block_enter:
mov INP, _INP(%rsp)
## schedule 48 input dwords, by doing 3 rounds of 12 each
- xor SRND, SRND
+ leaq K256(%rip), SRND
+ ## loop1 upper bound
+ leaq K256+3*4*32(%rip), INP
.align 16
loop1:
- vpaddd K256+0*32(SRND), X0, XFER
+ vpaddd 0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
- vpaddd K256+1*32(SRND), X0, XFER
+ vpaddd 1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
- vpaddd K256+2*32(SRND), X0, XFER
+ vpaddd 2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
- vpaddd K256+3*32(SRND), X0, XFER
+ vpaddd 3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
add $4*32, SRND
- cmp $3*4*32, SRND
+ cmp INP, SRND
jb loop1
+ ## loop2 upper bound
+ leaq K256+4*4*32(%rip), INP
+
loop2:
## Do last 16 rounds with no scheduling
- vpaddd K256+0*32(SRND), X0, XFER
+ vpaddd 0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
- vpaddd K256+1*32(SRND), X1, XFER
+ vpaddd 1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
@@ -626,7 +631,7 @@ loop2:
vmovdqa X2, X0
vmovdqa X3, X1
- cmp $4*4*32, SRND
+ cmp INP, SRND
jb loop2
mov _CTX(%rsp), CTX
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support
2019-01-31 19:24 ` [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support Thomas Garnier
@ 2019-02-07 11:48 ` Borislav Petkov
2019-02-07 17:01 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Borislav Petkov @ 2019-02-07 11:48 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Herbert Xu, David S. Miller,
Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, linux-crypto,
linux-kernel
On Thu, Jan 31, 2019 at 11:24:08AM -0800, Thomas Garnier wrote:
> Change the assembly code to use only relative references of symbols for the
> kernel to be PIE compatible.
>
> Position Independent Executable (PIE) support will allow to extend the
> KASLR randomization range below 0xffffffff80000000.
This sentence is auto-sprinkled in a bunch of commit messages. Sounds to
me it should be rather somewhere in the 0/n message as a justification
for the feature but not in every other commit message...?
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support
2019-02-07 11:48 ` Borislav Petkov
@ 2019-02-07 17:01 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-07 17:01 UTC (permalink / raw)
To: Borislav Petkov
Cc: Kernel Hardening, Kristen Carlson Accardi, Herbert Xu,
David S. Miller, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
the arch/x86 maintainers, Linux Crypto Mailing List, LKML
On Thu, Feb 7, 2019 at 3:49 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:08AM -0800, Thomas Garnier wrote:
> > Change the assembly code to use only relative references of symbols for the
> > kernel to be PIE compatible.
> >
> > Position Independent Executable (PIE) support will allow to extend the
> > KASLR randomization range below 0xffffffff80000000.
>
> This sentence is auto-sprinkled in a bunch of commit messages. Sounds to
> me it should be rather somewhere in the 0/n message as a justification
> for the feature but not in every other commit message...?
I go into more details in the compose message but I wanted a small
sentence explaining why PIE changes are being made.
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 02/27] x86: Use symbol name in jump table for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-07 12:17 ` Borislav Petkov
2019-01-31 19:24 ` [PATCH v6 03/27] x86: Add macro to get symbol address " Thomas Garnier
` (26 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Peter Zijlstra (Intel),
Ard Biesheuvel, Thomas Garnier, Nadav Amit, Masahiro Yamada,
linux-kernel
Replace the %c constraint with %P. The %c is incompatible with PIE
because it implies an immediate value whereas %P reference a symbol.
Change the _ASM_PTR reference to .long for expected relocation size and
add a long padding to ensure entry alignment.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/jump_label.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 65191ce8e1cf..e47fad8ee632 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -25,9 +25,9 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
+ _ASM_PTR "%P0 - .\n\t"
".popsection \n\t"
- : : "i" (key), "i" (branch) : : l_yes);
+ : : "X" (&((char *)key)[branch]) : : l_yes);
return false;
l_yes:
@@ -42,9 +42,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
+ _ASM_PTR "%P0 - .\n\t"
".popsection \n\t"
- : : "i" (key), "i" (branch) : : l_yes);
+ : : "X" (&((char *)key)[branch]) : : l_yes);
return false;
l_yes:
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 02/27] x86: Use symbol name in jump table for PIE support
2019-01-31 19:24 ` [PATCH v6 02/27] x86: Use symbol name in jump table " Thomas Garnier
@ 2019-02-07 12:17 ` Borislav Petkov
2019-02-07 17:04 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Borislav Petkov @ 2019-02-07 12:17 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Thomas Gleixner, Ingo Molnar,
H. Peter Anvin, x86, Peter Zijlstra (Intel),
Ard Biesheuvel, Thomas Garnier, Nadav Amit, Masahiro Yamada,
linux-kernel
On Thu, Jan 31, 2019 at 11:24:09AM -0800, Thomas Garnier wrote:
> Replace the %c constraint with %P. The %c is incompatible with PIE
> because it implies an immediate value whereas %P reference a symbol.
How so?
AFAIK, %c requires a constant operand and if %P is used to print a
constant, it simply drops syntax-specific prefixes and does a bare
constant.
I guess that here
https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers
is not entirely correct as it should not say "If used for a constant"
for %P but say "symbol or constant".
But before/after asm doesn't show any difference. So what gives?
before:
# 39 "./arch/x86/include/asm/jump_label.h" 1
1:
.byte 0xe9
.long .L241 - 2f #
2:
.pushsection __jump_table, "aw"
.balign 8
.long 1b - ., .L241 - . #
.quad __use_tsc + 1 - . #,
.popsection
after:
# 39 "./arch/x86/include/asm/jump_label.h" 1
1:
.byte 0xe9
.long .L241 - 2f #
2:
.pushsection __jump_table, "aw"
.balign 8
.long 1b - ., .L241 - . #
.quad __use_tsc+1 - . #
.popsection
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 02/27] x86: Use symbol name in jump table for PIE support
2019-02-07 12:17 ` Borislav Petkov
@ 2019-02-07 17:04 ` Thomas Garnier
2019-02-07 17:11 ` Borislav Petkov
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-02-07 17:04 UTC (permalink / raw)
To: Borislav Petkov
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, the arch/x86 maintainers,
Peter Zijlstra (Intel),
Ard Biesheuvel, Nadav Amit, Masahiro Yamada, LKML
On Thu, Feb 7, 2019 at 4:17 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:09AM -0800, Thomas Garnier wrote:
> > Replace the %c constraint with %P. The %c is incompatible with PIE
> > because it implies an immediate value whereas %P reference a symbol.
>
> How so?
>
> AFAIK, %c requires a constant operand and if %P is used to print a
> constant, it simply drops syntax-specific prefixes and does a bare
> constant.
>
> I guess that here
>
> https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#x86Operandmodifiers
>
> is not entirely correct as it should not say "If used for a constant"
> for %P but say "symbol or constant".
>
> But before/after asm doesn't show any difference. So what gives?
I assume that's an optimisation done by gcc later. The P modifier in
the documentation does state that it is used to generate PIC code.
>
> before:
> # 39 "./arch/x86/include/asm/jump_label.h" 1
> 1:
> .byte 0xe9
> .long .L241 - 2f #
> 2:
> .pushsection __jump_table, "aw"
> .balign 8
> .long 1b - ., .L241 - . #
> .quad __use_tsc + 1 - . #,
> .popsection
>
> after:
> # 39 "./arch/x86/include/asm/jump_label.h" 1
> 1:
> .byte 0xe9
> .long .L241 - 2f #
> 2:
> .pushsection __jump_table, "aw"
> .balign 8
> .long 1b - ., .L241 - . #
> .quad __use_tsc+1 - . #
> .popsection
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 02/27] x86: Use symbol name in jump table for PIE support
2019-02-07 17:04 ` Thomas Garnier
@ 2019-02-07 17:11 ` Borislav Petkov
2019-02-07 23:55 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Borislav Petkov @ 2019-02-07 17:11 UTC (permalink / raw)
To: Thomas Garnier
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, the arch/x86 maintainers,
Peter Zijlstra (Intel),
Ard Biesheuvel, Nadav Amit, Masahiro Yamada, LKML
On Thu, Feb 07, 2019 at 09:04:45AM -0800, Thomas Garnier wrote:
> I assume that's an optimisation done by gcc later.
So why is that change even needed? Where does it break?
> The P modifier in the documentation does state that it is used to
> generate PIC code.
The documentation says:
"If used for a function, print the PLT suffix and generate PIC code. For
example, emit foo@PLT instead of ’foo’ for the function foo()."
when you use %P for a function. Which is not how it is used here.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 02/27] x86: Use symbol name in jump table for PIE support
2019-02-07 17:11 ` Borislav Petkov
@ 2019-02-07 23:55 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-07 23:55 UTC (permalink / raw)
To: Borislav Petkov
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, H. Peter Anvin, the arch/x86 maintainers,
Peter Zijlstra (Intel),
Ard Biesheuvel, Nadav Amit, Masahiro Yamada, LKML
On Thu, Feb 7, 2019 at 9:11 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Feb 07, 2019 at 09:04:45AM -0800, Thomas Garnier wrote:
> > I assume that's an optimisation done by gcc later.
>
> So why is that change even needed? Where does it break?
>
> > The P modifier in the documentation does state that it is used to
> > generate PIC code.
>
> The documentation says:
>
> "If used for a function, print the PLT suffix and generate PIC code. For
> example, emit foo@PLT instead of ’foo’ for the function foo()."
>
> when you use %P for a function. Which is not how it is used here.
I did more checks about that. I think Ard's patch to make jump label
relative actually fixed the issue I had with them.
Thanks for spotting this, I will do additional checks and look at
removing this change.
>
> --
> Regards/Gruss,
> Boris.
>
> Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 03/27] x86: Add macro to get symbol address for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 01/27] x86/crypto: Adapt assembly for PIE support Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 02/27] x86: Use symbol name in jump table " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 04/27] x86: relocate_kernel - Adapt assembly " Thomas Garnier
` (25 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Juergen Gross,
Thomas Garnier, Peter Zijlstra (Intel),
Nadav Amit, Arnaldo Carvalho de Melo, linux-kernel
Add a new _ASM_MOVABS macro to fetch a symbol address. It will be used
to replace "_ASM_MOV $<symbol>, %dst" code construct that are not compatible
with PIE.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/asm.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..5ff63ca80bca 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -30,6 +30,7 @@
#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
#define _ASM_MOV __ASM_SIZE(mov)
+#define _ASM_MOVABS __ASM_SEL(movl, movabsq)
#define _ASM_INC __ASM_SIZE(inc)
#define _ASM_DEC __ASM_SIZE(dec)
#define _ASM_ADD __ASM_SIZE(add)
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 04/27] x86: relocate_kernel - Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (2 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 03/27] x86: Add macro to get symbol address " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 05/27] x86/entry/64: " Thomas Garnier
` (24 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier,
linux-kernel
Change the assembly code to use only absolute references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/kernel/relocate_kernel_64.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 11eda21eb697..3320368b6ec9 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -208,7 +208,7 @@ identity_mapped:
movq %rax, %cr3
lea PAGE_SIZE(%r8), %rsp
call swap_pages
- movq $virtual_mapped, %rax
+ movabsq $virtual_mapped, %rax
pushq %rax
ret
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 05/27] x86/entry/64: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (3 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 04/27] x86: relocate_kernel - Adapt assembly " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 06/27] x86: pm-trace - " Thomas Garnier
` (23 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, x86, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/entry/entry_64.S | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..16a93eb4c11f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1269,7 +1269,8 @@ ENTRY(error_entry)
movl %ecx, %eax /* zero extend */
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
- cmpq $.Lgs_change, RIP+8(%rsp)
+ leaq .Lgs_change(%rip), %rcx
+ cmpq %rcx, RIP+8(%rsp)
jne .Lerror_entry_done
/*
@@ -1466,10 +1467,10 @@ ENTRY(nmi)
* resume the outer NMI.
*/
- movq $repeat_nmi, %rdx
+ leaq repeat_nmi(%rip), %rdx
cmpq 8(%rsp), %rdx
ja 1f
- movq $end_repeat_nmi, %rdx
+ leaq end_repeat_nmi(%rip), %rdx
cmpq 8(%rsp), %rdx
ja nested_nmi_out
1:
@@ -1523,7 +1524,8 @@ nested_nmi:
pushq %rdx
pushfq
pushq $__KERNEL_CS
- pushq $repeat_nmi
+ leaq repeat_nmi(%rip), %rdx
+ pushq %rdx
/* Put stack back */
addq $(6*8), %rsp
@@ -1562,7 +1564,11 @@ first_nmi:
addq $8, (%rsp) /* Fix up RSP */
pushfq /* RFLAGS */
pushq $__KERNEL_CS /* CS */
- pushq $1f /* RIP */
+ pushq $0 /* Futur return address */
+ pushq %rax /* Save RAX */
+ leaq 1f(%rip), %rax /* RIP */
+ movq %rax, 8(%rsp) /* Put 1f on return address */
+ popq %rax /* Restore RAX */
iretq /* continues at repeat_nmi below */
UNWIND_HINT_IRET_REGS
1:
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 06/27] x86: pm-trace - Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (4 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 05/27] x86/entry/64: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 07/27] x86/CPU: " Thomas Garnier
` (22 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier,
linux-kernel
Change assembly to use the new _ASM_MOVABS macro instead of _ASM_MOV for
the assembly to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/pm-trace.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/pm-trace.h b/arch/x86/include/asm/pm-trace.h
index bfa32aa428e5..972070806ce9 100644
--- a/arch/x86/include/asm/pm-trace.h
+++ b/arch/x86/include/asm/pm-trace.h
@@ -8,7 +8,7 @@
do { \
if (pm_trace_enabled) { \
const void *tracedata; \
- asm volatile(_ASM_MOV " $1f,%0\n" \
+ asm volatile(_ASM_MOVABS " $1f,%0\n" \
".section .tracedata,\"a\"\n" \
"1:\t.word %c1\n\t" \
_ASM_PTR " %c2\n" \
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 07/27] x86/CPU: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (5 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 06/27] x86: pm-trace - " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 08/27] x86/acpi: " Thomas Garnier
` (21 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Andrew Morton,
Thomas Garnier, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible. Use the new _ASM_MOVABS macro instead of
the 'mov $symbol, %dst' construct.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/processor.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..ce9851bf6778 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -723,11 +723,13 @@ static inline void sync_core(void)
"pushfq\n\t"
"mov %%cs, %0\n\t"
"pushq %q0\n\t"
- "pushq $1f\n\t"
+ "movabsq $1f, %q0\n\t"
+ "pushq %q0\n\t"
"iretq\n\t"
UNWIND_HINT_RESTORE
"1:"
- : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT
+ : : "cc", "memory");
#endif
}
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 08/27] x86/acpi: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (6 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 07/27] x86/CPU: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 09/27] x86/boot/64: " Thomas Garnier
` (20 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Pavel Machek, Rafael J . Wysocki,
Rafael J. Wysocki, Len Brown, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, linux-pm, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
arch/x86/kernel/acpi/wakeup_64.S | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 50b8ed0317a3..472659c0f811 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -14,7 +14,7 @@
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
ENTRY(wakeup_long64)
- movq saved_magic, %rax
+ movq saved_magic(%rip), %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
jne bogus_64_magic
@@ -25,14 +25,14 @@ ENTRY(wakeup_long64)
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movq saved_rsp, %rsp
+ movq saved_rsp(%rip), %rsp
- movq saved_rbx, %rbx
- movq saved_rdi, %rdi
- movq saved_rsi, %rsi
- movq saved_rbp, %rbp
+ movq saved_rbx(%rip), %rbx
+ movq saved_rdi(%rip), %rdi
+ movq saved_rsi(%rip), %rsi
+ movq saved_rbp(%rip), %rbp
- movq saved_rip, %rax
+ movq saved_rip(%rip), %rax
jmp *%rax
ENDPROC(wakeup_long64)
@@ -45,7 +45,7 @@ ENTRY(do_suspend_lowlevel)
xorl %eax, %eax
call save_processor_state
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
movq %rsi, pt_regs_si(%rax)
@@ -64,13 +64,14 @@ ENTRY(do_suspend_lowlevel)
pushfq
popq pt_regs_flags(%rax)
- movq $.Lresume_point, saved_rip(%rip)
+ leaq .Lresume_point(%rip), %rax
+ movq %rax, saved_rip(%rip)
- movq %rsp, saved_rsp
- movq %rbp, saved_rbp
- movq %rbx, saved_rbx
- movq %rdi, saved_rdi
- movq %rsi, saved_rsi
+ movq %rsp, saved_rsp(%rip)
+ movq %rbp, saved_rbp(%rip)
+ movq %rbx, saved_rbx(%rip)
+ movq %rdi, saved_rdi(%rip)
+ movq %rsi, saved_rsi(%rip)
addq $8, %rsp
movl $3, %edi
@@ -82,7 +83,7 @@ ENTRY(do_suspend_lowlevel)
.align 4
.Lresume_point:
/* We don't restore %rax, it must be 0 anyway */
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq saved_context_cr4(%rax), %rbx
movq %rbx, %cr4
movq saved_context_cr3(%rax), %rbx
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 09/27] x86/boot/64: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (7 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 08/27] x86/acpi: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 10/27] x86/power/64: " Thomas Garnier
` (19 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Juergen Gross,
Kirill A. Shutemov, Thomas Garnier, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Early at boot, the kernel is mapped at a temporary address while preparing
the page table. To know the changes needed for the page table with KASLR,
the boot code calculate the difference between the expected address of the
kernel and the one chosen by KASLR. It does not work with PIE because all
symbols in code are relatives. Instead of getting the future relocated
virtual address, you will get the current temporary mapping.
Instructions were changed to have absolute 64-bit references.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/kernel/head_64.S | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d1dbe8e4eb82..b9b6c6aa0313 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -90,8 +90,10 @@ startup_64:
popq %rsi
/* Form the CR3 value being sure to include the CR3 modifier */
- addq $(early_top_pgt - __START_KERNEL_map), %rax
+ movabs $(early_top_pgt - __START_KERNEL_map), %rcx
+ addq %rcx, %rax
jmp 1f
+
ENTRY(secondary_startup_64)
UNWIND_HINT_EMPTY
/*
@@ -120,7 +122,8 @@ ENTRY(secondary_startup_64)
popq %rsi
/* Form the CR3 value being sure to include the CR3 modifier */
- addq $(init_top_pgt - __START_KERNEL_map), %rax
+ movabs $(init_top_pgt - __START_KERNEL_map), %rcx
+ addq %rcx, %rax
1:
/* Enable PAE mode, PGE and LA57 */
@@ -138,7 +141,7 @@ ENTRY(secondary_startup_64)
movq %rax, %cr3
/* Ensure I am executing from virtual addresses */
- movq $1f, %rax
+ movabs $1f, %rax
ANNOTATE_RETPOLINE_SAFE
jmp *%rax
1:
@@ -235,11 +238,12 @@ ENTRY(secondary_startup_64)
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
- pushq $.Lafter_lret # put return address on stack for unwinder
+ movabs $.Lafter_lret, %rax
+ pushq %rax # put return address on stack for unwinder
xorl %ebp, %ebp # clear frame pointer
- movq initial_code(%rip), %rax
+ leaq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
- pushq %rax # target address in negative space
+ pushq (%rax) # target address in negative space
lretq
.Lafter_lret:
END(secondary_startup_64)
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 10/27] x86/power/64: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (8 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 09/27] x86/boot/64: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 11/27] x86/paravirt: " Thomas Garnier
` (18 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Pavel Machek, Rafael J . Wysocki,
Rafael J. Wysocki, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
H. Peter Anvin, x86, linux-pm, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
arch/x86/power/hibernate_asm_64.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 3008baa2fa95..9ed980efef72 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -24,7 +24,7 @@
#include <asm/frame.h>
ENTRY(swsusp_arch_suspend)
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
movq %rsi, pt_regs_si(%rax)
@@ -115,7 +115,7 @@ ENTRY(restore_registers)
movq %rax, %cr4; # turn PGE back on
/* We don't restore %rax, it must be 0 anyway */
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq pt_regs_sp(%rax), %rsp
movq pt_regs_bp(%rax), %rbp
movq pt_regs_si(%rax), %rsi
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 11/27] x86/paravirt: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (9 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 10/27] x86/power/64: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 12/27] x86/alternatives: " Thomas Garnier
` (17 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Juergen Gross, Alok Kataria,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
x86, virtualization, linux-kernel
if PIE is enabled, switch the paravirt assembly constraints to be
compatible. The %c/i constrains generate smaller code so is kept by
default.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/paravirt_types.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 488c59686a73..8cfcc1b463de 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -342,9 +342,17 @@ extern struct paravirt_patch_template pv_ops;
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
+#ifdef CONFIG_X86_PIE
+#define paravirt_opptr_call "a"
+#define paravirt_opptr_type "p"
+#else
+#define paravirt_opptr_call "c"
+#define paravirt_opptr_type "i"
+#endif
+
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(pv_ops.op))
+ [paravirt_opptr] paravirt_opptr_type (&(pv_ops.op))
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -392,7 +400,7 @@ int paravirt_disable_iospace(void);
*/
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ "call *%" paravirt_opptr_call "[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 12/27] x86/alternatives: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (10 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 11/27] x86/paravirt: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled Thomas Garnier
` (16 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Peter Zijlstra (Intel),
Thomas Garnier, Alexey Dobriyan, Nadav Amit, linux-kernel
Change the assembly options to work with pointers instead of integers.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/alternative.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c74073a19cc..f5d6fe1d294b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -233,7 +233,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
/* Like alternative_io, but for replacing a direct call with another one. */
#define alternative_call(oldfunc, newfunc, feature, output, input...) \
asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+ : output : [old] "X" (oldfunc), [new] "X" (newfunc), ## input)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -246,8 +246,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
: output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input)
+ : [old] "X" (oldfunc), [new1] "X" (newfunc1), \
+ [new2] "X" (newfunc2), ## input)
/*
* use this macro(s) if you need more than one output parameter
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (11 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 12/27] x86/alternatives: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-01 11:15 ` Kirill A. Shutemov
2019-01-31 19:24 ` [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support Thomas Garnier
` (15 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Mimi Zohar, Juergen Gross,
Nayna Jain, Masahiro Yamada, Thomas Garnier, Jan Kiszka,
Nick Desaulniers, Kirill A. Shutemov, linux-kernel
The __startup_64 function assumes all symbols have relocated addresses
instead of the current boot virtual address. PIE generated code favor
relative addresses making all virtual and physical address math incorrect.
If PIE is enabled, build head64.c as mcmodel large instead to ensure absolute
references on all memory access. Add a global __force_order variable required
when using a large model with read_cr* functions.
To build head64.c as mcmodel=large, disable the retpoline gcc flags.
This code is used at early boot and removed later, it doesn't need
retpoline mitigation.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/kernel/Makefile | 6 ++++++
arch/x86/kernel/head64.c | 3 +++
2 files changed, 9 insertions(+)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00b7e27bc2b7..1f98f52eab9f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,6 +22,12 @@ CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
endif
+ifdef CONFIG_X86_PIE
+# Remove PIE and retpoline flags that are incompatible with mcmodel=large
+CFLAGS_REMOVE_head64.o += -fPIE -mindirect-branch=thunk-extern -mindirect-branch-register
+CFLAGS_head64.o = -mcmodel=large
+endif
+
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 16b1cbd3a61e..22e81275495b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -63,6 +63,9 @@ EXPORT_SYMBOL(vmemmap_base);
#define __head __section(.head.text)
+/* Required for read_cr3 when building as PIE */
+unsigned long __force_order;
+
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
{
return ptr - (void *)_text + (void *)physaddr;
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled
2019-01-31 19:24 ` [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled Thomas Garnier
@ 2019-02-01 11:15 ` Kirill A. Shutemov
2019-02-01 17:11 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Kirill A. Shutemov @ 2019-02-01 11:15 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Mimi Zohar, Juergen Gross,
Nayna Jain, Masahiro Yamada, Thomas Garnier, Jan Kiszka,
Nick Desaulniers, Kirill A. Shutemov, linux-kernel
On Thu, Jan 31, 2019 at 11:24:20AM -0800, Thomas Garnier wrote:
> The __startup_64 function assumes all symbols have relocated addresses
> instead of the current boot virtual address. PIE generated code favor
> relative addresses making all virtual and physical address math incorrect.
> If PIE is enabled, build head64.c as mcmodel large instead to ensure absolute
> references on all memory access. Add a global __force_order variable required
> when using a large model with read_cr* functions.
>
> To build head64.c as mcmodel=large, disable the retpoline gcc flags.
> This code is used at early boot and removed later, it doesn't need
> retpoline mitigation.
>
> Position Independent Executable (PIE) support will allow to extend the
> KASLR randomization range below 0xffffffff80000000.
>
> Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> ---
> arch/x86/kernel/Makefile | 6 ++++++
> arch/x86/kernel/head64.c | 3 +++
> 2 files changed, 9 insertions(+)
>
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 00b7e27bc2b7..1f98f52eab9f 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -22,6 +22,12 @@ CFLAGS_REMOVE_early_printk.o = -pg
> CFLAGS_REMOVE_head64.o = -pg
> endif
>
> +ifdef CONFIG_X86_PIE
> +# Remove PIE and retpoline flags that are incompatible with mcmodel=large
> +CFLAGS_REMOVE_head64.o += -fPIE -mindirect-branch=thunk-extern -mindirect-branch-register
> +CFLAGS_head64.o = -mcmodel=large
> +endif
> +
> KASAN_SANITIZE_head$(BITS).o := n
> KASAN_SANITIZE_dumpstack.o := n
> KASAN_SANITIZE_dumpstack_$(BITS).o := n
> diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> index 16b1cbd3a61e..22e81275495b 100644
> --- a/arch/x86/kernel/head64.c
> +++ b/arch/x86/kernel/head64.c
> @@ -63,6 +63,9 @@ EXPORT_SYMBOL(vmemmap_base);
>
> #define __head __section(.head.text)
>
> +/* Required for read_cr3 when building as PIE */
> +unsigned long __force_order;
> +
I believe it only needed for GCC < 5. Newer GCC can eliminate the
reference. See my comment in arch/x86/boot/compressed/pgtable_64.c.
Maybe we should expand the comment here too?
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled
2019-02-01 11:15 ` Kirill A. Shutemov
@ 2019-02-01 17:11 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 17:11 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: Kernel Hardening, kristen, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, the arch/x86 maintainers,
Mimi Zohar, Juergen Gross, Nayna Jain, Masahiro Yamada,
Jan Kiszka, Nick Desaulniers, Kirill A. Shutemov, LKML
On Fri, Feb 1, 2019 at 3:15 AM Kirill A. Shutemov <kirill@shutemov.name> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:20AM -0800, Thomas Garnier wrote:
> > The __startup_64 function assumes all symbols have relocated addresses
> > instead of the current boot virtual address. PIE generated code favor
> > relative addresses making all virtual and physical address math incorrect.
> > If PIE is enabled, build head64.c as mcmodel large instead to ensure absolute
> > references on all memory access. Add a global __force_order variable required
> > when using a large model with read_cr* functions.
> >
> > To build head64.c as mcmodel=large, disable the retpoline gcc flags.
> > This code is used at early boot and removed later, it doesn't need
> > retpoline mitigation.
> >
> > Position Independent Executable (PIE) support will allow to extend the
> > KASLR randomization range below 0xffffffff80000000.
> >
> > Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> > ---
> > arch/x86/kernel/Makefile | 6 ++++++
> > arch/x86/kernel/head64.c | 3 +++
> > 2 files changed, 9 insertions(+)
> >
> > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> > index 00b7e27bc2b7..1f98f52eab9f 100644
> > --- a/arch/x86/kernel/Makefile
> > +++ b/arch/x86/kernel/Makefile
> > @@ -22,6 +22,12 @@ CFLAGS_REMOVE_early_printk.o = -pg
> > CFLAGS_REMOVE_head64.o = -pg
> > endif
> >
> > +ifdef CONFIG_X86_PIE
> > +# Remove PIE and retpoline flags that are incompatible with mcmodel=large
> > +CFLAGS_REMOVE_head64.o += -fPIE -mindirect-branch=thunk-extern -mindirect-branch-register
> > +CFLAGS_head64.o = -mcmodel=large
> > +endif
> > +
> > KASAN_SANITIZE_head$(BITS).o := n
> > KASAN_SANITIZE_dumpstack.o := n
> > KASAN_SANITIZE_dumpstack_$(BITS).o := n
> > diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> > index 16b1cbd3a61e..22e81275495b 100644
> > --- a/arch/x86/kernel/head64.c
> > +++ b/arch/x86/kernel/head64.c
> > @@ -63,6 +63,9 @@ EXPORT_SYMBOL(vmemmap_base);
> >
> > #define __head __section(.head.text)
> >
> > +/* Required for read_cr3 when building as PIE */
> > +unsigned long __force_order;
> > +
>
> I believe it only needed for GCC < 5. Newer GCC can eliminate the
> reference. See my comment in arch/x86/boot/compressed/pgtable_64.c.
>
> Maybe we should expand the comment here too?
Make sense, I will add a similar comment in the next iteration. Thanks
for pointing that out.
>
> --
> Kirill A. Shutemov
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (12 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 20:57 ` Christopher Lameter
2019-01-31 19:24 ` [PATCH v6 15/27] compiler: Option to default to hidden symbols Thomas Garnier
` (14 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, x86, Dennis Zhou,
Tejun Heo, Christoph Lameter, Boris Ostrovsky, Juergen Gross,
Stefano Stabellini, Andrew Morton, Andi Kleen, Thomas Garnier,
Kirill A. Shutemov, Michal Hocko, Mike Rapoport,
Stephen Rothwell, Cao jin, Brijesh Singh, Masahiro Yamada,
Joerg Roedel, Peter Zijlstra, Kees Cook, Mathieu Desnoyers,
linux-kernel, xen-devel
Perpcu uses a clever design where the .percu ELF section has a virtual
address of zero and the custom linux relocation code avoid relocating
specific symbols. It makes the code simple and easily adaptable with or
without SMP support.
This design is incompatible with PIE. While creating a PIE binary, the
copmiler tries to make everything relative. The compiler will attempt to
generate instructions with the distance between zero and any 64-bit
virtual address. It will fail as the relocation range cannot fit within
the possible instructions accessing a segment register.
This patch solves tihs problem by removing the zero mapping. The .percpu
symbols are now close to the base of the kernel and the compiler
generates appropriate relocations. To accomodate this change, the GS base
is adapted to be the difference between zero and the .percpu section
address. These changes are done only when PIE is enabled. The original
implementation is kept as-is by default.
The assembly and PER_CPU macros are changed to use relative references
when PIE is enabled.
The KALLSYMS_ABSOLUTE_PERCPU configuration is disabled with PIE given
percpu symbols are not absolute in this case.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/entry/calling.h | 2 +-
arch/x86/entry/entry_64.S | 4 ++--
arch/x86/include/asm/percpu.h | 25 +++++++++++++++++++------
arch/x86/include/asm/processor.h | 4 +++-
arch/x86/kernel/head_64.S | 4 ++++
arch/x86/kernel/setup_percpu.c | 5 ++++-
arch/x86/kernel/vmlinux.lds.S | 13 +++++++++++--
arch/x86/lib/cmpxchg16b_emu.S | 8 ++++----
arch/x86/xen/xen-asm.S | 12 ++++++------
init/Kconfig | 2 +-
10 files changed, 55 insertions(+), 24 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..d5a6d3a0c24b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -218,7 +218,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
#define THIS_CPU_user_pcid_flush_mask \
- PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+ PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 16a93eb4c11f..fc15fe058d3c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ movq %rbx, PER_CPU_VAR(irq_stack_union + stack_canary_offset)
#endif
#ifdef CONFIG_RETPOLINE
@@ -841,7 +841,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw + (TSS_ist + ((x) - 1) * 8))
/**
* idtentry - Generate an IDT entry stub
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1a19d11cfbbd..608c15751f29 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -5,9 +5,11 @@
#ifdef CONFIG_X86_64
#define __percpu_seg gs
#define __percpu_mov_op movq
+#define __percpu_rel (%rip)
#else
#define __percpu_seg fs
#define __percpu_mov_op movl
+#define __percpu_rel
#endif
#ifdef __ASSEMBLY__
@@ -28,10 +30,14 @@
#define PER_CPU(var, reg) \
__percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
lea var(reg), reg
-#define PER_CPU_VAR(var) %__percpu_seg:var
+/* Compatible with Position Independent Code */
+#define PER_CPU_VAR(var) %__percpu_seg:(var)##__percpu_rel
+/* Rare absolute reference */
+#define PER_CPU_VAR_ABS(var) %__percpu_seg:var
#else /* ! SMP */
#define PER_CPU(var, reg) __percpu_mov_op $var, reg
-#define PER_CPU_VAR(var) var
+#define PER_CPU_VAR(var) (var)##__percpu_rel
+#define PER_CPU_VAR_ABS(var) var
#endif /* SMP */
#ifdef CONFIG_X86_64_SMP
@@ -209,27 +215,34 @@ do { \
pfo_ret__; \
})
+/* Position Independent code uses relative addresses only */
+#ifdef CONFIG_X86_PIE
+#define __percpu_stable_arg __percpu_arg(a1)
+#else
+#define __percpu_stable_arg __percpu_arg(P1)
+#endif
+
#define percpu_stable_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(P1)",%0" \
+ asm(op "b "__percpu_stable_arg ",%0" \
: "=q" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 2: \
- asm(op "w "__percpu_arg(P1)",%0" \
+ asm(op "w "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 4: \
- asm(op "l "__percpu_arg(P1)",%0" \
+ asm(op "l "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 8: \
- asm(op "q "__percpu_arg(P1)",%0" \
+ asm(op "q "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ce9851bf6778..18f1e8269ad7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -24,6 +24,7 @@ struct vm86;
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/sections.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -402,7 +403,8 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+ return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu) -
+ (unsigned long)__per_cpu_start;
}
DECLARE_PER_CPU(char *, irq_stack_ptr);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b9b6c6aa0313..0f1739d7bff7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -269,7 +269,11 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
+#ifdef CONFIG_X86_PIE
+ .quad 0
+#else
.quad INIT_PER_CPU_VAR(irq_stack_union)
+#endif
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e8796fcd7e5a..cc66f3434da9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -26,7 +26,7 @@
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_PIE)
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
@@ -40,6 +40,9 @@ unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
};
EXPORT_SYMBOL(__per_cpu_offset);
+/* Used to calculate gs_base for each CPU */
+EXPORT_SYMBOL(__per_cpu_start);
+
/*
* On x86_64 symbols referenced from code should be reachable using
* 32bit relocations. Reserve space for static percpu variables in
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..7b461fa82107 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -222,9 +222,14 @@ SECTIONS
/*
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
* output PHDR, so the next output section - .init.text - should
- * start another segment - init.
+ * start another segment - init. For Position Independent Code, the
+ * per-cpu section cannot be zero-based because everything is relative.
*/
+#ifdef CONFIG_X86_PIE
+ PERCPU_SECTION(INTERNODE_CACHE_BYTES)
+#else
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
+#endif
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif
@@ -401,7 +406,11 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
+#ifdef CONFIG_X86_PIE
+#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
+#else
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
+#endif
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
@@ -411,7 +420,7 @@ INIT_PER_CPU(irq_stack_union);
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_X86_PIE)
. = ASSERT((irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 9b330242e740..254950604ae4 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -33,13 +33,13 @@ ENTRY(this_cpu_cmpxchg16b_emu)
pushfq
cli
- cmpq PER_CPU_VAR((%rsi)), %rax
+ cmpq PER_CPU_VAR_ABS((%rsi)), %rax
jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ cmpq PER_CPU_VAR_ABS(8(%rsi)), %rdx
jne .Lnot_same
- movq %rbx, PER_CPU_VAR((%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ movq %rbx, PER_CPU_VAR_ABS((%rsi))
+ movq %rcx, PER_CPU_VAR_ABS(8(%rsi))
popfq
mov $1, %al
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..a5d73d3218be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -21,7 +21,7 @@
ENTRY(xen_irq_enable_direct)
FRAME_BEGIN
/* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
/*
* Preempt here doesn't matter because that will deal with any
@@ -30,7 +30,7 @@ ENTRY(xen_irq_enable_direct)
*/
/* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
jz 1f
call check_events
@@ -45,7 +45,7 @@ ENTRY(xen_irq_enable_direct)
* non-zero.
*/
ENTRY(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
ret
ENDPROC(xen_irq_disable_direct)
@@ -59,7 +59,7 @@ ENDPROC(xen_irq_disable_direct)
* x86 use opposite senses (mask vs enable).
*/
ENTRY(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
setz %ah
addb %ah, %ah
ret
@@ -80,7 +80,7 @@ ENTRY(xen_restore_fl_direct)
#else
testb $X86_EFLAGS_IF>>8, %ah
#endif
- setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ setz PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
/*
* Preempt here doesn't matter because that will deal with any
* pending interrupts. The pending check may end up being run
@@ -88,7 +88,7 @@ ENTRY(xen_restore_fl_direct)
*/
/* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
jnz 1f
call check_events
1:
diff --git a/init/Kconfig b/init/Kconfig
index 1486b913daeb..bb383615823a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1453,7 +1453,7 @@ config KALLSYMS_ALL
config KALLSYMS_ABSOLUTE_PERCPU
bool
depends on KALLSYMS
- default X86_64 && SMP
+ default X86_64 && SMP && !X86_PIE
config KALLSYMS_BASE_RELATIVE
bool
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-01-31 19:24 ` [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support Thomas Garnier
@ 2019-01-31 20:57 ` Christopher Lameter
2019-01-31 22:49 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Christopher Lameter @ 2019-01-31 20:57 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, x86, Dennis Zhou,
Tejun Heo, Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Thomas Garnier, Kirill A. Shutemov,
Michal Hocko, Mike Rapoport, Stephen Rothwell, Cao jin,
Brijesh Singh, Masahiro Yamada, Joerg Roedel, Peter Zijlstra,
Kees Cook, Mathieu Desnoyers, linux-kernel, xen-devel
On Thu, 31 Jan 2019, Thomas Garnier wrote:
> Perpcu uses a clever design where the .percu ELF section has a virtual
> address of zero and the custom linux relocation code avoid relocating
> specific symbols. It makes the code simple and easily adaptable with or
> without SMP support.
We usually talk about this as offsets rather than addressess. The intend
here is to give every processor its own address that is unique for this
processor. Operations are always relative to a segment register and the
whole area can be relocated at will by simply changing the segment
register.
> This design is incompatible with PIE. While creating a PIE binary, the
> copmiler tries to make everything relative. The compiler will attempt to
This is very compatible with PIE because it is already relative.
> generate instructions with the distance between zero and any 64-bit
> virtual address. It will fail as the relocation range cannot fit within
> the possible instructions accessing a segment register.
Leave the offsets alone and just change the segment register if you need
to relocate the area of a specific processor?
> The assembly and PER_CPU macros are changed to use relative references
> when PIE is enabled.
They already use relative reference. What is the point here?
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -5,9 +5,11 @@
> #ifdef CONFIG_X86_64
> #define __percpu_seg gs
> #define __percpu_mov_op movq
> +#define __percpu_rel (%rip)
The percpu section cannot be IP relative since we need to have separate
address spaces per cpu.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-01-31 20:57 ` Christopher Lameter
@ 2019-01-31 22:49 ` Thomas Garnier
2019-02-01 2:31 ` Christopher Lameter
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 22:49 UTC (permalink / raw)
To: Christopher Lameter
Cc: Kernel Hardening, kristen, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Thu, Jan 31, 2019 at 12:57 PM Christopher Lameter <cl@linux.com> wrote:
>
> On Thu, 31 Jan 2019, Thomas Garnier wrote:
>
> > Perpcu uses a clever design where the .percu ELF section has a virtual
> > address of zero and the custom linux relocation code avoid relocating
> > specific symbols. It makes the code simple and easily adaptable with or
> > without SMP support.
>
> We usually talk about this as offsets rather than addressess. The intend
> here is to give every processor its own address that is unique for this
> processor. Operations are always relative to a segment register and the
> whole area can be relocated at will by simply changing the segment
> register.
>
> > This design is incompatible with PIE. While creating a PIE binary, the
> > copmiler tries to make everything relative. The compiler will attempt to
>
> This is very compatible with PIE because it is already relative.
The per-cpu symbols are in a section that is zero based to create
offsets. The compiler doesn't see them as offsets but as relative
symbol and try to relocate them. Given the distance between zero and
the mapped kernel is much larger than the instruction offset range, it
fails to do it.
>
> > generate instructions with the distance between zero and any 64-bit
> > virtual address. It will fail as the relocation range cannot fit within
> > the possible instructions accessing a segment register.
>
> Leave the offsets alone and just change the segment register if you need
> to relocate the area of a specific processor?
>
> > The assembly and PER_CPU macros are changed to use relative references
> > when PIE is enabled.
>
> They already use relative reference. What is the point here?
>
> > --- a/arch/x86/include/asm/percpu.h
> > +++ b/arch/x86/include/asm/percpu.h
> > @@ -5,9 +5,11 @@
> > #ifdef CONFIG_X86_64
> > #define __percpu_seg gs
> > #define __percpu_mov_op movq
> > +#define __percpu_rel (%rip)
>
> The percpu section cannot be IP relative since we need to have separate
> address spaces per cpu.
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-01-31 22:49 ` Thomas Garnier
@ 2019-02-01 2:31 ` Christopher Lameter
2019-02-01 17:13 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Christopher Lameter @ 2019-02-01 2:31 UTC (permalink / raw)
To: Thomas Garnier
Cc: Kernel Hardening, kristen, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Thu, 31 Jan 2019, Thomas Garnier wrote:
> The per-cpu symbols are in a section that is zero based to create
> offsets. The compiler doesn't see them as offsets but as relative
> symbol and try to relocate them. Given the distance between zero and
> the mapped kernel is much larger than the instruction offset range, it
> fails to do it.
We switch that off in the linker. If that does not work with your
modifications then you need to figure out how to update the link
configuration.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-02-01 2:31 ` Christopher Lameter
@ 2019-02-01 17:13 ` Thomas Garnier
2019-04-08 15:58 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 17:13 UTC (permalink / raw)
To: Christopher Lameter
Cc: Kernel Hardening, kristen, Andy Lutomirski, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Thu, Jan 31, 2019 at 6:31 PM Christopher Lameter <cl@linux.com> wrote:
>
> On Thu, 31 Jan 2019, Thomas Garnier wrote:
>
> > The per-cpu symbols are in a section that is zero based to create
> > offsets. The compiler doesn't see them as offsets but as relative
> > symbol and try to relocate them. Given the distance between zero and
> > the mapped kernel is much larger than the instruction offset range, it
> > fails to do it.
>
> We switch that off in the linker. If that does not work with your
> modifications then you need to figure out how to update the link
> configuration.
>
It didn't work originally but I will revisit to see if I missed something.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-02-01 17:13 ` Thomas Garnier
@ 2019-04-08 15:58 ` Thomas Garnier
2019-04-08 17:56 ` Christopher Lameter
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-04-08 15:58 UTC (permalink / raw)
To: Christopher Lameter
Cc: Kernel Hardening, Kristen Carlson Accardi, Andy Lutomirski,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Fri, Feb 1, 2019 at 9:13 AM Thomas Garnier <thgarnie@chromium.org> wrote:
>
> On Thu, Jan 31, 2019 at 6:31 PM Christopher Lameter <cl@linux.com> wrote:
> >
> > On Thu, 31 Jan 2019, Thomas Garnier wrote:
> >
> > > The per-cpu symbols are in a section that is zero based to create
> > > offsets. The compiler doesn't see them as offsets but as relative
> > > symbol and try to relocate them. Given the distance between zero and
> > > the mapped kernel is much larger than the instruction offset range, it
> > > fails to do it.
> >
> > We switch that off in the linker. If that does not work with your
> > modifications then you need to figure out how to update the link
> > configuration.
> >
>
> It didn't work originally but I will revisit to see if I missed something.
I revisited and couldn't find a way to prevent relocations to the
percpu section. Without PIE, you can reference absolute address which
was convenient for percpu.
Christopher: Did you have something specific in mind?
I checked the following:
- Changing the FLAGS() on the PHDRS.
- using -z noreloc-overflow which actually doesn't seem to apply to
PC32 relocations.
- Look at all linker options and script format for anything around that.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-04-08 15:58 ` Thomas Garnier
@ 2019-04-08 17:56 ` Christopher Lameter
2019-04-08 18:08 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Christopher Lameter @ 2019-04-08 17:56 UTC (permalink / raw)
To: Thomas Garnier
Cc: Kernel Hardening, Kristen Carlson Accardi, Andy Lutomirski,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Mon, 8 Apr 2019, Thomas Garnier wrote:
> > It didn't work originally but I will revisit to see if I missed something.
>
> I revisited and couldn't find a way to prevent relocations to the
> percpu section. Without PIE, you can reference absolute address which
> was convenient for percpu.
Can you switch PIE off for the percpu section? If not maybe the linker
needs to have an additional option?
Cannot imagine that this is not possible. You neeed to be able to
reference registers that are in fixed memory locations.
> Christopher: Did you have something specific in mind?
I thought that we just leave it as is.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support
2019-04-08 17:56 ` Christopher Lameter
@ 2019-04-08 18:08 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-04-08 18:08 UTC (permalink / raw)
To: Christopher Lameter
Cc: Kernel Hardening, Kristen Carlson Accardi, Andy Lutomirski,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Dennis Zhou, Tejun Heo,
Boris Ostrovsky, Juergen Gross, Stefano Stabellini,
Andrew Morton, Andi Kleen, Kirill A. Shutemov, Michal Hocko,
Mike Rapoport, Stephen Rothwell, Cao jin, Brijesh Singh,
Masahiro Yamada, Joerg Roedel, Peter Zijlstra, Kees Cook,
Mathieu Desnoyers, LKML, xen-devel
On Mon, Apr 8, 2019 at 10:56 AM Christopher Lameter <cl@linux.com> wrote:
>
> On Mon, 8 Apr 2019, Thomas Garnier wrote:
>
> > > It didn't work originally but I will revisit to see if I missed something.
> >
> > I revisited and couldn't find a way to prevent relocations to the
> > percpu section. Without PIE, you can reference absolute address which
> > was convenient for percpu.
>
> Can you switch PIE off for the percpu section? If not maybe the linker
> needs to have an additional option?
I don't think so or I didn't find any option to do that. Changing the
linker might be a bit too much if we have a software solution which
doesn't impact performance.
>
> Cannot imagine that this is not possible. You neeed to be able to
> reference registers that are in fixed memory locations.
>
>
> > Christopher: Did you have something specific in mind?
>
> I thought that we just leave it as is.
I would like to as well. I will try couple things at the assembly
level instead of the linker and come back to this thread.
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 15/27] compiler: Option to default to hidden symbols
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (13 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 14/27] x86/percpu: Adapt percpu for PIE support Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-01 7:12 ` Dan Carpenter
2019-02-01 8:22 ` Adrian Hunter
2019-01-31 19:24 ` [PATCH v6 16/27] compiler: Option to add PROVIDE_HIDDEN replacement for weak symbols Thomas Garnier
` (13 subsequent siblings)
28 siblings, 2 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Luis Chamberlain,
Greg Kroah-Hartman, Rafael J. Wysocki, Arnd Bergmann,
Luc Van Oostenryck, Steven Rostedt, Jason Baron, Thomas Garnier,
Jordan Borgner, Masami Hiramatsu, Masahiro Yamada,
Peter Zijlstra (Intel),
Andrew Morton, Kees Cook, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Andi Kleen, Jan Beulich, Song Liu,
Adrian Hunter, Alexander Shishkin, linux-kernel, linux-arch,
linux-sparse
Provide an option to default visibility to hidden except for key
symbols. This option is disabled by default and will be used by x86_64
PIE support to remove errors between compilation units.
The default visibility is also enabled for external symbols that are
compared as they maybe equals (start/end of sections). In this case,
older versions of GCC will remove the comparison if the symbols are
hidden. This issue exists at least on gcc 4.9 and before.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/boot/boot.h | 2 +-
arch/x86/include/asm/setup.h | 2 +-
arch/x86/kernel/cpu/microcode/core.c | 4 ++--
drivers/base/firmware_loader/main.c | 4 ++--
include/asm-generic/sections.h | 6 ++++++
include/linux/compiler.h | 7 +++++++
init/Kconfig | 7 +++++++
kernel/kallsyms.c | 16 ++++++++--------
kernel/trace/trace.h | 4 ++--
lib/dynamic_debug.c | 4 ++--
10 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 32a09eb5c101..c4afcfecc817 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -193,7 +193,7 @@ static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
}
/* Heap -- available for dynamic lists. */
-extern char _end[];
+extern char _end[] __default_visibility;
extern char *HEAP;
extern char *heap_end;
#define RESET_HEAP() ((void *)( HEAP = _end ))
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ed8ec011a9fd..74f0a8d87986 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -71,7 +71,7 @@ static inline void x86_ce4100_early_setup(void) { }
* This is set up by the setup-routine at boot-time
*/
extern struct boot_params boot_params;
-extern char _text[];
+extern char _text[] __default_visibility;
static inline bool kaslr_enabled(void)
{
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 97f9ada9ceda..04ca89e65f79 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -149,8 +149,8 @@ static bool __init check_loader_disabled_bsp(void)
return *res;
}
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
+extern struct builtin_fw __start_builtin_fw[] __default_visibility;
+extern struct builtin_fw __end_builtin_fw[] __default_visibility;
bool get_builtin_firmware(struct cpio_data *cd, const char *name)
{
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 8e9213b36e31..f04096161a52 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -94,8 +94,8 @@ static struct firmware_cache fw_cache;
#ifdef CONFIG_FW_LOADER
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
+extern struct builtin_fw __start_builtin_fw[] __default_visibility;
+extern struct builtin_fw __end_builtin_fw[] __default_visibility;
static void fw_copy_to_prealloc_buf(struct firmware *fw,
void *buf, size_t size)
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d79abca81a52..94f072f3a48d 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -32,6 +32,9 @@
* __softirqentry_text_start, __softirqentry_text_end
* __start_opd, __end_opd
*/
+#ifdef CONFIG_DEFAULT_HIDDEN_SYMS
+#pragma GCC visibility push(default)
+#endif
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
extern char __bss_start[], __bss_stop[];
@@ -49,6 +52,9 @@ extern char __start_once[], __end_once[];
/* Start and end of .ctors section - used for constructor calls. */
extern char __ctors_start[], __ctors_end[];
+#ifdef CONFIG_DEFAULT_HIDDEN_SYMS
+#pragma GCC visibility pop
+#endif
/* Start and end of .opd section - used for function descriptors. */
extern char __start_opd[], __end_opd[];
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 87692fdae97a..5c0723604a52 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -287,6 +287,13 @@ unsigned long read_word_at_a_time(const void *addr)
__u.__val; \
})
+#ifdef CONFIG_DEFAULT_HIDDEN_SYMS
+#pragma GCC visibility push(hidden)
+#define __default_visibility __attribute__((visibility ("default")))
+#else
+#define __default_visibility
+#endif
+
#endif /* __KERNEL__ */
/*
diff --git a/init/Kconfig b/init/Kconfig
index bb383615823a..116e0de4817f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1848,6 +1848,13 @@ config CMDLINE_OVERRIDE
endif
+#
+# Default to hidden visibility for all symbols.
+# Useful for Position Independent Code to reduce global references.
+#
+config DEFAULT_HIDDEN_SYMS
+ bool
+
endmenu # General setup
source "arch/Kconfig"
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 14934afa9e68..547affc40b68 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -29,24 +29,24 @@
* These will be re-linked against their real values
* during the second link stage.
*/
-extern const unsigned long kallsyms_addresses[] __weak;
-extern const int kallsyms_offsets[] __weak;
-extern const u8 kallsyms_names[] __weak;
+extern const unsigned long kallsyms_addresses[] __weak __default_visibility;
+extern const int kallsyms_offsets[] __weak __default_visibility;
+extern const u8 kallsyms_names[] __weak __default_visibility;
/*
* Tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV).
*/
extern const unsigned int kallsyms_num_syms
-__attribute__((weak, section(".rodata")));
+__attribute__((weak, section(".rodata"))) __default_visibility;
extern const unsigned long kallsyms_relative_base
-__attribute__((weak, section(".rodata")));
+__attribute__((weak, section(".rodata"))) __default_visibility;
-extern const u8 kallsyms_token_table[] __weak;
-extern const u16 kallsyms_token_index[] __weak;
+extern const u8 kallsyms_token_table[] __weak __default_visibility;
+extern const u16 kallsyms_token_index[] __weak __default_visibility;
-extern const unsigned int kallsyms_markers[] __weak;
+extern const unsigned int kallsyms_markers[] __weak __default_visibility;
/*
* Expand a compressed symbol data into the resulting uncompressed string,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 08900828d282..5246977a2db7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1809,8 +1809,8 @@ extern int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable);
extern int tracing_alloc_snapshot(void);
-extern const char *__start___trace_bprintk_fmt[];
-extern const char *__stop___trace_bprintk_fmt[];
+extern const char *__start___trace_bprintk_fmt[] __default_visibility;
+extern const char *__stop___trace_bprintk_fmt[] __default_visibility;
extern const char *__start___tracepoint_str[];
extern const char *__stop___tracepoint_str[];
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dbf2b457e47e..05482e65ae7d 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -37,8 +37,8 @@
#include <linux/device.h>
#include <linux/netdevice.h>
-extern struct _ddebug __start___verbose[];
-extern struct _ddebug __stop___verbose[];
+extern struct _ddebug __start___verbose[] __default_visibility;
+extern struct _ddebug __stop___verbose[] __default_visibility;
struct ddebug_table {
struct list_head link;
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 15/27] compiler: Option to default to hidden symbols
2019-01-31 19:24 ` [PATCH v6 15/27] compiler: Option to default to hidden symbols Thomas Garnier
@ 2019-02-01 7:12 ` Dan Carpenter
2019-02-01 17:00 ` Thomas Garnier
2019-02-01 8:22 ` Adrian Hunter
1 sibling, 1 reply; 56+ messages in thread
From: Dan Carpenter @ 2019-02-01 7:12 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Luis Chamberlain,
Greg Kroah-Hartman, Rafael J. Wysocki, Arnd Bergmann,
Luc Van Oostenryck, Steven Rostedt, Jason Baron, Thomas Garnier,
Jordan Borgner, Masami Hiramatsu, Masahiro Yamada,
Peter Zijlstra (Intel),
Andrew Morton, Kees Cook, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Andi Kleen, Jan Beulich, Song Liu,
Adrian Hunter, Alexander Shishkin, linux-kernel, linux-arch,
linux-sparse
On Thu, Jan 31, 2019 at 11:24:22AM -0800, Thomas Garnier wrote:
> Provide an option to default visibility to hidden except for key
> symbols.
It took me a while to figure out the verb in this sentence... :P
It's weird that we're annotating things to be default. What visibility
are they we leave off the annotation?
regards,
dan carpenter
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 15/27] compiler: Option to default to hidden symbols
2019-02-01 7:12 ` Dan Carpenter
@ 2019-02-01 17:00 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 17:00 UTC (permalink / raw)
To: Dan Carpenter
Cc: Thomas Garnier, Kernel Hardening, kristen, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Luis Chamberlain, Greg Kroah-Hartman,
Rafael J. Wysocki, Arnd Bergmann, Luc Van Oostenryck,
Steven Rostedt, Jason Baron, Jordan Borgner, Masami Hiramatsu,
Masahiro Yamada, Peter Zijlstra (Intel),
Andrew Morton, Kees Cook, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Andi Kleen, Jan Beulich, Song Liu,
Adrian Hunter, Alexander Shishkin, LKML, linux-arch,
Sparse Mailing-list
On Thu, Jan 31, 2019 at 11:13 PM Dan Carpenter <dan.carpenter@oracle.com> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:22AM -0800, Thomas Garnier wrote:
> > Provide an option to default visibility to hidden except for key
> > symbols.
>
> It took me a while to figure out the verb in this sentence... :P
I agree, I should rewrite this description.
>
> It's weird that we're annotating things to be default. What visibility
> are they we leave off the annotation?
The word "default" is confusing but that's how it is called by gcc.
Symbols not marked default are hidden.
>
> regards,
> dan carpenter
>
--
Thomas
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 15/27] compiler: Option to default to hidden symbols
2019-01-31 19:24 ` [PATCH v6 15/27] compiler: Option to default to hidden symbols Thomas Garnier
2019-02-01 7:12 ` Dan Carpenter
@ 2019-02-01 8:22 ` Adrian Hunter
2019-02-01 17:35 ` Thomas Garnier
1 sibling, 1 reply; 56+ messages in thread
From: Adrian Hunter @ 2019-02-01 8:22 UTC (permalink / raw)
To: Thomas Garnier, kernel-hardening
Cc: kristen, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
H. Peter Anvin, x86, Luis Chamberlain, Greg Kroah-Hartman,
Rafael J. Wysocki, Arnd Bergmann, Luc Van Oostenryck,
Steven Rostedt, Jason Baron, Thomas Garnier, Jordan Borgner,
Masami Hiramatsu, Masahiro Yamada, Peter Zijlstra (Intel),
Andrew Morton, Kees Cook, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Andi Kleen, Jan Beulich, Song Liu,
Alexander Shishkin, linux-kernel, linux-arch, linux-sparse
On 31/01/19 9:24 PM, Thomas Garnier wrote:
> Provide an option to default visibility to hidden except for key
> symbols. This option is disabled by default and will be used by x86_64
> PIE support to remove errors between compilation units.
>
> The default visibility is also enabled for external symbols that are
> compared as they maybe equals (start/end of sections). In this case,
> older versions of GCC will remove the comparison if the symbols are
> hidden. This issue exists at least on gcc 4.9 and before.
What does this mean, if anything, for what/how symbols appear in /proc/kallsyms?
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 15/27] compiler: Option to default to hidden symbols
2019-02-01 8:22 ` Adrian Hunter
@ 2019-02-01 17:35 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 17:35 UTC (permalink / raw)
To: Adrian Hunter
Cc: Thomas Garnier, Kernel Hardening, kristen, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Luis Chamberlain, Greg Kroah-Hartman,
Rafael J. Wysocki, Arnd Bergmann, Luc Van Oostenryck,
Steven Rostedt, Jason Baron, Jordan Borgner, Masami Hiramatsu,
Masahiro Yamada, Peter Zijlstra (Intel),
Andrew Morton, Kees Cook, Mathieu Desnoyers,
Arnaldo Carvalho de Melo, Andi Kleen, Jan Beulich, Song Liu,
Alexander Shishkin, LKML, linux-arch, Sparse Mailing-list
On Fri, Feb 1, 2019 at 12:24 AM Adrian Hunter <adrian.hunter@intel.com> wrote:
>
> On 31/01/19 9:24 PM, Thomas Garnier wrote:
> > Provide an option to default visibility to hidden except for key
> > symbols. This option is disabled by default and will be used by x86_64
> > PIE support to remove errors between compilation units.
> >
> > The default visibility is also enabled for external symbols that are
> > compared as they maybe equals (start/end of sections). In this case,
> > older versions of GCC will remove the comparison if the symbols are
> > hidden. This issue exists at least on gcc 4.9 and before.
>
> What does this mean, if anything, for what/how symbols appear in /proc/kallsyms?
No, they will still appear in kallsyms. Some symbols are not absolute
anymore or moved sections but still present.
--
Thomas
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 16/27] compiler: Option to add PROVIDE_HIDDEN replacement for weak symbols
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (14 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 15/27] compiler: Option to default to hidden symbols Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 17/27] x86/relocs: Handle PIE relocations Thomas Garnier
` (12 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Masahiro Yamada, Peter Zijlstra (Intel),
Ingo Molnar, Andrew Morton, Kees Cook, Mathieu Desnoyers,
Thomas Garnier, Nicholas Piggin, Sam Ravnborg, Palmer Dabbelt,
Michael Forney, Cao jin, linux-kernel
Provide an option to have a PROVIDE_HIDDEN (linker script) entry for
each weak symbol. This option solves an error in x86_64 where the linker
optimizes PIE generated code to be non-PIE because --emit-relocs was used
instead of -pie (to reduce dynamic relocations).
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
init/Kconfig | 7 +++++++
scripts/link-vmlinux.sh | 14 ++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 116e0de4817f..2261c9891631 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2104,6 +2104,13 @@ config ASN1
inform it as to what tags are to be expected in a stream and what
functions to call on what tags.
+config WEAK_PROVIDE_HIDDEN
+ bool
+ help
+ Generate linker script PROVIDE_HIDDEN entries for all weak symbols. It
+ allows to prevent non-PIE code being replaced by the linker if the
+ emit-relocs option is used instead of PIE (useful for x86_64 PIE).
+
source "kernel/Kconfig.locks"
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index bc7f1fc1f55b..abf44a804c79 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -119,6 +119,17 @@ kallsyms()
${CC} ${aflags} -c -o ${2} ${afile}
}
+gen_weak_provide_hidden()
+{
+ if [ -n "${CONFIG_WEAK_PROVIDE_HIDDEN}" ]; then
+ local pattern="s/^\s\+ w \(\w\+\)$/PROVIDE_HIDDEN(\1 = .);/gp"
+ echo -e "SECTIONS {\n. = _end;" > .tmp_vmlinux_hiddenld
+ ${NM} ${1} | sed -n "${pattern}" >> .tmp_vmlinux_hiddenld
+ echo "}" >> .tmp_vmlinux_hiddenld
+ LDFLAGS_vmlinux="${LDFLAGS_vmlinux} -T .tmp_vmlinux_hiddenld"
+ fi
+}
+
# Create map file with all symbols from ${1}
# See mksymap for additional details
mksysmap()
@@ -200,6 +211,9 @@ modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches
${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+# Generate weak linker script
+gen_weak_provide_hidden vmlinux.o
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 17/27] x86/relocs: Handle PIE relocations
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (15 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 16/27] compiler: Option to add PROVIDE_HIDDEN replacement for weak symbols Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 18/27] xen: Adapt assembly for PIE support Thomas Garnier
` (11 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier, H.J. Lu,
Peter Zijlstra (Intel),
Joerg Roedel, Jordan Borgner, Ard Biesheuvel, linux-kernel
Change the relocation tool to correctly handle relocations generated by
-fPIE option:
- Add relocation for each entry of the .got section given the linker does not
generate R_X86_64_GLOB_DAT on a simple link.
- Ignore R_X86_64_GOTPCREL.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/tools/relocs.c | 96 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 95 insertions(+), 1 deletion(-)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b629f6992d9f..2a3c703218cc 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -32,6 +32,7 @@ struct section {
Elf_Sym *symtab;
Elf_Rel *reltab;
char *strtab;
+ Elf_Addr *got;
};
static struct section *secs;
@@ -295,6 +296,36 @@ static Elf_Sym *sym_lookup(const char *symname)
return 0;
}
+static Elf_Sym *sym_lookup_addr(Elf_Addr addr, const char **name)
+{
+ int i;
+
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ long nsyms;
+ Elf_Sym *symtab;
+ Elf_Sym *sym;
+
+ if (sec->shdr.sh_type != SHT_SYMTAB)
+ continue;
+
+ nsyms = sec->shdr.sh_size/sizeof(Elf_Sym);
+ symtab = sec->symtab;
+
+ for (sym = symtab; --nsyms >= 0; sym++) {
+ if (sym->st_value == addr) {
+ if (name) {
+ *name = sym_name(sec->link->strtab,
+ sym);
+ }
+ return sym;
+ }
+ }
+ }
+ return 0;
+}
+
+
#if BYTE_ORDER == LITTLE_ENDIAN
#define le16_to_cpu(val) (val)
#define le32_to_cpu(val) (val)
@@ -515,6 +546,35 @@ static void read_relocs(FILE *fp)
}
}
+static void read_got(FILE *fp)
+{
+ int i;
+
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+
+ sec->got = NULL;
+ if (sec->shdr.sh_type != SHT_PROGBITS ||
+ strcmp(sec_name(i), ".got")) {
+ continue;
+ }
+ sec->got = malloc(sec->shdr.sh_size);
+ if (!sec->got) {
+ die("malloc of %d bytes for got failed\n",
+ sec->shdr.sh_size);
+ }
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
+ }
+ if (fread(sec->got, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
+ die("Cannot read got: %s\n",
+ strerror(errno));
+ }
+ }
+}
+
static void print_absolute_symbols(void)
{
@@ -645,6 +705,32 @@ static void add_reloc(struct relocs *r, uint32_t offset)
r->offset[r->count++] = offset;
}
+/*
+ * The linker does not generate relocations for the GOT for the kernel.
+ * If a GOT is found, simulate the relocations that should have been included.
+ */
+static void walk_got_table(int (*process)(struct section *sec, Elf_Rel *rel,
+ Elf_Sym *sym, const char *symname),
+ struct section *sec)
+{
+ int i;
+ Elf_Addr entry;
+ Elf_Sym *sym;
+ const char *symname;
+ Elf_Rel rel;
+
+ for (i = 0; i < sec->shdr.sh_size/sizeof(Elf_Addr); i++) {
+ entry = sec->got[i];
+ sym = sym_lookup_addr(entry, &symname);
+ if (!sym)
+ die("Could not found got symbol for entry %d\n", i);
+ rel.r_offset = sec->shdr.sh_addr + i * sizeof(Elf_Addr);
+ rel.r_info = ELF_BITS == 64 ? R_X86_64_GLOB_DAT
+ : R_386_GLOB_DAT;
+ process(sec, &rel, sym, symname);
+ }
+}
+
static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
Elf_Sym *sym, const char *symname))
{
@@ -658,6 +744,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_REL_TYPE) {
+ if (sec->got)
+ walk_got_table(process, sec);
continue;
}
sec_symtab = sec->link;
@@ -767,6 +855,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
offset += per_cpu_load_addr;
switch (r_type) {
+ case R_X86_64_GOTPCREL:
case R_X86_64_NONE:
/* NONE can be ignored. */
break;
@@ -820,7 +909,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
* the relocations are processed.
* Make sure that the offset will fit.
*/
- if ((int32_t)offset != (int64_t)offset)
+ if (r_type != R_X86_64_64 && (int32_t)offset != (int64_t)offset)
die("Relocation offset doesn't fit in 32 bits\n");
if (r_type == R_X86_64_64)
@@ -829,6 +918,10 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
add_reloc(&relocs32, offset);
break;
+ case R_X86_64_GLOB_DAT:
+ add_reloc(&relocs64, offset);
+ break;
+
default:
die("Unsupported relocation type: %s (%d)\n",
rel_type(r_type), r_type);
@@ -1098,6 +1191,7 @@ void process(FILE *fp, int use_real_mode, int as_text,
read_strtabs(fp);
read_symtabs(fp);
read_relocs(fp);
+ read_got(fp);
if (ELF_BITS == 64)
percpu_init();
if (show_absolute_syms) {
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 18/27] xen: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (16 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 17/27] x86/relocs: Handle PIE relocations Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 19/27] kvm: " Thomas Garnier
` (10 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Juergen Gross, Boris Ostrovsky,
Stefano Stabellini, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier,
Ard Biesheuvel, Joerg Roedel, H.J. Lu, Jordan Borgner, xen-devel,
linux-kernel
Change the assembly code to use the new _ASM_MOVABS macro which get a
symbol reference while being PIE compatible. Adapt the relocation tool
to ignore 32-bit Xen code.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
arch/x86/platform/pvh/head.S | 14 ++++++++++----
arch/x86/tools/relocs.c | 16 +++++++++++++++-
arch/x86/xen/xen-head.S | 11 ++++++-----
3 files changed, 31 insertions(+), 10 deletions(-)
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 1f8825bbaffb..e52d8b31e01d 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -103,8 +103,8 @@ ENTRY(pvh_start_xen)
call xen_prepare_pvh
/* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
+ movabs $_pa(pvh_bootparams), %rsi
+ movabs $_pa(startup_64), %rax
jmp *%rax
#else /* CONFIG_X86_64 */
@@ -150,10 +150,16 @@ END(pvh_start_xen)
.section ".init.data","aw"
.balign 8
+ /*
+ * Use an ASM_PTR (quad on x64) for _pa(gdt_start) because PIE requires
+ * a pointer size storage value before applying the relocation. On
+ * 32-bit _ASM_PTR will be a long which is aligned the space needed for
+ * relocation.
+ */
gdt:
.word gdt_end - gdt_start
- .long _pa(gdt_start)
- .word 0
+ _ASM_PTR _pa(gdt_start)
+ .balign 8
gdt_start:
.quad 0x0000000000000000 /* NULL descriptor */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 2a3c703218cc..1b5ee38446b6 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -837,6 +837,16 @@ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
strncmp(symname, "init_per_cpu_", 13);
}
+/*
+ * Check if the 32-bit relocation is within the xenpvh 32-bit code.
+ * If so, ignores it.
+ */
+static int is_in_xenpvh_assembly(Elf_Addr offset)
+{
+ Elf_Sym *sym = sym_lookup("pvh_start_xen");
+ return sym && (offset >= sym->st_value) &&
+ (offset < (sym->st_value + sym->st_size));
+}
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
@@ -909,8 +919,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
* the relocations are processed.
* Make sure that the offset will fit.
*/
- if (r_type != R_X86_64_64 && (int32_t)offset != (int64_t)offset)
+ if (r_type != R_X86_64_64 &&
+ (int32_t)offset != (int64_t)offset) {
+ if (is_in_xenpvh_assembly(offset))
+ break;
die("Relocation offset doesn't fit in 32 bits\n");
+ }
if (r_type == R_X86_64_64)
add_reloc(&relocs64, offset);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead5e59c..4418ff0a1d96 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,14 +28,15 @@ ENTRY(startup_xen)
/* Clear .bss */
xor %eax,%eax
- mov $__bss_start, %_ASM_DI
- mov $__bss_stop, %_ASM_CX
+ _ASM_MOVABS $__bss_start, %_ASM_DI
+ _ASM_MOVABS $__bss_stop, %_ASM_CX
sub %_ASM_DI, %_ASM_CX
shr $__ASM_SEL(2, 3), %_ASM_CX
rep __ASM_SIZE(stos)
- mov %_ASM_SI, xen_start_info
- mov $init_thread_union+THREAD_SIZE, %_ASM_SP
+ _ASM_MOVABS $xen_start_info, %_ASM_AX
+ _ASM_MOV %_ASM_SI, (%_ASM_AX)
+ _ASM_MOVABS $init_thread_union+THREAD_SIZE, %_ASM_SP
#ifdef CONFIG_X86_64
/* Set up %gs.
@@ -46,7 +47,7 @@ ENTRY(startup_xen)
* init data section till per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ movabsq $INIT_PER_CPU_VAR(irq_stack_union),%rax
cdq
wrmsr
#endif
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 19/27] kvm: Adapt assembly for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (17 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 18/27] xen: Adapt assembly for PIE support Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-06 19:56 ` Sean Christopherson
2019-01-31 19:24 ` [PATCH v6 20/27] x86: Support global stack cookie Thomas Garnier
` (9 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Paolo Bonzini,
Radim Krčmář,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
x86, Joerg Roedel, kvm, linux-kernel
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible. The new __ASM_MOVABS macro is used to
get the address of a symbol on both 32 and 64-bit with PIE support.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/include/asm/kvm_host.h | 8 ++++++--
arch/x86/kernel/kvm.c | 6 ++++--
arch/x86/kvm/svm.c | 4 ++--
arch/x86/kvm/vmx/vmx.c | 2 +-
4 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4660ce90de7f..fdb3307d5fe1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1498,9 +1498,13 @@ asmlinkage void kvm_spurious_fault(void);
".pushsection .fixup, \"ax\" \n" \
"667: \n\t" \
cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
+ "cmpb $0, kvm_rebooting" __ASM_SEL(, (%%rip)) " \n\t" \
"jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
+ __ASM_SIZE(push) "$0 \n\t" \
+ __ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
+ _ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
+ _ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4, 8) "(%%" _ASM_SP ") \n\t" \
+ __ASM_SIZE(pop) "%%" _ASM_AX " \n\t" \
"jmp kvm_spurious_fault \n\t" \
".popsection \n\t" \
_ASM_EXTABLE(666b, 667b)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5c93a65ee1e5..f6eb02004e43 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -826,8 +826,10 @@ asm(
".global __raw_callee_save___kvm_vcpu_is_preempted;"
".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
"__raw_callee_save___kvm_vcpu_is_preempted:"
-"movq __per_cpu_offset(,%rdi,8), %rax;"
-"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+"leaq __per_cpu_offset(%rip), %rax;"
+"movq (%rax,%rdi,8), %rax;"
+"addq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
+"cmpb $0, (%rax);"
"setne %al;"
"ret;"
".popsection");
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f13a3a24d360..26abb82b1b67 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -706,12 +706,12 @@ static u32 svm_msrpm_offset(u32 msr)
static inline void clgi(void)
{
- asm volatile (__ex("clgi"));
+ asm volatile (__ex("clgi") : :);
}
static inline void stgi(void)
{
- asm volatile (__ex("stgi"));
+ asm volatile (__ex("stgi") : :);
}
static inline void invlpga(unsigned long addr, u32 asid)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4341175339f3..3275761a7375 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2161,7 +2161,7 @@ static void vmclear_local_loaded_vmcss(void)
*/
static void kvm_cpu_vmxoff(void)
{
- asm volatile (__ex("vmxoff"));
+ asm volatile (__ex("vmxoff") :::);
intel_pt_handle_vmx(0);
cr4_clear_bits(X86_CR4_VMXE);
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 19/27] kvm: Adapt assembly for PIE support
2019-01-31 19:24 ` [PATCH v6 19/27] kvm: " Thomas Garnier
@ 2019-02-06 19:56 ` Sean Christopherson
2019-02-06 21:23 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Sean Christopherson @ 2019-02-06 19:56 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Paolo Bonzini,
Radim Krčmář,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
x86, Joerg Roedel, kvm, linux-kernel
On Thu, Jan 31, 2019 at 11:24:26AM -0800, Thomas Garnier wrote:
> Change the assembly code to use only relative references of symbols for the
> kernel to be PIE compatible. The new __ASM_MOVABS macro is used to
> get the address of a symbol on both 32 and 64-bit with PIE support.
>
> Position Independent Executable (PIE) support will allow to extend the
> KASLR randomization range below 0xffffffff80000000.
>
> Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> ---
> arch/x86/include/asm/kvm_host.h | 8 ++++++--
> arch/x86/kernel/kvm.c | 6 ++++--
> arch/x86/kvm/svm.c | 4 ++--
> arch/x86/kvm/vmx/vmx.c | 2 +-
> 4 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 4660ce90de7f..fdb3307d5fe1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1498,9 +1498,13 @@ asmlinkage void kvm_spurious_fault(void);
> ".pushsection .fixup, \"ax\" \n" \
> "667: \n\t" \
> cleanup_insn "\n\t" \
> - "cmpb $0, kvm_rebooting \n\t" \
> + "cmpb $0, kvm_rebooting" __ASM_SEL(, (%%rip)) " \n\t" \
> "jne 668b \n\t" \
> - __ASM_SIZE(push) " $666b \n\t" \
> + __ASM_SIZE(push) "$0 \n\t" \
> + __ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
> + _ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
> + _ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4, 8) "(%%" _ASM_SP ") \n\t" \
> + __ASM_SIZE(pop) "%%" _ASM_AX " \n\t" \
This blob isn't very intuitive to begin with, and the extra stack
shenanigans are a bit much when PIE is disabled. What about breaking
out the behavior to separate helper macros to keep the simpler code
for non-PIE and to make the code somewhat self-documenting? E.g.:
#ifndef CONFIG_X86_PIE
#define KVM_PUSH_FAULTING_INSN_RIP __ASM_SIZE(push) " $666b \n\t"
#else
#define KVM_PUSH_FAULTING_INSN_RIP \
__ASM_SIZE(push) "$0 \n\t" \
__ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
_ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
_ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4, 8) "(%%" _ASM_SP ") \n\t" \
__ASM_SIZE(pop) "%%" _ASM_AX " \n\t"
#endif
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
"666: " insn "\n\t" \
"668: \n\t" \
".pushsection .fixup, \"ax\" \n" \
"667: \n\t" \
cleanup_insn "\n\t" \
"cmpb $0, kvm_rebooting" __ASM_SEL(, (%%rip)) " \n\t" \
"jne 668b \n\t" \
KVM_PUSH_FAULTING_INSN_RIP \
"jmp kvm_spurious_fault \n\t" \
".popsection \n\t" \
_ASM_EXTABLE(666b, 667b)
> "jmp kvm_spurious_fault \n\t" \
> ".popsection \n\t" \
> _ASM_EXTABLE(666b, 667b)
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 5c93a65ee1e5..f6eb02004e43 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
This change to arch/x86/kernel/kvm.c should be done in a separate patch
as it affects the kernel itself when running as a guest under KVM,
whereas arch/x86/kvm/**/* and arch/x86/include/asm/kvm_host.h affect
KVM as a host, i.e. the KVM module. Case in point, the below bug causes
a kernel panic when running as a KVM guest but has no impact on the KVM
module.
> @@ -826,8 +826,10 @@ asm(
> ".global __raw_callee_save___kvm_vcpu_is_preempted;"
> ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
> "__raw_callee_save___kvm_vcpu_is_preempted:"
> -"movq __per_cpu_offset(,%rdi,8), %rax;"
> -"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
> +"leaq __per_cpu_offset(%rip), %rax;"
> +"movq (%rax,%rdi,8), %rax;"
> +"addq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
This is wrong, it's directly accessing the per-cpu offset of 'steal_time'
as a virtual address, e.g. without PIE enabled:
0xffffffff8104820b <+11>: add 0x7efccffe(%rip),%rax # 0x15210 <steal_time+16>
This results in kernel panics due to unhandled page faults:
[ 0.001453] BUG: unable to handle kernel paging request at 0000000000015210
[ 0.001453] #PF error: [normal kernel read fault]
I think you want something like the following, except that the whole
point of handcoded assembly is to avoid modifying registers other than
RAX, i.e. modifying RDI is a no-no.
"leaq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rdi;"
"cmpb $0, (%rax,%rdi,1);"
And similar to the comment on ____kvm_handle_fault_on_reboot(), what
about wrapping the PIE-specific version in an ifdef?
> +"cmpb $0, (%rax);"
> "setne %al;"
> "ret;"
> ".popsection");
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index f13a3a24d360..26abb82b1b67 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -706,12 +706,12 @@ static u32 svm_msrpm_offset(u32 msr)
>
> static inline void clgi(void)
> {
> - asm volatile (__ex("clgi"));
> + asm volatile (__ex("clgi") : :);
> }
>
> static inline void stgi(void)
> {
> - asm volatile (__ex("stgi"));
> + asm volatile (__ex("stgi") : :);
> }
>
> static inline void invlpga(unsigned long addr, u32 asid)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 4341175339f3..3275761a7375 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2161,7 +2161,7 @@ static void vmclear_local_loaded_vmcss(void)
> */
> static void kvm_cpu_vmxoff(void)
> {
> - asm volatile (__ex("vmxoff"));
> + asm volatile (__ex("vmxoff") :::);
>
> intel_pt_handle_vmx(0);
> cr4_clear_bits(X86_CR4_VMXE);
> --
> 2.20.1.495.gaa96b0ce6b-goog
>
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 19/27] kvm: Adapt assembly for PIE support
2019-02-06 19:56 ` Sean Christopherson
@ 2019-02-06 21:23 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-06 21:23 UTC (permalink / raw)
To: Sean Christopherson
Cc: Kernel Hardening, Kristen Carlson Accardi, Paolo Bonzini,
Radim Krčmář,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
the arch/x86 maintainers, Joerg Roedel, kvm list, LKML
On Wed, Feb 6, 2019 at 11:56 AM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:26AM -0800, Thomas Garnier wrote:
> > Change the assembly code to use only relative references of symbols for the
> > kernel to be PIE compatible. The new __ASM_MOVABS macro is used to
> > get the address of a symbol on both 32 and 64-bit with PIE support.
> >
> > Position Independent Executable (PIE) support will allow to extend the
> > KASLR randomization range below 0xffffffff80000000.
> >
> > Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
> > ---
> > arch/x86/include/asm/kvm_host.h | 8 ++++++--
> > arch/x86/kernel/kvm.c | 6 ++++--
> > arch/x86/kvm/svm.c | 4 ++--
> > arch/x86/kvm/vmx/vmx.c | 2 +-
> > 4 files changed, 13 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 4660ce90de7f..fdb3307d5fe1 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1498,9 +1498,13 @@ asmlinkage void kvm_spurious_fault(void);
> > ".pushsection .fixup, \"ax\" \n" \
> > "667: \n\t" \
> > cleanup_insn "\n\t" \
> > - "cmpb $0, kvm_rebooting \n\t" \
> > + "cmpb $0, kvm_rebooting" __ASM_SEL(, (%%rip)) " \n\t" \
> > "jne 668b \n\t" \
> > - __ASM_SIZE(push) " $666b \n\t" \
> > + __ASM_SIZE(push) "$0 \n\t" \
> > + __ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
> > + _ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
> > + _ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4, 8) "(%%" _ASM_SP ") \n\t" \
> > + __ASM_SIZE(pop) "%%" _ASM_AX " \n\t" \
>
> This blob isn't very intuitive to begin with, and the extra stack
> shenanigans are a bit much when PIE is disabled. What about breaking
> out the behavior to separate helper macros to keep the simpler code
> for non-PIE and to make the code somewhat self-documenting? E.g.:
>
> #ifndef CONFIG_X86_PIE
> #define KVM_PUSH_FAULTING_INSN_RIP __ASM_SIZE(push) " $666b \n\t"
> #else
> #define KVM_PUSH_FAULTING_INSN_RIP \
> __ASM_SIZE(push) "$0 \n\t" \
> __ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
> _ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
> _ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4, 8) "(%%" _ASM_SP ") \n\t" \
> __ASM_SIZE(pop) "%%" _ASM_AX " \n\t"
> #endif
>
> #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
> "666: " insn "\n\t" \
> "668: \n\t" \
> ".pushsection .fixup, \"ax\" \n" \
> "667: \n\t" \
> cleanup_insn "\n\t" \
> "cmpb $0, kvm_rebooting" __ASM_SEL(, (%%rip)) " \n\t" \
> "jne 668b \n\t" \
> KVM_PUSH_FAULTING_INSN_RIP \
> "jmp kvm_spurious_fault \n\t" \
> ".popsection \n\t" \
> _ASM_EXTABLE(666b, 667b)
>
> > "jmp kvm_spurious_fault \n\t" \
> > ".popsection \n\t" \
> > _ASM_EXTABLE(666b, 667b)
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 5c93a65ee1e5..f6eb02004e43 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
>
> This change to arch/x86/kernel/kvm.c should be done in a separate patch
> as it affects the kernel itself when running as a guest under KVM,
> whereas arch/x86/kvm/**/* and arch/x86/include/asm/kvm_host.h affect
> KVM as a host, i.e. the KVM module. Case in point, the below bug causes
> a kernel panic when running as a KVM guest but has no impact on the KVM
> module.
Got it, will split in next iteration.
>
> > @@ -826,8 +826,10 @@ asm(
> > ".global __raw_callee_save___kvm_vcpu_is_preempted;"
> > ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
> > "__raw_callee_save___kvm_vcpu_is_preempted:"
> > -"movq __per_cpu_offset(,%rdi,8), %rax;"
> > -"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
> > +"leaq __per_cpu_offset(%rip), %rax;"
> > +"movq (%rax,%rdi,8), %rax;"
> > +"addq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
>
> This is wrong, it's directly accessing the per-cpu offset of 'steal_time'
> as a virtual address, e.g. without PIE enabled:
>
> 0xffffffff8104820b <+11>: add 0x7efccffe(%rip),%rax # 0x15210 <steal_time+16>
>
> This results in kernel panics due to unhandled page faults:
>
> [ 0.001453] BUG: unable to handle kernel paging request at 0000000000015210
> [ 0.001453] #PF error: [normal kernel read fault]
Yes, I think something went wrong in rebasing. Thanks for pointing it out.
>
> I think you want something like the following, except that the whole
> point of handcoded assembly is to avoid modifying registers other than
> RAX, i.e. modifying RDI is a no-no.
>
> "leaq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rdi;"
> "cmpb $0, (%rax,%rdi,1);"
>
>
> And similar to the comment on ____kvm_handle_fault_on_reboot(), what
> about wrapping the PIE-specific version in an ifdef?
I will look into this and try your approach.
>
> > +"cmpb $0, (%rax);"
> > "setne %al;"
> > "ret;"
> > ".popsection");
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index f13a3a24d360..26abb82b1b67 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -706,12 +706,12 @@ static u32 svm_msrpm_offset(u32 msr)
> >
> > static inline void clgi(void)
> > {
> > - asm volatile (__ex("clgi"));
> > + asm volatile (__ex("clgi") : :);
> > }
> >
> > static inline void stgi(void)
> > {
> > - asm volatile (__ex("stgi"));
> > + asm volatile (__ex("stgi") : :);
> > }
> >
> > static inline void invlpga(unsigned long addr, u32 asid)
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 4341175339f3..3275761a7375 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -2161,7 +2161,7 @@ static void vmclear_local_loaded_vmcss(void)
> > */
> > static void kvm_cpu_vmxoff(void)
> > {
> > - asm volatile (__ex("vmxoff"));
> > + asm volatile (__ex("vmxoff") :::);
> >
> > intel_pt_handle_vmx(0);
> > cr4_clear_bits(X86_CR4_VMXE);
> > --
> > 2.20.1.495.gaa96b0ce6b-goog
> >
Thanks for the feedback.
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 20/27] x86: Support global stack cookie
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (18 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 19/27] kvm: " Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-02-01 19:27 ` Andy Lutomirski
2019-01-31 19:24 ` [PATCH v6 21/27] x86/ftrace: Adapt function tracing for PIE support Thomas Garnier
` (8 subsequent siblings)
28 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Andy Lutomirski,
Thomas Garnier, Masahiro Yamada, Juergen Gross, Joerg Roedel,
Jia Zhang, Konrad Rzeszutek Wilk, Tim Chen, linux-kernel
Add an off-by-default configuration option to use a global stack cookie
instead of the default TLS. This configuration option will only be used
with PIE binaries.
For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
between the fs segment to gs segment. A PIE binary does not use
mcmodel=kernel because it can be relocated anywhere, therefore the
compiler will default to the fs segment register. This is fixed on the
latest version of gcc.
If the segment selector is available, it will be automatically added. If
the automatic configuration was selected, a warning is written and the
global variable stack cookie is used. If a specific stack mode was
selected (regular or strong) and the compiler does not support selecting
the segment register, an error is emitted.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/Kconfig | 12 ++++++++++++
arch/x86/Makefile | 9 +++++++++
arch/x86/entry/entry_32.S | 3 ++-
arch/x86/entry/entry_64.S | 3 ++-
arch/x86/include/asm/processor.h | 3 ++-
arch/x86/include/asm/stackprotector.h | 19 ++++++++++++++-----
arch/x86/kernel/asm-offsets.c | 3 ++-
arch/x86/kernel/asm-offsets_32.c | 3 ++-
arch/x86/kernel/asm-offsets_64.c | 3 ++-
arch/x86/kernel/cpu/common.c | 3 ++-
arch/x86/kernel/head_32.S | 3 ++-
arch/x86/kernel/process.c | 5 +++++
12 files changed, 56 insertions(+), 13 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0519da6f8ee4..263d81c570b2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2221,6 +2221,18 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
+config X86_GLOBAL_STACKPROTECTOR
+ bool "Stack cookie using a global variable"
+ depends on CC_STACKPROTECTOR_AUTO
+ default n
+ help
+ This option turns on the "stack-protector" GCC feature using a global
+ variable instead of a segment register. It is useful when the
+ compiler does not support custom segment registers when building a
+ position independent (PIE) binary.
+
+ If unsure, say N
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 76bc4dc03d5e..65d6d9a1dd22 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -132,6 +132,15 @@ else
KBUILD_CFLAGS += -mcmodel=kernel
endif
+ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ ifeq ($(call cc-option, -mstack-protector-guard=global),)
+ $(error Cannot use CONFIG_X86_GLOBAL_STACKPROTECTOR: \
+ -mstack-protector-guard=global not supported \
+ by compiler)
+ endif
+ KBUILD_CFLAGS += -mstack-protector-guard=global
+endif
+
ifdef CONFIG_X86_X32
x32_ld_ok := $(call try-run,\
/bin/echo -e '1: .quad 1b' | \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af..1a4abb98664b 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -655,7 +655,8 @@ ENTRY(__switch_to_asm)
movl %esp, TASK_threadsp(%eax)
movl TASK_threadsp(%edx), %esp
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
movl TASK_stack_canary(%edx), %ebx
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index fc15fe058d3c..1ae9c85241dc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -296,7 +296,8 @@ ENTRY(__switch_to_asm)
movq %rsp, TASK_threadsp(%rdi)
movq TASK_threadsp(%rsi), %rsp
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
movq TASK_stack_canary(%rsi), %rbx
movq %rbx, PER_CPU_VAR(irq_stack_union + stack_canary_offset)
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 18f1e8269ad7..d322b1789d94 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -416,7 +416,8 @@ extern asmlinkage void ignore_sysret(void);
void save_fsgs_for_kvm(void);
#endif
#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
/*
* Make sure stack canary segment base is cached-aligned:
* "For Intel Atom processors, avoid non zero segment base address
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..4e120cf36782 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -52,6 +52,10 @@
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+extern unsigned long __stack_chk_guard;
+#endif
+
/*
* Initialize the stackprotector canary value.
*
@@ -63,7 +67,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary;
u64 tsc;
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/*
@@ -77,17 +81,22 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ if (__stack_chk_guard == 0)
+ __stack_chk_guard = canary ?: 1;
+#else /* !CONFIG_X86_GLOBAL_STACKPROTECTOR */
current->stack_canary = canary;
#ifdef CONFIG_X86_64
this_cpu_write(irq_stack_union.stack_canary, canary);
-#else
+#else /* CONFIG_X86_32 */
this_cpu_write(stack_canary.canary, canary);
#endif
+#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
@@ -100,7 +109,7 @@ static inline void setup_stack_canary_segment(int cpu)
static inline void load_stack_canary_segment(void)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
#endif
}
@@ -116,7 +125,7 @@ static inline void setup_stack_canary_segment(int cpu)
static inline void load_stack_canary_segment(void)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..bd6d7beffc1c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -33,7 +33,8 @@ static void __used common(void)
{
BLANK();
OFFSET(TASK_threadsp, task_struct, thread.sp);
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
OFFSET(TASK_stack_canary, task_struct, stack_canary);
#endif
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 82826f2275cc..775b0a7692f3 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -56,7 +56,8 @@ void foo(void)
offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
offsetofend(struct cpu_entry_area, entry_stack_page.stack));
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index ddced33184b5..5bb1247afd6b 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -70,7 +70,8 @@ int main(void)
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..70aaf94fcf26 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1613,7 +1613,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 30f9cb2c0b55..6afa948d211a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -375,7 +375,8 @@ ENDPROC(startup_32_smp)
*/
__INIT
setup_once:
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
/*
* Configure the stack canary. The linker can't handle this by
* relocation. Manually set base address in stack canary
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 90ae0ca51083..582113ffb011 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -90,6 +90,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
DEFINE_PER_CPU(bool, __tss_limit_invalid);
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 20/27] x86: Support global stack cookie
2019-01-31 19:24 ` [PATCH v6 20/27] x86: Support global stack cookie Thomas Garnier
@ 2019-02-01 19:27 ` Andy Lutomirski
2019-02-01 20:21 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2019-02-01 19:27 UTC (permalink / raw)
To: Thomas Garnier
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, X86 ML,
Andy Lutomirski, Thomas Garnier, Masahiro Yamada, Juergen Gross,
Joerg Roedel, Jia Zhang, Konrad Rzeszutek Wilk, Tim Chen, LKML
On Thu, Jan 31, 2019 at 11:29 AM Thomas Garnier <thgarnie@chromium.org> wrote:
>
> Add an off-by-default configuration option to use a global stack cookie
> instead of the default TLS. This configuration option will only be used
> with PIE binaries.
>
> For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
> between the fs segment to gs segment. A PIE binary does not use
> mcmodel=kernel because it can be relocated anywhere, therefore the
> compiler will default to the fs segment register. This is fixed on the
> latest version of gcc.
I hate all these gcc-sucks-so-we-hack-it-and-change-nasty-semantics
options. How about just preventing use of both stack protector and
PIE unless the version of gcc in use is new enough.
Also, does -mstack-protector-guard-reg not solve this? See
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708. Or is there
another bug? Or are you worried about gcc versions that don't have
that feature yet?
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 20/27] x86: Support global stack cookie
2019-02-01 19:27 ` Andy Lutomirski
@ 2019-02-01 20:21 ` Thomas Garnier
2019-02-01 22:36 ` Andy Lutomirski
0 siblings, 1 reply; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 20:21 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, X86 ML,
Masahiro Yamada, Juergen Gross, Joerg Roedel, Jia Zhang,
Konrad Rzeszutek Wilk, Tim Chen, LKML
On Fri, Feb 1, 2019 at 11:27 AM Andy Lutomirski <luto@kernel.org> wrote:
>
> On Thu, Jan 31, 2019 at 11:29 AM Thomas Garnier <thgarnie@chromium.org> wrote:
> >
> > Add an off-by-default configuration option to use a global stack cookie
> > instead of the default TLS. This configuration option will only be used
> > with PIE binaries.
> >
> > For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
> > between the fs segment to gs segment. A PIE binary does not use
> > mcmodel=kernel because it can be relocated anywhere, therefore the
> > compiler will default to the fs segment register. This is fixed on the
> > latest version of gcc.
>
> I hate all these gcc-sucks-so-we-hack-it-and-change-nasty-semantics
> options. How about just preventing use of both stack protector and
> PIE unless the version of gcc in use is new enough.
So fail the build in this scenario?
>
> Also, does -mstack-protector-guard-reg not solve this? See
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708. Or is there
> another bug? Or are you worried about gcc versions that don't have
> that feature yet?
I am worried about gcc versions that don't have this feature, yes.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 20/27] x86: Support global stack cookie
2019-02-01 20:21 ` Thomas Garnier
@ 2019-02-01 22:36 ` Andy Lutomirski
2019-02-01 23:56 ` Thomas Garnier
0 siblings, 1 reply; 56+ messages in thread
From: Andy Lutomirski @ 2019-02-01 22:36 UTC (permalink / raw)
To: Thomas Garnier
Cc: Andy Lutomirski, Kernel Hardening, Kristen Carlson Accardi,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
X86 ML, Masahiro Yamada, Juergen Gross, Joerg Roedel, Jia Zhang,
Konrad Rzeszutek Wilk, Tim Chen, LKML
> On Feb 1, 2019, at 12:21 PM, Thomas Garnier <thgarnie@chromium.org> wrote:
>
>> On Fri, Feb 1, 2019 at 11:27 AM Andy Lutomirski <luto@kernel.org> wrote:
>>
>>> On Thu, Jan 31, 2019 at 11:29 AM Thomas Garnier <thgarnie@chromium.org> wrote:
>>>
>>> Add an off-by-default configuration option to use a global stack cookie
>>> instead of the default TLS. This configuration option will only be used
>>> with PIE binaries.
>>>
>>> For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
>>> between the fs segment to gs segment. A PIE binary does not use
>>> mcmodel=kernel because it can be relocated anywhere, therefore the
>>> compiler will default to the fs segment register. This is fixed on the
>>> latest version of gcc.
>>
>> I hate all these gcc-sucks-so-we-hack-it-and-change-nasty-semantics
>> options. How about just preventing use of both stack protector and
>> PIE unless the version of gcc in use is new enough.
>
> So fail the build in this scenario?
Fail the build or use some Kconfig magic to prevent this from being configured in the first place.
>
>>
>> Also, does -mstack-protector-guard-reg not solve this? See
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708. Or is there
>> another bug? Or are you worried about gcc versions that don't have
>> that feature yet?
>
> I am worried about gcc versions that don't have this feature, yes.
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 20/27] x86: Support global stack cookie
2019-02-01 22:36 ` Andy Lutomirski
@ 2019-02-01 23:56 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-02-01 23:56 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Andy Lutomirski, Kernel Hardening, Kristen Carlson Accardi,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
X86 ML, Masahiro Yamada, Juergen Gross, Joerg Roedel, Jia Zhang,
Konrad Rzeszutek Wilk, Tim Chen, LKML
On Fri, Feb 1, 2019 at 2:36 PM Andy Lutomirski <luto@amacapital.net> wrote:
>
>
> > On Feb 1, 2019, at 12:21 PM, Thomas Garnier <thgarnie@chromium.org> wrote:
> >
> >> On Fri, Feb 1, 2019 at 11:27 AM Andy Lutomirski <luto@kernel.org> wrote:
> >>
> >>> On Thu, Jan 31, 2019 at 11:29 AM Thomas Garnier <thgarnie@chromium.org> wrote:
> >>>
> >>> Add an off-by-default configuration option to use a global stack cookie
> >>> instead of the default TLS. This configuration option will only be used
> >>> with PIE binaries.
> >>>
> >>> For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
> >>> between the fs segment to gs segment. A PIE binary does not use
> >>> mcmodel=kernel because it can be relocated anywhere, therefore the
> >>> compiler will default to the fs segment register. This is fixed on the
> >>> latest version of gcc.
> >>
> >> I hate all these gcc-sucks-so-we-hack-it-and-change-nasty-semantics
> >> options. How about just preventing use of both stack protector and
> >> PIE unless the version of gcc in use is new enough.
> >
> > So fail the build in this scenario?
>
> Fail the build or use some Kconfig magic to prevent this from being configured in the first place.
Ok, I can do that in next iteration.
>
> >
> >>
> >> Also, does -mstack-protector-guard-reg not solve this? See
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708. Or is there
> >> another bug? Or are you worried about gcc versions that don't have
> >> that feature yet?
> >
> > I am worried about gcc versions that don't have this feature, yes.
^ permalink raw reply [flat|nested] 56+ messages in thread
* [PATCH v6 21/27] x86/ftrace: Adapt function tracing for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (19 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 20/27] x86: Support global stack cookie Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 22/27] x86/modules: Add option to start module section after kernel Thomas Garnier
` (7 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Steven Rostedt, Ingo Molnar,
Thomas Gleixner, Borislav Petkov, H. Peter Anvin, x86,
Joe Lawrence, Thomas Garnier, James Hogan, Peter Zijlstra (Intel),
nixiaoming, linux-kernel
When using PIE with function tracing, the compiler generates a
call through the GOT (call *__fentry__@GOTPCREL). This instruction
takes 6-bytes instead of 5-bytes with a relative call.
If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte nop
so ftrace can handle the previous 5-bytes as before.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
arch/x86/kernel/ftrace.c | 51 ++++++++++++++++++++++++--
scripts/recordmcount.c | 78 ++++++++++++++++++++++++++--------------
2 files changed, 101 insertions(+), 28 deletions(-)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8257a59704ae..82feb8c7a47e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -102,7 +102,7 @@ static const unsigned char *ftrace_nop_replace(void)
static int
ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
- unsigned const char *new_code)
+ unsigned const char *new_code)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -135,6 +135,53 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
return 0;
}
+/* Bytes before call GOT offset */
+static const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
+
+static int
+ftrace_modify_initial_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE + 1];
+
+ /*
+ * If PIE is not enabled default to the original approach to code
+ * modification.
+ */
+ if (!IS_ENABLED(CONFIG_X86_PIE))
+ return ftrace_modify_code_direct(ip, old_code, new_code);
+
+ ftrace_expected = old_code;
+
+ /* Ensure the instructions point to a call to the GOT */
+ if (probe_kernel_read(replaced, (void *)ip, sizeof(replaced))) {
+ WARN_ONCE(1, "invalid function");
+ return -EFAULT;
+ }
+
+ if (memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn))) {
+ WARN_ONCE(1, "invalid function call");
+ return -EINVAL;
+ }
+
+ /*
+ * Build a nop slide with a 5-byte nop and 1-byte nop to keep the ftrace
+ * hooking algorithm working with the expected 5 bytes instruction.
+ */
+ memset(replaced, ideal_nops[1][0], sizeof(replaced));
+ memcpy(replaced, new_code, MCOUNT_INSN_SIZE);
+
+ ip = text_ip_addr(ip);
+
+ if (probe_kernel_write((void *)ip, replaced, sizeof(replaced)))
+ return -EPERM;
+
+ sync_core();
+
+ return 0;
+
+}
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -153,7 +200,7 @@ int ftrace_make_nop(struct module *mod,
* just modify the code directly.
*/
if (addr == MCOUNT_ADDR)
- return ftrace_modify_code_direct(rec->ip, old, new);
+ return ftrace_modify_initial_code(rec->ip, old, new);
ftrace_expected = NULL;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index a50a2aa963ad..4b8bd746ed2e 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -171,33 +171,9 @@ umalloc(size_t size)
return addr;
}
-static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
-static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
-static unsigned char *ideal_nop;
-
static char rel_type_nop;
-
static int (*make_nop)(void *map, size_t const offset);
-
-static int make_nop_x86(void *map, size_t const offset)
-{
- uint32_t *ptr;
- unsigned char *op;
-
- /* Confirm we have 0xe8 0x0 0x0 0x0 0x0 */
- ptr = map + offset;
- if (*ptr != 0)
- return -1;
-
- op = map + offset - 1;
- if (*op != 0xe8)
- return -1;
-
- /* convert to nop */
- ulseek(fd_map, offset - 1, SEEK_SET);
- uwrite(fd_map, ideal_nop, 5);
- return 0;
-}
+static unsigned char *ideal_nop;
static unsigned char ideal_nop4_arm_le[4] = { 0x00, 0x00, 0xa0, 0xe1 }; /* mov r0, r0 */
static unsigned char ideal_nop4_arm_be[4] = { 0xe1, 0xa0, 0x00, 0x00 }; /* mov r0, r0 */
@@ -447,6 +423,49 @@ static void MIPS64_r_info(Elf64_Rel *const rp, unsigned sym, unsigned type)
}).r_info;
}
+static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+static unsigned char ideal_nop6_x86_64[6] = { 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+static size_t ideal_nop_x86_size;
+
+static unsigned char stub_default_x86[2] = { 0xe8, 0x00 }; /* call relative */
+static unsigned char stub_got_x86[3] = { 0xff, 0x15, 0x00 }; /* call .got */
+static unsigned char *stub_x86;
+static size_t stub_x86_size;
+
+static int make_nop_x86(void *map, size_t const offset)
+{
+ uint32_t *ptr;
+ size_t stub_offset = offset - stub_x86_size;
+
+ /* confirm we have the expected stub */
+ ptr = map + stub_offset;
+ if (memcmp(ptr, stub_x86, stub_x86_size))
+ return -1;
+
+ /* convert to nop */
+ ulseek(fd_map, stub_offset, SEEK_SET);
+ uwrite(fd_map, ideal_nop, ideal_nop_x86_size);
+ return 0;
+}
+
+/* Swap the stub and nop for a got call if the binary is built with PIE */
+static int is_fake_mcount_x86_x64(Elf64_Rel const *rp)
+{
+ if (ELF64_R_TYPE(rp->r_info) == R_X86_64_GOTPCREL) {
+ ideal_nop = ideal_nop6_x86_64;
+ ideal_nop_x86_size = sizeof(ideal_nop6_x86_64);
+ stub_x86 = stub_got_x86;
+ stub_x86_size = sizeof(stub_got_x86);
+ mcount_adjust_64 = 1 - stub_x86_size;
+ }
+
+ /* Once the relocation was checked, rollback to default */
+ is_fake_mcount64 = fn_is_fake_mcount64;
+ return is_fake_mcount64(rp);
+}
+
+
static void
do_file(char const *const fname)
{
@@ -509,6 +528,9 @@ do_file(char const *const fname)
rel_type_nop = R_386_NONE;
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_32;
+ ideal_nop_x86_size = sizeof(ideal_nop5_x86_32);
+ stub_x86 = stub_default_x86;
+ stub_x86_size = sizeof(stub_default_x86);
mcount_adjust_32 = -1;
break;
case EM_ARM: reltype = R_ARM_ABS32;
@@ -533,9 +555,13 @@ do_file(char const *const fname)
case EM_X86_64:
make_nop = make_nop_x86;
ideal_nop = ideal_nop5_x86_64;
+ ideal_nop_x86_size = sizeof(ideal_nop5_x86_64);
+ stub_x86 = stub_default_x86;
+ stub_x86_size = sizeof(stub_default_x86);
reltype = R_X86_64_64;
rel_type_nop = R_X86_64_NONE;
- mcount_adjust_64 = -1;
+ is_fake_mcount64 = is_fake_mcount_x86_x64;
+ mcount_adjust_64 = 1 - stub_x86_size;
break;
} /* end switch */
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 22/27] x86/modules: Add option to start module section after kernel
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (20 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 21/27] x86/ftrace: Adapt function tracing for PIE support Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 23/27] x86/modules: Adapt module loading for PIE support Thomas Garnier
` (6 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Jonathan Corbet,
Dave Hansen, Andy Lutomirski, Peter Zijlstra, Kirill A. Shutemov,
Baoquan He, Thomas Garnier, Alexander Popov, Joerg Roedel,
Juergen Gross, linux-kernel, linux-doc
Add an option so the module section is just after the mapped kernel. It
will ensure position independent modules are always at the right
distance from the kernel and do not require mcmodule=large. It also
optimize the available size for modules by getting rid of the empty
space on kernel randomization range.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
Documentation/x86/x86_64/mm.txt | 3 +++
arch/x86/Kconfig | 4 ++++
arch/x86/include/asm/pgtable_64_types.h | 6 ++++++
arch/x86/kernel/head64.c | 5 ++++-
arch/x86/mm/dump_pagetables.c | 3 ++-
5 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 804f9426ed17..35b845d695d5 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -151,3 +151,6 @@ correct as KASAN disables KASLR.
For both 4- and 5-level layouts, the STACKLEAK_POISON value in the last 2MB
hole: ffffffffffff4111
+
+If CONFIG_DYNAMIC_MODULE_BASE is enabled, the module section follows the end of
+the mapped kernel.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 263d81c570b2..c3ad1b0ae1a1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2221,6 +2221,10 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
+# Module section starts just after the end of the kernel module
+config DYNAMIC_MODULE_BASE
+ bool
+
config X86_GLOBAL_STACKPROTECTOR
bool "Stack cookie using a global variable"
depends on CC_STACKPROTECTOR_AUTO
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 88bca456da99..d1bb676ec376 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -7,6 +7,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/kaslr.h>
+#include <asm/sections.h>
/*
* These are used to make use of C type-checking..
@@ -141,7 +142,12 @@ extern unsigned int ptrs_per_p4d;
#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+#ifdef CONFIG_DYNAMIC_MODULE_BASE
+#define MODULES_VADDR ALIGN(((unsigned long)_end + PAGE_SIZE), PMD_SIZE)
+#else
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#endif
+
/* The module sections ends with the start of the fixmap */
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 22e81275495b..ca2f6ff431af 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -411,12 +411,15 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
* Build-time sanity checks on the kernel image and module
* area mappings. (these are purely build-time and produce no code)
*/
+#ifndef CONFIG_DYNAMIC_MODULE_BASE
BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
- BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_RANDOMIZE_BASE_LARGE) &&
+ MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+#endif
MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index e3cdc85ce5b6..3172bd968215 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -104,7 +104,7 @@ static struct addr_marker address_markers[] = {
[EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
#endif
[HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
- [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
+ [MODULES_VADDR_NR] = { 0/*MODULES_VADDR*/, "Modules" },
[MODULES_END_NR] = { MODULES_END, "End Modules" },
[FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
[END_OF_SPACE_NR] = { -1, NULL }
@@ -623,6 +623,7 @@ static int __init pt_dump_init(void)
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
#endif
+ address_markers[MODULES_VADDR_NR].start_address = MODULES_VADDR;
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 23/27] x86/modules: Adapt module loading for PIE support
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (21 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 22/27] x86/modules: Add option to start module section after kernel Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 24/27] x86/mm: Make the x86 GOT read-only Thomas Garnier
` (5 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier,
Joerg Roedel, Andy Lutomirski, Francis Deslauriers,
Ard Biesheuvel, H.J. Lu, Peter Zijlstra (Intel),
linux-kernel
Adapt module loading to support PIE relocations. Generate dynamic GOT if
a symbol requires it but no entry exists in the kernel GOT.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/Makefile | 4 +
arch/x86/include/asm/module.h | 11 ++
arch/x86/include/asm/sections.h | 4 +
arch/x86/kernel/module.c | 181 +++++++++++++++++++++++++++++++-
arch/x86/kernel/module.lds | 3 +
5 files changed, 198 insertions(+), 5 deletions(-)
create mode 100644 arch/x86/kernel/module.lds
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 65d6d9a1dd22..5e9c1b02cf87 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -129,8 +129,12 @@ else
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
+ifdef CONFIG_X86_PIE
+ KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/module.lds
+else
KBUILD_CFLAGS += -mcmodel=kernel
endif
+endif
ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
ifeq ($(call cc-option, -mstack-protector-guard=global),)
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..68ff05e14288 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,12 +5,23 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+#ifdef CONFIG_X86_PIE
+struct mod_got_sec {
+ struct elf64_shdr *got;
+ int got_num_entries;
+ int got_max_entries;
+};
+#endif
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+#ifdef CONFIG_X86_PIE
+ struct mod_got_sec core;
+#endif
};
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 8ea1cfdbeabc..d4468309d743 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -13,4 +13,8 @@ extern char __end_rodata_aligned[];
extern char __end_rodata_hpage_align[];
#endif
+#if defined(CONFIG_X86_PIE)
+extern char __start_got[], __end_got[];
+#endif
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b052e883dd8c..f0d3ed92049e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/jump_label.h>
#include <linux/random.h>
+#include <linux/sort.h>
#include <asm/text-patching.h>
#include <asm/page.h>
@@ -77,6 +78,173 @@ static unsigned long int get_module_load_offset(void)
}
#endif
+#ifdef CONFIG_X86_PIE
+static u64 find_got_kernel_entry(Elf64_Sym *sym, const Elf64_Rela *rela)
+{
+ u64 *pos;
+
+ for (pos = (u64 *)__start_got; pos < (u64 *)__end_got; pos++) {
+ if (*pos == sym->st_value)
+ return (u64)pos + rela->r_addend;
+ }
+
+ return 0;
+}
+
+static u64 module_emit_got_entry(struct module *mod, void *loc,
+ const Elf64_Rela *rela, Elf64_Sym *sym)
+{
+ struct mod_got_sec *gotsec = &mod->arch.core;
+ u64 *got = (u64 *)gotsec->got->sh_addr;
+ int i = gotsec->got_num_entries;
+ u64 ret;
+
+ /* Check if we can use the kernel GOT */
+ ret = find_got_kernel_entry(sym, rela);
+ if (ret)
+ return ret;
+
+ got[i] = sym->st_value;
+
+ /*
+ * Check if the entry we just created is a duplicate. Given that the
+ * relocations are sorted, this will be the last entry we allocated.
+ * (if one exists).
+ */
+ if (i > 0 && got[i] == got[i - 2]) {
+ ret = (u64)&got[i - 1];
+ } else {
+ gotsec->got_num_entries++;
+ BUG_ON(gotsec->got_num_entries > gotsec->got_max_entries);
+ ret = (u64)&got[i];
+ }
+
+ return ret + rela->r_addend;
+}
+
+#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+ const Elf64_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+ if (i == 0)
+ i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding
+ * slot.
+ */
+ return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_gots(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+ unsigned int ret = 0;
+ Elf64_Sym *s;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_X86_64_GOTPCREL:
+ s = syms + ELF64_R_SYM(rela[i].r_info);
+
+ /*
+ * Use the kernel GOT when possible, else reserve a
+ * custom one for this module.
+ */
+ if (!duplicate_rel(rela, i) &&
+ !find_got_kernel_entry(s, rela + i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Generate GOT entries for GOTPCREL relocations that do not exists in the
+ * kernel GOT. Based on arm64 module-plts implementation.
+ */
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long gots = 0;
+ Elf_Shdr *symtab = NULL;
+ Elf64_Sym *syms = NULL;
+ char *strings, *name;
+ int i;
+
+ /*
+ * Find the empty .got section so we can expand it to store the PLT
+ * entries. Record the symtab address as well.
+ */
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) {
+ mod->arch.core.got = sechdrs + i;
+ } else if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symtab = sechdrs + i;
+ syms = (Elf64_Sym *)symtab->sh_addr;
+ }
+ }
+
+ if (!mod->arch.core.got) {
+ pr_err("%s: module GOT section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+ if (!syms) {
+ pr_err("%s: module symtab section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+ int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+
+ /* sort by type, symbol index and addend */
+ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+ gots += count_gots(syms, rels, numrels);
+ }
+
+ mod->arch.core.got->sh_type = SHT_NOBITS;
+ mod->arch.core.got->sh_flags = SHF_ALLOC;
+ mod->arch.core.got->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.core.got->sh_size = (gots + 1) * sizeof(u64);
+ mod->arch.core.got_num_entries = 0;
+ mod->arch.core.got_max_entries = gots;
+
+ /*
+ * If a _GLOBAL_OFFSET_TABLE_ symbol exists, make it absolute for
+ * modules to correctly reference it. Similar to s390 implementation.
+ */
+ strings = (void *) ehdr + sechdrs[symtab->sh_link].sh_offset;
+ for (i = 0; i < symtab->sh_size/sizeof(Elf_Sym); i++) {
+ if (syms[i].st_shndx != SHN_UNDEF)
+ continue;
+ name = strings + syms[i].st_name;
+ if (!strcmp(name, "_GLOBAL_OFFSET_TABLE_")) {
+ syms[i].st_shndx = SHN_ABS;
+ break;
+ }
+ }
+ return 0;
+}
+#endif
+
void *module_alloc(unsigned long size)
{
void *p;
@@ -190,16 +358,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
+#ifdef CONFIG_X86_PIE
+ case R_X86_64_GOTPCREL:
+ val = module_emit_got_entry(me, loc, rel + i, sym);
+ /* fallthrough */
+#endif
case R_X86_64_PC32:
case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
*(u32 *)loc = val;
-#if 0
- if ((s64)val != *(s32 *)loc)
+ if (IS_ENABLED(CONFIG_X86_PIE) &&
+ (s64)val != *(s32 *)loc)
goto overflow;
-#endif
break;
case R_X86_64_PC64:
if (*(u64 *)loc != 0)
@@ -223,8 +395,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
- pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
- me->name);
+ pr_err("`%s' likely too far from the kernel\n", me->name);
return -ENOEXEC;
}
#endif
diff --git a/arch/x86/kernel/module.lds b/arch/x86/kernel/module.lds
new file mode 100644
index 000000000000..fd6e95a4b454
--- /dev/null
+++ b/arch/x86/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+ .got (NOLOAD) : { BYTE(0) }
+}
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 24/27] x86/mm: Make the x86 GOT read-only
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (22 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 23/27] x86/modules: Adapt module loading for PIE support Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 25/27] x86/pie: Add option to build the kernel as PIE Thomas Garnier
` (4 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Arnd Bergmann, linux-arch, linux-kernel
The GOT is changed during early boot when relocations are applied. Make
it read-only directly. This table exists only for PIE binary.
Position Independent Executable (PIE) support will allow to extend the
KASLR randomization range below 0xffffffff80000000.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
include/asm-generic/vmlinux.lds.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..0a038594c878 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -323,6 +323,17 @@
__end_ro_after_init = .;
#endif
+#ifdef CONFIG_X86_PIE
+#define RO_GOT_X86 \
+ .got : AT(ADDR(.got) - LOAD_OFFSET) { \
+ __start_got = .; \
+ *(.got); \
+ __end_got = .; \
+ }
+#else
+#define RO_GOT_X86
+#endif
+
/*
* Read only Data
*/
@@ -379,6 +390,7 @@
__end_builtin_fw = .; \
} \
\
+ RO_GOT_X86 \
TRACEDATA \
\
/* Kernel symbol table: Normal symbols */ \
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 25/27] x86/pie: Add option to build the kernel as PIE
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (23 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 24/27] x86/mm: Make the x86 GOT read-only Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 26/27] x86/relocs: Add option to generate 64-bit relocations Thomas Garnier
` (3 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, linux-kernel
Add the CONFIG_X86_PIE option which builds the kernel as a Position
Independent Executable (PIE). The kernel is currently build with the
mcmodel=kernel option which forces it to stay on the top 2G of the
virtual address space. With PIE, the kernel will be able to move below
the current limit.
The --emit-relocs linker option was kept instead of using -pie to limit
the impact on mapped sections. Any incompatible relocation will be
catch by the arch/x86/tools/relocs binary at compile time.
If segment based stack cookies are enabled, try to use the compiler
option to select the segment register. If not available, automatically
enabled global stack cookie in auto mode. Otherwise, recommend
compiler update or global stack cookie option.
Performance/Size impact:
Size of vmlinux (Default configuration):
File size:
- PIE disabled: +0.18%
- PIE enabled: -1.977% (less relocations)
.text section:
- PIE disabled: same
- PIE enabled: same
Size of vmlinux (Ubuntu configuration):
File size:
- PIE disabled: +0.21%
- PIE enabled: +10%
.text section:
- PIE disabled: same
- PIE enabled: +0.001%
The size increase is mainly due to not having access to the 32-bit signed
relocation that can be used with mcmodel=kernel. A small part is due to reduced
optimization for PIE code. This bug [1] was opened with gcc to provide a better
code generation for kernel PIE.
Hackbench (50% and 1600% on thread/process for pipe/sockets):
- PIE disabled: no significant change (avg -/+ 0.5% on latest test).
- PIE enabled: between -1% to +1% in average (default and Ubuntu config).
Kernbench (average of 10 Half and Optimal runs):
Elapsed Time:
- PIE disabled: no significant change (avg -0.5%)
- PIE enabled: average -0.5% to +0.5%
System Time:
- PIE disabled: no significant change (avg -0.1%)
- PIE enabled: average -0.4% to +0.4%.
[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82303
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/Kconfig | 8 ++++++++
arch/x86/Makefile | 45 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c3ad1b0ae1a1..e4316b8ed130 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2237,6 +2237,14 @@ config X86_GLOBAL_STACKPROTECTOR
If unsure, say N
+config X86_PIE
+ bool
+ depends on X86_64
+ select DEFAULT_HIDDEN_SYMS
+ select WEAK_PROVIDE_HIDDEN
+ select DYNAMIC_MODULE_BASE
+ select MODULE_REL_CRCS if MODVERSIONS
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5e9c1b02cf87..f72f78510059 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -60,6 +60,8 @@ endif
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+stackglobal := $(call cc-option-yn, -mstack-protector-guard=global)
+
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
@@ -130,14 +132,55 @@ else
KBUILD_CFLAGS += -mno-red-zone
ifdef CONFIG_X86_PIE
+ KBUILD_CFLAGS += -fPIE
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/module.lds
+
+ # Relax relocation in both CFLAGS and LDFLAGS to support older compilers
+ KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no)
+ LDFLAGS_vmlinux += $(call ld-option,--no-relax)
+ KBUILD_LDFLAGS_MODULE += $(call ld-option,--no-relax)
+
+ # Stack validation is not yet support due to self-referenced switches
+ifdef CONFIG_STACK_VALIDATION
+ $(warning CONFIG_STACK_VALIDATION is not yet supported for x86_64 pie \
+ build.)
+ SKIP_STACK_VALIDATION := 1
+ export SKIP_STACK_VALIDATION
+endif
+
+ifndef CONFIG_CC_STACKPROTECTOR_NONE
+ifndef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ stackseg-flag := -mstack-protector-guard-reg=%gs
+ ifeq ($(call cc-option-yn,$(stackseg-flag)),n)
+ # Try to enable global stack cookie if possible
+ ifeq ($(stackglobal), y)
+ $(warning Cannot use CONFIG_CC_STACKPROTECTOR_* while \
+ building a position independent kernel. \
+ Default to global stack protector \
+ (CONFIG_X86_GLOBAL_STACKPROTECTOR).)
+ CONFIG_X86_GLOBAL_STACKPROTECTOR := y
+ KBUILD_CFLAGS += -DCONFIG_X86_GLOBAL_STACKPROTECTOR
+ KBUILD_AFLAGS += -DCONFIG_X86_GLOBAL_STACKPROTECTOR
+ else
+ $(error echo Cannot use \
+ CONFIG_CC_STACKPROTECTOR_(REGULAR|STRONG|AUTO) \
+ while building a position independent binary. \
+ Update your compiler or use \
+ CONFIG_X86_GLOBAL_STACKPROTECTOR)
+ endif
+ else
+ KBUILD_CFLAGS += $(stackseg-flag)
+ endif
+endif
+endif
+
else
KBUILD_CFLAGS += -mcmodel=kernel
endif
endif
ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
- ifeq ($(call cc-option, -mstack-protector-guard=global),)
+ ifeq ($(stackglobal), n)
$(error Cannot use CONFIG_X86_GLOBAL_STACKPROTECTOR: \
-mstack-protector-guard=global not supported \
by compiler)
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 26/27] x86/relocs: Add option to generate 64-bit relocations
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (24 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 25/27] x86/pie: Add option to build the kernel as PIE Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:24 ` [PATCH v6 27/27] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB Thomas Garnier
` (2 subsequent siblings)
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Thomas Garnier,
Jordan Borgner, Ard Biesheuvel, H.J. Lu, Joerg Roedel,
linux-kernel
The x86 relocation tool generates a list of 32-bit signed integers. There
was no need to use 64-bit integers because all addresses where above the 2G
top of the memory.
This change add a large-reloc option to generate 64-bit unsigned integers.
It can be used when the kernel plan to go below the top 2G and 32-bit
integers are not enough.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
arch/x86/tools/relocs.c | 61 +++++++++++++++++++++++++++-------
arch/x86/tools/relocs.h | 4 +--
arch/x86/tools/relocs_common.c | 15 ++++++---
3 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 1b5ee38446b6..b4169eed37ab 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -13,8 +13,14 @@
static Elf_Ehdr ehdr;
+#if ELF_BITS == 64
+typedef uint64_t rel_off_t;
+#else
+typedef uint32_t rel_off_t;
+#endif
+
struct relocs {
- uint32_t *offset;
+ rel_off_t *offset;
unsigned long count;
unsigned long size;
};
@@ -690,7 +696,7 @@ static void print_absolute_relocs(void)
printf("\n");
}
-static void add_reloc(struct relocs *r, uint32_t offset)
+static void add_reloc(struct relocs *r, rel_off_t offset)
{
if (r->count == r->size) {
unsigned long newsize = r->size + 50000;
@@ -1075,26 +1081,49 @@ static void sort_relocs(struct relocs *r)
qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
}
-static int write32(uint32_t v, FILE *f)
+static int write32(rel_off_t rel, FILE *f)
{
- unsigned char buf[4];
+ unsigned char buf[sizeof(uint32_t)];
+ uint32_t v = (uint32_t)rel;
put_unaligned_le32(v, buf);
- return fwrite(buf, 1, 4, f) == 4 ? 0 : -1;
+ return fwrite(buf, 1, sizeof(buf), f) == sizeof(buf) ? 0 : -1;
}
-static int write32_as_text(uint32_t v, FILE *f)
+static int write32_as_text(rel_off_t rel, FILE *f)
{
+ uint32_t v = (uint32_t)rel;
return fprintf(f, "\t.long 0x%08"PRIx32"\n", v) > 0 ? 0 : -1;
}
-static void emit_relocs(int as_text, int use_real_mode)
+static int write64(rel_off_t rel, FILE *f)
+{
+ unsigned char buf[sizeof(uint64_t)];
+ uint64_t v = (uint64_t)rel;
+
+ put_unaligned_le64(v, buf);
+ return fwrite(buf, 1, sizeof(buf), f) == sizeof(buf) ? 0 : -1;
+}
+
+static int write64_as_text(rel_off_t rel, FILE *f)
+{
+ uint64_t v = (uint64_t)rel;
+
+ return fprintf(f, "\t.quad 0x%016"PRIx64"\n", v) > 0 ? 0 : -1;
+}
+
+static void emit_relocs(int as_text, int use_real_mode, int use_large_reloc)
{
int i;
- int (*write_reloc)(uint32_t, FILE *) = write32;
+ int (*write_reloc)(rel_off_t rel, FILE *f);
int (*do_reloc)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
const char *symname);
+ if (use_large_reloc)
+ write_reloc = write64;
+ else
+ write_reloc = write32;
+
#if ELF_BITS == 64
if (!use_real_mode)
do_reloc = do_reloc64;
@@ -1105,6 +1134,9 @@ static void emit_relocs(int as_text, int use_real_mode)
do_reloc = do_reloc32;
else
do_reloc = do_reloc_real;
+
+ /* Large relocations only for 64-bit */
+ use_large_reloc = 0;
#endif
/* Collect up the relocations */
@@ -1128,8 +1160,13 @@ static void emit_relocs(int as_text, int use_real_mode)
* gas will like.
*/
printf(".section \".data.reloc\",\"a\"\n");
- printf(".balign 4\n");
- write_reloc = write32_as_text;
+ if (use_large_reloc) {
+ printf(".balign 8\n");
+ write_reloc = write64_as_text;
+ } else {
+ printf(".balign 4\n");
+ write_reloc = write32_as_text;
+ }
}
if (use_real_mode) {
@@ -1197,7 +1234,7 @@ static void print_reloc_info(void)
void process(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info)
+ int show_reloc_info, int use_large_reloc)
{
regex_init(use_real_mode);
read_ehdr(fp);
@@ -1220,5 +1257,5 @@ void process(FILE *fp, int use_real_mode, int as_text,
print_reloc_info();
return;
}
- emit_relocs(as_text, use_real_mode);
+ emit_relocs(as_text, use_real_mode, use_large_reloc);
}
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 43c83c0fd22c..3d401da59df7 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -31,8 +31,8 @@ enum symtype {
void process_32(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info);
+ int show_reloc_info, int use_large_reloc);
void process_64(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info);
+ int show_reloc_info, int use_large_reloc);
#endif /* RELOCS_H */
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c
index 6634352a20bc..11f49adf1c06 100644
--- a/arch/x86/tools/relocs_common.c
+++ b/arch/x86/tools/relocs_common.c
@@ -12,14 +12,14 @@ void die(char *fmt, ...)
static void usage(void)
{
- die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \
- " vmlinux\n");
+ die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode|" \
+ "--large-reloc] vmlinux\n");
}
int main(int argc, char **argv)
{
int show_absolute_syms, show_absolute_relocs, show_reloc_info;
- int as_text, use_real_mode;
+ int as_text, use_real_mode, use_large_reloc;
const char *fname;
FILE *fp;
int i;
@@ -30,6 +30,7 @@ int main(int argc, char **argv)
show_reloc_info = 0;
as_text = 0;
use_real_mode = 0;
+ use_large_reloc = 0;
fname = NULL;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
@@ -54,6 +55,10 @@ int main(int argc, char **argv)
use_real_mode = 1;
continue;
}
+ if (strcmp(arg, "--large-reloc") == 0) {
+ use_large_reloc = 1;
+ continue;
+ }
}
else if (!fname) {
fname = arg;
@@ -75,11 +80,11 @@ int main(int argc, char **argv)
if (e_ident[EI_CLASS] == ELFCLASS64)
process_64(fp, use_real_mode, as_text,
show_absolute_syms, show_absolute_relocs,
- show_reloc_info);
+ show_reloc_info, use_large_reloc);
else
process_32(fp, use_real_mode, as_text,
show_absolute_syms, show_absolute_relocs,
- show_reloc_info);
+ show_reloc_info, use_large_reloc);
fclose(fp);
return 0;
}
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* [PATCH v6 27/27] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (25 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 26/27] x86/relocs: Add option to generate 64-bit relocations Thomas Garnier
@ 2019-01-31 19:24 ` Thomas Garnier
2019-01-31 19:59 ` [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Kees Cook
2019-01-31 21:40 ` Konrad Rzeszutek Wilk
28 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 19:24 UTC (permalink / raw)
To: kernel-hardening
Cc: kristen, Thomas Garnier, Masahiro Yamada, Michal Marek,
Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
x86, Kirill A. Shutemov, Palmer Dabbelt, Nathan Chancellor,
Thomas Garnier, Kees Cook, Cao jin, H.J. Lu, Baoquan He,
Juergen Gross, linux-kbuild, linux-kernel
Add a new CONFIG_RANDOMIZE_BASE_LARGE option to benefit from PIE
support. It increases the KASLR range from 1GB to 3GB. The new range
stars at 0xffffffff00000000 just above the EFI memory region. This
option is off by default.
The boot code is adapted to create the appropriate page table spanning
three PUD pages.
The relocation table uses 64-bit integers generated with the updated
relocation tool with the large-reloc option.
Signed-off-by: Thomas Garnier <thgarnie@chromium.org>
---
Makefile | 3 +++
arch/x86/Kconfig | 21 +++++++++++++++++++++
arch/x86/boot/compressed/Makefile | 5 +++++
arch/x86/boot/compressed/misc.c | 10 +++++++++-
arch/x86/include/asm/page_64_types.h | 10 ++++++++++
arch/x86/kernel/head64.c | 15 ++++++++++++---
arch/x86/kernel/head_64.S | 11 ++++++++++-
7 files changed, 70 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
index 6e4f0dba45bb..41e0aa0c06b0 100644
--- a/Makefile
+++ b/Makefile
@@ -1106,6 +1106,8 @@ genheader:
PHONY += prepare-objtool
prepare-objtool: $(objtool_target)
ifeq ($(SKIP_STACK_VALIDATION),1)
+# CONFIG_STACK_VALIDATION is not yet support by CONFIG_X86_PIE and warning is displayed before.
+ifndef CONFIG_X86_PIE
ifdef CONFIG_UNWINDER_ORC
@echo "error: Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
@false
@@ -1113,6 +1115,7 @@ else
@echo "warning: Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
endif
endif
+endif
# Generate some files
# ---------------------------------------------------------------------------
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e4316b8ed130..e61e4fafa1a0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2245,6 +2245,27 @@ config X86_PIE
select DYNAMIC_MODULE_BASE
select MODULE_REL_CRCS if MODVERSIONS
+config RANDOMIZE_BASE_LARGE
+ bool "Increase the randomization range of the kernel image"
+ depends on X86_64 && RANDOMIZE_BASE
+ select X86_PIE
+ select X86_MODULE_PLTS if MODULES
+ default n
+ help
+ Build the kernel as a Position Independent Executable (PIE) and
+ increase the available randomization range from 1GB to 3GB.
+
+ This option impacts performance on kernel CPU intensive workloads up
+ to 10% due to PIE generated code. Impact on user-mode processes and
+ typical usage would be significantly less (0.50% when you build the
+ kernel).
+
+ The kernel and modules will generate slightly more assembly (1 to 2%
+ increase on the .text sections). The vmlinux binary will be
+ significantly smaller due to fewer relocations.
+
+ If unsure say N
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f0515ac895a4..02d1ba4877a0 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -121,7 +121,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
+# Large randomization require bigger relocation table
+ifeq ($(CONFIG_RANDOMIZE_BASE_LARGE),y)
+CMD_RELOCS = arch/x86/tools/relocs --large-reloc
+else
CMD_RELOCS = arch/x86/tools/relocs
+endif
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
$(obj)/vmlinux.relocs: vmlinux FORCE
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8dd1d5ccae58..28d17bd5bad8 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -171,10 +171,18 @@ void __puthex(unsigned long value)
}
#if CONFIG_X86_NEED_RELOCS
+
+/* Large randomization go lower than -2G and use large relocation table */
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+typedef long rel_t;
+#else
+typedef int rel_t;
+#endif
+
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
{
- int *reloc;
+ rel_t *reloc;
unsigned long delta, map, ptr;
unsigned long min_addr = (unsigned long)output;
unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..acd4f3b400ca 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -48,7 +48,11 @@
#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+#define __START_KERNEL_map _AC(0xffffffff00000000, UL)
+#else
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
+#endif /* CONFIG_RANDOMIZE_BASE_LARGE */
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
@@ -67,11 +71,17 @@
* On KASLR use 1 GiB by default, leaving 1 GiB for modules once the
* page tables are fully set up.
*
+ * On PIE, we relocate the binary 2G lower so add this extra space.
+ *
* If KASLR is disabled we can shrink it to 0.5 GiB and increase the size
* of the modules area to 1.5 GiB.
*/
#ifdef CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+#define KERNEL_IMAGE_SIZE (_AC(3, UL) * 1024 * 1024 * 1024)
+#else
#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024)
+#endif
#else
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index ca2f6ff431af..0da0dc47f08f 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -62,6 +62,7 @@ EXPORT_SYMBOL(vmemmap_base);
#endif
#define __head __section(.head.text)
+#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
/* Required for read_cr3 when building as PIE */
unsigned long __force_order;
@@ -119,6 +120,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
+ unsigned long level3_kernel_start, level3_kernel_count;
+ unsigned long level3_fixmap_start;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
@@ -150,6 +153,11 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Include the SME encryption mask in the fixup value */
load_delta += sme_get_me_mask();
+ /* Look at the randomization spread to adapt page table used */
+ level3_kernel_start = pud_index(__START_KERNEL_map);
+ level3_kernel_count = pud_count(KERNEL_IMAGE_SIZE);
+ level3_fixmap_start = level3_kernel_start + level3_kernel_count;
+
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
@@ -166,8 +174,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
}
pud = fixup_pointer(&level3_kernel_pgt, physaddr);
- pud[510] += load_delta;
- pud[511] += load_delta;
+ for (i = 0; i < level3_kernel_count; i++)
+ pud[level3_kernel_start + i] += load_delta;
+ pud[level3_fixmap_start] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
@@ -226,7 +235,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
*/
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
+ for (i = 0; i < PTRS_PER_PMD * level3_kernel_count; i++) {
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0f1739d7bff7..82d637615d2c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -42,12 +42,16 @@
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
+/* Adapt page table L3 space based on range of randomization */
+L3_KERNEL_ENTRY_COUNT = pud_count(KERNEL_IMAGE_SIZE)
+
.text
__HEAD
.code64
@@ -432,7 +436,12 @@ NEXT_PAGE(level4_kernel_pgt)
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ i = 0
+ .rept L3_KERNEL_ENTRY_COUNT
+ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC \
+ + PAGE_SIZE*i
+ i = i + 1
+ .endr
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
NEXT_PAGE(level2_kernel_pgt)
--
2.20.1.495.gaa96b0ce6b-goog
^ permalink raw reply related [flat|nested] 56+ messages in thread
* Re: [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (26 preceding siblings ...)
2019-01-31 19:24 ` [PATCH v6 27/27] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB Thomas Garnier
@ 2019-01-31 19:59 ` Kees Cook
2019-01-31 21:40 ` Konrad Rzeszutek Wilk
28 siblings, 0 replies; 56+ messages in thread
From: Kees Cook @ 2019-01-31 19:59 UTC (permalink / raw)
To: Thomas Garnier
Cc: Kernel Hardening, Kristen Carlson Accardi, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, X86 ML,
Jonathan Corbet, Masahiro Yamada, Michal Marek, Herbert Xu,
David S. Miller, Andy Lutomirski, Paolo Bonzini,
Radim Krčmář,
Juergen Gross, Alok Kataria, Dennis Zhou, Tejun Heo,
Christoph Lameter, Rafael J. Wysocki, Len Brown, Pavel Machek,
Steven Rostedt, Joerg Roedel, Dave Hansen, Peter Zijlstra,
Boris Ostrovsky, Stefano Stabellini, Luis Chamberlain,
Greg Kroah-Hartman, Arnd Bergmann, Luc Van Oostenryck,
Jason Baron, Kirill A. Shutemov, Andrey Ryabinin, Thomas Garnier,
Baoquan He, Alexander Popov, Jordan Borgner, Nathan Chancellor,
Cao jin, H.J. Lu, Alexey Dobriyan, Nadav Amit, Yonghong Song,
Nick Desaulniers, Arnaldo Carvalho de Melo, Jann Horn,
Ard Biesheuvel, Andrew Morton, Andi Kleen, Francis Deslauriers,
Masami Hiramatsu, Mimi Zohar, Nayna Jain, Michael Ellerman,
Jan Kiszka, Jia Zhang, Konrad Rzeszutek Wilk, Brijesh Singh,
Jan Beulich, Tim Chen, Mike Rapoport, Michal Hocko,
Stephen Rothwell, Rafael Ávila de Espíndola,
Mathieu Desnoyers, Nicholas Piggin, Adrian Hunter, Song Liu,
Alexander Shishkin, Michael Forney, Palmer Dabbelt, James Hogan,
Joe Lawrence, nixiaoming, LKML, open list:DOCUMENTATION,
linux-kbuild, linux-crypto, KVM, virtualization, Linux PM list,
xen-devel, linux-arch, Sparse Mailing-list
On Fri, Feb 1, 2019 at 8:28 AM Thomas Garnier <thgarnie@chromium.org> wrote:
> These patches make the changes necessary to build the kernel as Position
> Independent Executable (PIE) on x86_64. A PIE kernel can be relocated below
> the top 2G of the virtual address space. It allows to optionally extend the
> KASLR randomization range from 1G to 3G. The chosen range is the one currently
> available, future changes will allow the kernel module to have a wider
> randomization range.
This also lays the groundwork for doing compilation-unit-granularity
KASLR, as Kristen has been working on. With PIE working, the
relocations are more sane and boot-time reordering becomes possible
(or at least, it becomes the same logically as doing the work on
modules, etc).
--
Kees Cook
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization
2019-01-31 19:24 [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Thomas Garnier
` (27 preceding siblings ...)
2019-01-31 19:59 ` [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization Kees Cook
@ 2019-01-31 21:40 ` Konrad Rzeszutek Wilk
2019-01-31 22:42 ` Thomas Garnier
28 siblings, 1 reply; 56+ messages in thread
From: Konrad Rzeszutek Wilk @ 2019-01-31 21:40 UTC (permalink / raw)
To: Thomas Garnier
Cc: kernel-hardening, kristen, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, x86, Jonathan Corbet,
Masahiro Yamada, Michal Marek, Herbert Xu, David S. Miller,
Andy Lutomirski, Paolo Bonzini, Radim Krčmář,
Juergen Gross, Alok Kataria, Dennis Zhou, Tejun Heo,
Christoph Lameter, Rafael J. Wysocki, Len Brown, Pavel Machek,
Steven Rostedt, Joerg Roedel, Dave Hansen, Peter Zijlstra,
Boris Ostrovsky, Stefano Stabellini, Luis Chamberlain,
Greg Kroah-Hartman, Arnd Bergmann, Luc Van Oostenryck,
Jason Baron, Kirill A. Shutemov, Kees Cook, Andrey Ryabinin,
Thomas Garnier, Baoquan He, Alexander Popov, Jordan Borgner,
Nathan Chancellor, Cao jin, H.J. Lu, Alexey Dobriyan, Nadav Amit,
Yonghong Song, Nick Desaulniers, Arnaldo Carvalho de Melo,
Jann Horn, Ard Biesheuvel, Andrew Morton, Andi Kleen,
Francis Deslauriers, Masami Hiramatsu, Mimi Zohar, Nayna Jain,
Michael Ellerman, Jan Kiszka, Jia Zhang, Brijesh Singh,
Jan Beulich, Tim Chen, Mike Rapoport, Michal Hocko,
Stephen Rothwell, Rafael Ávila de Espíndola,
Mathieu Desnoyers, Nicholas Piggin, Adrian Hunter, Song Liu,
Alexander Shishkin, Michael Forney, Palmer Dabbelt, James Hogan,
Joe Lawrence, nixiaoming, linux-kernel, linux-doc, linux-kbuild,
linux-crypto, kvm, virtualization, linux-pm, xen-devel,
linux-arch, linux-sparse
On Thu, Jan 31, 2019 at 11:24:07AM -0800, Thomas Garnier wrote:
> There has been no major concern in the latest iterations. I am interested on
> what would be the best way to slowly integrate this patchset upstream.
One question that I was somehow expected in this cover letter - what
about all those lovely speculative bugs? As in say some one hasn't
updated their machine with the Spectre v3a microcode - wouldn't they
be able to get the kernel virtual address space?
In effect rendering all this hard-work not needed?
^ permalink raw reply [flat|nested] 56+ messages in thread
* Re: [PATCH v6 00/27] x86: PIE support and option to extend KASLR randomization
2019-01-31 21:40 ` Konrad Rzeszutek Wilk
@ 2019-01-31 22:42 ` Thomas Garnier
0 siblings, 0 replies; 56+ messages in thread
From: Thomas Garnier @ 2019-01-31 22:42 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk
Cc: Kernel Hardening, kristen, Thomas Gleixner, Ingo Molnar,
Borislav Petkov, H. Peter Anvin, the arch/x86 maintainers,
Jonathan Corbet, Masahiro Yamada, Michal Marek, Herbert Xu,
David S. Miller, Andy Lutomirski, Paolo Bonzini,
Radim Krčmář,
Juergen Gross, Alok Kataria, Dennis Zhou, Tejun Heo,
Christoph Lameter, Rafael J. Wysocki, Len Brown, Pavel Machek,
Steven Rostedt, Joerg Roedel, Dave Hansen, Peter Zijlstra,
Boris Ostrovsky, Stefano Stabellini, Luis Chamberlain,
Greg Kroah-Hartman, Arnd Bergmann, Luc Van Oostenryck,
Jason Baron, Kirill A. Shutemov, Kees Cook, Andrey Ryabinin,
Baoquan He, Alexander Popov, Jordan Borgner, Nathan Chancellor,
Cao jin, H.J. Lu, Alexey Dobriyan, Nadav Amit, Yonghong Song,
Nick Desaulniers, Arnaldo Carvalho de Melo, Jann Horn,
Ard Biesheuvel, Andrew Morton, Andi Kleen, Francis Deslauriers,
Masami Hiramatsu, Mimi Zohar, Nayna Jain, Michael Ellerman,
Jan Kiszka, Jia Zhang, Brijesh Singh, Jan Beulich, Tim Chen,
Mike Rapoport, Michal Hocko, Stephen Rothwell,
Rafael Ávila de Espíndola, Mathieu Desnoyers,
Nicholas Piggin, Adrian Hunter, Song Liu, Alexander Shishkin,
Michael Forney, Palmer Dabbelt, James Hogan, Joe Lawrence,
nixiaoming, LKML, Linux Doc Mailing List, linux-kbuild,
Linux Crypto Mailing List, kvm list, virtualization,
Linux PM list, xen-devel, linux-arch, Sparse Mailing-list
On Thu, Jan 31, 2019 at 1:41 PM Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
>
> On Thu, Jan 31, 2019 at 11:24:07AM -0800, Thomas Garnier wrote:
> > There has been no major concern in the latest iterations. I am interested on
> > what would be the best way to slowly integrate this patchset upstream.
>
> One question that I was somehow expected in this cover letter - what
> about all those lovely speculative bugs? As in say some one hasn't
> updated their machine with the Spectre v3a microcode - wouldn't they
> be able to get the kernel virtual address space?
Yes they would be.
>
> In effect rendering all this hard-work not needed?
Only if we think Spectre bugs will never be fixed.
^ permalink raw reply [flat|nested] 56+ messages in thread