linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Ingo Molnar <mingo@kernel.org>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Rasmus Villemoes <linux@rasmusvillemoes.dk>
Subject: [POC 07/12] x86-64: rai: implement _rai_load
Date: Thu, 18 Oct 2018 00:33:27 +0200	[thread overview]
Message-ID: <20181017223332.11964-7-linux@rasmusvillemoes.dk> (raw)
In-Reply-To: <20181017223332.11964-1-linux@rasmusvillemoes.dk>

This implements the simplest of the rai_* operations, loading a
value. For load of an 8-byte value, I believe we do need to keep room
for a movabs, since there's no guarantee the final value can be loaded
with as an imm32 or using a %rip-relative leaq.

It wouldn't hurt to add some sanity checking in rai_patch_one, e.g. at
least check that the immediate we are replacing is the dummy 0x12345678
we used in the .rai_templ section.

That the patching works can be seen in a quick virtme session.  gdb on
vmlinux and /proc/kcore shows

(gdb) x/16i rai_proc_show
   0xffffffff8108c120 <rai_proc_show>:  mov    $0xffffffff81fd9ad4,%rsi
   0xffffffff8108c127 <rai_proc_show+7>:        jmpq   0xffffffff819652e9
   0xffffffff8108c12c <rai_proc_show+12>:       nop
   0xffffffff8108c12d <rai_proc_show+13>:       nop
   0xffffffff8108c12e <rai_proc_show+14>:       nop
   0xffffffff8108c12f <rai_proc_show+15>:       nop
   0xffffffff8108c130 <rai_proc_show+16>:       nop
   0xffffffff8108c131 <rai_proc_show+17>:       jmpq   0xffffffff819652f5
   0xffffffff8108c136 <rai_proc_show+22>:       jmpq   0xffffffff81965300
   0xffffffff8108c13b <rai_proc_show+27>:       callq  0xffffffff81238bb0 <seq_printf>
   0xffffffff8108c140 <rai_proc_show+32>:       mov    $0xffffffffffffffff,%rax
   0xffffffff8108c147 <rai_proc_show+39>:       mov    %rax,0x17b228a(%rip)        # 0xffffffff8283e3d8 <three>
   0xffffffff8108c14e <rai_proc_show+46>:       mov    %eax,0x17b228c(%rip)        # 0xffffffff8283e3e0 <two>
   0xffffffff8108c154 <rai_proc_show+52>:       mov    %eax,0x17b228a(%rip)        # 0xffffffff8283e3e4 <one>
   0xffffffff8108c15a <rai_proc_show+58>:       xor    %eax,%eax
   0xffffffff8108c15c <rai_proc_show+60>:       retq
(gdb) x/16i 0xffffffff96e8c120
   0xffffffff96e8c120:  mov    $0xffffffff97dd9ad4,%rsi
   0xffffffff96e8c127:  movabs $0x3,%r8
   0xffffffff96e8c131:  mov    $0x2,%ecx
   0xffffffff96e8c136:  mov    $0x1,%edx
   0xffffffff96e8c13b:  callq  0xffffffff97038bb0
   0xffffffff96e8c140:  mov    $0xffffffffffffffff,%rax
   0xffffffff96e8c147:  mov    %rax,0x17b228a(%rip)        # 0xffffffff9863e3d8
   0xffffffff96e8c14e:  mov    %eax,0x17b228c(%rip)        # 0xffffffff9863e3e0
   0xffffffff96e8c154:  mov    %eax,0x17b228a(%rip)        # 0xffffffff9863e3e4
   0xffffffff96e8c15a:  xor    %eax,%eax
   0xffffffff96e8c15c:  retq
   0xffffffff96e8c15d:  nopl   (%rax)
   0xffffffff96e8c160:  push   %rbx
   0xffffffff96e8c161:  mov    $0xffffffff9804c240,%rdi
   0xffffffff96e8c168:  mov    $0xffffffff97e9fccc,%rbx
   0xffffffff96e8c16f:  callq  0xffffffff9776b230

where we also see that gcc chooses the destination registers rather
intelligently. As expected, repeated "cat /proc/rai" continues to print
"one: 1, two: 2, three: 3".

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
---
 arch/x86/include/asm/rai.S | 42 +++++++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/rai.h | 30 ++++++++++++++++++++++++++-
 arch/x86/kernel/rai.c      | 18 ++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/rai.S b/arch/x86/include/asm/rai.S
index 253d27453416..f42cdd8db876 100644
--- a/arch/x86/include/asm/rai.S
+++ b/arch/x86/include/asm/rai.S
@@ -8,11 +8,51 @@
 	.long \templ_end - \templ
 	.long \thunk - .
 .endm
-	
+
 .macro rai_entry_pad start end
 	.ifgt STRUCT_RAI_ENTRY_SIZE-(\end-\start)
 	.skip STRUCT_RAI_ENTRY_SIZE-(\end-\start), 0x00
 	.endif
 .endm
 
+.macro rai_load dst, var, type
+	.pushsection .rai_templ, "aw"
+10:
+	.ifeq \type - RAI_LOAD_8
+	movabs $0x1234567812345678, \dst
+	.else
+	mov $0x12345678, \dst
+	.endif
+11:
+	.popsection
+
+	/* Even if the mov \var, \dst is short enough to fit in the
+	 * space we reserve in .text, we still need the thunk for when
+	 * we do the immediate patching. */
+	.pushsection .text.rai_thunk, "ax"
+20:
+	mov \var(%rip), \dst
+	jmp 32f
+21:
+	.popsection
+
+	/* The part that goes into .text */
+30:
+	/* silence objtool by actually using the thunk for now */
+	jmp 20b
+	/* mov \var(%rip), \dst */
+31:
+	.skip -(((11b - 10b)-(31b - 30b)) > 0)*((11b - 10b)-(31b - 30b)), 0x90
+32:
+
+	.pushsection .rai_data, "a"
+40:
+	rai_entry \type 30b 32b 10b 11b 20b
+	.quad \var   /* .load.addr */
+41:
+	rai_entry_pad 40b 41b
+	.popsection
+.endm /* rai_load */
+
+
 #endif
diff --git a/arch/x86/include/asm/rai.h b/arch/x86/include/asm/rai.h
index 269d696255b0..b57494c98d0f 100644
--- a/arch/x86/include/asm/rai.h
+++ b/arch/x86/include/asm/rai.h
@@ -1,7 +1,10 @@
 #ifndef _ASM_X86_RAI_H
 #define _ASM_X86_RAI_H
 
-#define STRUCT_RAI_ENTRY_SIZE 24
+#define RAI_LOAD_4 0
+#define RAI_LOAD_8 1
+
+#define STRUCT_RAI_ENTRY_SIZE 32
 
 /* Put the asm macros in a separate file for easier editing. */
 #include <asm/rai.S>
@@ -16,10 +19,35 @@ struct rai_entry {
 	s32 templ_len;    /* length of template */
 	s32 thunk_offset; /* member-relative offset to ool thunk */
 	/* type-specific data follows */
+	union {
+		struct {
+			void *addr;
+		} load;
+	};
 };
 _Static_assert(sizeof(struct rai_entry) == STRUCT_RAI_ENTRY_SIZE,
 	       "please update STRUCT_RAI_ENTRY_SIZE");
 
+#define _rai_load(var) ({						\
+		typeof(var) ret__;					\
+		switch(sizeof(var)) {					\
+		case 4:							\
+			asm("rai_load %0, %c1, %c2"			\
+			    : "=r" (ret__)				\
+			    : "i" (&(var)), "i" (RAI_LOAD_4));		\
+			break;						\
+		case 8:							\
+			asm("rai_load %0, %c1, %c2"			\
+			    : "=r" (ret__)				\
+			    : "i" (&(var)), "i" (RAI_LOAD_8));		\
+			break;						\
+		default:						\
+			ret__ = _rai_load_fallback(var);		\
+			break;						\
+		}							\
+		ret__;							\
+	})
+
 #endif /* !__ASSEMBLY */
 
 #endif /* _ASM_X86_RAI_H */
diff --git a/arch/x86/kernel/rai.c b/arch/x86/kernel/rai.c
index 819d03a025e3..e55e85f11a2e 100644
--- a/arch/x86/kernel/rai.c
+++ b/arch/x86/kernel/rai.c
@@ -14,6 +14,24 @@ rai_patch_one(const struct rai_entry *r)
 	u8 *thunk = (u8*)&r->thunk_offset + r->thunk_offset;
 
 	switch (r->type) {
+	case RAI_LOAD_4: {
+		const u32 *imm = r->load.addr;
+		/*
+		 * The immediate is the last 4 bytes of the template,
+		 * regardless of the operand encoding.
+		 */
+		memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+		break;
+	}
+	case RAI_LOAD_8: {
+		const u64 *imm = r->load.addr;
+		/*
+		 * The immediate is the last 8 bytes of the template,
+		 * regardless of the operand encoding.
+		 */
+		memcpy(templ + r->templ_len - sizeof(*imm), imm, sizeof(*imm));
+		break;
+	}
 	default:
 		WARN_ONCE(1, "unhandled RAI type %d\n", r->type);
 		return;
-- 
2.19.1.6.gbde171bbf5


  parent reply	other threads:[~2018-10-17 22:33 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-17 22:33 [POC 01/12] Accessing __ro_after_init variables as immediates Rasmus Villemoes
2018-10-17 22:33 ` [POC 02/12] init/main.c: call update_rai_access() Rasmus Villemoes
2018-10-17 22:33 ` [POC 03/12] arch/Kconfig: add ARCH_HAS_RAI symbol Rasmus Villemoes
2018-10-17 22:33 ` [POC 04/12] vmlinux.lds.h: handle various rai sections Rasmus Villemoes
2018-10-17 22:33 ` [POC 05/12] x86-64: initial ro-after-init patching support Rasmus Villemoes
2018-10-17 22:33 ` [POC 06/12] ugly ugly hack Rasmus Villemoes
2018-10-17 22:33 ` Rasmus Villemoes [this message]
2018-10-17 22:33 ` [POC 08/12] fs/dcache.c: access dentry_cache via rai_load Rasmus Villemoes
2018-10-17 22:33 ` [POC 09/12] fs/inode.c: access inode_cachep " Rasmus Villemoes
2018-10-17 22:33 ` [POC 10/12] hack: /proc/rai: add rai_bucket_shift use Rasmus Villemoes
2018-10-17 22:33 ` [POC 11/12] x86-64: implement _rai_bucket_shift Rasmus Villemoes
2018-10-17 22:33 ` [POC 12/12] fs/dcache.c: use rai_bucket_shift for dentry hashtable Rasmus Villemoes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181017223332.11964-7-linux@rasmusvillemoes.dk \
    --to=linux@rasmusvillemoes.dk \
    --cc=hpa@zytor.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).