All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: x86 emulator: access GPRs on demand
@ 2012-07-19 12:14 Avi Kivity
  2012-08-07 21:23 ` Marcelo Tosatti
  0 siblings, 1 reply; 4+ messages in thread
From: Avi Kivity @ 2012-07-19 12:14 UTC (permalink / raw)
  To: Marcelo Tosatti, Gleb Natapov; +Cc: kvm

Instead of populating the the entire register file, read in registers
as they are accessed, and write back only the modified ones.  This
saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually
used during emulation), and a two 128-byte copies for the registers.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  17 ++-
 arch/x86/kvm/emulate.c             | 268 +++++++++++++++++++++----------------
 arch/x86/kvm/x86.c                 |  50 ++++---
 3 files changed, 192 insertions(+), 143 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c764f43..2f1da16 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -86,6 +86,19 @@ struct x86_instruction_info {
 
 struct x86_emulate_ops {
 	/*
+	 * read_gpr: read a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 */
+	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
+	/*
+	 * write_gpr: write a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 * @val: value to write.
+	 */
+	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
+	/*
 	 * read_std: Read bytes of standard (non-emulated/special) memory.
 	 *           Used for descriptor reading.
 	 *  @addr:  [IN ] Linear address from which to read.
@@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
 	bool rip_relative;
 	unsigned long _eip;
 	struct operand memop;
+	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
+	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */
 	/* Fields above regs are cleared together. */
-	unsigned long regs[NR_VCPU_REGS];
+	unsigned long _regs[NR_VCPU_REGS];
 	struct operand *memopp;
 	struct fetch_cache fetch;
 	struct read_cache io_read;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a99..468d26e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -202,6 +202,28 @@ struct gprefix {
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
 #define EFLG_RESERVED_ONE_MASK 2
 
+static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	if (!(ctxt->regs_valid & (1 << nr))) {
+		ctxt->regs_valid |= 1 << nr;
+		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+	}
+	return ctxt->_regs[nr];
+}
+
+static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	ctxt->regs_valid |= 1 << nr;
+	ctxt->regs_dirty |= 1 << nr;
+	return &ctxt->_regs[nr];
+}
+
+static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	reg_read(ctxt, nr);
+	return reg_write(ctxt, nr);
+}
+
 /*
  * Instruction emulation:
  * Most instructions are emulated directly via a fragment of inline assembly
@@ -374,8 +396,8 @@ struct gprefix {
 #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
 	do {								\
 		unsigned long _tmp;					\
-		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
-		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
+		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\
+		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\
 									\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "5", "1")			\
@@ -773,14 +795,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-static void *decode_register(u8 modrm_reg, unsigned long *regs,
+static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
 			     int highbyte_regs)
 {
 	void *p;
 
-	p = &regs[modrm_reg];
 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
-		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
+		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
+	else
+		p = reg_rmw(ctxt, modrm_reg);
 	return p;
 }
 
@@ -969,10 +992,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
 
 	op->type = OP_REG;
 	if (ctxt->d & ByteOp) {
-		op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
+		op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
 		op->bytes = 1;
 	} else {
-		op->addr.reg = decode_register(reg, ctxt->regs, 0);
+		op->addr.reg = decode_register(ctxt, reg, 0);
 		op->bytes = ctxt->op_bytes;
 	}
 	fetch_register_operand(op);
@@ -1007,8 +1030,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	if (ctxt->modrm_mod == 3) {
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = decode_register(ctxt->modrm_rm,
-					       ctxt->regs, ctxt->d & ByteOp);
+		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
 		if (ctxt->d & Sse) {
 			op->type = OP_XMM;
 			op->bytes = 16;
@@ -1029,10 +1051,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	op->type = OP_MEM;
 
 	if (ctxt->ad_bytes == 2) {
-		unsigned bx = ctxt->regs[VCPU_REGS_RBX];
-		unsigned bp = ctxt->regs[VCPU_REGS_RBP];
-		unsigned si = ctxt->regs[VCPU_REGS_RSI];
-		unsigned di = ctxt->regs[VCPU_REGS_RDI];
+		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
+		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
+		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
+		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
 
 		/* 16-bit ModR/M decode. */
 		switch (ctxt->modrm_mod) {
@@ -1089,17 +1111,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
 				modrm_ea += insn_fetch(s32, ctxt);
 			else {
-				modrm_ea += ctxt->regs[base_reg];
+				modrm_ea += reg_read(ctxt, base_reg);
 				adjust_modrm_seg(ctxt, base_reg);
 			}
 			if (index_reg != 4)
-				modrm_ea += ctxt->regs[index_reg] << scale;
+				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
 			base_reg = ctxt->modrm_rm;
-			modrm_ea += ctxt->regs[base_reg];
+			modrm_ea += reg_read(ctxt, base_reg);
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
@@ -1240,10 +1262,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (rc->pos == rc->end) { /* refill pio read ahead */
 		unsigned int in_page, n;
 		unsigned int count = ctxt->rep_prefix ?
-			address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
+			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
 		in_page = (ctxt->eflags & EFLG_DF) ?
-			offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
-			PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
+			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
+			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
 			count);
 		if (n == 0)
@@ -1522,8 +1544,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
 {
 	struct segmented_address addr;
 
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes);
-	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), -bytes);
+	addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP));
 	addr.seg = VCPU_SREG_SS;
 
 	return segmented_write(ctxt, addr, data, bytes);
@@ -1542,13 +1564,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	struct segmented_address addr;
 
-	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
+	addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP));
 	addr.seg = VCPU_SREG_SS;
 	rc = segmented_read(ctxt, addr, dest, len);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len);
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), len);
 	return rc;
 }
 
@@ -1610,26 +1632,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
 	int rc;
 	unsigned frame_size = ctxt->src.val;
 	unsigned nesting_level = ctxt->src2.val & 31;
+	ulong rbp;
 
 	if (nesting_level)
 		return X86EMUL_UNHANDLEABLE;
 
-	rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt));
+	rbp = reg_read(ctxt, VCPU_REGS_RBP);
+	rc = push(ctxt, &rbp, stack_size(ctxt));
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
 		      stack_mask(ctxt));
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP],
-		      ctxt->regs[VCPU_REGS_RSP] - frame_size,
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
+		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
 		      stack_mask(ctxt));
 	return X86EMUL_CONTINUE;
 }
 
 static int em_leave(struct x86_emulate_ctxt *ctxt)
 {
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
 		      stack_mask(ctxt));
-	return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes);
+	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
 }
 
 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
@@ -1657,13 +1681,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 
 static int em_pusha(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP];
+	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
 	int rc = X86EMUL_CONTINUE;
 	int reg = VCPU_REGS_RAX;
 
 	while (reg <= VCPU_REGS_RDI) {
 		(reg == VCPU_REGS_RSP) ?
-		(ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]);
+		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
 
 		rc = em_push(ctxt);
 		if (rc != X86EMUL_CONTINUE)
@@ -1688,12 +1712,12 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 
 	while (reg >= VCPU_REGS_RAX) {
 		if (reg == VCPU_REGS_RSP) {
-			register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP],
+			register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP),
 							ctxt->op_bytes);
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes);
+		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
 		--reg;
@@ -1961,14 +1985,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 {
 	u64 old = ctxt->dst.orig_val64;
 
-	if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) ||
-	    ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) {
-		ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
-		ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
+	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
+	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
+		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
+		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
 		ctxt->eflags &= ~EFLG_ZF;
 	} else {
-		ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) |
-			(u32) ctxt->regs[VCPU_REGS_RBX];
+		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
+			(u32) reg_read(ctxt, VCPU_REGS_RBX);
 
 		ctxt->eflags |= EFLG_ZF;
 	}
@@ -2004,7 +2028,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 {
 	/* Save real source value, then compare EAX against destination. */
 	ctxt->src.orig_val = ctxt->src.val;
-	ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
+	ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
 	emulate_2op_SrcV(ctxt, "cmp");
 
 	if (ctxt->eflags & EFLG_ZF) {
@@ -2013,7 +2037,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 	} else {
 		/* Failure: write the value we saw to EAX. */
 		ctxt->dst.type = OP_REG;
-		ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
+		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 	}
 	return X86EMUL_CONTINUE;
 }
@@ -2153,10 +2177,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
 	if (efer & EFER_LMA) {
 #ifdef CONFIG_X86_64
-		ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF;
 
 		ops->get_msr(ctxt,
 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
@@ -2235,7 +2259,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	ctxt->_eip = msr_data;
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-	ctxt->regs[VCPU_REGS_RSP] = msr_data;
+	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
 
 	return X86EMUL_CONTINUE;
 }
@@ -2285,8 +2309,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
-	ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
+	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
+	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
 
 	return X86EMUL_CONTINUE;
 }
@@ -2355,14 +2379,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
 {
 	tss->ip = ctxt->_eip;
 	tss->flag = ctxt->eflags;
-	tss->ax = ctxt->regs[VCPU_REGS_RAX];
-	tss->cx = ctxt->regs[VCPU_REGS_RCX];
-	tss->dx = ctxt->regs[VCPU_REGS_RDX];
-	tss->bx = ctxt->regs[VCPU_REGS_RBX];
-	tss->sp = ctxt->regs[VCPU_REGS_RSP];
-	tss->bp = ctxt->regs[VCPU_REGS_RBP];
-	tss->si = ctxt->regs[VCPU_REGS_RSI];
-	tss->di = ctxt->regs[VCPU_REGS_RDI];
+	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2378,14 +2402,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 
 	ctxt->_eip = tss->ip;
 	ctxt->eflags = tss->flag | 2;
-	ctxt->regs[VCPU_REGS_RAX] = tss->ax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->cx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->dx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->bx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->sp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->bp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->si;
-	ctxt->regs[VCPU_REGS_RDI] = tss->di;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2470,14 +2494,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
 	tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
 	tss->eip = ctxt->_eip;
 	tss->eflags = ctxt->eflags;
-	tss->eax = ctxt->regs[VCPU_REGS_RAX];
-	tss->ecx = ctxt->regs[VCPU_REGS_RCX];
-	tss->edx = ctxt->regs[VCPU_REGS_RDX];
-	tss->ebx = ctxt->regs[VCPU_REGS_RBX];
-	tss->esp = ctxt->regs[VCPU_REGS_RSP];
-	tss->ebp = ctxt->regs[VCPU_REGS_RBP];
-	tss->esi = ctxt->regs[VCPU_REGS_RSI];
-	tss->edi = ctxt->regs[VCPU_REGS_RDI];
+	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2499,14 +2523,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	ctxt->eflags = tss->eflags | 2;
 
 	/* General purpose registers */
-	ctxt->regs[VCPU_REGS_RAX] = tss->eax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->edx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->ebx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->esp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->ebp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->esi;
-	ctxt->regs[VCPU_REGS_RDI] = tss->edi;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2738,8 +2762,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
-	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
+	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
 	op->addr.mem.seg = seg;
 }
 
@@ -2825,7 +2849,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), ctxt->src.val);
 	return X86EMUL_CONTINUE;
 }
 
@@ -2915,7 +2939,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
 {
 	ctxt->dst.type = OP_REG;
 	ctxt->dst.bytes = ctxt->src.bytes;
-	ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
 
 	return X86EMUL_CONTINUE;
@@ -2926,8 +2950,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 	u64 tsc = 0;
 
 	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)tsc;
-	ctxt->regs[VCPU_REGS_RDX] = tsc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2935,10 +2959,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 pmc;
 
-	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
+	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
 		return emulate_gp(ctxt, 0);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
-	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2980,9 +3004,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
-		| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
-	if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data))
+	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
+		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
+	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
 		return emulate_gp(ctxt, 0);
 
 	return X86EMUL_CONTINUE;
@@ -2992,11 +3016,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data))
+	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
 		return emulate_gp(ctxt, 0);
 
-	ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
-	ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
+	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3176,8 +3200,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 
 static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
-	if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) != 0)) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		jmp_rel(ctxt, ctxt->src.val);
 
@@ -3186,7 +3210,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 
 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 {
-	if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
+	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) == 0))
 		jmp_rel(ctxt, ctxt->src.val);
 
 	return X86EMUL_CONTINUE;
@@ -3274,20 +3298,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
 {
 	u32 eax, ebx, ecx, edx;
 
-	eax = ctxt->regs[VCPU_REGS_RAX];
-	ecx = ctxt->regs[VCPU_REGS_RCX];
+	eax = reg_read(ctxt, VCPU_REGS_RAX);
+	ecx = reg_read(ctxt, VCPU_REGS_RCX);
 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
-	ctxt->regs[VCPU_REGS_RAX] = eax;
-	ctxt->regs[VCPU_REGS_RBX] = ebx;
-	ctxt->regs[VCPU_REGS_RCX] = ecx;
-	ctxt->regs[VCPU_REGS_RDX] = edx;
+	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
+	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
 	return X86EMUL_CONTINUE;
 }
 
 static int em_lahf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL;
-	ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3444,7 +3468,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt)
 
 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
 {
-	u64 rax = ctxt->regs[VCPU_REGS_RAX];
+	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
 
 	/* Valid physical address? */
 	if (rax & 0xffff000000000000ULL)
@@ -3466,7 +3490,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
-	u64 rcx = ctxt->regs[VCPU_REGS_RCX];
+	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
 
 	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
 	    (rcx > 3))
@@ -3924,7 +3948,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 	case OpAcc:
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 		fetch_register_operand(op);
 		op->orig_val = op->val;
 		break;
@@ -3932,19 +3956,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		break;
 	case OpDX:
 		op->type = OP_REG;
 		op->bytes = 2;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 		fetch_register_operand(op);
 		break;
 	case OpCL:
 		op->bytes = 1;
-		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
+		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
 		break;
 	case OpImmByte:
 		rc = decode_imm(ctxt, op, 1, true);
@@ -3975,7 +3999,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
 		break;
@@ -4281,6 +4305,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
 }
 
+static void writeback_registers(struct x86_emulate_ctxt *ctxt)
+{
+	unsigned reg;
+
+	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
+}
+
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
@@ -4365,7 +4397,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		/* All REP prefixes have the same first termination condition */
-		if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) {
+		if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
 			ctxt->eip = ctxt->_eip;
 			goto done;
 		}
@@ -4438,7 +4470,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		ctxt->dst.val = ctxt->src.addr.mem.ea;
 		break;
 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
-		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
+		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
 			break;
 		rc = em_xchg(ctxt);
 		break;
@@ -4466,7 +4498,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		rc = em_grp2(ctxt);
 		break;
 	case 0xd2 ... 0xd3:	/* Grp2 */
-		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
+		ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
 		rc = em_grp2(ctxt);
 		break;
 	case 0xe9: /* jmp rel */
@@ -4521,14 +4553,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
+		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
 			 * Re-enter guest when pio read ahead buffer is empty
 			 * or, if it is not used, after each 1024 iteration.
 			 */
-			if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) &&
+			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
 			    (r->end == 0 || r->end != r->pos)) {
 				/*
 				 * Reset read cache. Usually happens before
@@ -4536,6 +4568,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 				 * we have to do it here.
 				 */
 				ctxt->mem_read.end = 0;
+				writeback_registers(ctxt);
 				return EMULATION_RESTART;
 			}
 			goto done; /* skip rip writeback */
@@ -4550,6 +4583,9 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	if (rc == X86EMUL_INTERCEPTED)
 		return EMULATION_INTERCEPTED;
 
+	if (rc == X86EMUL_CONTINUE)
+		writeback_registers(ctxt);
+
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 
 twobyte_insn:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59b5950..b8a8982 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4325,7 +4325,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
 }
 
+static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
+{
+	return kvm_register_read(emul_to_vcpu(ctxt), reg);
+}
+
+static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
+{
+	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
+}
+
 static struct x86_emulate_ops emulate_ops = {
+	.read_gpr            = emulator_read_gpr,
+	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
 	.fetch               = kvm_fetch_guest_virt,
@@ -4360,14 +4372,6 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	.get_cpuid           = emulator_get_cpuid,
 };
 
-static void cache_all_regs(struct kvm_vcpu *vcpu)
-{
-	kvm_register_read(vcpu, VCPU_REGS_RAX);
-	kvm_register_read(vcpu, VCPU_REGS_RSP);
-	kvm_register_read(vcpu, VCPU_REGS_RIP);
-	vcpu->arch.regs_dirty = ~0;
-}
-
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 {
 	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
@@ -4394,12 +4398,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 		kvm_queue_exception(vcpu, ctxt->exception.vector);
 }
 
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
-			      const unsigned long *regs)
+static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
 	memset(&ctxt->twobyte, 0,
-	       (void *)&ctxt->regs - (void *)&ctxt->twobyte);
-	memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
+	       (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
 
 	ctxt->fetch.start = 0;
 	ctxt->fetch.end = 0;
@@ -4414,14 +4416,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int cs_db, cs_l;
 
-	/*
-	 * TODO: fix emulate.c to use guest_read/write_register
-	 * instead of direct ->regs accesses, can save hundred cycles
-	 * on Intel for instructions that don't read/change RSP, for
-	 * for example.
-	 */
-	cache_all_regs(vcpu);
-
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
@@ -4433,7 +4427,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 							  X86EMUL_MODE_PROT16;
 	ctxt->guest_mode = is_guest_mode(vcpu);
 
-	init_decode_cache(ctxt, vcpu->arch.regs);
+	init_decode_cache(ctxt);
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
@@ -4453,7 +4447,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 		return EMULATE_FAIL;
 
 	ctxt->eip = ctxt->_eip;
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
@@ -4601,7 +4594,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	   changes registers values  during IO operation */
 	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
 		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
-		memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
+		ctxt->regs_valid = 0;
 	}
 
 restart:
@@ -4639,7 +4632,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		kvm_set_rflags(vcpu, ctxt->eflags);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
 	} else
@@ -5593,7 +5585,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		 * backdoor interface) need this to work
 		 */
 		struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+		unsigned reg;
+
+		for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+			kvm_register_write(vcpu, reg, ctxt->_regs[reg]);
+
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 	}
 	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5724,6 +5720,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
+	unsigned reg;
 
 	init_emulate_ctxt(vcpu);
 
@@ -5733,7 +5730,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 	if (ret)
 		return EMULATE_FAIL;
 
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+		kvm_register_write(vcpu, reg, ctxt->_regs[reg]);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-- 
1.7.11.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86 emulator: access GPRs on demand
  2012-07-19 12:14 [PATCH] KVM: x86 emulator: access GPRs on demand Avi Kivity
@ 2012-08-07 21:23 ` Marcelo Tosatti
  2012-08-07 21:24   ` Marcelo Tosatti
  2012-08-08 12:38   ` Avi Kivity
  0 siblings, 2 replies; 4+ messages in thread
From: Marcelo Tosatti @ 2012-08-07 21:23 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm

On Thu, Jul 19, 2012 at 03:14:38PM +0300, Avi Kivity wrote:
> Instead of populating the the entire register file, read in registers
> as they are accessed, and write back only the modified ones.  This
> saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually
> used during emulation), and a two 128-byte copies for the registers.
> 
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  arch/x86/include/asm/kvm_emulate.h |  17 ++-
>  arch/x86/kvm/emulate.c             | 268 +++++++++++++++++++++----------------
>  arch/x86/kvm/x86.c                 |  50 ++++---
>  3 files changed, 192 insertions(+), 143 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index c764f43..2f1da16 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -86,6 +86,19 @@ struct x86_instruction_info {
>  
>  struct x86_emulate_ops {
>  	/*
> +	 * read_gpr: read a general purpose register (rax - r15)
> +	 *
> +	 * @reg: gpr number.
> +	 */
> +	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
> +	/*
> +	 * write_gpr: write a general purpose register (rax - r15)
> +	 *
> +	 * @reg: gpr number.
> +	 * @val: value to write.
> +	 */
> +	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
> +	/*
>  	 * read_std: Read bytes of standard (non-emulated/special) memory.
>  	 *           Used for descriptor reading.
>  	 *  @addr:  [IN ] Linear address from which to read.
> @@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
>  	bool rip_relative;
>  	unsigned long _eip;
>  	struct operand memop;
> +	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
> +	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */

emul_regs_dirty (to avoid with the other regs_dirty).

>  	/* Fields above regs are cleared together. */
> -	unsigned long regs[NR_VCPU_REGS];
> +	unsigned long _regs[NR_VCPU_REGS];
>  	struct operand *memopp;
>  	struct fetch_cache fetch;
>  	struct read_cache io_read;
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 97d9a99..468d26e 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -202,6 +202,28 @@ struct gprefix {
>  #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
>  #define EFLG_RESERVED_ONE_MASK 2
>  
> +static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> +	if (!(ctxt->regs_valid & (1 << nr))) {
> +		ctxt->regs_valid |= 1 << nr;
> +		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
> +	}
> +	return ctxt->_regs[nr];
> +}
> +
> +static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> +	ctxt->regs_valid |= 1 << nr;
> +	ctxt->regs_dirty |= 1 << nr;
> +	return &ctxt->_regs[nr];
> +}
> +
> +static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
> +{
> +	reg_read(ctxt, nr);
> +	return reg_write(ctxt, nr);
> +}
> +
>  /*
>   * Instruction emulation:
>   * Most instructions are emulated directly via a fragment of inline assembly
> @@ -374,8 +396,8 @@ struct gprefix {
>  #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
>  	do {								\
>  		unsigned long _tmp;					\
> -		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
> -		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
> +		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\
> +		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\
>  									\
>  		__asm__ __volatile__ (					\
>  			_PRE_EFLAGS("0", "5", "1")			\
> @@ -773,14 +795,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
>   * pointer into the block that addresses the relevant register.
>   * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
>   */
> -static void *decode_register(u8 modrm_reg, unsigned long *regs,
> +static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
>  			     int highbyte_regs)
>  {
>  	void *p;
>  
> -	p = &regs[modrm_reg];
>  	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
> -		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
> +		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
> +	else
> +		p = reg_rmw(ctxt, modrm_reg);
>  	return p;
>  }
>  
> @@ -969,10 +992,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
>  
>  	op->type = OP_REG;
>  	if (ctxt->d & ByteOp) {
> -		op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
> +		op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
>  		op->bytes = 1;
>  	} else {
> -		op->addr.reg = decode_register(reg, ctxt->regs, 0);
> +		op->addr.reg = decode_register(ctxt, reg, 0);
>  		op->bytes = ctxt->op_bytes;
>  	}
>  	fetch_register_operand(op);
> @@ -1007,8 +1030,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
>  	if (ctxt->modrm_mod == 3) {
>  		op->type = OP_REG;
>  		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
> -		op->addr.reg = decode_register(ctxt->modrm_rm,
> -					       ctxt->regs, ctxt->d & ByteOp);
> +		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
>  		if (ctxt->d & Sse) {
>  			op->type = OP_XMM;
>  			op->bytes = 16;
> @@ -1029,10 +1051,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
>  	op->type = OP_MEM;
>  
>  	if (ctxt->ad_bytes == 2) {
> -		unsigned bx = ctxt->regs[VCPU_REGS_RBX];
> -		unsigned bp = ctxt->regs[VCPU_REGS_RBP];
> -		unsigned si = ctxt->regs[VCPU_REGS_RSI];
> -		unsigned di = ctxt->regs[VCPU_REGS_RDI];
> +		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
> +		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
> +		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
> +		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
>  
>  		/* 16-bit ModR/M decode. */
>  		switch (ctxt->modrm_mod) {
> @@ -1089,17 +1111,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
>  			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
>  				modrm_ea += insn_fetch(s32, ctxt);
>  			else {
> -				modrm_ea += ctxt->regs[base_reg];
> +				modrm_ea += reg_read(ctxt, base_reg);
>  				adjust_modrm_seg(ctxt, base_reg);
>  			}
>  			if (index_reg != 4)
> -				modrm_ea += ctxt->regs[index_reg] << scale;
> +				modrm_ea += reg_read(ctxt, index_reg) << scale;
>  		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
>  			if (ctxt->mode == X86EMUL_MODE_PROT64)
>  				ctxt->rip_relative = 1;
>  		} else {
>  			base_reg = ctxt->modrm_rm;
> -			modrm_ea += ctxt->regs[base_reg];
> +			modrm_ea += reg_read(ctxt, base_reg);
>  			adjust_modrm_seg(ctxt, base_reg);
>  		}
>  		switch (ctxt->modrm_mod) {
> @@ -1240,10 +1262,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
>  	if (rc->pos == rc->end) { /* refill pio read ahead */
>  		unsigned int in_page, n;
>  		unsigned int count = ctxt->rep_prefix ?
> -			address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
> +			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
>  		in_page = (ctxt->eflags & EFLG_DF) ?
> -			offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
> -			PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
> +			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
> +			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
>  		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
>  			count);
>  		if (n == 0)
> @@ -1522,8 +1544,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
>  {
>  	struct segmented_address addr;
>  
> -	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes);
> -	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
> +	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), -bytes);
> +	addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP));
>  	addr.seg = VCPU_SREG_SS;
>  
>  	return segmented_write(ctxt, addr, data, bytes);
> @@ -1542,13 +1564,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
>  	int rc;
>  	struct segmented_address addr;
>  
> -	addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]);
> +	addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP));
>  	addr.seg = VCPU_SREG_SS;
>  	rc = segmented_read(ctxt, addr, dest, len);
>  	if (rc != X86EMUL_CONTINUE)
>  		return rc;
>  
> -	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len);
> +	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), len);
>  	return rc;
>  }
>  
> @@ -1610,26 +1632,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
>  	int rc;
>  	unsigned frame_size = ctxt->src.val;
>  	unsigned nesting_level = ctxt->src2.val & 31;
> +	ulong rbp;
>  
>  	if (nesting_level)
>  		return X86EMUL_UNHANDLEABLE;
>  
> -	rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt));
> +	rbp = reg_read(ctxt, VCPU_REGS_RBP);
> +	rc = push(ctxt, &rbp, stack_size(ctxt));
>  	if (rc != X86EMUL_CONTINUE)
>  		return rc;
> -	assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP],
> +	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
>  		      stack_mask(ctxt));
> -	assign_masked(&ctxt->regs[VCPU_REGS_RSP],
> -		      ctxt->regs[VCPU_REGS_RSP] - frame_size,
> +	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
> +		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
>  		      stack_mask(ctxt));
>  	return X86EMUL_CONTINUE;
>  }
>  
>  static int em_leave(struct x86_emulate_ctxt *ctxt)
>  {
> -	assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP],
> +	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
>  		      stack_mask(ctxt));
> -	return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes);
> +	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
>  }
>  
>  static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
> @@ -1657,13 +1681,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_pusha(struct x86_emulate_ctxt *ctxt)
>  {
> -	unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP];
> +	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
>  	int rc = X86EMUL_CONTINUE;
>  	int reg = VCPU_REGS_RAX;
>  
>  	while (reg <= VCPU_REGS_RDI) {
>  		(reg == VCPU_REGS_RSP) ?
> -		(ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]);
> +		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
>  
>  		rc = em_push(ctxt);
>  		if (rc != X86EMUL_CONTINUE)
> @@ -1688,12 +1712,12 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
>  
>  	while (reg >= VCPU_REGS_RAX) {
>  		if (reg == VCPU_REGS_RSP) {
> -			register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP],
> +			register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP),
>  							ctxt->op_bytes);
>  			--reg;
>  		}
>  
> -		rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes);
> +		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
>  		if (rc != X86EMUL_CONTINUE)
>  			break;
>  		--reg;
> @@ -1961,14 +1985,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 old = ctxt->dst.orig_val64;
>  
> -	if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) ||
> -	    ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) {
> -		ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
> -		ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
> +	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
> +	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
> +		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
> +		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
>  		ctxt->eflags &= ~EFLG_ZF;
>  	} else {
> -		ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) |
> -			(u32) ctxt->regs[VCPU_REGS_RBX];
> +		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
> +			(u32) reg_read(ctxt, VCPU_REGS_RBX);
>  
>  		ctxt->eflags |= EFLG_ZF;
>  	}
> @@ -2004,7 +2028,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
>  {
>  	/* Save real source value, then compare EAX against destination. */
>  	ctxt->src.orig_val = ctxt->src.val;
> -	ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
> +	ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
>  	emulate_2op_SrcV(ctxt, "cmp");
>  
>  	if (ctxt->eflags & EFLG_ZF) {
> @@ -2013,7 +2037,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
>  	} else {
>  		/* Failure: write the value we saw to EAX. */
>  		ctxt->dst.type = OP_REG;
> -		ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
> +		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
>  	}
>  	return X86EMUL_CONTINUE;
>  }
> @@ -2153,10 +2177,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
>  	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
>  	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
>  
> -	ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip;
> +	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
>  	if (efer & EFER_LMA) {
>  #ifdef CONFIG_X86_64
> -		ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
> +		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF;
>  
>  		ops->get_msr(ctxt,
>  			     ctxt->mode == X86EMUL_MODE_PROT64 ?
> @@ -2235,7 +2259,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
>  	ctxt->_eip = msr_data;
>  
>  	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
> -	ctxt->regs[VCPU_REGS_RSP] = msr_data;
> +	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
>  
>  	return X86EMUL_CONTINUE;
>  }
> @@ -2285,8 +2309,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>  	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
>  	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
>  
> -	ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
> -	ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
> +	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
> +	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
>  
>  	return X86EMUL_CONTINUE;
>  }
> @@ -2355,14 +2379,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
>  {
>  	tss->ip = ctxt->_eip;
>  	tss->flag = ctxt->eflags;
> -	tss->ax = ctxt->regs[VCPU_REGS_RAX];
> -	tss->cx = ctxt->regs[VCPU_REGS_RCX];
> -	tss->dx = ctxt->regs[VCPU_REGS_RDX];
> -	tss->bx = ctxt->regs[VCPU_REGS_RBX];
> -	tss->sp = ctxt->regs[VCPU_REGS_RSP];
> -	tss->bp = ctxt->regs[VCPU_REGS_RBP];
> -	tss->si = ctxt->regs[VCPU_REGS_RSI];
> -	tss->di = ctxt->regs[VCPU_REGS_RDI];
> +	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
> +	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
> +	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
> +	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
> +	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
> +	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
> +	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
> +	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
>  
>  	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
>  	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
> @@ -2378,14 +2402,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
>  
>  	ctxt->_eip = tss->ip;
>  	ctxt->eflags = tss->flag | 2;
> -	ctxt->regs[VCPU_REGS_RAX] = tss->ax;
> -	ctxt->regs[VCPU_REGS_RCX] = tss->cx;
> -	ctxt->regs[VCPU_REGS_RDX] = tss->dx;
> -	ctxt->regs[VCPU_REGS_RBX] = tss->bx;
> -	ctxt->regs[VCPU_REGS_RSP] = tss->sp;
> -	ctxt->regs[VCPU_REGS_RBP] = tss->bp;
> -	ctxt->regs[VCPU_REGS_RSI] = tss->si;
> -	ctxt->regs[VCPU_REGS_RDI] = tss->di;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
> +	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
> +	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
> +	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
> +	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
> +	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
> +	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
>  
>  	/*
>  	 * SDM says that segment selectors are loaded before segment
> @@ -2470,14 +2494,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
>  	tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
>  	tss->eip = ctxt->_eip;
>  	tss->eflags = ctxt->eflags;
> -	tss->eax = ctxt->regs[VCPU_REGS_RAX];
> -	tss->ecx = ctxt->regs[VCPU_REGS_RCX];
> -	tss->edx = ctxt->regs[VCPU_REGS_RDX];
> -	tss->ebx = ctxt->regs[VCPU_REGS_RBX];
> -	tss->esp = ctxt->regs[VCPU_REGS_RSP];
> -	tss->ebp = ctxt->regs[VCPU_REGS_RBP];
> -	tss->esi = ctxt->regs[VCPU_REGS_RSI];
> -	tss->edi = ctxt->regs[VCPU_REGS_RDI];
> +	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
> +	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
> +	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
> +	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
> +	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
> +	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
> +	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
> +	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
>  
>  	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
>  	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
> @@ -2499,14 +2523,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
>  	ctxt->eflags = tss->eflags | 2;
>  
>  	/* General purpose registers */
> -	ctxt->regs[VCPU_REGS_RAX] = tss->eax;
> -	ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
> -	ctxt->regs[VCPU_REGS_RDX] = tss->edx;
> -	ctxt->regs[VCPU_REGS_RBX] = tss->ebx;
> -	ctxt->regs[VCPU_REGS_RSP] = tss->esp;
> -	ctxt->regs[VCPU_REGS_RBP] = tss->ebp;
> -	ctxt->regs[VCPU_REGS_RSI] = tss->esi;
> -	ctxt->regs[VCPU_REGS_RDI] = tss->edi;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
> +	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
> +	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
> +	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
> +	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
> +	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
> +	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
>  
>  	/*
>  	 * SDM says that segment selectors are loaded before segment
> @@ -2738,8 +2762,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
>  {
>  	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
>  
> -	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
> -	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
> +	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
> +	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
>  	op->addr.mem.seg = seg;
>  }
>  
> @@ -2825,7 +2849,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
>  	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
>  	if (rc != X86EMUL_CONTINUE)
>  		return rc;
> -	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
> +	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), ctxt->src.val);
>  	return X86EMUL_CONTINUE;
>  }
>  
> @@ -2915,7 +2939,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
>  {
>  	ctxt->dst.type = OP_REG;
>  	ctxt->dst.bytes = ctxt->src.bytes;
> -	ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
> +	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
>  	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
>  
>  	return X86EMUL_CONTINUE;
> @@ -2926,8 +2950,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
>  	u64 tsc = 0;
>  
>  	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
> -	ctxt->regs[VCPU_REGS_RAX] = (u32)tsc;
> -	ctxt->regs[VCPU_REGS_RDX] = tsc >> 32;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
>  	return X86EMUL_CONTINUE;
>  }
>  
> @@ -2935,10 +2959,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 pmc;
>  
> -	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
> +	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
>  		return emulate_gp(ctxt, 0);
> -	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
> -	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
>  	return X86EMUL_CONTINUE;
>  }
>  
> @@ -2980,9 +3004,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 msr_data;
>  
> -	msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
> -		| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
> -	if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data))
> +	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
> +		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
> +	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
>  		return emulate_gp(ctxt, 0);
>  
>  	return X86EMUL_CONTINUE;
> @@ -2992,11 +3016,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 msr_data;
>  
> -	if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data))
> +	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
>  		return emulate_gp(ctxt, 0);
>  
> -	ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
> -	ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
>  	return X86EMUL_CONTINUE;
>  }
>  
> @@ -3176,8 +3200,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_loop(struct x86_emulate_ctxt *ctxt)
>  {
> -	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> -	if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
> +	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
> +	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) != 0)) &&
>  	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
>  		jmp_rel(ctxt, ctxt->src.val);
>  
> @@ -3186,7 +3210,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_jcxz(struct x86_emulate_ctxt *ctxt)
>  {
> -	if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
> +	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) == 0))
>  		jmp_rel(ctxt, ctxt->src.val);
>  
>  	return X86EMUL_CONTINUE;
> @@ -3274,20 +3298,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
>  {
>  	u32 eax, ebx, ecx, edx;
>  
> -	eax = ctxt->regs[VCPU_REGS_RAX];
> -	ecx = ctxt->regs[VCPU_REGS_RCX];
> +	eax = reg_read(ctxt, VCPU_REGS_RAX);
> +	ecx = reg_read(ctxt, VCPU_REGS_RCX);
>  	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
> -	ctxt->regs[VCPU_REGS_RAX] = eax;
> -	ctxt->regs[VCPU_REGS_RBX] = ebx;
> -	ctxt->regs[VCPU_REGS_RCX] = ecx;
> -	ctxt->regs[VCPU_REGS_RDX] = edx;
> +	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
> +	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
> +	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
> +	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
>  	return X86EMUL_CONTINUE;
>  }
>  
>  static int em_lahf(struct x86_emulate_ctxt *ctxt)
>  {
> -	ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL;
> -	ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8;
> +	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
> +	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
>  	return X86EMUL_CONTINUE;
>  }
>  
> @@ -3444,7 +3468,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt)
>  
>  static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
>  {
> -	u64 rax = ctxt->regs[VCPU_REGS_RAX];
> +	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
>  
>  	/* Valid physical address? */
>  	if (rax & 0xffff000000000000ULL)
> @@ -3466,7 +3490,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
>  static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
>  {
>  	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
> -	u64 rcx = ctxt->regs[VCPU_REGS_RCX];
> +	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
>  
>  	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
>  	    (rcx > 3))
> @@ -3924,7 +3948,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
>  	case OpAcc:
>  		op->type = OP_REG;
>  		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
> -		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
> +		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
>  		fetch_register_operand(op);
>  		op->orig_val = op->val;
>  		break;
> @@ -3932,19 +3956,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
>  		op->type = OP_MEM;
>  		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
>  		op->addr.mem.ea =
> -			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
> +			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
>  		op->addr.mem.seg = VCPU_SREG_ES;
>  		op->val = 0;
>  		break;
>  	case OpDX:
>  		op->type = OP_REG;
>  		op->bytes = 2;
> -		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
> +		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
>  		fetch_register_operand(op);
>  		break;
>  	case OpCL:
>  		op->bytes = 1;
> -		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
> +		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
>  		break;
>  	case OpImmByte:
>  		rc = decode_imm(ctxt, op, 1, true);
> @@ -3975,7 +3999,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
>  		op->type = OP_MEM;
>  		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
>  		op->addr.mem.ea =
> -			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
> +			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
>  		op->addr.mem.seg = seg_override(ctxt);
>  		op->val = 0;
>  		break;
> @@ -4281,6 +4305,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
>  		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
>  }
>  
> +static void writeback_registers(struct x86_emulate_ctxt *ctxt)
> +{
> +	unsigned reg;
> +
> +	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
> +		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
> +}
> +
>  int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  {
>  	struct x86_emulate_ops *ops = ctxt->ops;
> @@ -4365,7 +4397,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  
>  	if (ctxt->rep_prefix && (ctxt->d & String)) {
>  		/* All REP prefixes have the same first termination condition */
> -		if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) {
> +		if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
>  			ctxt->eip = ctxt->_eip;
>  			goto done;
>  		}
> @@ -4438,7 +4470,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  		ctxt->dst.val = ctxt->src.addr.mem.ea;
>  		break;
>  	case 0x90 ... 0x97: /* nop / xchg reg, rax */
> -		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
> +		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
>  			break;
>  		rc = em_xchg(ctxt);
>  		break;
> @@ -4466,7 +4498,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  		rc = em_grp2(ctxt);
>  		break;
>  	case 0xd2 ... 0xd3:	/* Grp2 */
> -		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
> +		ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
>  		rc = em_grp2(ctxt);
>  		break;
>  	case 0xe9: /* jmp rel */
> @@ -4521,14 +4553,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  
>  	if (ctxt->rep_prefix && (ctxt->d & String)) {
>  		struct read_cache *r = &ctxt->io_read;
> -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> +		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
>  
>  		if (!string_insn_completed(ctxt)) {
>  			/*
>  			 * Re-enter guest when pio read ahead buffer is empty
>  			 * or, if it is not used, after each 1024 iteration.
>  			 */
> -			if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) &&
> +			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
>  			    (r->end == 0 || r->end != r->pos)) {
>  				/*
>  				 * Reset read cache. Usually happens before
> @@ -4536,6 +4568,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  				 * we have to do it here.
>  				 */
>  				ctxt->mem_read.end = 0;
> +				writeback_registers(ctxt);
>  				return EMULATION_RESTART;
>  			}
>  			goto done; /* skip rip writeback */
> @@ -4550,6 +4583,9 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>  	if (rc == X86EMUL_INTERCEPTED)
>  		return EMULATION_INTERCEPTED;
>  
> +	if (rc == X86EMUL_CONTINUE)
> +		writeback_registers(ctxt);
> +

Why can't this be in the writeback function in this file (named
internal_writeback_registers or something)? Less cases to consider (eg
why its safe to skip writeback on X86EMUL_INTERCEPTED again).

>  	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
>  
>  twobyte_insn:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 59b5950..b8a8982 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4325,7 +4325,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
>  	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
>  }
>  
> +static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
> +{
> +	return kvm_register_read(emul_to_vcpu(ctxt), reg);
> +}
> +
> +static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
> +{
> +	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
> +}
> +
>  static struct x86_emulate_ops emulate_ops = {
> +	.read_gpr            = emulator_read_gpr,
> +	.write_gpr           = emulator_write_gpr,
>  	.read_std            = kvm_read_guest_virt_system,
>  	.write_std           = kvm_write_guest_virt_system,
>  	.fetch               = kvm_fetch_guest_virt,
> @@ -4360,14 +4372,6 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
>  	.get_cpuid           = emulator_get_cpuid,
>  };
>  
> -static void cache_all_regs(struct kvm_vcpu *vcpu)
> -{
> -	kvm_register_read(vcpu, VCPU_REGS_RAX);
> -	kvm_register_read(vcpu, VCPU_REGS_RSP);
> -	kvm_register_read(vcpu, VCPU_REGS_RIP);
> -	vcpu->arch.regs_dirty = ~0;
> -}
> -
>  static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
>  {
>  	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
> @@ -4394,12 +4398,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
>  		kvm_queue_exception(vcpu, ctxt->exception.vector);
>  }
>  
> -static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
> -			      const unsigned long *regs)
> +static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
>  {
>  	memset(&ctxt->twobyte, 0,
> -	       (void *)&ctxt->regs - (void *)&ctxt->twobyte);
> -	memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
> +	       (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
>  
>  	ctxt->fetch.start = 0;
>  	ctxt->fetch.end = 0;
> @@ -4414,14 +4416,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
>  	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
>  	int cs_db, cs_l;
>  
> -	/*
> -	 * TODO: fix emulate.c to use guest_read/write_register
> -	 * instead of direct ->regs accesses, can save hundred cycles
> -	 * on Intel for instructions that don't read/change RSP, for
> -	 * for example.
> -	 */
> -	cache_all_regs(vcpu);
> -
>  	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
>  
>  	ctxt->eflags = kvm_get_rflags(vcpu);
> @@ -4433,7 +4427,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
>  							  X86EMUL_MODE_PROT16;
>  	ctxt->guest_mode = is_guest_mode(vcpu);
>  
> -	init_decode_cache(ctxt, vcpu->arch.regs);
> +	init_decode_cache(ctxt);
>  	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
>  }
>  
> @@ -4453,7 +4447,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>  		return EMULATE_FAIL;
>  
>  	ctxt->eip = ctxt->_eip;
> -	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
>  	kvm_rip_write(vcpu, ctxt->eip);
>  	kvm_set_rflags(vcpu, ctxt->eflags);


Need to writeback here?

> @@ -4601,7 +4594,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>  	   changes registers values  during IO operation */
>  	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
>  		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
> -		memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
> +		ctxt->regs_valid = 0;
>  	}

I think you can improve this hack now (perhaps invalidate the emulator
cache on SET_REGS?).

>  restart:
> @@ -4639,7 +4632,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>  		toggle_interruptibility(vcpu, ctxt->interruptibility);
>  		kvm_set_rflags(vcpu, ctxt->eflags);
>  		kvm_make_request(KVM_REQ_EVENT, vcpu);
> -		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
>  		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>  		kvm_rip_write(vcpu, ctxt->eip);
>  	} else
> @@ -5593,7 +5585,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
>  		 * backdoor interface) need this to work
>  		 */
>  		struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
> -		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
> +		unsigned reg;
> +
> +		for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
> +			kvm_register_write(vcpu, reg, ctxt->_regs[reg]);
> +

Same as comment below.

>  		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>  	}
>  	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
> @@ -5724,6 +5720,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>  {
>  	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
>  	int ret;
> +	unsigned reg;
>  
>  	init_emulate_ctxt(vcpu);
>  
> @@ -5733,7 +5730,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>  	if (ret)
>  		return EMULATE_FAIL;
>  
> -	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
> +	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
> +		kvm_register_write(vcpu, reg, ctxt->_regs[reg]);

Should update regs_avail/regs_dirty? Better to do any of that in
emulator.c via interfaces.

>  	kvm_rip_write(vcpu, ctxt->eip);
>  	kvm_set_rflags(vcpu, ctxt->eflags);
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> -- 
> 1.7.11.2

Did not double check that emulator.c convertion is complete with your
patch (which should be done).



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86 emulator: access GPRs on demand
  2012-08-07 21:23 ` Marcelo Tosatti
@ 2012-08-07 21:24   ` Marcelo Tosatti
  2012-08-08 12:38   ` Avi Kivity
  1 sibling, 0 replies; 4+ messages in thread
From: Marcelo Tosatti @ 2012-08-07 21:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm

On Tue, Aug 07, 2012 at 06:23:01PM -0300, Marcelo Tosatti wrote:
> On Thu, Jul 19, 2012 at 03:14:38PM +0300, Avi Kivity wrote:
> > Instead of populating the the entire register file, read in registers
> > as they are accessed, and write back only the modified ones.  This
> > saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually
> > used during emulation), and a two 128-byte copies for the registers.
> > 
> > Signed-off-by: Avi Kivity <avi@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_emulate.h |  17 ++-
> >  arch/x86/kvm/emulate.c             | 268 +++++++++++++++++++++----------------
> >  arch/x86/kvm/x86.c                 |  50 ++++---
> >  3 files changed, 192 insertions(+), 143 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> > index c764f43..2f1da16 100644
> > --- a/arch/x86/include/asm/kvm_emulate.h
> > +++ b/arch/x86/include/asm/kvm_emulate.h
> > @@ -86,6 +86,19 @@ struct x86_instruction_info {
> >  
> >  struct x86_emulate_ops {
> >  	/*
> > +	 * read_gpr: read a general purpose register (rax - r15)
> > +	 *
> > +	 * @reg: gpr number.
> > +	 */
> > +	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
> > +	/*
> > +	 * write_gpr: write a general purpose register (rax - r15)
> > +	 *
> > +	 * @reg: gpr number.
> > +	 * @val: value to write.
> > +	 */
> > +	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
> > +	/*
> >  	 * read_std: Read bytes of standard (non-emulated/special) memory.
> >  	 *           Used for descriptor reading.
> >  	 *  @addr:  [IN ] Linear address from which to read.
> > @@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
> >  	bool rip_relative;
> >  	unsigned long _eip;
> >  	struct operand memop;
> > +	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
> > +	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */
> 
> emul_regs_dirty (to avoid with the other regs_dirty).

avoid confusion.



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] KVM: x86 emulator: access GPRs on demand
  2012-08-07 21:23 ` Marcelo Tosatti
  2012-08-07 21:24   ` Marcelo Tosatti
@ 2012-08-08 12:38   ` Avi Kivity
  1 sibling, 0 replies; 4+ messages in thread
From: Avi Kivity @ 2012-08-08 12:38 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm

On 08/08/2012 12:23 AM, Marcelo Tosatti wrote:
>> @@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
>>  	bool rip_relative;
>>  	unsigned long _eip;
>>  	struct operand memop;
>> +	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
>> +	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */
> 
> emul_regs_dirty (to avoid with the other regs_dirty).

Well, it's in x86_emulate_ctxt, and there are other fields that can be
confused here.  I think it's okay as they're never used together.

>>  
>> @@ -4453,7 +4447,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>>  		return EMULATE_FAIL;
>>  
>>  	ctxt->eip = ctxt->_eip;
>> -	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
>>  	kvm_rip_write(vcpu, ctxt->eip);
>>  	kvm_set_rflags(vcpu, ctxt->eflags);
> 
> 
> Need to writeback here?

In emulate_in_real().  Good catch.

> 
>> @@ -4601,7 +4594,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
>>  	   changes registers values  during IO operation */
>>  	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
>>  		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
>> -		memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
>> +		ctxt->regs_valid = 0;
>>  	}
> 
> I think you can improve this hack now (perhaps invalidate the emulator
> cache on SET_REGS?).

This is dangerous, if someone does a GET_REGS/SET_REGS pair within an
mmio transaction.

The documentation implies it should not be done, but it doesn't say so
explicitly.  It's likely qemu allows it if the mmio transaction drops
the lock, for example.

I agree that doing it in SET_REGS is better conceptually.

> 
>>  		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>>  	}
>>  	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
>> @@ -5724,6 +5720,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>>  {
>>  	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
>>  	int ret;
>> +	unsigned reg;
>>  
>>  	init_emulate_ctxt(vcpu);
>>  
>> @@ -5733,7 +5730,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>>  	if (ret)
>>  		return EMULATE_FAIL;
>>  
>> -	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
>> +	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
>> +		kvm_register_write(vcpu, reg, ctxt->_regs[reg]);
> 
> Should update regs_avail/regs_dirty? Better to do any of that in
> emulator.c via interfaces.

emulator_task_switch() should do it, yes.

> 
>>  	kvm_rip_write(vcpu, ctxt->eip);
>>  	kvm_set_rflags(vcpu, ctxt->eflags);
>>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>> -- 
>> 1.7.11.2
> 
> Did not double check that emulator.c convertion is complete with your
> patch (which should be done).

I did s/regs/_regs/ for that.  Or did you mean something else?

Regarding emulator interfaces, perhaps we should make all entry points
regular:

  emulator_init(ctxt, ops, vcpu); // just once

  emulator_reset(ctxt)
  while ((ret = emulator_entry_point(ctxt)) == EMULATOR_NEED_DATA)
       get that data from somewhere

emulator_reset() simply resets the state machine, can be as simple as
clearing a bool flag. emulator_entry_point (can be x86_emulate_insn(),
x86_emulate_int(), x86_emulate_task_switch(), etc.) runs until one of
the callbacks tells it to stop to get more data.

(of course the while loop is actually in userspace)


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-08-08 12:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-19 12:14 [PATCH] KVM: x86 emulator: access GPRs on demand Avi Kivity
2012-08-07 21:23 ` Marcelo Tosatti
2012-08-07 21:24   ` Marcelo Tosatti
2012-08-08 12:38   ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.