All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] improve speed of "rep ins" emulation
@ 2012-05-23 14:08 Gleb Natapov
  2012-05-23 14:08 ` [PATCH 1/2] Provide userspace IO exit completion callback Gleb Natapov
  2012-05-23 14:08 ` [PATCH 2/2] Provide fast path for "rep ins" emulation if possible Gleb Natapov
  0 siblings, 2 replies; 18+ messages in thread
From: Gleb Natapov @ 2012-05-23 14:08 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

With this patches loading 100M initrd takes ~10s instead of ~40s without.

Gleb Natapov (2):
  Provide userspace IO exit completion callback.
  Provide fast path for "rep ins" emulation if possible.

 arch/x86/include/asm/kvm_host.h |    6 ++
 arch/x86/kvm/svm.c              |   20 +++-
 arch/x86/kvm/vmx.c              |   25 ++++--
 arch/x86/kvm/x86.c              |  187 +++++++++++++++++++++++++++++++--------
 4 files changed, 189 insertions(+), 49 deletions(-)

-- 
1.7.7.3


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/2] Provide userspace IO exit completion callback.
  2012-05-23 14:08 [PATCH 0/2] improve speed of "rep ins" emulation Gleb Natapov
@ 2012-05-23 14:08 ` Gleb Natapov
  2012-05-23 14:08 ` [PATCH 2/2] Provide fast path for "rep ins" emulation if possible Gleb Natapov
  1 sibling, 0 replies; 18+ messages in thread
From: Gleb Natapov @ 2012-05-23 14:08 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64c8989..1bcd280 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -405,6 +405,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b78f89d..8361649 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4547,6 +4547,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4617,13 +4620,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5474,6 +5480,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5490,47 +5514,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5557,9 +5571,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	}
 
 	r = __vcpu_run(vcpu);
 
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-23 14:08 [PATCH 0/2] improve speed of "rep ins" emulation Gleb Natapov
  2012-05-23 14:08 ` [PATCH 1/2] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-05-23 14:08 ` Gleb Natapov
  2012-05-23 14:40   ` Avi Kivity
  2012-06-05 16:07   ` Avi Kivity
  1 sibling, 2 replies; 18+ messages in thread
From: Gleb Natapov @ 2012-05-23 14:08 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

"rep ins" emulation is going through emulator now. This is slow because
emulator knows how to write back only one datum at a time. This patch
provides fast path for the instruction in certain conditions. The
conditions are: DF flag is not set, destination memory is RAM and single
datum does not cross page boundary. If fast path code fails it falls
back to emulation.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    5 ++
 arch/x86/kvm/svm.c              |   20 ++++++--
 arch/x86/kvm/vmx.c              |   25 +++++++---
 arch/x86/kvm/x86.c              |   95 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 133 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bcd280..5129639 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -403,6 +403,9 @@ struct kvm_vcpu_arch {
 	/* emulate context */
 
 	struct x86_emulate_ctxt emulate_ctxt;
+	struct x86_fast_string_pio_ctxt {
+		unsigned long linear_addr;
+	} fast_string_pio_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
 	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
@@ -763,6 +766,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 struct x86_emulate_ctxt;
 
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port,
+		u8 addr_size);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f75af40..20e3fb0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
+	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 	svm->next_rip = svm->vmcb->control.exit_info_2;
-	skip_emulated_instruction(&svm->vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(&svm->vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep) {
+		int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >>
+			SVM_IOIO_ASIZE_SHIFT;
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				ffs(addr_size) - 1);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32eb588..a2935f3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -632,6 +632,7 @@ static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
 static bool cpu_has_load_perf_global_ctrl;
+static bool cpu_has_ins_outs_inst_info;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -2510,6 +2511,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (((vmx_msr_high >> 18) & 15) != 6)
 		return -EIO;
 
+	cpu_has_ins_outs_inst_info = vmx_msr_high & (1u << 22);
+
 	vmcs_conf->size = vmx_msr_high & 0x1fff;
 	vmcs_conf->order = get_order(vmcs_config.size);
 	vmcs_conf->revision_id = vmx_msr_low;
@@ -4333,23 +4336,31 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int size, in, string;
+	int size, in, string, rep;
 	unsigned port;
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	string = (exit_qualification & 16) != 0;
 	in = (exit_qualification & 8) != 0;
+	string = (exit_qualification & 16) != 0;
+	rep = (exit_qualification & 32) != 0;
 
 	++vcpu->stat.io_exits;
 
-	if (string || in)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
-
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
-	skip_emulated_instruction(vcpu);
 
-	return kvm_fast_pio_out(vcpu, size, port);
+	if (!string && !in) {
+		skip_emulated_instruction(vcpu);
+		return kvm_fast_pio_out(vcpu, size, port);
+	} else if (string && in && rep && cpu_has_ins_outs_inst_info) {
+		u32 inst_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+		int r = kvm_fast_string_pio_in(vcpu, size, port,
+				(inst_info >> 7) & 7);
+		if (r != EMULATE_FAIL)
+			return r == EMULATE_DONE;
+	}
+
+	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static void
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8361649..3bc7ad3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4660,6 +4660,101 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
 }
 EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
+static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		                unsigned short port, unsigned long addr,
+				int count)
+{
+	struct page *page;
+	gpa_t gpa;
+	char *kaddr;
+	int ret;
+
+	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
+
+	if (gpa == UNMAPPED_GVA ||
+			(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+		return EMULATE_FAIL;
+
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		kvm_release_page_clean(page);
+		return EMULATE_FAIL;
+	}
+
+	kaddr = kmap_atomic(page);
+	kaddr += offset_in_page(gpa);
+
+	ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+			kaddr, count);
+
+	kunmap_atomic(kaddr);
+	if (ret) {
+		kvm_register_write(vcpu, VCPU_REGS_RCX,
+				kvm_register_read(vcpu, VCPU_REGS_RCX) - count);
+		kvm_register_write(vcpu, VCPU_REGS_RDI,
+				kvm_register_read(vcpu, VCPU_REGS_RDI) + count*size);
+		kvm_release_page_dirty(page);
+		return EMULATE_DONE;
+	}
+	kvm_release_page_clean(page);
+	return EMULATE_DO_MMIO;
+}
+
+static int complete_fast_string_pio(struct kvm_vcpu *vcpu)
+{
+	unsigned long linear_addr = vcpu->arch.fast_string_pio_ctxt.linear_addr;
+	int r;
+
+	BUG_ON(!vcpu->arch.pio.count);
+
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = __kvm_fast_string_pio_in(vcpu, vcpu->arch.pio.size,
+			vcpu->arch.pio.port, linear_addr, vcpu->arch.pio.count);
+	BUG_ON(r == EMULATE_DO_MMIO);
+	if (r == EMULATE_FAIL) /* mem slot gone while we were not looking */
+		vcpu->arch.pio.count = 0; /* drop the pio data */
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	return 1;
+}
+
+int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
+		unsigned short port, u8 addr_size)
+{
+	unsigned long masks[] = {0xffff, 0xffffffff, ~0};
+	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
+	unsigned long linear_addr = rdi + get_segment_base(vcpu, VCPU_SREG_ES);
+	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
+	int r;
+
+	if (rcx == 0) {
+		kvm_x86_ops->skip_emulated_instruction(vcpu);
+		return EMULATE_DONE;
+	}
+	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
+		return EMULATE_FAIL;
+	if (addr_size > 2)
+		return EMULATE_FAIL;
+
+	linear_addr &= masks[addr_size];
+
+	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
+
+	if (count == 0) /* 'in' crosses page boundry */
+		return EMULATE_FAIL;
+
+	count = min(count, rcx);
+
+	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
+
+	if (r != EMULATE_DO_MMIO)
+		return r;
+
+	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
+	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
+	return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
+
 static void tsc_bad(void *info)
 {
 	__this_cpu_write(cpu_tsc_khz, 0);
-- 
1.7.7.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-23 14:08 ` [PATCH 2/2] Provide fast path for "rep ins" emulation if possible Gleb Natapov
@ 2012-05-23 14:40   ` Avi Kivity
  2012-05-23 14:49     ` Avi Kivity
  2012-06-05 16:07   ` Avi Kivity
  1 sibling, 1 reply; 18+ messages in thread
From: Avi Kivity @ 2012-05-23 14:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> "rep ins" emulation is going through emulator now. This is slow because
> emulator knows how to write back only one datum at a time. This patch
> provides fast path for the instruction in certain conditions. The
> conditions are: DF flag is not set, destination memory is RAM and single
> datum does not cross page boundary. If fast path code fails it falls
> back to emulation.
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index f75af40..20e3fb0 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
>  {
>  	struct kvm_vcpu *vcpu = &svm->vcpu;
>  	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
> -	int size, in, string;
> +	int size, in, string, rep;
>  	unsigned port;
>  
>  	++svm->vcpu.stat.io_exits;
>  	string = (io_info & SVM_IOIO_STR_MASK) != 0;
> +	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
>  	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
> -	if (string || in)
> -		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>  

If decode assists are not available, we still need to emulate, see 15.33.5.

>  	port = io_info >> 16;
>  	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
>  	svm->next_rip = svm->vmcb->control.exit_info_2;
> -	skip_emulated_instruction(&svm->vcpu);
>  
> -	return kvm_fast_pio_out(vcpu, size, port);
> +	if (!string && !in) {
> +		skip_emulated_instruction(&svm->vcpu);
> +		return kvm_fast_pio_out(vcpu, size, port);
> +	} else if (string && in && rep) {
> +		int addr_size = (io_info & SVM_IOIO_ASIZE_MASK) >>
> +			SVM_IOIO_ASIZE_SHIFT;
> +		int r = kvm_fast_string_pio_in(vcpu, size, port,
> +				ffs(addr_size) - 1);
> +		if (r != EMULATE_FAIL)
> +			return r == EMULATE_DONE;
> +	}
> +
> +	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>  }
>  
-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-23 14:40   ` Avi Kivity
@ 2012-05-23 14:49     ` Avi Kivity
  2012-05-24 10:34       ` Roedel, Joerg
  0 siblings, 1 reply; 18+ messages in thread
From: Avi Kivity @ 2012-05-23 14:49 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti, Roedel, Joerg

On 05/23/2012 05:40 PM, Avi Kivity wrote:
> On 05/23/2012 05:08 PM, Gleb Natapov wrote:
>> "rep ins" emulation is going through emulator now. This is slow because
>> emulator knows how to write back only one datum at a time. This patch
>> provides fast path for the instruction in certain conditions. The
>> conditions are: DF flag is not set, destination memory is RAM and single
>> datum does not cross page boundary. If fast path code fails it falls
>> back to emulation.
>> 
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index f75af40..20e3fb0 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1887,21 +1887,31 @@ static int io_interception(struct vcpu_svm *svm)
>>  {
>>  	struct kvm_vcpu *vcpu = &svm->vcpu;
>>  	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
>> -	int size, in, string;
>> +	int size, in, string, rep;
>>  	unsigned port;
>>  
>>  	++svm->vcpu.stat.io_exits;
>>  	string = (io_info & SVM_IOIO_STR_MASK) != 0;
>> +	rep = (io_info & SVM_IOIO_REP_MASK) != 0;
>>  	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
>> -	if (string || in)
>> -		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
>>  
> 
> If decode assists are not available, we still need to emulate, see 15.33.5.
> 

Joerg, the 2010 version of the manual says that the effective segment
(10:12) is only available with decode assists.  The 2012 version says
it's unconditional. What's correct?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-23 14:49     ` Avi Kivity
@ 2012-05-24 10:34       ` Roedel, Joerg
  2012-05-24 10:36         ` Avi Kivity
  0 siblings, 1 reply; 18+ messages in thread
From: Roedel, Joerg @ 2012-05-24 10:34 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm, mtosatti

On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
> On 05/23/2012 05:40 PM, Avi Kivity wrote:
> > On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> > If decode assists are not available, we still need to emulate, see 15.33.5.
> > 
> 
> Joerg, the 2010 version of the manual says that the effective segment
> (10:12) is only available with decode assists.  The 2012 version says
> it's unconditional. What's correct?

It is still conditional and only available with decode-assists. I will
talk to the APM authors to clarify this in the documentation.


	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-24 10:34       ` Roedel, Joerg
@ 2012-05-24 10:36         ` Avi Kivity
  2012-05-24 10:54           ` Roedel, Joerg
  0 siblings, 1 reply; 18+ messages in thread
From: Avi Kivity @ 2012-05-24 10:36 UTC (permalink / raw)
  To: Roedel, Joerg; +Cc: Gleb Natapov, kvm, mtosatti

On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
> On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
>> On 05/23/2012 05:40 PM, Avi Kivity wrote:
>> > On 05/23/2012 05:08 PM, Gleb Natapov wrote:
>> > If decode assists are not available, we still need to emulate, see 15.33.5.
>> > 
>> 
>> Joerg, the 2010 version of the manual says that the effective segment
>> (10:12) is only available with decode assists.  The 2012 version says
>> it's unconditional. What's correct?
> 
> It is still conditional and only available with decode-assists. I will
> talk to the APM authors to clarify this in the documentation.

Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
unless other bits in that word also depend on decode assists.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-24 10:36         ` Avi Kivity
@ 2012-05-24 10:54           ` Roedel, Joerg
  2012-06-05 17:27             ` Alexander Graf
  0 siblings, 1 reply; 18+ messages in thread
From: Roedel, Joerg @ 2012-05-24 10:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm, mtosatti

On Thu, May 24, 2012 at 01:36:51PM +0300, Avi Kivity wrote:
> On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
> > On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
> >> On 05/23/2012 05:40 PM, Avi Kivity wrote:
> >> > On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> >> > If decode assists are not available, we still need to emulate, see 15.33.5.
> >> > 
> >> 
> >> Joerg, the 2010 version of the manual says that the effective segment
> >> (10:12) is only available with decode assists.  The 2012 version says
> >> it's unconditional. What's correct?
> > 
> > It is still conditional and only available with decode-assists. I will
> > talk to the APM authors to clarify this in the documentation.
> 
> Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
> unless other bits in that word also depend on decode assists.

Doesn't look like it. All other bits EXITINFO1 bits for the IOIO
intercept are documented from the very ancient beginnings of SVM. So the
segment number is the only addition.


	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-23 14:08 ` [PATCH 2/2] Provide fast path for "rep ins" emulation if possible Gleb Natapov
  2012-05-23 14:40   ` Avi Kivity
@ 2012-06-05 16:07   ` Avi Kivity
  2012-06-05 17:19     ` Gleb Natapov
  1 sibling, 1 reply; 18+ messages in thread
From: Avi Kivity @ 2012-06-05 16:07 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> "rep ins" emulation is going through emulator now. This is slow because
> emulator knows how to write back only one datum at a time. This patch
> provides fast path for the instruction in certain conditions. The
> conditions are: DF flag is not set, destination memory is RAM and single
> datum does not cross page boundary. If fast path code fails it falls
> back to emulation.
> 
>  
> +static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
> +		                unsigned short port, unsigned long addr,
> +				int count)
> +{
> +	struct page *page;
> +	gpa_t gpa;
> +	char *kaddr;
> +	int ret;
> +
> +	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
> +
> +	if (gpa == UNMAPPED_GVA ||
> +			(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
> +		return EMULATE_FAIL;
> +
> +	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
> +	if (is_error_page(page)) {
> +		kvm_release_page_clean(page);
> +		return EMULATE_FAIL;
> +	}
> +
> +	kaddr = kmap_atomic(page);
> +	kaddr += offset_in_page(gpa);
> +
> +	ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
> +			kaddr, count);
> +
> +	kunmap_atomic(kaddr);
> +	if (ret) {
> +		kvm_register_write(vcpu, VCPU_REGS_RCX,
> +				kvm_register_read(vcpu, VCPU_REGS_RCX) - count);

Sometimes we only modify the low 16 bits of %rcx.

> +		kvm_register_write(vcpu, VCPU_REGS_RDI,
> +				kvm_register_read(vcpu, VCPU_REGS_RDI) + count*size);

Possibly, ditto.

> +		kvm_release_page_dirty(page);
> +		return EMULATE_DONE;
> +	}
> +	kvm_release_page_clean(page);
> +	return EMULATE_DO_MMIO;
> +}
> +
> +
> +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size,
> +		unsigned short port, u8 addr_size)

This is actually the logarithm of addr_size, the variable name should
reflect it.

> +{
> +	unsigned long masks[] = {0xffff, 0xffffffff, ~0};
> +	unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
> +	unsigned long linear_addr = rdi + get_segment_base(vcpu, VCPU_SREG_ES);
> +	unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count;
> +	int r;
> +


Missing es limit check - rep in can succeed for part of the page, fail
when the %rdi hits the limit.

> +	if (rcx == 0) {
> +		kvm_x86_ops->skip_emulated_instruction(vcpu);
> +		return EMULATE_DONE;
> +	}
> +	if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF)
> +		return EMULATE_FAIL;
> +	if (addr_size > 2)
> +		return EMULATE_FAIL;
> +
> +	linear_addr &= masks[addr_size];

Also need to mask %rcx.

> +
> +	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
> +
> +	if (count == 0) /* 'in' crosses page boundry */
> +		return EMULATE_FAIL;
> +
> +	count = min(count, rcx);
> +
> +	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
> +
> +	if (r != EMULATE_DO_MMIO)
> +		return r;
> +
> +	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
> +	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
> +	return EMULATE_DO_MMIO;
> +}
> +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);

Perhaps a better way to do it is to move the code into the emulator,
which already handles all the checks and masks, and just avoid
x86_decode_insn()/x86_emulate_insn().

> +
>  static void tsc_bad(void *info)
>  {
>  	__this_cpu_write(cpu_tsc_khz, 0);


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 16:07   ` Avi Kivity
@ 2012-06-05 17:19     ` Gleb Natapov
  2012-06-06  8:04       ` Avi Kivity
  0 siblings, 1 reply; 18+ messages in thread
From: Gleb Natapov @ 2012-06-05 17:19 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Tue, Jun 05, 2012 at 07:07:39PM +0300, Avi Kivity wrote:
> > +
> > +	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
> > +
> > +	if (count == 0) /* 'in' crosses page boundry */
> > +		return EMULATE_FAIL;
> > +
> > +	count = min(count, rcx);
> > +
> > +	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
> > +
> > +	if (r != EMULATE_DO_MMIO)
> > +		return r;
> > +
> > +	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
> > +	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
> > +	return EMULATE_DO_MMIO;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
> 
> Perhaps a better way to do it is to move the code into the emulator,
> which already handles all the checks and masks, and just avoid
> x86_decode_insn()/x86_emulate_insn().
> 
I do not see how it would be better. Emulator works on different data
structures that should be prepared and the only functions that we can
reuse (as far as I see) are register_address_increment() and ad_mask()
anyway.

--
			Gleb.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-05-24 10:54           ` Roedel, Joerg
@ 2012-06-05 17:27             ` Alexander Graf
  2012-06-05 17:36               ` Gleb Natapov
  0 siblings, 1 reply; 18+ messages in thread
From: Alexander Graf @ 2012-06-05 17:27 UTC (permalink / raw)
  To: Roedel, Joerg; +Cc: Avi Kivity, Gleb Natapov, kvm, mtosatti


On 24.05.2012, at 12:54, Roedel, Joerg wrote:

> On Thu, May 24, 2012 at 01:36:51PM +0300, Avi Kivity wrote:
>> On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
>>> On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
>>>> On 05/23/2012 05:40 PM, Avi Kivity wrote:
>>>>> On 05/23/2012 05:08 PM, Gleb Natapov wrote:
>>>>> If decode assists are not available, we still need to emulate, see 15.33.5.
>>>>> 
>>>> 
>>>> Joerg, the 2010 version of the manual says that the effective segment
>>>> (10:12) is only available with decode assists.  The 2012 version says
>>>> it's unconditional. What's correct?
>>> 
>>> It is still conditional and only available with decode-assists. I will
>>> talk to the APM authors to clarify this in the documentation.
>> 
>> Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
>> unless other bits in that word also depend on decode assists.
> 
> Doesn't look like it. All other bits EXITINFO1 bits for the IOIO
> intercept are documented from the very ancient beginnings of SVM. So the
> segment number is the only addition.

I do remember some bits missing in early implementations though. Don't ask me what, I just remember that when looking at intercept logs back in the day, some bits were not set even though they were documented as such - and those were pretty crucial bits, which is why we go through the emulator usually.


Alex


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:27             ` Alexander Graf
@ 2012-06-05 17:36               ` Gleb Natapov
  2012-06-05 17:41                 ` Alexander Graf
  0 siblings, 1 reply; 18+ messages in thread
From: Gleb Natapov @ 2012-06-05 17:36 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Roedel, Joerg, Avi Kivity, kvm, mtosatti

On Tue, Jun 05, 2012 at 07:27:40PM +0200, Alexander Graf wrote:
> 
> On 24.05.2012, at 12:54, Roedel, Joerg wrote:
> 
> > On Thu, May 24, 2012 at 01:36:51PM +0300, Avi Kivity wrote:
> >> On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
> >>> On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
> >>>> On 05/23/2012 05:40 PM, Avi Kivity wrote:
> >>>>> On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> >>>>> If decode assists are not available, we still need to emulate, see 15.33.5.
> >>>>> 
> >>>> 
> >>>> Joerg, the 2010 version of the manual says that the effective segment
> >>>> (10:12) is only available with decode assists.  The 2012 version says
> >>>> it's unconditional. What's correct?
> >>> 
> >>> It is still conditional and only available with decode-assists. I will
> >>> talk to the APM authors to clarify this in the documentation.
> >> 
> >> Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
> >> unless other bits in that word also depend on decode assists.
> > 
> > Doesn't look like it. All other bits EXITINFO1 bits for the IOIO
> > intercept are documented from the very ancient beginnings of SVM. So the
> > segment number is the only addition.
> 
> I do remember some bits missing in early implementations though. Don't ask me what, I just remember that when looking at intercept logs back in the day, some bits were not set even though they were documented as such - and those were pretty crucial bits, which is why we go through the emulator usually.
> 
I know exactly why code goes through the emulation because I made it do so :)
And the reason was because the previous code didn't handle anything
except string pio to memory with address increment. i.e the case this
patch turns to fast path.

--
			Gleb.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:36               ` Gleb Natapov
@ 2012-06-05 17:41                 ` Alexander Graf
  2012-06-05 17:50                   ` Gleb Natapov
  2012-06-05 17:57                   ` Gleb Natapov
  0 siblings, 2 replies; 18+ messages in thread
From: Alexander Graf @ 2012-06-05 17:41 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Roedel, Joerg, Avi Kivity, kvm, mtosatti


On 05.06.2012, at 19:36, Gleb Natapov wrote:

> On Tue, Jun 05, 2012 at 07:27:40PM +0200, Alexander Graf wrote:
>> 
>> On 24.05.2012, at 12:54, Roedel, Joerg wrote:
>> 
>>> On Thu, May 24, 2012 at 01:36:51PM +0300, Avi Kivity wrote:
>>>> On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
>>>>> On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
>>>>>> On 05/23/2012 05:40 PM, Avi Kivity wrote:
>>>>>>> On 05/23/2012 05:08 PM, Gleb Natapov wrote:
>>>>>>> If decode assists are not available, we still need to emulate, see 15.33.5.
>>>>>>> 
>>>>>> 
>>>>>> Joerg, the 2010 version of the manual says that the effective segment
>>>>>> (10:12) is only available with decode assists.  The 2012 version says
>>>>>> it's unconditional. What's correct?
>>>>> 
>>>>> It is still conditional and only available with decode-assists. I will
>>>>> talk to the APM authors to clarify this in the documentation.
>>>> 
>>>> Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
>>>> unless other bits in that word also depend on decode assists.
>>> 
>>> Doesn't look like it. All other bits EXITINFO1 bits for the IOIO
>>> intercept are documented from the very ancient beginnings of SVM. So the
>>> segment number is the only addition.
>> 
>> I do remember some bits missing in early implementations though. Don't ask me what, I just remember that when looking at intercept logs back in the day, some bits were not set even though they were documented as such - and those were pretty crucial bits, which is why we go through the emulator usually.
>> 
> I know exactly why code goes through the emulation because I made it do so :)
> And the reason was because the previous code didn't handle anything
> except string pio to memory with address increment. i.e the case this
> patch turns to fast path.

No, what I'm saying is that the hardware omitted some fields even though they were provided in the spec. IIRC it was the size field, but I could be wrong. Please take an old (phenom / barcelona is old enough) machine and create some traces to make 100% sure that all fields you're trying to read out are actually provided.


Alex


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:41                 ` Alexander Graf
@ 2012-06-05 17:50                   ` Gleb Natapov
  2012-06-05 18:17                     ` Gleb Natapov
  2012-06-05 17:57                   ` Gleb Natapov
  1 sibling, 1 reply; 18+ messages in thread
From: Gleb Natapov @ 2012-06-05 17:50 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Roedel, Joerg, Avi Kivity, kvm, mtosatti

On Tue, Jun 05, 2012 at 07:41:17PM +0200, Alexander Graf wrote:
> 
> On 05.06.2012, at 19:36, Gleb Natapov wrote:
> 
> > On Tue, Jun 05, 2012 at 07:27:40PM +0200, Alexander Graf wrote:
> >> 
> >> On 24.05.2012, at 12:54, Roedel, Joerg wrote:
> >> 
> >>> On Thu, May 24, 2012 at 01:36:51PM +0300, Avi Kivity wrote:
> >>>> On 05/24/2012 01:34 PM, Roedel, Joerg wrote:
> >>>>> On Wed, May 23, 2012 at 05:49:31PM +0300, Avi Kivity wrote:
> >>>>>> On 05/23/2012 05:40 PM, Avi Kivity wrote:
> >>>>>>> On 05/23/2012 05:08 PM, Gleb Natapov wrote:
> >>>>>>> If decode assists are not available, we still need to emulate, see 15.33.5.
> >>>>>>> 
> >>>>>> 
> >>>>>> Joerg, the 2010 version of the manual says that the effective segment
> >>>>>> (10:12) is only available with decode assists.  The 2012 version says
> >>>>>> it's unconditional. What's correct?
> >>>>> 
> >>>>> It is still conditional and only available with decode-assists. I will
> >>>>> talk to the APM authors to clarify this in the documentation.
> >>>> 
> >>>> Thanks.  As it happens it doesn't matter for INS emulation, only OUTS,
> >>>> unless other bits in that word also depend on decode assists.
> >>> 
> >>> Doesn't look like it. All other bits EXITINFO1 bits for the IOIO
> >>> intercept are documented from the very ancient beginnings of SVM. So the
> >>> segment number is the only addition.
> >> 
> >> I do remember some bits missing in early implementations though. Don't ask me what, I just remember that when looking at intercept logs back in the day, some bits were not set even though they were documented as such - and those were pretty crucial bits, which is why we go through the emulator usually.
> >> 
> > I know exactly why code goes through the emulation because I made it do so :)
> > And the reason was because the previous code didn't handle anything
> > except string pio to memory with address increment. i.e the case this
> > patch turns to fast path.
> 
> No, what I'm saying is that the hardware omitted some fields even though they were provided in the spec. IIRC it was the size field, but I could be wrong. Please take an old (phenom / barcelona is old enough) machine and create some traces to make 100% sure that all fields you're trying to read out are actually provided.
> 
May be you are right about missing fields, but I am definitely know that
this is not the reason string PIO goes through the emulator. Are you
sure missing fields where on AMD? Intel did miss some crucial info in
early versions.

--
			Gleb.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:41                 ` Alexander Graf
  2012-06-05 17:50                   ` Gleb Natapov
@ 2012-06-05 17:57                   ` Gleb Natapov
  2012-06-06  9:28                     ` Roedel, Joerg
  1 sibling, 1 reply; 18+ messages in thread
From: Gleb Natapov @ 2012-06-05 17:57 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Roedel, Joerg, Avi Kivity, kvm, mtosatti

On Tue, Jun 05, 2012 at 07:41:17PM +0200, Alexander Graf wrote:
> IIRC it was the size field, but I could be wrong. Please take an old (phenom / barcelona is old enough) machine and create some traces to make 100% sure that all fields you're trying to read out are actually provided.
> 
Joerg do you know about an errata of such kind?

--
			Gleb.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:50                   ` Gleb Natapov
@ 2012-06-05 18:17                     ` Gleb Natapov
  0 siblings, 0 replies; 18+ messages in thread
From: Gleb Natapov @ 2012-06-05 18:17 UTC (permalink / raw)
  To: Alexander Graf; +Cc: Roedel, Joerg, Avi Kivity, kvm, mtosatti

On Tue, Jun 05, 2012 at 08:50:16PM +0300, Gleb Natapov wrote:
> > No, what I'm saying is that the hardware omitted some fields even though they were provided in the spec. IIRC it was the size field, but I could be wrong. Please take an old (phenom / barcelona is old enough) machine and create some traces to make 100% sure that all fields you're trying to read out are actually provided.
> > 
> May be you are right about missing fields, but I am definitely know that
> this is not the reason string PIO goes through the emulator. Are you
> sure missing fields where on AMD? Intel did miss some crucial info in
> early versions.
> 
I double checked and while emulation of string PIO was outside of
emulator the decode went through emulator. The size field should be OK
though since it is used for regular PIO emulation, so may be the problem
was with address size or with effective data segment. If later then this
is well known and does not influence this patch.

--
			Gleb.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:19     ` Gleb Natapov
@ 2012-06-06  8:04       ` Avi Kivity
  0 siblings, 0 replies; 18+ messages in thread
From: Avi Kivity @ 2012-06-06  8:04 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 06/05/2012 08:19 PM, Gleb Natapov wrote:
> On Tue, Jun 05, 2012 at 07:07:39PM +0300, Avi Kivity wrote:
>> > +
>> > +	count = (PAGE_SIZE - offset_in_page(linear_addr))/size;
>> > +
>> > +	if (count == 0) /* 'in' crosses page boundry */
>> > +		return EMULATE_FAIL;
>> > +
>> > +	count = min(count, rcx);
>> > +
>> > +	r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count);
>> > +
>> > +	if (r != EMULATE_DO_MMIO)
>> > +		return r;
>> > +
>> > +	vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr;
>> > +	vcpu->arch.complete_userspace_io = complete_fast_string_pio;
>> > +	return EMULATE_DO_MMIO;
>> > +}
>> > +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in);
>> 
>> Perhaps a better way to do it is to move the code into the emulator,
>> which already handles all the checks and masks, and just avoid
>> x86_decode_insn()/x86_emulate_insn().
>> 
> I do not see how it would be better. Emulator works on different data
> structures that should be prepared and the only functions that we can
> reuse (as far as I see) are register_address_increment() and ad_mask()
> anyway.

Also linearize().  Remember the hardware segment checks are only applied
to the first iteration.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] Provide fast path for "rep ins" emulation if possible.
  2012-06-05 17:57                   ` Gleb Natapov
@ 2012-06-06  9:28                     ` Roedel, Joerg
  0 siblings, 0 replies; 18+ messages in thread
From: Roedel, Joerg @ 2012-06-06  9:28 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Alexander Graf, Avi Kivity, kvm, mtosatti

On Tue, Jun 05, 2012 at 08:57:17PM +0300, Gleb Natapov wrote:
> On Tue, Jun 05, 2012 at 07:41:17PM +0200, Alexander Graf wrote:
> > IIRC it was the size field, but I could be wrong. Please take an old (phenom / barcelona is old enough) machine and create some traces to make 100% sure that all fields you're trying to read out are actually provided.
> > 
> Joerg do you know about an errata of such kind?

No, I am not aware of anything like that. The only thing that come to
mind is the missing effective segment information without decode
assists.


	Joerg

-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2012-06-06  9:28 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-23 14:08 [PATCH 0/2] improve speed of "rep ins" emulation Gleb Natapov
2012-05-23 14:08 ` [PATCH 1/2] Provide userspace IO exit completion callback Gleb Natapov
2012-05-23 14:08 ` [PATCH 2/2] Provide fast path for "rep ins" emulation if possible Gleb Natapov
2012-05-23 14:40   ` Avi Kivity
2012-05-23 14:49     ` Avi Kivity
2012-05-24 10:34       ` Roedel, Joerg
2012-05-24 10:36         ` Avi Kivity
2012-05-24 10:54           ` Roedel, Joerg
2012-06-05 17:27             ` Alexander Graf
2012-06-05 17:36               ` Gleb Natapov
2012-06-05 17:41                 ` Alexander Graf
2012-06-05 17:50                   ` Gleb Natapov
2012-06-05 18:17                     ` Gleb Natapov
2012-06-05 17:57                   ` Gleb Natapov
2012-06-06  9:28                     ` Roedel, Joerg
2012-06-05 16:07   ` Avi Kivity
2012-06-05 17:19     ` Gleb Natapov
2012-06-06  8:04       ` Avi Kivity

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.