linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 00/15] KVM: x86: fully implement vMTRR
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
@ 2015-05-30 10:58 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 01/15] KVM: x86: move MTRR related code to a separate file Xiao Guangrong
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:58 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1157 bytes --]



On 05/30/2015 06:59 PM, Xiao Guangrong wrote:
> Currently guest MTRR is completely prohibited if cache snoop is supported on
> IOMMU (!noncoherent_dma) and host does the emulation based on the knowledge
> from host side, however, host side is not the good point to know
> what the purpose of guest is. A good example is that pass-throughed VGA
> frame buffer is not always UC as host expected
>
> Besides that, there is a bugs in current code if noncoherent_dma is detected:
> KVM still uses hugepage even if the 4K pages in that ranges span between
> on different memory types and uses the cache type of first page to do memory
> mapping
>
> This patchset enables full MTRR virtualization and currently only works on
> Intel EPT architecture
>
> There is the code will be attached in this thread is used to check the effect
> of this patchset:
> [12834.843439] __INIT__.
> [12835.909109] Running in preload MTRR_TYPE_UNCACHABLE 4753900 ns.
> [12835.915232] Running in MTRR_TYPE_UNCACHABLE 5257522 ns.
> [12838.083948] Running in preload MTRR_TYPE_WRBACK 13266 ns.
> [12838.084998] Running in MTRR_TYPE_WRBACK 13099 ns.
>

Has been attached in this mail.

[-- Attachment #2: main.c --]
[-- Type: text/x-csrc, Size: 3622 bytes --]

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/fb.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/ioctl.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <asm/io.h>
#include <linux/uaccess.h>

#ifdef CONFIG_MTRR
#include <asm/mtrr.h>
#endif

//#define DEBUG

#ifdef DEBUG
#define dprintk(fmt, args...)		\
	printk(fmt, ##args)
#define debug_show_mtrr()	show_mtrr()
#else
//#define pr_debug(format, ...) do {} while (0)
#define	dprintk(fmt, ...)	do {} while (0)
#define	debug_show_mtrr()	do {} while (0)
#endif

MODULE_LICENSE("GPL");

#define NR_PAGES_ORDER	1

static u64 __do_write(struct page *page, int page_nr)
{
	void *addr = page_address(page);
	u64 total = 0;
	int i;
	
	for (i = 0; i < PAGE_SIZE * page_nr; i++) {
		((char *)addr)[i] = (char)0x99;
	}

	return total;
}

static u64 __do_read(struct page *page, int page_nr)
{
	void *addr = page_address(page);
	u64 total = 0;
	int i;
	
	for (i = 0; i < PAGE_SIZE * page_nr; i++) {
		total += ((char *)addr)[i];
		if (((char *)addr)[i] != (char)0x99) {
			printk("read failed %x expected 0x99.\n", ((char *)addr)[i]);
			break;
		}
	}

	return total;
}

static void do_rw(struct page *page, int page_nr, const char *cachetype)
{
	ktime_t start, end;
	u64 total;
	
	start = ktime_get();
	__do_write(page, page_nr);
	total = __do_read(page, page_nr);
	end = ktime_get();
	printk("Running in %s %lld ns.\n", cachetype,
	       ktime_to_ns(ktime_sub(end, start)));	
}

static int add_region_to_mtrr(struct page *page, int page_nr, int type,
			      const char *cachetype)
{
	unsigned long pfn;
	int ret;
	
	pfn = page_to_pfn(page);
	
	ret = mtrr_add(pfn << PAGE_SHIFT, PAGE_SIZE * page_nr, type, 1);
	if (ret < 0)
		printk("mtrr_add %s failed, return %d.\n", cachetype, ret);

	dprintk("%s add region %lx nrpage %d.\n", cachetype, pfn << PAGE_SHIFT, page_nr);	
	return ret;
}

static int del_region_from_mtrr(int entry, struct page *page, int page_nr, const char *cachetype)
{
	unsigned long pfn;
	int ret;
	
	pfn = page_to_pfn(page);
	
	ret = mtrr_del(entry, pfn << PAGE_SHIFT, PAGE_SIZE * page_nr);
	if (ret < 0)
		printk("mtrr_del %s failed, return %d.\n", cachetype, ret);
	dprintk("%s del region %lx nrpage %d.\n", cachetype, pfn << PAGE_SHIFT, page_nr);	
	return ret;
}

static int memory_rw(void)
{
	struct page *page;
	int page_order, page_nr, ret;
	
	page_order = NR_PAGES_ORDER;
	page_nr = 1 << page_order;
	
	page = alloc_pages(GFP_KERNEL, page_order);
	if (!page)
		return -ENOMEM;
	
	ret = add_region_to_mtrr(page, page_nr, MTRR_TYPE_UNCACHABLE,
				 "MTRR_TYPE_UNCACHABLE");
	if (ret < 0)
		goto exit;

	do_rw(page, page_nr, "preload MTRR_TYPE_UNCACHABLE");
	do_rw(page, page_nr, "MTRR_TYPE_UNCACHABLE");
	
	ret = del_region_from_mtrr(ret, page, page_nr, "MTRR_TYPE_UNCACHABLE");
	if (ret < 0)
		goto exit;
	
	ret = add_region_to_mtrr(page, page_nr, MTRR_TYPE_WRBACK,
				 "MTRR_TYPE_WRBACK");
	if (ret < 0)
		goto exit;
	
	do_rw(page, page_nr, "preload MTRR_TYPE_WRBACK");
	do_rw(page, page_nr, "MTRR_TYPE_WRBACK");

	ret = del_region_from_mtrr(ret, page, page_nr, "MTRR_TYPE_WRBACK");
	if (ret < 0)
		goto exit;
		
	ret = 0;
exit:
	__free_pages(page, page_order);
	return ret;
		
}

static int __init my_init(void)
{
	printk("__INIT__.\n");

	dprintk("######### INIT MTRR...\n");

	return memory_rw();
}

static void __exit my_exit(void)
{
	dprintk("###### EXIT MTRR...\n");
	return;
}

module_init(my_init)
module_exit(my_exit)


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH 00/15] KVM: x86: fully implement vMTRR
@ 2015-05-30 10:59 Xiao Guangrong
  2015-05-30 10:58 ` Xiao Guangrong
                   ` (15 more replies)
  0 siblings, 16 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Currently guest MTRR is completely prohibited if cache snoop is supported on
IOMMU (!noncoherent_dma) and host does the emulation based on the knowledge
from host side, however, host side is not the good point to know
what the purpose of guest is. A good example is that pass-throughed VGA
frame buffer is not always UC as host expected

Besides that, there is a bugs in current code if noncoherent_dma is detected:
KVM still uses hugepage even if the 4K pages in that ranges span between
on different memory types and uses the cache type of first page to do memory
mapping

This patchset enables full MTRR virtualization and currently only works on
Intel EPT architecture

There is the code will be attached in this thread is used to check the effect
of this patchset:
[12834.843439] __INIT__.
[12835.909109] Running in preload MTRR_TYPE_UNCACHABLE 4753900 ns.
[12835.915232] Running in MTRR_TYPE_UNCACHABLE 5257522 ns.
[12838.083948] Running in preload MTRR_TYPE_WRBACK 13266 ns.
[12838.084998] Running in MTRR_TYPE_WRBACK 13099 ns.

There is the kernel compiling test result in guest which has 16 vcpus and 10G
memory:
before the patchset: 4m48.162s
after the patchset: 4m49.014s
no performance regression is detected

Xiao Guangrong (15):
  KVM: x86: move MTRR related code to a separate file
  KVM: MTRR: handle MSR_MTRRcap in kvm_mtrr_get_msr
  KVM: MTRR: remove mtrr_state.have_fixed
  KVM: MTRR: exactly define the size of variable MTRRs
  KVM: MTRR: clean up mtrr default type
  KVM: MTRR: do not split 64 bits MSR content
  KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  KVM: MTRR: introduce fixed_mtrr_segment table
  KVM: MTRR: introduce var_mtrr_range
  KVM: MTRR: sort variable MTRRs
  KVM: MTRR: introduce fixed_mtrr_addr_* functions
  KVM: MTRR: introduce mtrr_for_each_mem_type
  KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type
  KVM: MTRR: do not map huage page for non-consistent range
  KVM: VMX: fully implement guest MTRR virtualization

 arch/x86/include/asm/kvm_host.h |  29 +-
 arch/x86/kvm/Makefile           |   2 +-
 arch/x86/kvm/mmu.c              | 126 ++------
 arch/x86/kvm/mtrr.c             | 671 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm.c              |   2 +-
 arch/x86/kvm/vmx.c              |  28 +-
 arch/x86/kvm/x86.c              | 219 +------------
 arch/x86/kvm/x86.h              |   5 +
 8 files changed, 731 insertions(+), 351 deletions(-)
 create mode 100644 arch/x86/kvm/mtrr.c

-- 
2.1.0


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH 01/15] KVM: x86: move MTRR related code to a separate file
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
  2015-05-30 10:58 ` Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 02/15] KVM: MTRR: handle MSR_MTRRcap in kvm_mtrr_get_msr Xiao Guangrong
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

MTRR code locates in x86.c and mmu.c so that move them to a separate file to
make the organization more clearer and it will be the place where we fully
implement vMTRR

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +-
 arch/x86/kvm/Makefile           |   2 +-
 arch/x86/kvm/mmu.c              | 103 ------------
 arch/x86/kvm/mtrr.c             | 336 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx.c              |   2 +-
 arch/x86/kvm/x86.c              | 214 +------------------------
 arch/x86/kvm/x86.h              |   2 +
 7 files changed, 343 insertions(+), 318 deletions(-)
 create mode 100644 arch/x86/kvm/mtrr.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7276107..2353392 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -893,7 +893,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
-u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
+u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 struct kvm_irq_mask_notifier {
 	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 16e8f96..470dc6c9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6ea2481..7462c57 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2428,109 +2428,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
-/*
- * The function is based on mtrr_type_lookup() in
- * arch/x86/kernel/cpu/mtrr/generic.c
- */
-static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
-			 u64 start, u64 end)
-{
-	u64 base, mask;
-	u8 prev_match, curr_match;
-	int i, num_var_ranges = KVM_NR_VAR_MTRR;
-
-	/* MTRR is completely disabled, use UC for all of physical memory. */
-	if (!(mtrr_state->enabled & 0x2))
-		return MTRR_TYPE_UNCACHABLE;
-
-	/* Make end inclusive end, instead of exclusive */
-	end--;
-
-	/* Look in fixed ranges. Just return the type as per start */
-	if (mtrr_state->have_fixed && (mtrr_state->enabled & 0x1) &&
-	      (start < 0x100000)) {
-		int idx;
-
-		if (start < 0x80000) {
-			idx = 0;
-			idx += (start >> 16);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0xC0000) {
-			idx = 1 * 8;
-			idx += ((start - 0x80000) >> 14);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0x1000000) {
-			idx = 3 * 8;
-			idx += ((start - 0xC0000) >> 12);
-			return mtrr_state->fixed_ranges[idx];
-		}
-	}
-
-	/*
-	 * Look in variable ranges
-	 * Look of multiple ranges matching this address and pick type
-	 * as per MTRR precedence
-	 */
-	prev_match = 0xFF;
-	for (i = 0; i < num_var_ranges; ++i) {
-		unsigned short start_state, end_state;
-
-		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
-			continue;
-
-		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
-		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
-		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
-		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
-
-		start_state = ((start & mask) == (base & mask));
-		end_state = ((end & mask) == (base & mask));
-		if (start_state != end_state)
-			return 0xFE;
-
-		if ((start & mask) != (base & mask))
-			continue;
-
-		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
-		if (prev_match == 0xFF) {
-			prev_match = curr_match;
-			continue;
-		}
-
-		if (prev_match == MTRR_TYPE_UNCACHABLE ||
-		    curr_match == MTRR_TYPE_UNCACHABLE)
-			return MTRR_TYPE_UNCACHABLE;
-
-		if ((prev_match == MTRR_TYPE_WRBACK &&
-		     curr_match == MTRR_TYPE_WRTHROUGH) ||
-		    (prev_match == MTRR_TYPE_WRTHROUGH &&
-		     curr_match == MTRR_TYPE_WRBACK)) {
-			prev_match = MTRR_TYPE_WRTHROUGH;
-			curr_match = MTRR_TYPE_WRTHROUGH;
-		}
-
-		if (prev_match != curr_match)
-			return MTRR_TYPE_UNCACHABLE;
-	}
-
-	if (prev_match != 0xFF)
-		return prev_match;
-
-	return mtrr_state->def_type;
-}
-
-u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
-	u8 mtrr;
-
-	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
-			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
-	if (mtrr == 0xfe || mtrr == 0xff)
-		mtrr = MTRR_TYPE_WRBACK;
-	return mtrr;
-}
-EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
-
 static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
 	trace_kvm_mmu_unsync_page(sp);
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
new file mode 100644
index 0000000..599485d
--- /dev/null
+++ b/arch/x86/kvm/mtrr.c
@@ -0,0 +1,336 @@
+
+/*
+ * vMTRR implementation
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ * Copyright(C) 2015 Intel Corporation.
+ *
+ * Authors:
+ *   Yaniv Kamay  <yaniv@qumranet.com>
+ *   Avi Kivity   <avi@qumranet.com>
+ *   Marcelo Tosatti <mtosatti@redhat.com>
+ *   Paolo Bonzini <pbonzini@redhat.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/mtrr.h>
+
+#include "cpuid.h"
+#include "mmu.h"
+
+static bool msr_mtrr_valid(unsigned msr)
+{
+	switch (msr) {
+	case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
+	case MSR_MTRRfix64K_00000:
+	case MSR_MTRRfix16K_80000:
+	case MSR_MTRRfix16K_A0000:
+	case MSR_MTRRfix4K_C0000:
+	case MSR_MTRRfix4K_C8000:
+	case MSR_MTRRfix4K_D0000:
+	case MSR_MTRRfix4K_D8000:
+	case MSR_MTRRfix4K_E0000:
+	case MSR_MTRRfix4K_E8000:
+	case MSR_MTRRfix4K_F0000:
+	case MSR_MTRRfix4K_F8000:
+	case MSR_MTRRdefType:
+	case MSR_IA32_CR_PAT:
+		return true;
+	case 0x2f8:
+		return true;
+	}
+	return false;
+}
+
+static bool valid_pat_type(unsigned t)
+{
+	return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
+}
+
+static bool valid_mtrr_type(unsigned t)
+{
+	return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
+}
+
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	int i;
+	u64 mask;
+
+	if (!msr_mtrr_valid(msr))
+		return false;
+
+	if (msr == MSR_IA32_CR_PAT) {
+		for (i = 0; i < 8; i++)
+			if (!valid_pat_type((data >> (i * 8)) & 0xff))
+				return false;
+		return true;
+	} else if (msr == MSR_MTRRdefType) {
+		if (data & ~0xcff)
+			return false;
+		return valid_mtrr_type(data & 0xff);
+	} else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
+		for (i = 0; i < 8 ; i++)
+			if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
+				return false;
+		return true;
+	}
+
+	/* variable MTRRs */
+	WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+	mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+	if ((msr & 1) == 0) {
+		/* MTRR base */
+		if (!valid_mtrr_type(data & 0xff))
+			return false;
+		mask |= 0xf00;
+	} else
+		/* MTRR mask */
+		mask |= 0x7ff;
+	if (data & mask) {
+		kvm_inject_gp(vcpu, 0);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
+
+static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
+	unsigned char mtrr_enabled = mtrr_state->enabled;
+	gfn_t start, end, mask;
+	int index;
+	bool is_fixed = true;
+
+	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
+	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+		return;
+
+	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
+		return;
+
+	switch (msr) {
+	case MSR_MTRRfix64K_00000:
+		start = 0x0;
+		end = 0x80000;
+		break;
+	case MSR_MTRRfix16K_80000:
+		start = 0x80000;
+		end = 0xa0000;
+		break;
+	case MSR_MTRRfix16K_A0000:
+		start = 0xa0000;
+		end = 0xc0000;
+		break;
+	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+		index = msr - MSR_MTRRfix4K_C0000;
+		start = 0xc0000 + index * (32 << 10);
+		end = start + (32 << 10);
+		break;
+	case MSR_MTRRdefType:
+		is_fixed = false;
+		start = 0x0;
+		end = ~0ULL;
+		break;
+	default:
+		/* variable range MTRRs. */
+		is_fixed = false;
+		index = (msr - 0x200) / 2;
+		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
+		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
+		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
+
+		end = ((start & mask) | ~mask) + 1;
+	}
+
+	if (is_fixed && !(mtrr_enabled & 0x1))
+		return;
+
+	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
+}
+
+int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
+	if (!kvm_mtrr_valid(vcpu, msr, data))
+		return 1;
+
+	if (msr == MSR_MTRRdefType) {
+		vcpu->arch.mtrr_state.def_type = data;
+		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
+	} else if (msr == MSR_MTRRfix64K_00000)
+		p[0] = data;
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		p[1 + msr - MSR_MTRRfix16K_80000] = data;
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
+	else if (msr == MSR_IA32_CR_PAT)
+		vcpu->arch.pat = data;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pt = data;
+	}
+
+	update_mtrr(vcpu, msr);
+	return 0;
+}
+
+int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+
+	if (!msr_mtrr_valid(msr))
+		return 1;
+
+	if (msr == MSR_MTRRdefType)
+		*pdata = vcpu->arch.mtrr_state.def_type +
+			 (vcpu->arch.mtrr_state.enabled << 10);
+	else if (msr == MSR_MTRRfix64K_00000)
+		*pdata = p[0];
+	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
+		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
+	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
+		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
+	else if (msr == MSR_IA32_CR_PAT)
+		*pdata = vcpu->arch.pat;
+	else {	/* Variable MTRRs */
+		int idx, is_mtrr_mask;
+		u64 *pt;
+
+		idx = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		if (!is_mtrr_mask)
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+		else
+			pt =
+			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
+		*pdata = *pt;
+	}
+
+	return 0;
+}
+
+/*
+ * The function is based on mtrr_type_lookup() in
+ * arch/x86/kernel/cpu/mtrr/generic.c
+ */
+static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+			 u64 start, u64 end)
+{
+	u64 base, mask;
+	u8 prev_match, curr_match;
+	int i, num_var_ranges = KVM_NR_VAR_MTRR;
+
+	/* MTRR is completely disabled, use UC for all of physical memory. */
+	if (!(mtrr_state->enabled & 0x2))
+		return MTRR_TYPE_UNCACHABLE;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state->have_fixed && (mtrr_state->enabled & 0x1) &&
+	      (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state->fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state->fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
+		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask))
+			continue;
+
+		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE)
+			return MTRR_TYPE_UNCACHABLE;
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match)
+			return MTRR_TYPE_UNCACHABLE;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state->def_type;
+}
+
+u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u8 mtrr;
+
+	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
+			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
+	if (mtrr == 0xfe || mtrr == 0xff)
+		mtrr = MTRR_TYPE_WRBACK;
+	return mtrr;
+}
+EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9cf5030..fe7a589 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8644,7 +8644,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	if (is_mmio)
 		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
 	else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
-		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+		ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
 		      VMX_EPT_MT_EPTE_SHIFT;
 	else
 		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ba7b0cc..b6f7500 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -57,7 +57,6 @@
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
-#include <asm/mtrr.h>
 #include <asm/mce.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h> /* Ugh! */
@@ -1772,179 +1771,6 @@ static void kvmclock_sync_fn(struct work_struct *work)
 					KVMCLOCK_SYNC_PERIOD);
 }
 
-static bool msr_mtrr_valid(unsigned msr)
-{
-	switch (msr) {
-	case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
-	case MSR_MTRRfix64K_00000:
-	case MSR_MTRRfix16K_80000:
-	case MSR_MTRRfix16K_A0000:
-	case MSR_MTRRfix4K_C0000:
-	case MSR_MTRRfix4K_C8000:
-	case MSR_MTRRfix4K_D0000:
-	case MSR_MTRRfix4K_D8000:
-	case MSR_MTRRfix4K_E0000:
-	case MSR_MTRRfix4K_E8000:
-	case MSR_MTRRfix4K_F0000:
-	case MSR_MTRRfix4K_F8000:
-	case MSR_MTRRdefType:
-	case MSR_IA32_CR_PAT:
-		return true;
-	case 0x2f8:
-		return true;
-	}
-	return false;
-}
-
-static bool valid_pat_type(unsigned t)
-{
-	return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
-}
-
-static bool valid_mtrr_type(unsigned t)
-{
-	return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
-}
-
-bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	int i;
-	u64 mask;
-
-	if (!msr_mtrr_valid(msr))
-		return false;
-
-	if (msr == MSR_IA32_CR_PAT) {
-		for (i = 0; i < 8; i++)
-			if (!valid_pat_type((data >> (i * 8)) & 0xff))
-				return false;
-		return true;
-	} else if (msr == MSR_MTRRdefType) {
-		if (data & ~0xcff)
-			return false;
-		return valid_mtrr_type(data & 0xff);
-	} else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
-		for (i = 0; i < 8 ; i++)
-			if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
-				return false;
-		return true;
-	}
-
-	/* variable MTRRs */
-	WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
-
-	mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
-	if ((msr & 1) == 0) {
-		/* MTRR base */
-		if (!valid_mtrr_type(data & 0xff))
-			return false;
-		mask |= 0xf00;
-	} else
-		/* MTRR mask */
-		mask |= 0x7ff;
-	if (data & mask) {
-		kvm_inject_gp(vcpu, 0);
-		return false;
-	}
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
-
-static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
-{
-	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
-	unsigned char mtrr_enabled = mtrr_state->enabled;
-	gfn_t start, end, mask;
-	int index;
-	bool is_fixed = true;
-
-	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
-	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
-		return;
-
-	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
-		return;
-
-	switch (msr) {
-	case MSR_MTRRfix64K_00000:
-		start = 0x0;
-		end = 0x80000;
-		break;
-	case MSR_MTRRfix16K_80000:
-		start = 0x80000;
-		end = 0xa0000;
-		break;
-	case MSR_MTRRfix16K_A0000:
-		start = 0xa0000;
-		end = 0xc0000;
-		break;
-	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
-		index = msr - MSR_MTRRfix4K_C0000;
-		start = 0xc0000 + index * (32 << 10);
-		end = start + (32 << 10);
-		break;
-	case MSR_MTRRdefType:
-		is_fixed = false;
-		start = 0x0;
-		end = ~0ULL;
-		break;
-	default:
-		/* variable range MTRRs. */
-		is_fixed = false;
-		index = (msr - 0x200) / 2;
-		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
-		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
-		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
-		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
-		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
-
-		end = ((start & mask) | ~mask) + 1;
-	}
-
-	if (is_fixed && !(mtrr_enabled & 0x1))
-		return;
-
-	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
-}
-
-static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
-	if (!kvm_mtrr_valid(vcpu, msr, data))
-		return 1;
-
-	if (msr == MSR_MTRRdefType) {
-		vcpu->arch.mtrr_state.def_type = data;
-		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
-	} else if (msr == MSR_MTRRfix64K_00000)
-		p[0] = data;
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		p[1 + msr - MSR_MTRRfix16K_80000] = data;
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
-	else if (msr == MSR_IA32_CR_PAT)
-		vcpu->arch.pat = data;
-	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
-		u64 *pt;
-
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
-		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pt = data;
-	}
-
-	update_mtrr(vcpu, msr);
-	return 0;
-}
-
 static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	u64 mcg_cap = vcpu->arch.mcg_cap;
@@ -2236,7 +2062,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			    __func__, data);
 		break;
 	case 0x200 ... 0x2ff:
-		return set_msr_mtrr(vcpu, msr, data);
+		return kvm_mtrr_set_msr(vcpu, msr, data);
 	case MSR_IA32_APICBASE:
 		return kvm_set_apic_base(vcpu, msr_info);
 	case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
@@ -2441,42 +2267,6 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 }
 EXPORT_SYMBOL_GPL(kvm_get_msr);
 
-static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
-	if (!msr_mtrr_valid(msr))
-		return 1;
-
-	if (msr == MSR_MTRRdefType)
-		*pdata = vcpu->arch.mtrr_state.def_type +
-			 (vcpu->arch.mtrr_state.enabled << 10);
-	else if (msr == MSR_MTRRfix64K_00000)
-		*pdata = p[0];
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
-	else if (msr == MSR_IA32_CR_PAT)
-		*pdata = vcpu->arch.pat;
-	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
-		u64 *pt;
-
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
-		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
-		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pdata = *pt;
-	}
-
-	return 0;
-}
-
 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
@@ -2618,7 +2408,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = 0x500 | KVM_NR_VAR_MTRR;
 		break;
 	case 0x200 ... 0x2ff:
-		return get_msr_mtrr(vcpu, msr, pdata);
+		return kvm_mtrr_get_msr(vcpu, msr, pdata);
 	case 0xcd: /* fsb frequency */
 		data = 3;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 01a1d01..956558d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -163,6 +163,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	struct x86_exception *exception);
 
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
 				| XSTATE_BNDREGS | XSTATE_BNDCSR \
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 02/15] KVM: MTRR: handle MSR_MTRRcap in kvm_mtrr_get_msr
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
  2015-05-30 10:58 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 01/15] KVM: x86: move MTRR related code to a separate file Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 03/15] KVM: MTRR: remove mtrr_state.have_fixed Xiao Guangrong
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

MSR_MTRRcap is a MTRR msr so move the handler to the common place, also
add some comments to make the hard code more readable

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 12 ++++++++++++
 arch/x86/kvm/x86.c  |  2 --
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 599485d..32664ec 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -200,6 +200,18 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
 
+	/* MSR_MTRRcap is a readonly MSR. */
+	if (msr == MSR_MTRRcap) {
+		/*
+		 * SMRR = 0
+		 * WC = 1
+		 * FIX = 1
+		 * VCNT = KVM_NR_VAR_MTRR
+		 */
+		*pdata = 0x500 | KVM_NR_VAR_MTRR;
+		return 0;
+	}
+
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6f7500..92db294 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2405,8 +2405,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = 0x100000000ULL;
 		break;
 	case MSR_MTRRcap:
-		data = 0x500 | KVM_NR_VAR_MTRR;
-		break;
 	case 0x200 ... 0x2ff:
 		return kvm_mtrr_get_msr(vcpu, msr, pdata);
 	case 0xcd: /* fsb frequency */
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 03/15] KVM: MTRR: remove mtrr_state.have_fixed
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (2 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 02/15] KVM: MTRR: handle MSR_MTRRcap in kvm_mtrr_get_msr Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 04/15] KVM: MTRR: exactly define the size of variable MTRRs Xiao Guangrong
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

vMTRR does not depend on any host MTRR feature and fixed MTRRs have always
been implemented, so drop this field

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h | 9 ++++++++-
 arch/x86/kvm/mtrr.c             | 7 +++----
 arch/x86/kvm/x86.c              | 1 -
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2353392..65de1b3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -344,6 +344,13 @@ enum {
 	KVM_DEBUGREG_RELOAD = 4,
 };
 
+struct kvm_mtrr {
+	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+	unsigned char enabled;
+	mtrr_type def_type;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -472,7 +479,7 @@ struct kvm_vcpu_arch {
 	unsigned nmi_pending; /* NMI queued after currently running handler */
 	bool nmi_injected;    /* Trying to inject an NMI this entry */
 
-	struct mtrr_state_type mtrr_state;
+	struct kvm_mtrr mtrr_state;
 	u64 pat;
 
 	unsigned switch_db_regs;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 32664ec..562341b 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 {
-	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
 	unsigned char mtrr_enabled = mtrr_state->enabled;
 	gfn_t start, end, mask;
 	int index;
@@ -248,7 +248,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
  * The function is based on mtrr_type_lookup() in
  * arch/x86/kernel/cpu/mtrr/generic.c
  */
-static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
+static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 			 u64 start, u64 end)
 {
 	u64 base, mask;
@@ -263,8 +263,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
 	end--;
 
 	/* Look in fixed ranges. Just return the type as per start */
-	if (mtrr_state->have_fixed && (mtrr_state->enabled & 0x1) &&
-	      (start < 0x100000)) {
+	if ((mtrr_state->enabled & 0x1) && (start < 0x100000)) {
 		int idx;
 
 		if (start < 0x80000) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 92db294..64c2891 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6985,7 +6985,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int r;
 
-	vcpu->arch.mtrr_state.have_fixed = 1;
 	r = vcpu_load(vcpu);
 	if (r)
 		return r;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 04/15] KVM: MTRR: exactly define the size of variable MTRRs
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (3 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 03/15] KVM: MTRR: remove mtrr_state.have_fixed Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 05/15] KVM: MTRR: clean up mtrr default type Xiao Guangrong
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Only KVM_NR_VAR_MTRR variable MTRRs are available in KVM guest

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65de1b3..2c3c52d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -345,7 +345,7 @@ enum {
 };
 
 struct kvm_mtrr {
-	struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+	struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
 	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
 	unsigned char enabled;
 	mtrr_type def_type;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 05/15] KVM: MTRR: clean up mtrr default type
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (4 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 04/15] KVM: MTRR: exactly define the size of variable MTRRs Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:11   ` Paolo Bonzini
  2015-06-09  0:35   ` David Matlack
  2015-05-30 10:59 ` [PATCH 06/15] KVM: MTRR: do not split 64 bits MSR content Xiao Guangrong
                   ` (9 subsequent siblings)
  15 siblings, 2 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Use union definition to avoid the decode/code workload and drop all the
hard code

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h | 12 ++++++++++--
 arch/x86/kvm/mtrr.c             | 19 ++++++++-----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2c3c52d..95ce2ff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -347,8 +347,16 @@ enum {
 struct kvm_mtrr {
 	struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
 	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
-	unsigned char enabled;
-	mtrr_type def_type;
+
+	union {
+		u64 deftype;
+		struct {
+			unsigned def_type:8;
+			unsigned reserved:2;
+			unsigned fixed_mtrr_enabled:1;
+			unsigned mtrr_enabled:1;
+		};
+	};
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 562341b..6de49dd 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -105,7 +105,6 @@ EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
-	unsigned char mtrr_enabled = mtrr_state->enabled;
 	gfn_t start, end, mask;
 	int index;
 	bool is_fixed = true;
@@ -114,7 +113,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
 		return;
 
-	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
+	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
 		return;
 
 	switch (msr) {
@@ -153,7 +152,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 		end = ((start & mask) | ~mask) + 1;
 	}
 
-	if (is_fixed && !(mtrr_enabled & 0x1))
+	if (is_fixed && !mtrr_state->fixed_mtrr_enabled)
 		return;
 
 	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
@@ -166,10 +165,9 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	if (!kvm_mtrr_valid(vcpu, msr, data))
 		return 1;
 
-	if (msr == MSR_MTRRdefType) {
-		vcpu->arch.mtrr_state.def_type = data;
-		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
-	} else if (msr == MSR_MTRRfix64K_00000)
+	if (msr == MSR_MTRRdefType)
+		vcpu->arch.mtrr_state.deftype = data;
+	else if (msr == MSR_MTRRfix64K_00000)
 		p[0] = data;
 	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
 		p[1 + msr - MSR_MTRRfix16K_80000] = data;
@@ -216,8 +214,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return 1;
 
 	if (msr == MSR_MTRRdefType)
-		*pdata = vcpu->arch.mtrr_state.def_type +
-			 (vcpu->arch.mtrr_state.enabled << 10);
+		*pdata = vcpu->arch.mtrr_state.deftype;
 	else if (msr == MSR_MTRRfix64K_00000)
 		*pdata = p[0];
 	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
@@ -256,14 +253,14 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 	int i, num_var_ranges = KVM_NR_VAR_MTRR;
 
 	/* MTRR is completely disabled, use UC for all of physical memory. */
-	if (!(mtrr_state->enabled & 0x2))
+	if (!mtrr_state->mtrr_enabled)
 		return MTRR_TYPE_UNCACHABLE;
 
 	/* Make end inclusive end, instead of exclusive */
 	end--;
 
 	/* Look in fixed ranges. Just return the type as per start */
-	if ((mtrr_state->enabled & 0x1) && (start < 0x100000)) {
+	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
 		int idx;
 
 		if (start < 0x80000) {
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 06/15] KVM: MTRR: do not split 64 bits MSR content
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (5 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 05/15] KVM: MTRR: clean up mtrr default type Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type Xiao Guangrong
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Variable MTRR MSRs are 64 bits which are directly accessed with full length,
no reason to split them to two 32 bits

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  7 ++++++-
 arch/x86/kvm/mtrr.c             | 32 ++++++++++----------------------
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 95ce2ff..f3fc152 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -344,8 +344,13 @@ enum {
 	KVM_DEBUGREG_RELOAD = 4,
 };
 
+struct kvm_mtrr_range {
+	u64 base;
+	u64 mask;
+};
+
 struct kvm_mtrr {
-	struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
+	struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
 	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
 
 	union {
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 6de49dd..bc9c6da 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -143,10 +143,8 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 		/* variable range MTRRs. */
 		is_fixed = false;
 		index = (msr - 0x200) / 2;
-		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
-		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
-		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
-		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
+		start = mtrr_state->var_ranges[index].base & PAGE_MASK;
+		mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
 		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
 
 		end = ((start & mask) | ~mask) + 1;
@@ -177,17 +175,13 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		vcpu->arch.pat = data;
 	else {	/* Variable MTRRs */
 		int idx, is_mtrr_mask;
-		u64 *pt;
 
 		idx = (msr - 0x200) / 2;
 		is_mtrr_mask = msr - 0x200 - 2 * idx;
 		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+			vcpu->arch.mtrr_state.var_ranges[idx].base = data;
 		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pt = data;
+			vcpu->arch.mtrr_state.var_ranges[idx].mask = data;
 	}
 
 	update_mtrr(vcpu, msr);
@@ -225,17 +219,13 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		*pdata = vcpu->arch.pat;
 	else {	/* Variable MTRRs */
 		int idx, is_mtrr_mask;
-		u64 *pt;
 
 		idx = (msr - 0x200) / 2;
 		is_mtrr_mask = msr - 0x200 - 2 * idx;
 		if (!is_mtrr_mask)
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
+			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].base;
 		else
-			pt =
-			  (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
-		*pdata = *pt;
+			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].mask;
 	}
 
 	return 0;
@@ -287,13 +277,11 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 	for (i = 0; i < num_var_ranges; ++i) {
 		unsigned short start_state, end_state;
 
-		if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
+		if (!(mtrr_state->var_ranges[i].mask & (1 << 11)))
 			continue;
 
-		base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
-		       (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
-		mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
-		       (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
+		base = mtrr_state->var_ranges[i].base & PAGE_MASK;
+		mask = mtrr_state->var_ranges[i].mask & PAGE_MASK;
 
 		start_state = ((start & mask) == (base & mask));
 		end_state = ((end & mask) == (base & mask));
@@ -303,7 +291,7 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 		if ((start & mask) != (base & mask))
 			continue;
 
-		curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
+		curr_match = mtrr_state->var_ranges[i].base & 0xff;
 		if (prev_match == 0xFF) {
 			prev_match = curr_match;
 			continue;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (6 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 06/15] KVM: MTRR: do not split 64 bits MSR content Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:16   ` Paolo Bonzini
  2015-05-30 10:59 ` [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table Xiao Guangrong
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

 - kvm_mtrr_get_guest_memory_type() only checks one page in MTRRs so that
   it's unnecessary to check to see if the range is partially covered in
   MTRR

 - optimize the check of overlap memory type and add some comments to explain
   the precedence

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 89 ++++++++++++++++++++++++++---------------------------
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index bc9c6da..d3c06d2 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -231,24 +231,16 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-/*
- * The function is based on mtrr_type_lookup() in
- * arch/x86/kernel/cpu/mtrr/generic.c
- */
-static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
-			 u64 start, u64 end)
+u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
-	u64 base, mask;
-	u8 prev_match, curr_match;
-	int i, num_var_ranges = KVM_NR_VAR_MTRR;
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
+	u64 base, mask, start = gfn_to_gpa(gfn);
+	int i, num_var_ranges = KVM_NR_VAR_MTRR, type_mask, type = -1;
 
 	/* MTRR is completely disabled, use UC for all of physical memory. */
 	if (!mtrr_state->mtrr_enabled)
 		return MTRR_TYPE_UNCACHABLE;
 
-	/* Make end inclusive end, instead of exclusive */
-	end--;
-
 	/* Look in fixed ranges. Just return the type as per start */
 	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
 		int idx;
@@ -273,9 +265,9 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 	 * Look of multiple ranges matching this address and pick type
 	 * as per MTRR precedence
 	 */
-	prev_match = 0xFF;
+	type_mask = (1 << MTRR_TYPE_WRBACK) | (1 << MTRR_TYPE_WRTHROUGH);
 	for (i = 0; i < num_var_ranges; ++i) {
-		unsigned short start_state, end_state;
+		int curr_type;
 
 		if (!(mtrr_state->var_ranges[i].mask & (1 << 11)))
 			continue;
@@ -283,50 +275,57 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
 		base = mtrr_state->var_ranges[i].base & PAGE_MASK;
 		mask = mtrr_state->var_ranges[i].mask & PAGE_MASK;
 
-		start_state = ((start & mask) == (base & mask));
-		end_state = ((end & mask) == (base & mask));
-		if (start_state != end_state)
-			return 0xFE;
-
 		if ((start & mask) != (base & mask))
 			continue;
 
-		curr_match = mtrr_state->var_ranges[i].base & 0xff;
-		if (prev_match == 0xFF) {
-			prev_match = curr_match;
+		/*
+		 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
+		 * Precedences.
+		 */
+
+		curr_type = mtrr_state->var_ranges[i].base & 0xff;
+		if (type == -1) {
+			type = curr_type;
 			continue;
 		}
 
-		if (prev_match == MTRR_TYPE_UNCACHABLE ||
-		    curr_match == MTRR_TYPE_UNCACHABLE)
+		/*
+		 * If two or more variable memory ranges match and the
+		 * memory types are identical, then that memory type is
+		 * used.
+		 */
+		if (type == curr_type)
+			continue;
+
+		/*
+		 * If two or more variable memory ranges match and one of
+		 * the memory types is UC, the UC memory type used.
+		 */
+		if (curr_type == MTRR_TYPE_UNCACHABLE)
 			return MTRR_TYPE_UNCACHABLE;
 
-		if ((prev_match == MTRR_TYPE_WRBACK &&
-		     curr_match == MTRR_TYPE_WRTHROUGH) ||
-		    (prev_match == MTRR_TYPE_WRTHROUGH &&
-		     curr_match == MTRR_TYPE_WRBACK)) {
-			prev_match = MTRR_TYPE_WRTHROUGH;
-			curr_match = MTRR_TYPE_WRTHROUGH;
+		/*
+		 * If two or more variable memory ranges match and the
+		 * memory types are WT and WB, the WT memory type is used.
+		 */
+		if (((1 << type) & type_mask) &&
+		      ((1 << curr_type) & type_mask)) {
+			type = MTRR_TYPE_WRTHROUGH;
+			continue;
 		}
 
-		if (prev_match != curr_match)
-			return MTRR_TYPE_UNCACHABLE;
+		/*
+		 * For overlaps not defined by the above rules, processor
+		 * behavior is undefined.
+		 */
+
+		 /* We use WB for this undefined behavior. :( */
+		 return MTRR_TYPE_WRBACK;
 	}
 
-	if (prev_match != 0xFF)
-		return prev_match;
+	if (type != -1)
+		return type;
 
 	return mtrr_state->def_type;
 }
-
-u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
-	u8 mtrr;
-
-	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
-			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
-	if (mtrr == 0xfe || mtrr == 0xff)
-		mtrr = MTRR_TYPE_WRBACK;
-	return mtrr;
-}
 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (7 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:25   ` Paolo Bonzini
  2015-05-30 10:59 ` [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range Xiao Guangrong
                   ` (6 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

This table summarizes the information of fixed MTRRs and introduce some APIs
to abstract its operation which helps us to clean up the code and will be
used in later patches

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 191 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 142 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index d3c06d2..888441e 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -102,12 +102,126 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
+struct fixed_mtrr_segment {
+	u64 start;
+	u64 end;
+
+	/*
+	 * unit corresponds to the MSR entry in this segment, the size
+	 * of unit is covered in one MSR.
+	 */
+	u64 unit_size;
+
+	/* a range is covered in one memory cache type. */
+	u64 range_size;
+
+	/* the start position in kvm_mtrr.fixed_ranges[]. */
+	int range_start;
+};
+
+static struct fixed_mtrr_segment fixed_seg_table[] = {
+	/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
+	{
+		.start = 0x0,
+		.end = 0x80000,
+		.unit_size = 0x80000,
+		.range_size = 1ULL << 16,
+		.range_start = 0,
+	},
+
+	/*
+	 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
+	 * 16K fixed mtrr.
+	 */
+	{
+		.start = 0x80000,
+		.end = 0xc0000,
+		.unit_size = (0xc0000 - 0x80000) / 2,
+		.range_size = 1ULL << 14,
+		.range_start = 8,
+	},
+
+	/*
+	 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
+	 * 4K fixed mtrr.
+	 */
+	{
+		.start = 0xc0000,
+		.end = 0x100000,
+		.unit_size = (0x100000 - 0xc0000) / 8,
+		.range_size = 1ULL << 12,
+		.range_start = 24,
+	}
+};
+
+static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
+{
+	switch (msr) {
+	case MSR_MTRRfix64K_00000:
+		*seg = 0;
+		*unit = 0;
+		break;
+	case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
+		*seg = 1;
+		*unit = msr - MSR_MTRRfix16K_80000;
+		break;
+	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+		*seg = 2;
+		*unit = msr - MSR_MTRRfix4K_C0000;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end)
+{
+	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
+
+	*start = mtrr_seg->start + unit * mtrr_seg->unit_size;
+	*end = *start + mtrr_seg->unit_size;
+	WARN_ON(*end > mtrr_seg->end);
+}
+
+static int fixed_mtrr_seg_unit_range_index(int seg, int unit)
+{
+	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
+
+	WARN_ON(mtrr_seg->start + unit * mtrr_seg->unit_size > mtrr_seg->end);
+
+	/* each unit has 8 ranges. */
+	return mtrr_seg->range_start + 8 * unit;
+}
+
+static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end)
+{
+	int seg, unit;
+
+	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
+		return false;
+
+	fixed_mtrr_seg_unit_range(seg, unit, start, end);
+	return true;
+}
+
+static int fixed_msr_to_range_index(u32 msr)
+{
+	int seg, unit;
+
+	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
+		return -1;
+
+	fixed_mtrr_seg_unit_range_index(seg, unit);
+	return 0;
+}
+
 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
 	gfn_t start, end, mask;
 	int index;
-	bool is_fixed = true;
 
 	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
 	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
@@ -116,32 +230,19 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
 		return;
 
+	if (fixed_msr_to_range(msr, &start, &end)) {
+		if (!mtrr_state->fixed_mtrr_enabled)
+			return;
+		goto do_zap;
+	}
+
 	switch (msr) {
-	case MSR_MTRRfix64K_00000:
-		start = 0x0;
-		end = 0x80000;
-		break;
-	case MSR_MTRRfix16K_80000:
-		start = 0x80000;
-		end = 0xa0000;
-		break;
-	case MSR_MTRRfix16K_A0000:
-		start = 0xa0000;
-		end = 0xc0000;
-		break;
-	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
-		index = msr - MSR_MTRRfix4K_C0000;
-		start = 0xc0000 + index * (32 << 10);
-		end = start + (32 << 10);
-		break;
 	case MSR_MTRRdefType:
-		is_fixed = false;
 		start = 0x0;
 		end = ~0ULL;
 		break;
 	default:
 		/* variable range MTRRs. */
-		is_fixed = false;
 		index = (msr - 0x200) / 2;
 		start = mtrr_state->var_ranges[index].base & PAGE_MASK;
 		mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
@@ -150,38 +251,33 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 		end = ((start & mask) | ~mask) + 1;
 	}
 
-	if (is_fixed && !mtrr_state->fixed_mtrr_enabled)
-		return;
-
+do_zap:
 	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
 }
 
 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+	int index;
 
 	if (!kvm_mtrr_valid(vcpu, msr, data))
 		return 1;
 
-	if (msr == MSR_MTRRdefType)
+	index = fixed_msr_to_range_index(msr);
+	if (index >= 0)
+		*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
+	else if (msr == MSR_MTRRdefType)
 		vcpu->arch.mtrr_state.deftype = data;
-	else if (msr == MSR_MTRRfix64K_00000)
-		p[0] = data;
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		p[1 + msr - MSR_MTRRfix16K_80000] = data;
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
 	else if (msr == MSR_IA32_CR_PAT)
 		vcpu->arch.pat = data;
 	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
+		int is_mtrr_mask;
 
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		index = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * index;
 		if (!is_mtrr_mask)
-			vcpu->arch.mtrr_state.var_ranges[idx].base = data;
+			vcpu->arch.mtrr_state.var_ranges[index].base = data;
 		else
-			vcpu->arch.mtrr_state.var_ranges[idx].mask = data;
+			vcpu->arch.mtrr_state.var_ranges[index].mask = data;
 	}
 
 	update_mtrr(vcpu, msr);
@@ -190,7 +286,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
-	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
+	int index;
 
 	/* MSR_MTRRcap is a readonly MSR. */
 	if (msr == MSR_MTRRcap) {
@@ -207,25 +303,22 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	if (!msr_mtrr_valid(msr))
 		return 1;
 
-	if (msr == MSR_MTRRdefType)
+	index = fixed_msr_to_range_index(msr);
+	if (index >= 0)
+		*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
+	else if (msr == MSR_MTRRdefType)
 		*pdata = vcpu->arch.mtrr_state.deftype;
-	else if (msr == MSR_MTRRfix64K_00000)
-		*pdata = p[0];
-	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
-		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
-	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
-		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
 	else if (msr == MSR_IA32_CR_PAT)
 		*pdata = vcpu->arch.pat;
 	else {	/* Variable MTRRs */
-		int idx, is_mtrr_mask;
+		int is_mtrr_mask;
 
-		idx = (msr - 0x200) / 2;
-		is_mtrr_mask = msr - 0x200 - 2 * idx;
+		index = (msr - 0x200) / 2;
+		is_mtrr_mask = msr - 0x200 - 2 * index;
 		if (!is_mtrr_mask)
-			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].base;
+			*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
 		else
-			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].mask;
+			*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
 	}
 
 	return 0;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (8 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-09  0:36   ` David Matlack
  2015-05-30 10:59 ` [PATCH 10/15] KVM: MTRR: sort variable MTRRs Xiao Guangrong
                   ` (5 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

It gets the range for the specified variable MTRR

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 888441e..aeb9767 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -217,10 +217,21 @@ static int fixed_msr_to_range_index(u32 msr)
 	return 0;
 }
 
+static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
+{
+	u64 mask;
+
+	*start = range->base & PAGE_MASK;
+
+	mask = range->mask & PAGE_MASK;
+	mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
+	*end = ((*start & mask) | ~mask) + 1;
+}
+
 static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
-	gfn_t start, end, mask;
+	gfn_t start, end;
 	int index;
 
 	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
@@ -244,11 +255,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 	default:
 		/* variable range MTRRs. */
 		index = (msr - 0x200) / 2;
-		start = mtrr_state->var_ranges[index].base & PAGE_MASK;
-		mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
-		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
-
-		end = ((start & mask) | ~mask) + 1;
+		var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end);
 	}
 
 do_zap:
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 10/15] KVM: MTRR: sort variable MTRRs
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (9 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:27   ` Paolo Bonzini
  2015-05-30 10:59 ` [PATCH 11/15] KVM: MTRR: introduce fixed_mtrr_addr_* functions Xiao Guangrong
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Sort all valid variable MTRRs based on its base address, it will help us to
check a range to see if it's fully contained in variable MTRRs

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/mtrr.c             | 39 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |  2 +-
 arch/x86/kvm/x86.h              |  1 +
 4 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f3fc152..5be8f2e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -347,6 +347,7 @@ enum {
 struct kvm_mtrr_range {
 	u64 base;
 	u64 mask;
+	struct list_head node;
 };
 
 struct kvm_mtrr {
@@ -362,6 +363,8 @@ struct kvm_mtrr {
 			unsigned mtrr_enabled:1;
 		};
 	};
+
+	struct list_head head;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index aeb9767..8e3b81a 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -262,6 +262,38 @@ do_zap:
 	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
 }
 
+static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
+{
+	u64 start, end;
+
+	if (!(range->mask & (1 << 11)))
+		return false;
+
+	var_mtrr_range(range, &start, &end);
+	return end > start;
+}
+
+static void set_var_mtrr_start(struct kvm_mtrr *mtrr_state, int index)
+{
+	/* remove the entry if it's in the list. */
+	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index]))
+		list_del(&mtrr_state->var_ranges[index].node);
+}
+
+static void set_var_mtrr_end(struct kvm_mtrr *mtrr_state, int index)
+{
+	struct kvm_mtrr_range *tmp, *cur = &mtrr_state->var_ranges[index];
+
+	/* add it to the list if it's valid. */
+	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index])) {
+		list_for_each_entry(tmp, &mtrr_state->head, node)
+			if (cur->base < tmp->base)
+				list_add_tail(&cur->node, &tmp->node);
+
+		list_add_tail(&cur->node, &mtrr_state->head);
+	}
+}
+
 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	int index;
@@ -281,10 +313,12 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 		index = (msr - 0x200) / 2;
 		is_mtrr_mask = msr - 0x200 - 2 * index;
+		set_var_mtrr_start(&vcpu->arch.mtrr_state, index);
 		if (!is_mtrr_mask)
 			vcpu->arch.mtrr_state.var_ranges[index].base = data;
 		else
 			vcpu->arch.mtrr_state.var_ranges[index].mask = data;
+		set_var_mtrr_end(&vcpu->arch.mtrr_state, index);
 	}
 
 	update_mtrr(vcpu, msr);
@@ -331,6 +365,11 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
+void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
+{
+	INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
+}
+
 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64c2891..8084ac3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6991,7 +6991,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvm_vcpu_reset(vcpu, false);
 	kvm_mmu_setup(vcpu);
 	vcpu_put(vcpu);
-
+	kvm_vcpu_mtrr_init(vcpu);
 	return r;
 }
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 956558d..a3dae49 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -162,6 +162,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
+void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 11/15] KVM: MTRR: introduce fixed_mtrr_addr_* functions
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (10 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 10/15] KVM: MTRR: sort variable MTRRs Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type Xiao Guangrong
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Two functions are introduced:
- fixed_mtrr_addr_to_seg() translates the address to the fixed
  MTRR segment

- fixed_mtrr_addr_seg_to_range_index() translates the address to
  the index of kvm_mtrr.fixed_ranges[]

They will be used in the later patch

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 8e3b81a..e59d138 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -217,6 +217,31 @@ static int fixed_msr_to_range_index(u32 msr)
 	return 0;
 }
 
+static int fixed_mtrr_addr_to_seg(u64 addr)
+{
+	struct fixed_mtrr_segment *mtrr_seg;
+	int seg, seg_num = ARRAY_SIZE(fixed_seg_table);
+
+	for (seg = 0; seg < seg_num; seg++) {
+		mtrr_seg = &fixed_seg_table[seg];
+		if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
+			return seg;
+	}
+
+	return -1;
+}
+
+static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg)
+{
+	struct fixed_mtrr_segment *mtrr_seg;
+	int index;
+
+	mtrr_seg = &fixed_seg_table[seg];
+	index = mtrr_seg->range_start;
+	index += (addr - mtrr_seg->start) / mtrr_seg->range_size;
+	return index;
+}
+
 static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
 {
 	u64 mask;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (11 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 11/15] KVM: MTRR: introduce fixed_mtrr_addr_* functions Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:33   ` Paolo Bonzini
  2015-06-09  0:36   ` David Matlack
  2015-05-30 10:59 ` [PATCH 13/15] KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type Xiao Guangrong
                   ` (2 subsequent siblings)
  15 siblings, 2 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

It walks all MTRRs and gets all the memory cache type setting for the
specified range also it checks if the range is fully covered by MTRRs

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e59d138..35f86303 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -395,6 +395,189 @@ void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
 	INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
 }
 
+struct mtrr_looker {
+	/* input fields. */
+	struct kvm_mtrr *mtrr_state;
+	u64 start;
+	u64 end;
+
+	/* output fields. */
+	int mem_type;
+	/* [start, end) is fully covered in MTRRs? */
+	bool partial_map;
+
+	/* private fields. */
+	union {
+		/* used for fixed MTRRs. */
+		struct {
+			int index;
+			int seg;
+		};
+
+		/* used for var MTRRs. */
+		struct {
+			struct kvm_mtrr_range *range;
+			/* max address has been covered in var MTRRs. */
+			u64 start_max;
+		};
+	};
+
+	bool fixed;
+};
+
+static void mtrr_lookup_init(struct mtrr_looker *looker,
+			     struct kvm_mtrr *mtrr_state, u64 start, u64 end)
+{
+	looker->mtrr_state = mtrr_state;
+	looker->start = start;
+	looker->end = end;
+}
+
+static u64 fixed_mtrr_range_end_addr(int seg, int index)
+{
+	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
+
+	 return mtrr_seg->start + mtrr_seg->range_size * index;
+}
+
+static bool mtrr_lookup_fixed_start(struct mtrr_looker *looker)
+{
+	int seg, index;
+
+	if (!looker->mtrr_state->fixed_mtrr_enabled)
+		return false;
+
+	seg = fixed_mtrr_addr_to_seg(looker->start);
+	if (seg < 0)
+		return false;
+
+	looker->fixed = true;
+	index = fixed_mtrr_addr_seg_to_range_index(looker->start, seg);
+	looker->index = index;
+	looker->seg = seg;
+	looker->mem_type = looker->mtrr_state->fixed_ranges[index];
+	looker->start = fixed_mtrr_range_end_addr(seg, index);
+	return true;
+}
+
+static bool match_var_range(struct mtrr_looker *looker,
+			    struct kvm_mtrr_range *range)
+{
+	u64 start, end;
+
+	var_mtrr_range(range, &start, &end);
+	if (!(start >= looker->end || end <= looker->start)) {
+		looker->range = range;
+		looker->mem_type = range->base & 0xff;
+
+		/*
+		 * the function is called when we do kvm_mtrr.head walking
+		 * that means range has the minimum base address interleaves
+		 * with [looker->start_max, looker->end).
+		 */
+		looker->partial_map |= looker->start_max < start;
+
+		/* update the max address has been covered. */
+		looker->start_max = max(looker->start_max, end);
+		return true;
+	}
+
+	return false;
+}
+
+static void mtrr_lookup_var_start(struct mtrr_looker *looker)
+{
+	struct kvm_mtrr *mtrr_state = looker->mtrr_state;
+	struct kvm_mtrr_range *range;
+
+	looker->fixed = false;
+	looker->partial_map = false;
+	looker->start_max = looker->start;
+	looker->mem_type = -1;
+
+	list_for_each_entry(range, &mtrr_state->head, node)
+		if (match_var_range(looker, range))
+			return;
+
+	looker->partial_map = true;
+}
+
+static void mtrr_lookup_fixed_next(struct mtrr_looker *looker)
+{
+	struct fixed_mtrr_segment *eseg = &fixed_seg_table[looker->seg];
+	struct kvm_mtrr *mtrr_state = looker->mtrr_state;
+	u64 end;
+
+	if (looker->start >= looker->end) {
+		looker->mem_type = -1;
+		looker->partial_map = false;
+		return;
+	}
+
+	WARN_ON(!looker->fixed);
+
+	looker->index++;
+	end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
+
+	/* switch to next segment. */
+	if (end >= eseg->end) {
+		looker->seg++;
+		looker->index = 0;
+
+		/* have looked up for all fixed MTRRs. */
+		if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
+			return mtrr_lookup_var_start(looker);
+
+		end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
+	}
+
+	looker->mem_type = mtrr_state->fixed_ranges[looker->index];
+	looker->start = end;
+}
+
+static void mtrr_lookup_var_next(struct mtrr_looker *looker)
+{
+	struct kvm_mtrr *mtrr_state = looker->mtrr_state;
+
+	WARN_ON(looker->fixed);
+
+	looker->mem_type = -1;
+
+	list_for_each_entry_continue(looker->range, &mtrr_state->head, node)
+		if (match_var_range(looker, looker->range))
+			return;
+
+	looker->partial_map |= looker->start_max < looker->end;
+}
+
+static void mtrr_lookup_start(struct mtrr_looker *looker)
+{
+	looker->mem_type = -1;
+
+	if (!looker->mtrr_state->mtrr_enabled) {
+		looker->partial_map = true;
+		return;
+	}
+
+	if (!mtrr_lookup_fixed_start(looker))
+		mtrr_lookup_var_start(looker);
+}
+
+static void mtrr_lookup_next(struct mtrr_looker *looker)
+{
+	WARN_ON(looker->mem_type == -1);
+
+	if (looker->fixed)
+		mtrr_lookup_fixed_next(looker);
+	else
+		mtrr_lookup_var_next(looker);
+}
+
+#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
+	for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_), \
+	     mtrr_lookup_start(_looker_); (_looker_)->mem_type != -1;	 \
+	     mtrr_lookup_next(_looker_))
+
 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 13/15] KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (12 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range Xiao Guangrong
  2015-05-30 10:59 ` [PATCH 15/15] KVM: VMX: fully implement guest MTRR virtualization Xiao Guangrong
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

mtrr_for_each_mem_type() is ready now, use it to simplify
kvm_mtrr_get_guest_memory_type()

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mtrr.c | 61 +++++++++++++----------------------------------------
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 35f86303..bc90834 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -581,56 +581,19 @@ static void mtrr_lookup_next(struct mtrr_looker *looker)
 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
-	u64 base, mask, start = gfn_to_gpa(gfn);
-	int i, num_var_ranges = KVM_NR_VAR_MTRR, type_mask, type = -1;
-
-	/* MTRR is completely disabled, use UC for all of physical memory. */
-	if (!mtrr_state->mtrr_enabled)
-		return MTRR_TYPE_UNCACHABLE;
-
-	/* Look in fixed ranges. Just return the type as per start */
-	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
-		int idx;
-
-		if (start < 0x80000) {
-			idx = 0;
-			idx += (start >> 16);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0xC0000) {
-			idx = 1 * 8;
-			idx += ((start - 0x80000) >> 14);
-			return mtrr_state->fixed_ranges[idx];
-		} else if (start < 0x1000000) {
-			idx = 3 * 8;
-			idx += ((start - 0xC0000) >> 12);
-			return mtrr_state->fixed_ranges[idx];
-		}
-	}
+	struct mtrr_looker looker;
+	u64 start = gfn_to_gpa(gfn), end = start + PAGE_SIZE;
+	int type_mask, type = -1;
 
-	/*
-	 * Look in variable ranges
-	 * Look of multiple ranges matching this address and pick type
-	 * as per MTRR precedence
-	 */
 	type_mask = (1 << MTRR_TYPE_WRBACK) | (1 << MTRR_TYPE_WRTHROUGH);
-	for (i = 0; i < num_var_ranges; ++i) {
-		int curr_type;
-
-		if (!(mtrr_state->var_ranges[i].mask & (1 << 11)))
-			continue;
-
-		base = mtrr_state->var_ranges[i].base & PAGE_MASK;
-		mask = mtrr_state->var_ranges[i].mask & PAGE_MASK;
-
-		if ((start & mask) != (base & mask))
-			continue;
+	mtrr_for_each_mem_type(&looker, mtrr_state, start, end) {
+		int curr_type = looker.mem_type;
 
 		/*
 		 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
 		 * Precedences.
 		 */
 
-		curr_type = mtrr_state->var_ranges[i].base & 0xff;
 		if (type == -1) {
 			type = curr_type;
 			continue;
@@ -670,9 +633,15 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 		 return MTRR_TYPE_WRBACK;
 	}
 
-	if (type != -1)
-		return type;
-
-	return mtrr_state->def_type;
+	/* It is not covered by MTRRs. */
+	if (looker.partial_map) {
+		/*
+		 * We just check one page, partially covered by MTRRs is
+		 * impossible.
+		 */
+		WARN_ON(type != -1);
+		type = mtrr_state->def_type;
+	}
+	return type;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (13 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 13/15] KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  2015-06-01  9:36   ` Paolo Bonzini
  2015-05-30 10:59 ` [PATCH 15/15] KVM: VMX: fully implement guest MTRR virtualization Xiao Guangrong
  15 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Based on Intel's SDM, mapping huge page which do not have consistent
memory cache for each 4k page will cause undefined behavior

In order to avoiding this kind of undefined behavior, we force to use
4k pages under this case

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.c  | 20 +++++++++++++++++++-
 arch/x86/kvm/mtrr.c | 25 +++++++++++++++++++++++++
 arch/x86/kvm/x86.h  |  2 ++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7462c57..c8c2a90 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3437,6 +3437,16 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	return false;
 }
 
+static bool
+check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
+{
+	int page_num = KVM_PAGES_PER_HPAGE(level);
+
+	gfn &= ~(page_num - 1);
+
+	return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num);
+}
+
 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 			  bool prefault)
 {
@@ -3462,9 +3472,17 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	if (r)
 		return r;
 
-	force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
+	if (mapping_level_dirty_bitmap(vcpu, gfn) ||
+	    !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL))
+		force_pt_level = 1;
+	else
+		force_pt_level = 0;
+
 	if (likely(!force_pt_level)) {
 		level = mapping_level(vcpu, gfn);
+		if (level > PT_DIRECTORY_LEVEL &&
+		    !check_hugepage_cache_consistency(vcpu, gfn, level))
+			level = PT_DIRECTORY_LEVEL;
 		gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
 	} else
 		level = PT_PAGE_TABLE_LEVEL;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index bc90834..703a66b 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -645,3 +645,28 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 	return type;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
+
+bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
+					  int page_num)
+{
+	struct mtrr_looker looker;
+	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
+	u64 start = gfn_to_gpa(gfn), end = gfn_to_gpa(gfn + page_num);
+	int type = -1;
+
+	mtrr_for_each_mem_type(&looker, mtrr_state, start, end) {
+		if (type == -1) {
+			type = looker.mem_type;
+			continue;
+		}
+
+		if (type != looker.mem_type)
+			return false;
+	}
+
+	if ((type != -1) && looker.partial_map &&
+	      (mtrr_state->def_type != type))
+		return false;
+
+	return true;
+}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a3dae49..7c30ec8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -166,6 +166,8 @@ void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
+					  int page_num);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
 				| XSTATE_BNDREGS | XSTATE_BNDCSR \
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 15/15] KVM: VMX: fully implement guest MTRR virtualization
  2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
                   ` (14 preceding siblings ...)
  2015-05-30 10:59 ` [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range Xiao Guangrong
@ 2015-05-30 10:59 ` Xiao Guangrong
  15 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-05-30 10:59 UTC (permalink / raw)
  To: pbonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Xiao Guangrong

Currently guest MTRR is completely prohibited if cache snoop is supported on
IOMMU (!noncoherent_dma) and host does the emulation based on the knowledge
from host side, however, host side is not the good point to know
what the purpose of guest is. A good example is that pass-throughed VGA
frame buffer is not always UC as host expected

This patchset enables full MTRR virtualization and currently only works on
Intel EPT architecture

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu.c              |  3 +--
 arch/x86/kvm/mtrr.c             |  3 +--
 arch/x86/kvm/svm.c              |  2 +-
 arch/x86/kvm/vmx.c              | 28 ++++------------------------
 5 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5be8f2e..b34de27 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -806,7 +806,7 @@ struct kvm_x86_ops {
 	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
-	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn);
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
 	bool (*invpcid_supported)(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c8c2a90..828fcd6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2496,8 +2496,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (level > PT_PAGE_TABLE_LEVEL)
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
-		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_reserved_pfn(pfn));
+		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn);
 
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 703a66b..bf84218 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -259,8 +259,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
 	gfn_t start, end;
 	int index;
 
-	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
-	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+	if (msr == MSR_IA32_CR_PAT || !tdp_enabled)
 		return;
 
 	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b9f9e10..23dd78a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4075,7 +4075,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
 	return false;
 }
 
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fe7a589..78b77be 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8626,31 +8626,11 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	u64 ret;
-
-	/* For VT-d and EPT combination
-	 * 1. MMIO: always map as UC
-	 * 2. EPT with VT-d:
-	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.
-	 *   b. VT-d with snooping control feature: snooping control feature of
-	 *	VT-d engine can guarantee the cache correctness. Just set it
-	 *	to WB to keep consistent with host. So the same as item 3.
-	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
-	 *    consistent with host MTRR
-	 */
-	if (is_mmio)
-		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
-	else if (kvm_arch_has_noncoherent_dma(vcpu->kvm))
-		ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
-		      VMX_EPT_MT_EPTE_SHIFT;
-	else
-		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
-			| VMX_EPT_IPAT_BIT;
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	u8 type = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
 
-	return ret;
+	return type << VMX_EPT_MT_EPTE_SHIFT;
 }
 
 static int vmx_get_lpage_level(void)
-- 
2.1.0


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH 05/15] KVM: MTRR: clean up mtrr default type
  2015-05-30 10:59 ` [PATCH 05/15] KVM: MTRR: clean up mtrr default type Xiao Guangrong
@ 2015-06-01  9:11   ` Paolo Bonzini
  2015-06-03  1:55     ` Xiao Guangrong
  2015-06-09  0:35   ` David Matlack
  1 sibling, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:11 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
> Use union definition to avoid the decode/code workload and drop all the
> hard code
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 12 ++++++++++--
>  arch/x86/kvm/mtrr.c             | 19 ++++++++-----------
>  2 files changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2c3c52d..95ce2ff 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -347,8 +347,16 @@ enum {
>  struct kvm_mtrr {
>  	struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
>  	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
> -	unsigned char enabled;
> -	mtrr_type def_type;
> +
> +	union {
> +		u64 deftype;
> +		struct {
> +			unsigned def_type:8;
> +			unsigned reserved:2;
> +			unsigned fixed_mtrr_enabled:1;
> +			unsigned mtrr_enabled:1;
> +		};
> +	};
>  };
>  
>  struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index 562341b..6de49dd 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -105,7 +105,6 @@ EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
>  static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  {
>  	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> -	unsigned char mtrr_enabled = mtrr_state->enabled;
>  	gfn_t start, end, mask;
>  	int index;
>  	bool is_fixed = true;
> @@ -114,7 +113,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>  		return;
>  
> -	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
> +	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)

I know Linus doesn't like bitfields too much.  Can you change these to
inline functions, and only leave an "u64 deftype" in the struct?

Paolo

>  		return;
>  
>  	switch (msr) {
> @@ -153,7 +152,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  		end = ((start & mask) | ~mask) + 1;
>  	}
>  
> -	if (is_fixed && !(mtrr_enabled & 0x1))
> +	if (is_fixed && !mtrr_state->fixed_mtrr_enabled)
>  		return;
>  
>  	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
> @@ -166,10 +165,9 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  	if (!kvm_mtrr_valid(vcpu, msr, data))
>  		return 1;
>  
> -	if (msr == MSR_MTRRdefType) {
> -		vcpu->arch.mtrr_state.def_type = data;
> -		vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
> -	} else if (msr == MSR_MTRRfix64K_00000)
> +	if (msr == MSR_MTRRdefType)
> +		vcpu->arch.mtrr_state.deftype = data;
> +	else if (msr == MSR_MTRRfix64K_00000)
>  		p[0] = data;
>  	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
>  		p[1 + msr - MSR_MTRRfix16K_80000] = data;
> @@ -216,8 +214,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  		return 1;
>  
>  	if (msr == MSR_MTRRdefType)
> -		*pdata = vcpu->arch.mtrr_state.def_type +
> -			 (vcpu->arch.mtrr_state.enabled << 10);
> +		*pdata = vcpu->arch.mtrr_state.deftype;
>  	else if (msr == MSR_MTRRfix64K_00000)
>  		*pdata = p[0];
>  	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
> @@ -256,14 +253,14 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>  	int i, num_var_ranges = KVM_NR_VAR_MTRR;
>  
>  	/* MTRR is completely disabled, use UC for all of physical memory. */
> -	if (!(mtrr_state->enabled & 0x2))
> +	if (!mtrr_state->mtrr_enabled)
>  		return MTRR_TYPE_UNCACHABLE;
>  
>  	/* Make end inclusive end, instead of exclusive */
>  	end--;
>  
>  	/* Look in fixed ranges. Just return the type as per start */
> -	if ((mtrr_state->enabled & 0x1) && (start < 0x100000)) {
> +	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
>  		int idx;
>  
>  		if (start < 0x80000) {
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  2015-05-30 10:59 ` [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type Xiao Guangrong
@ 2015-06-01  9:16   ` Paolo Bonzini
  2015-06-03  2:12     ` Xiao Guangrong
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:16 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
>  - kvm_mtrr_get_guest_memory_type() only checks one page in MTRRs so that
>    it's unnecessary to check to see if the range is partially covered in
>    MTRR
> 
>  - optimize the check of overlap memory type and add some comments to explain
>    the precedence
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/kvm/mtrr.c | 89 ++++++++++++++++++++++++++---------------------------
>  1 file changed, 44 insertions(+), 45 deletions(-)
> 
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index bc9c6da..d3c06d2 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -231,24 +231,16 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	return 0;
>  }
>  
> -/*
> - * The function is based on mtrr_type_lookup() in
> - * arch/x86/kernel/cpu/mtrr/generic.c
> - */
> -static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
> -			 u64 start, u64 end)
> +u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
>  {
> -	u64 base, mask;
> -	u8 prev_match, curr_match;
> -	int i, num_var_ranges = KVM_NR_VAR_MTRR;
> +	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> +	u64 base, mask, start = gfn_to_gpa(gfn);
> +	int i, num_var_ranges = KVM_NR_VAR_MTRR, type_mask, type = -1;

Do not mix initialized and uninitialized variables on the same line
(preexisting, I know, but let's fix it instead of making it worse :)).
Please put each initialized variable on a separate line.

Also please initialize type_mask here (more on this below).

>  
>  	/* MTRR is completely disabled, use UC for all of physical memory. */
>  	if (!mtrr_state->mtrr_enabled)
>  		return MTRR_TYPE_UNCACHABLE;
>  
> -	/* Make end inclusive end, instead of exclusive */
> -	end--;
> -
>  	/* Look in fixed ranges. Just return the type as per start */
>  	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
>  		int idx;
> @@ -273,9 +265,9 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>  	 * Look of multiple ranges matching this address and pick type
>  	 * as per MTRR precedence
>  	 */
> -	prev_match = 0xFF;
> +	type_mask = (1 << MTRR_TYPE_WRBACK) | (1 << MTRR_TYPE_WRTHROUGH);
>  	for (i = 0; i < num_var_ranges; ++i) {
> -		unsigned short start_state, end_state;
> +		int curr_type;
>  
>  		if (!(mtrr_state->var_ranges[i].mask & (1 << 11)))
>  			continue;
> @@ -283,50 +275,57 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>  		base = mtrr_state->var_ranges[i].base & PAGE_MASK;
>  		mask = mtrr_state->var_ranges[i].mask & PAGE_MASK;
>  
> -		start_state = ((start & mask) == (base & mask));
> -		end_state = ((end & mask) == (base & mask));
> -		if (start_state != end_state)
> -			return 0xFE;
> -
>  		if ((start & mask) != (base & mask))
>  			continue;
>  
> -		curr_match = mtrr_state->var_ranges[i].base & 0xff;
> -		if (prev_match == 0xFF) {
> -			prev_match = curr_match;
> +		/*
> +		 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
> +		 * Precedences.
> +		 */
> +
> +		curr_type = mtrr_state->var_ranges[i].base & 0xff;
> +		if (type == -1) {
> +			type = curr_type;
>  			continue;
>  		}
>  
> -		if (prev_match == MTRR_TYPE_UNCACHABLE ||
> -		    curr_match == MTRR_TYPE_UNCACHABLE)
> +		/*
> +		 * If two or more variable memory ranges match and the
> +		 * memory types are identical, then that memory type is
> +		 * used.
> +		 */
> +		if (type == curr_type)
> +			continue;
> +
> +		/*
> +		 * If two or more variable memory ranges match and one of
> +		 * the memory types is UC, the UC memory type used.
> +		 */
> +		if (curr_type == MTRR_TYPE_UNCACHABLE)
>  			return MTRR_TYPE_UNCACHABLE;
>  
> -		if ((prev_match == MTRR_TYPE_WRBACK &&
> -		     curr_match == MTRR_TYPE_WRTHROUGH) ||
> -		    (prev_match == MTRR_TYPE_WRTHROUGH &&
> -		     curr_match == MTRR_TYPE_WRBACK)) {
> -			prev_match = MTRR_TYPE_WRTHROUGH;
> -			curr_match = MTRR_TYPE_WRTHROUGH;
> +		/*
> +		 * If two or more variable memory ranges match and the
> +		 * memory types are WT and WB, the WT memory type is used.
> +		 */
> +		if (((1 << type) & type_mask) &&
> +		      ((1 << curr_type) & type_mask)) {

Please inline definition of type_mask in the "if", or rename it to
"wt_wb_mask" and make it const.  Or another suggestion below...

> +			type = MTRR_TYPE_WRTHROUGH;
> +			continue;
>  		}
>  
> -		if (prev_match != curr_match)
> -			return MTRR_TYPE_UNCACHABLE;
> +		/*
> +		 * For overlaps not defined by the above rules, processor
> +		 * behavior is undefined.
> +		 */

Perhaps just use type = MIN(type, curr_type), which also happens to get
WT vs. WB right?  You can also add a

	BUILD_BUG_ON(MTRR_TYPE_WRTHROUGH > MTRR_TYPE_WRBACK);

to ensure that the WT vs. WB precedence is correct.

Paolo

> +
> +		 /* We use WB for this undefined behavior. :( */
> +		 return MTRR_TYPE_WRBACK;
>  	}
>  
> -	if (prev_match != 0xFF)
> -		return prev_match;
> +	if (type != -1)
> +		return type;
>  
>  	return mtrr_state->def_type;
>  }
> -
> -u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
> -{
> -	u8 mtrr;
> -
> -	mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
> -			     (gfn << PAGE_SHIFT) + PAGE_SIZE);
> -	if (mtrr == 0xfe || mtrr == 0xff)
> -		mtrr = MTRR_TYPE_WRBACK;
> -	return mtrr;
> -}
>  EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table
  2015-05-30 10:59 ` [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table Xiao Guangrong
@ 2015-06-01  9:25   ` Paolo Bonzini
  2015-06-03  2:29     ` Xiao Guangrong
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:25 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
> This table summarizes the information of fixed MTRRs and introduce some APIs
> to abstract its operation which helps us to clean up the code and will be
> used in later patches
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/kvm/mtrr.c | 191 ++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 142 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index d3c06d2..888441e 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -102,12 +102,126 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  }
>  EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
>  
> +struct fixed_mtrr_segment {
> +	u64 start;
> +	u64 end;
> +
> +	/*
> +	 * unit corresponds to the MSR entry in this segment, the size
> +	 * of unit is covered in one MSR.
> +	 */
> +	u64 unit_size;
> +
> +	/* a range is covered in one memory cache type. */
> +	u64 range_size;
> +
> +	/* the start position in kvm_mtrr.fixed_ranges[]. */
> +	int range_start;
> +};
> +
> +static struct fixed_mtrr_segment fixed_seg_table[] = {
> +	/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
> +	{
> +		.start = 0x0,
> +		.end = 0x80000,
> +		.unit_size = 0x80000,

Unit size is always range size * 8.

> +		.range_size = 1ULL << 16,

Perhaps 64 * 1024 (and so on below) is clearer, because it matches the
name of the MSR?

> +		.range_start = 0,
> +	},
> +
> +	/*
> +	 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
> +	 * 16K fixed mtrr.
> +	 */
> +	{
> +		.start = 0x80000,
> +		.end = 0xc0000,
> +		.unit_size = (0xc0000 - 0x80000) / 2,
> +		.range_size = 1ULL << 14,
> +		.range_start = 8,
> +	},
> +
> +	/*
> +	 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
> +	 * 4K fixed mtrr.
> +	 */
> +	{
> +		.start = 0xc0000,
> +		.end = 0x100000,
> +		.unit_size = (0x100000 - 0xc0000) / 8,
> +		.range_size = 1ULL << 12,
> +		.range_start = 24,
> +	}
> +};
> +
> +static int fixed_msr_to_range_index(u32 msr)
> +{
> +	int seg, unit;
> +
> +	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
> +		return -1;
> +
> +	fixed_mtrr_seg_unit_range_index(seg, unit);

This looks wrong.

> +	return 0;
> +}
> +
>  static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  {
>  	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>  	gfn_t start, end, mask;
>  	int index;
> -	bool is_fixed = true;
>  
>  	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
>  	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
> @@ -116,32 +230,19 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
>  		return;
>  
> +	if (fixed_msr_to_range(msr, &start, &end)) {
> +		if (!mtrr_state->fixed_mtrr_enabled)
> +			return;
> +		goto do_zap;
> +	}
> +
>  	switch (msr) {

Please move defType handling in an "if" above "if
(fixed_msr_to_range(msr, &start, &end))".  Then you don't need the goto.

Paolo

> -	case MSR_MTRRfix64K_00000:
> -		start = 0x0;
> -		end = 0x80000;
> -		break;
> -	case MSR_MTRRfix16K_80000:
> -		start = 0x80000;
> -		end = 0xa0000;
> -		break;
> -	case MSR_MTRRfix16K_A0000:
> -		start = 0xa0000;
> -		end = 0xc0000;
> -		break;
> -	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
> -		index = msr - MSR_MTRRfix4K_C0000;
> -		start = 0xc0000 + index * (32 << 10);
> -		end = start + (32 << 10);
> -		break;
>  	case MSR_MTRRdefType:
> -		is_fixed = false;
>  		start = 0x0;
>  		end = ~0ULL;
>  		break;
>  	default:
>  		/* variable range MTRRs. */
> -		is_fixed = false;
>  		index = (msr - 0x200) / 2;
>  		start = mtrr_state->var_ranges[index].base & PAGE_MASK;
>  		mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
> @@ -150,38 +251,33 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  		end = ((start & mask) | ~mask) + 1;
>  	}
>  
> -	if (is_fixed && !mtrr_state->fixed_mtrr_enabled)
> -		return;
> -
> +do_zap:
>  	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
>  }
>  
>  int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  {
> -	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
> +	int index;
>  
>  	if (!kvm_mtrr_valid(vcpu, msr, data))
>  		return 1;
>  
> -	if (msr == MSR_MTRRdefType)
> +	index = fixed_msr_to_range_index(msr);
> +	if (index >= 0)
> +		*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
> +	else if (msr == MSR_MTRRdefType)
>  		vcpu->arch.mtrr_state.deftype = data;
> -	else if (msr == MSR_MTRRfix64K_00000)
> -		p[0] = data;
> -	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
> -		p[1 + msr - MSR_MTRRfix16K_80000] = data;
> -	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
> -		p[3 + msr - MSR_MTRRfix4K_C0000] = data;
>  	else if (msr == MSR_IA32_CR_PAT)
>  		vcpu->arch.pat = data;
>  	else {	/* Variable MTRRs */
> -		int idx, is_mtrr_mask;
> +		int is_mtrr_mask;
>  
> -		idx = (msr - 0x200) / 2;
> -		is_mtrr_mask = msr - 0x200 - 2 * idx;
> +		index = (msr - 0x200) / 2;
> +		is_mtrr_mask = msr - 0x200 - 2 * index;
>  		if (!is_mtrr_mask)
> -			vcpu->arch.mtrr_state.var_ranges[idx].base = data;
> +			vcpu->arch.mtrr_state.var_ranges[index].base = data;
>  		else
> -			vcpu->arch.mtrr_state.var_ranges[idx].mask = data;
> +			vcpu->arch.mtrr_state.var_ranges[index].mask = data;
>  	}
>  
>  	update_mtrr(vcpu, msr);
> @@ -190,7 +286,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  
>  int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  {
> -	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
> +	int index;
>  
>  	/* MSR_MTRRcap is a readonly MSR. */
>  	if (msr == MSR_MTRRcap) {
> @@ -207,25 +303,22 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	if (!msr_mtrr_valid(msr))
>  		return 1;
>  
> -	if (msr == MSR_MTRRdefType)
> +	index = fixed_msr_to_range_index(msr);
> +	if (index >= 0)
> +		*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
> +	else if (msr == MSR_MTRRdefType)
>  		*pdata = vcpu->arch.mtrr_state.deftype;
> -	else if (msr == MSR_MTRRfix64K_00000)
> -		*pdata = p[0];
> -	else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
> -		*pdata = p[1 + msr - MSR_MTRRfix16K_80000];
> -	else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
> -		*pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
>  	else if (msr == MSR_IA32_CR_PAT)
>  		*pdata = vcpu->arch.pat;
>  	else {	/* Variable MTRRs */
> -		int idx, is_mtrr_mask;
> +		int is_mtrr_mask;
>  
> -		idx = (msr - 0x200) / 2;
> -		is_mtrr_mask = msr - 0x200 - 2 * idx;
> +		index = (msr - 0x200) / 2;
> +		is_mtrr_mask = msr - 0x200 - 2 * index;
>  		if (!is_mtrr_mask)
> -			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].base;
> +			*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
>  		else
> -			*pdata = vcpu->arch.mtrr_state.var_ranges[idx].mask;
> +			*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
>  	}
>  
>  	return 0;
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 10/15] KVM: MTRR: sort variable MTRRs
  2015-05-30 10:59 ` [PATCH 10/15] KVM: MTRR: sort variable MTRRs Xiao Guangrong
@ 2015-06-01  9:27   ` Paolo Bonzini
  2015-06-03  2:31     ` Xiao Guangrong
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:27 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
> Sort all valid variable MTRRs based on its base address, it will help us to
> check a range to see if it's fully contained in variable MTRRs
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 +++
>  arch/x86/kvm/mtrr.c             | 39 +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c              |  2 +-
>  arch/x86/kvm/x86.h              |  1 +
>  4 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f3fc152..5be8f2e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -347,6 +347,7 @@ enum {
>  struct kvm_mtrr_range {
>  	u64 base;
>  	u64 mask;
> +	struct list_head node;
>  };
>  
>  struct kvm_mtrr {
> @@ -362,6 +363,8 @@ struct kvm_mtrr {
>  			unsigned mtrr_enabled:1;
>  		};
>  	};
> +
> +	struct list_head head;
>  };
>  
>  struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index aeb9767..8e3b81a 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -262,6 +262,38 @@ do_zap:
>  	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
>  }
>  
> +static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
> +{
> +	u64 start, end;
> +
> +	if (!(range->mask & (1 << 11)))
> +		return false;
> +
> +	var_mtrr_range(range, &start, &end);
> +	return end > start;
> +}
> +
> +static void set_var_mtrr_start(struct kvm_mtrr *mtrr_state, int index)
> +{
> +	/* remove the entry if it's in the list. */
> +	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index]))
> +		list_del(&mtrr_state->var_ranges[index].node);
> +}
> +
> +static void set_var_mtrr_end(struct kvm_mtrr *mtrr_state, int index)
> +{
> +	struct kvm_mtrr_range *tmp, *cur = &mtrr_state->var_ranges[index];
> +
> +	/* add it to the list if it's valid. */
> +	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index])) {
> +		list_for_each_entry(tmp, &mtrr_state->head, node)
> +			if (cur->base < tmp->base)
> +				list_add_tail(&cur->node, &tmp->node);
> +
> +		list_add_tail(&cur->node, &mtrr_state->head);
> +	}
> +}
> +
>  int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  {
>  	int index;
> @@ -281,10 +313,12 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  
>  		index = (msr - 0x200) / 2;
>  		is_mtrr_mask = msr - 0x200 - 2 * index;
> +		set_var_mtrr_start(&vcpu->arch.mtrr_state, index);
>  		if (!is_mtrr_mask)
>  			vcpu->arch.mtrr_state.var_ranges[index].base = data;
>  		else
>  			vcpu->arch.mtrr_state.var_ranges[index].mask = data;
> +		set_var_mtrr_end(&vcpu->arch.mtrr_state, index);

Just move the whole code to a new kvm_set_var_mtrr function.

>  	}
>  
>  	update_mtrr(vcpu, msr);
> @@ -331,6 +365,11 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  	return 0;
>  }
>  
> +void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
> +{
> +	INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
> +}
> +
>  u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
>  {
>  	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 64c2891..8084ac3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6991,7 +6991,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  	kvm_vcpu_reset(vcpu, false);
>  	kvm_mmu_setup(vcpu);
>  	vcpu_put(vcpu);
> -
> +	kvm_vcpu_mtrr_init(vcpu);

Please move this call much earlier.

Paolo

>  	return r;
>  }
>  
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 956558d..a3dae49 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -162,6 +162,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
>  	gva_t addr, void *val, unsigned int bytes,
>  	struct x86_exception *exception);
>  
> +void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
>  bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
>  int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
>  int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-05-30 10:59 ` [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type Xiao Guangrong
@ 2015-06-01  9:33   ` Paolo Bonzini
  2015-06-01 14:26     ` Paolo Bonzini
  2015-06-03  2:40     ` Xiao Guangrong
  2015-06-09  0:36   ` David Matlack
  1 sibling, 2 replies; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:33 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
> +struct mtrr_looker {
> +	/* input fields. */
> +	struct kvm_mtrr *mtrr_state;
> +	u64 start;
> +	u64 end;

s/looker/iter/ or s/looker/lookup/

> +static void mtrr_lookup_start(struct mtrr_looker *looker)
> +{
> +	looker->mem_type = -1;
> +
> +	if (!looker->mtrr_state->mtrr_enabled) {
> +		looker->partial_map = true;
> +		return;
> +	}
> +
> +	if (!mtrr_lookup_fixed_start(looker))
> +		mtrr_lookup_var_start(looker);
> +}
> +

Separating mtrr_lookup_start and mtrr_lookup_init is weird.

There are common parts of mtrr_lookup_*_start and mtrr_lookup_*_next.  
For example this:

> +	looker->mem_type = looker->mtrr_state->fixed_ranges[index];
> +	looker->start = fixed_mtrr_range_end_addr(seg, index);
> +	return true;

in mtrr_lookup_fixed_start is the same as this:

> 
> +	end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
> +
> +	/* switch to next segment. */
> +	if (end >= eseg->end) {
> +		looker->seg++;
> +		looker->index = 0;
> +
> +		/* have looked up for all fixed MTRRs. */
> +		if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
> +			return mtrr_lookup_var_start(looker);
> +
> +		end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
> +	}
> +
> +	looker->mem_type = mtrr_state->fixed_ranges[looker->index];
> +	looker->start = end;

in mtrr_lookup_fixed_next.  Can you try to make them more common?

Basically you should have

+#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
+	for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_); \
+	     !mtrr_lookup_end(_looker_); mtrr_lookup_next(_looker_))

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-05-30 10:59 ` [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range Xiao Guangrong
@ 2015-06-01  9:36   ` Paolo Bonzini
  2015-06-01  9:38     ` Paolo Bonzini
  2015-06-03  2:56     ` Xiao Guangrong
  0 siblings, 2 replies; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:36 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 30/05/2015 12:59, Xiao Guangrong wrote:
> Currently guest MTRR is completely prohibited if cache snoop is supported on
> IOMMU (!noncoherent_dma) and host does the emulation based on the knowledge
> from host side, however, host side is not the good point to know
> what the purpose of guest is. A good example is that pass-throughed VGA
> frame buffer is not always UC as host expected

Can you explain how?  The original idea was that such a framebuffer
would be kvm_is_reserved_pfn and thus be unconditionally UC.

> +bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
> +					  int page_num)
> +{
> +	struct mtrr_looker looker;
> +	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> +	u64 start = gfn_to_gpa(gfn), end = gfn_to_gpa(gfn + page_num);
> +	int type = -1;
> +
> +	mtrr_for_each_mem_type(&looker, mtrr_state, start, end) {
> +		if (type == -1) {
> +			type = looker.mem_type;
> +			continue;
> +		}
> +
> +		if (type != looker.mem_type)
> +			return false;
> +	}
> +
> +	if ((type != -1) && looker.partial_map &&
> +	      (mtrr_state->def_type != type))
> +		return false;
> +

No Pascal-like parentheses.

Does this have a performance impact on shadow?  Perhaps we could cache
in struct kvm_arch_memory_slot whether the memslot is covered by MTRRs?

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-01  9:36   ` Paolo Bonzini
@ 2015-06-01  9:38     ` Paolo Bonzini
  2015-06-03  2:56     ` Xiao Guangrong
  1 sibling, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01  9:38 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 01/06/2015 11:36, Paolo Bonzini wrote:
> Does this have a performance impact on shadow?  Perhaps we could cache
> in struct kvm_arch_memory_slot whether the memslot is covered by MTRRs?

Nevermind, patch 15 answers my question.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-06-01  9:33   ` Paolo Bonzini
@ 2015-06-01 14:26     ` Paolo Bonzini
  2015-06-03  2:57       ` Xiao Guangrong
  2015-06-03  2:40     ` Xiao Guangrong
  1 sibling, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-01 14:26 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 01/06/2015 11:33, Paolo Bonzini wrote:
>> +	looker->mem_type = looker->mtrr_state->fixed_ranges[index];
>> > +	looker->start = fixed_mtrr_range_end_addr(seg, index);
>> > +	return true;
> in mtrr_lookup_fixed_start is the same as this:
> 
>> > 
>> > +	end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>> > +
>> > +	/* switch to next segment. */
>> > +	if (end >= eseg->end) {
>> > +		looker->seg++;
>> > +		looker->index = 0;
>> > +
>> > +		/* have looked up for all fixed MTRRs. */
>> > +		if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
>> > +			return mtrr_lookup_var_start(looker);
>> > +
>> > +		end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>> > +	}
>> > +
>> > +	looker->mem_type = mtrr_state->fixed_ranges[looker->index];
>> > +	looker->start = end;
> in mtrr_lookup_fixed_next.  Can you try to make them more common?
> 
> Basically you should have
> 
> +#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
> +	for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_); \
> +	     !mtrr_lookup_end(_looker_); mtrr_lookup_next(_looker_))

... where the above code I quoted would be part of mtrr_lookup_end.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 05/15] KVM: MTRR: clean up mtrr default type
  2015-06-01  9:11   ` Paolo Bonzini
@ 2015-06-03  1:55     ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  1:55 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel


Thanks for your review, Paolo!


On 06/01/2015 05:11 PM, Paolo Bonzini wrote:

>>   struct kvm_vcpu_arch {
>> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
>> index 562341b..6de49dd 100644
>> --- a/arch/x86/kvm/mtrr.c
>> +++ b/arch/x86/kvm/mtrr.c
>> @@ -105,7 +105,6 @@ EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
>>   static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>   {
>>   	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>> -	unsigned char mtrr_enabled = mtrr_state->enabled;
>>   	gfn_t start, end, mask;
>>   	int index;
>>   	bool is_fixed = true;
>> @@ -114,7 +113,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>   	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>>   		return;
>>
>> -	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
>> +	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
>
> I know Linus doesn't like bitfields too much.  Can you change these to
> inline functions, and only leave an "u64 deftype" in the struct?
>

Sure. I will introduce mtrr_is_enabled() and fixed_mtrr_is_enabled() instead
of these bitfields.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  2015-06-01  9:16   ` Paolo Bonzini
@ 2015-06-03  2:12     ` Xiao Guangrong
  2015-06-03  7:57       ` Paolo Bonzini
  0 siblings, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:12 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 05:16 PM, Paolo Bonzini wrote:
>
>
> On 30/05/2015 12:59, Xiao Guangrong wrote:
>>   - kvm_mtrr_get_guest_memory_type() only checks one page in MTRRs so that
>>     it's unnecessary to check to see if the range is partially covered in
>>     MTRR
>>
>>   - optimize the check of overlap memory type and add some comments to explain
>>     the precedence
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   arch/x86/kvm/mtrr.c | 89 ++++++++++++++++++++++++++---------------------------
>>   1 file changed, 44 insertions(+), 45 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
>> index bc9c6da..d3c06d2 100644
>> --- a/arch/x86/kvm/mtrr.c
>> +++ b/arch/x86/kvm/mtrr.c
>> @@ -231,24 +231,16 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>   	return 0;
>>   }
>>
>> -/*
>> - * The function is based on mtrr_type_lookup() in
>> - * arch/x86/kernel/cpu/mtrr/generic.c
>> - */
>> -static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>> -			 u64 start, u64 end)
>> +u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
>>   {
>> -	u64 base, mask;
>> -	u8 prev_match, curr_match;
>> -	int i, num_var_ranges = KVM_NR_VAR_MTRR;
>> +	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>> +	u64 base, mask, start = gfn_to_gpa(gfn);
>> +	int i, num_var_ranges = KVM_NR_VAR_MTRR, type_mask, type = -1;
>
> Do not mix initialized and uninitialized variables on the same line
> (preexisting, I know, but let's fix it instead of making it worse :)).
> Please put each initialized variable on a separate line.

Okay. Will follow this style in the future development.

>
> Also please initialize type_mask here (more on this below).
>
>>
>>   	/* MTRR is completely disabled, use UC for all of physical memory. */
>>   	if (!mtrr_state->mtrr_enabled)
>>   		return MTRR_TYPE_UNCACHABLE;
>>
>> -	/* Make end inclusive end, instead of exclusive */
>> -	end--;
>> -
>>   	/* Look in fixed ranges. Just return the type as per start */
>>   	if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
>>   		int idx;
>> @@ -273,9 +265,9 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>>   	 * Look of multiple ranges matching this address and pick type
>>   	 * as per MTRR precedence
>>   	 */
>> -	prev_match = 0xFF;
>> +	type_mask = (1 << MTRR_TYPE_WRBACK) | (1 << MTRR_TYPE_WRTHROUGH);
>>   	for (i = 0; i < num_var_ranges; ++i) {
>> -		unsigned short start_state, end_state;
>> +		int curr_type;
>>
>>   		if (!(mtrr_state->var_ranges[i].mask & (1 << 11)))
>>   			continue;
>> @@ -283,50 +275,57 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>>   		base = mtrr_state->var_ranges[i].base & PAGE_MASK;
>>   		mask = mtrr_state->var_ranges[i].mask & PAGE_MASK;
>>
>> -		start_state = ((start & mask) == (base & mask));
>> -		end_state = ((end & mask) == (base & mask));
>> -		if (start_state != end_state)
>> -			return 0xFE;
>> -
>>   		if ((start & mask) != (base & mask))
>>   			continue;
>>
>> -		curr_match = mtrr_state->var_ranges[i].base & 0xff;
>> -		if (prev_match == 0xFF) {
>> -			prev_match = curr_match;
>> +		/*
>> +		 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
>> +		 * Precedences.
>> +		 */
>> +
>> +		curr_type = mtrr_state->var_ranges[i].base & 0xff;
>> +		if (type == -1) {
>> +			type = curr_type;
>>   			continue;
>>   		}
>>
>> -		if (prev_match == MTRR_TYPE_UNCACHABLE ||
>> -		    curr_match == MTRR_TYPE_UNCACHABLE)
>> +		/*
>> +		 * If two or more variable memory ranges match and the
>> +		 * memory types are identical, then that memory type is
>> +		 * used.
>> +		 */
>> +		if (type == curr_type)
>> +			continue;
>> +
>> +		/*
>> +		 * If two or more variable memory ranges match and one of
>> +		 * the memory types is UC, the UC memory type used.
>> +		 */
>> +		if (curr_type == MTRR_TYPE_UNCACHABLE)
>>   			return MTRR_TYPE_UNCACHABLE;
>>
>> -		if ((prev_match == MTRR_TYPE_WRBACK &&
>> -		     curr_match == MTRR_TYPE_WRTHROUGH) ||
>> -		    (prev_match == MTRR_TYPE_WRTHROUGH &&
>> -		     curr_match == MTRR_TYPE_WRBACK)) {
>> -			prev_match = MTRR_TYPE_WRTHROUGH;
>> -			curr_match = MTRR_TYPE_WRTHROUGH;
>> +		/*
>> +		 * If two or more variable memory ranges match and the
>> +		 * memory types are WT and WB, the WT memory type is used.
>> +		 */
>> +		if (((1 << type) & type_mask) &&
>> +		      ((1 << curr_type) & type_mask)) {
>
> Please inline definition of type_mask in the "if", or rename it to
> "wt_wb_mask" and make it const.  Or another suggestion below...

Okay, it's a better name indeed.

>
>> +			type = MTRR_TYPE_WRTHROUGH;
>> +			continue;
>>   		}
>>
>> -		if (prev_match != curr_match)
>> -			return MTRR_TYPE_UNCACHABLE;
>> +		/*
>> +		 * For overlaps not defined by the above rules, processor
>> +		 * behavior is undefined.
>> +		 */
>
> Perhaps just use type = MIN(type, curr_type), which also happens to get
> WT vs. WB right?  You can also add a
>
> 	BUILD_BUG_ON(MTRR_TYPE_WRTHROUGH > MTRR_TYPE_WRBACK);
>
> to ensure that the WT vs. WB precedence is correct.

Only WT and WB are allowed to be overlapped here otherwise is
"undefined behavior". For example if the types are MTRR_TYPE_WRPROT
and MTRR_TYPE_WRTHROUGH, min() will get MTRR_TYPE_WRTHROUGH rather than
"undefined behavior".

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table
  2015-06-01  9:25   ` Paolo Bonzini
@ 2015-06-03  2:29     ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:29 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 05:25 PM, Paolo Bonzini wrote:
>
>
> On 30/05/2015 12:59, Xiao Guangrong wrote:
>> This table summarizes the information of fixed MTRRs and introduce some APIs
>> to abstract its operation which helps us to clean up the code and will be
>> used in later patches
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   arch/x86/kvm/mtrr.c | 191 ++++++++++++++++++++++++++++++++++++++--------------
>>   1 file changed, 142 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
>> index d3c06d2..888441e 100644
>> --- a/arch/x86/kvm/mtrr.c
>> +++ b/arch/x86/kvm/mtrr.c
>> @@ -102,12 +102,126 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
>>
>> +struct fixed_mtrr_segment {
>> +	u64 start;
>> +	u64 end;
>> +
>> +	/*
>> +	 * unit corresponds to the MSR entry in this segment, the size
>> +	 * of unit is covered in one MSR.
>> +	 */
>> +	u64 unit_size;
>> +
>> +	/* a range is covered in one memory cache type. */
>> +	u64 range_size;
>> +
>> +	/* the start position in kvm_mtrr.fixed_ranges[]. */
>> +	int range_start;
>> +};
>> +
>> +static struct fixed_mtrr_segment fixed_seg_table[] = {
>> +	/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
>> +	{
>> +		.start = 0x0,
>> +		.end = 0x80000,
>> +		.unit_size = 0x80000,
>
> Unit size is always range size * 8.

Good catch, will drop this.

>
>> +		.range_size = 1ULL << 16,
>
> Perhaps 64 * 1024 (and so on below) is clearer, because it matches the
> name of the MSR?

Yes, it's better.

>
>> +		.range_start = 0,
>> +	},
>> +
>> +	/*
>> +	 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
>> +	 * 16K fixed mtrr.
>> +	 */
>> +	{
>> +		.start = 0x80000,
>> +		.end = 0xc0000,
>> +		.unit_size = (0xc0000 - 0x80000) / 2,
>> +		.range_size = 1ULL << 14,
>> +		.range_start = 8,
>> +	},
>> +
>> +	/*
>> +	 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
>> +	 * 4K fixed mtrr.
>> +	 */
>> +	{
>> +		.start = 0xc0000,
>> +		.end = 0x100000,
>> +		.unit_size = (0x100000 - 0xc0000) / 8,
>> +		.range_size = 1ULL << 12,
>> +		.range_start = 24,
>> +	}
>> +};
>> +
>> +static int fixed_msr_to_range_index(u32 msr)
>> +{
>> +	int seg, unit;
>> +
>> +	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
>> +		return -1;
>> +
>> +	fixed_mtrr_seg_unit_range_index(seg, unit);
>
> This looks wrong.

Oops, should be "return fixed_mtrr_seg_unit_range_index(seg, unit);" :(

>
>> +	return 0;
>> +}
>> +
>>   static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>   {
>>   	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>>   	gfn_t start, end, mask;
>>   	int index;
>> -	bool is_fixed = true;
>>
>>   	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
>>   	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>> @@ -116,32 +230,19 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>   	if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
>>   		return;
>>
>> +	if (fixed_msr_to_range(msr, &start, &end)) {
>> +		if (!mtrr_state->fixed_mtrr_enabled)
>> +			return;
>> +		goto do_zap;
>> +	}
>> +
>>   	switch (msr) {
>
> Please move defType handling in an "if" above "if
> (fixed_msr_to_range(msr, &start, &end))".  Then you don't need the goto.
>

Yes, indeed. Will do it in the next version.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 10/15] KVM: MTRR: sort variable MTRRs
  2015-06-01  9:27   ` Paolo Bonzini
@ 2015-06-03  2:31     ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:31 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 05:27 PM, Paolo Bonzini wrote:
>
>
> On 30/05/2015 12:59, Xiao Guangrong wrote:
>> Sort all valid variable MTRRs based on its base address, it will help us to
>> check a range to see if it's fully contained in variable MTRRs
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   arch/x86/include/asm/kvm_host.h |  3 +++
>>   arch/x86/kvm/mtrr.c             | 39 +++++++++++++++++++++++++++++++++++++++
>>   arch/x86/kvm/x86.c              |  2 +-
>>   arch/x86/kvm/x86.h              |  1 +
>>   4 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index f3fc152..5be8f2e 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -347,6 +347,7 @@ enum {
>>   struct kvm_mtrr_range {
>>   	u64 base;
>>   	u64 mask;
>> +	struct list_head node;
>>   };
>>
>>   struct kvm_mtrr {
>> @@ -362,6 +363,8 @@ struct kvm_mtrr {
>>   			unsigned mtrr_enabled:1;
>>   		};
>>   	};
>> +
>> +	struct list_head head;
>>   };
>>
>>   struct kvm_vcpu_arch {
>> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
>> index aeb9767..8e3b81a 100644
>> --- a/arch/x86/kvm/mtrr.c
>> +++ b/arch/x86/kvm/mtrr.c
>> @@ -262,6 +262,38 @@ do_zap:
>>   	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
>>   }
>>
>> +static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
>> +{
>> +	u64 start, end;
>> +
>> +	if (!(range->mask & (1 << 11)))
>> +		return false;
>> +
>> +	var_mtrr_range(range, &start, &end);
>> +	return end > start;
>> +}
>> +
>> +static void set_var_mtrr_start(struct kvm_mtrr *mtrr_state, int index)
>> +{
>> +	/* remove the entry if it's in the list. */
>> +	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index]))
>> +		list_del(&mtrr_state->var_ranges[index].node);
>> +}
>> +
>> +static void set_var_mtrr_end(struct kvm_mtrr *mtrr_state, int index)
>> +{
>> +	struct kvm_mtrr_range *tmp, *cur = &mtrr_state->var_ranges[index];
>> +
>> +	/* add it to the list if it's valid. */
>> +	if (var_mtrr_range_is_valid(&mtrr_state->var_ranges[index])) {
>> +		list_for_each_entry(tmp, &mtrr_state->head, node)
>> +			if (cur->base < tmp->base)
>> +				list_add_tail(&cur->node, &tmp->node);
>> +
>> +		list_add_tail(&cur->node, &mtrr_state->head);
>> +	}
>> +}
>> +
>>   int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>   {
>>   	int index;
>> @@ -281,10 +313,12 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>>
>>   		index = (msr - 0x200) / 2;
>>   		is_mtrr_mask = msr - 0x200 - 2 * index;
>> +		set_var_mtrr_start(&vcpu->arch.mtrr_state, index);
>>   		if (!is_mtrr_mask)
>>   			vcpu->arch.mtrr_state.var_ranges[index].base = data;
>>   		else
>>   			vcpu->arch.mtrr_state.var_ranges[index].mask = data;
>> +		set_var_mtrr_end(&vcpu->arch.mtrr_state, index);
>
> Just move the whole code to a new kvm_set_var_mtrr function.

Okay.

>
>>   	}
>>
>>   	update_mtrr(vcpu, msr);
>> @@ -331,6 +365,11 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>   	return 0;
>>   }
>>
>> +void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
>> +{
>> +	INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
>> +}
>> +
>>   u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
>>   {
>>   	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 64c2891..8084ac3 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6991,7 +6991,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>   	kvm_vcpu_reset(vcpu, false);
>>   	kvm_mmu_setup(vcpu);
>>   	vcpu_put(vcpu);
>> -
>> +	kvm_vcpu_mtrr_init(vcpu);
>
> Please move this call much earlier.

Okay, will do these in v2.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-06-01  9:33   ` Paolo Bonzini
  2015-06-01 14:26     ` Paolo Bonzini
@ 2015-06-03  2:40     ` Xiao Guangrong
  1 sibling, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:40 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 05:33 PM, Paolo Bonzini wrote:
>
>
> On 30/05/2015 12:59, Xiao Guangrong wrote:
>> +struct mtrr_looker {
>> +	/* input fields. */
>> +	struct kvm_mtrr *mtrr_state;
>> +	u64 start;
>> +	u64 end;
>
> s/looker/iter/ or s/looker/lookup/

Good to me.

>
>> +static void mtrr_lookup_start(struct mtrr_looker *looker)
>> +{
>> +	looker->mem_type = -1;
>> +
>> +	if (!looker->mtrr_state->mtrr_enabled) {
>> +		looker->partial_map = true;
>> +		return;
>> +	}
>> +
>> +	if (!mtrr_lookup_fixed_start(looker))
>> +		mtrr_lookup_var_start(looker);
>> +}
>> +
>
> Separating mtrr_lookup_start and mtrr_lookup_init is weird.

Agreed, will fold mtrr_lookup_start() into mtrr_lookup_init().

>
> There are common parts of mtrr_lookup_*_start and mtrr_lookup_*_next.
> For example this:
>
>> +	looker->mem_type = looker->mtrr_state->fixed_ranges[index];
>> +	looker->start = fixed_mtrr_range_end_addr(seg, index);
>> +	return true;
>
> in mtrr_lookup_fixed_start is the same as this:
>
>>
>> +	end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>> +
>> +	/* switch to next segment. */
>> +	if (end >= eseg->end) {
>> +		looker->seg++;
>> +		looker->index = 0;
>> +
>> +		/* have looked up for all fixed MTRRs. */
>> +		if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
>> +			return mtrr_lookup_var_start(looker);
>> +
>> +		end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>> +	}
>> +
>> +	looker->mem_type = mtrr_state->fixed_ranges[looker->index];
>> +	looker->start = end;
>
> in mtrr_lookup_fixed_next.  Can you try to make them more common?

Yes, i will check them carefully and introduce common functions to do
it.

>
> Basically you should have
>
> +#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
> +	for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_); \
> +	     !mtrr_lookup_end(_looker_); mtrr_lookup_next(_looker_))

Good idea, will do it in v2.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-01  9:36   ` Paolo Bonzini
  2015-06-01  9:38     ` Paolo Bonzini
@ 2015-06-03  2:56     ` Xiao Guangrong
  2015-06-03  7:55       ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:56 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 05:36 PM, Paolo Bonzini wrote:
>
>
> On 30/05/2015 12:59, Xiao Guangrong wrote:
>> Currently guest MTRR is completely prohibited if cache snoop is supported on
>> IOMMU (!noncoherent_dma) and host does the emulation based on the knowledge
>> from host side, however, host side is not the good point to know
>> what the purpose of guest is. A good example is that pass-throughed VGA
>> frame buffer is not always UC as host expected
>
> Can you explain how?  The original idea was that such a framebuffer
> would be kvm_is_reserved_pfn and thus be unconditionally UC.

Yes, frame-buffer is always UC in current code, however, UC for frame-buffer
causes bad performance. It's quoted from Documentation/x86/mtrr.txt:

| This is most useful when you have a video (VGA) card on a PCI or AGP bus.
| Enabling write-combining allows bus write transfers to be combined into a
| larger transfer before bursting over the PCI/AGP bus. This can increase
| performance of image write operations 2.5 times or more.

So that guest will configure the range to MTRR, this patchset follows
guest MTRR and cooperates with guest PAT (ept.VMX_EPT_IPAT_BIT = 0) to emulate
guest cache type as guest expects.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-06-01 14:26     ` Paolo Bonzini
@ 2015-06-03  2:57       ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-03  2:57 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/01/2015 10:26 PM, Paolo Bonzini wrote:
>
>
> On 01/06/2015 11:33, Paolo Bonzini wrote:
>>> +	looker->mem_type = looker->mtrr_state->fixed_ranges[index];
>>>> +	looker->start = fixed_mtrr_range_end_addr(seg, index);
>>>> +	return true;
>> in mtrr_lookup_fixed_start is the same as this:
>>
>>>>
>>>> +	end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>>>> +
>>>> +	/* switch to next segment. */
>>>> +	if (end >= eseg->end) {
>>>> +		looker->seg++;
>>>> +		looker->index = 0;
>>>> +
>>>> +		/* have looked up for all fixed MTRRs. */
>>>> +		if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
>>>> +			return mtrr_lookup_var_start(looker);
>>>> +
>>>> +		end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
>>>> +	}
>>>> +
>>>> +	looker->mem_type = mtrr_state->fixed_ranges[looker->index];
>>>> +	looker->start = end;
>> in mtrr_lookup_fixed_next.  Can you try to make them more common?
>>
>> Basically you should have
>>
>> +#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
>> +	for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_); \
>> +	     !mtrr_lookup_end(_looker_); mtrr_lookup_next(_looker_))
>
> ... where the above code I quoted would be part of mtrr_lookup_end.

Okay, will do. :)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-03  2:56     ` Xiao Guangrong
@ 2015-06-03  7:55       ` Paolo Bonzini
  2015-06-04  8:23         ` Xiao Guangrong
  0 siblings, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-03  7:55 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/06/2015 04:56, Xiao Guangrong wrote:
> 
> 
> On 06/01/2015 05:36 PM, Paolo Bonzini wrote:
>>
>>
>> On 30/05/2015 12:59, Xiao Guangrong wrote:
>>> Currently guest MTRR is completely prohibited if cache snoop is
>>> supported on
>>> IOMMU (!noncoherent_dma) and host does the emulation based on the
>>> knowledge
>>> from host side, however, host side is not the good point to know
>>> what the purpose of guest is. A good example is that pass-throughed VGA
>>> frame buffer is not always UC as host expected
>>
>> Can you explain how?  The original idea was that such a framebuffer
>> would be kvm_is_reserved_pfn and thus be unconditionally UC.
> 
> Yes, frame-buffer is always UC in current code, however, UC for
> frame-buffer causes bad performance.

Understood now, thanks.

> So that guest will configure the range to MTRR, this patchset follows
> guest MTRR and cooperates with guest PAT (ept.VMX_EPT_IPAT_BIT = 0) to
> emulate guest cache type as guest expects.

Unlike e.g. CR0.CD=1, UC memory does not snoop the cache to preserve
coherency.  AMD, has special logic to do this, for example:

- if guest PAT says "UC" and host MTRR says "WB", the processor will not
cache the memory but will snoop the cache as if CR0.CD=1

- if guest PAT says "WC" and host (nested page table) PAT says "WB" and
host MTRR says "WB", the processor will still do write combining but
also snoop the cache as if CR0.CD=1

I am worried that the lack of this feature could cause problems if
guests map QEMU's VGA framebuffer as uncached.  We have this problem on
ARM, so it's not 100% theoretical.

So, why do you need to always use IPAT=0?  Can patch 15 keep the current
logic for RAM, like this:

	if (is_mmio || kvm_arch_has_noncoherent_dma(vcpu->kvm))
		ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
		      VMX_EPT_MT_EPTE_SHIFT;
	else
		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
			| VMX_EPT_IPAT_BIT;

?

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type
  2015-06-03  2:12     ` Xiao Guangrong
@ 2015-06-03  7:57       ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-03  7:57 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 03/06/2015 04:12, Xiao Guangrong wrote:
>>
>> Perhaps just use type = MIN(type, curr_type), which also happens to get
>> WT vs. WB right?  You can also add a
>>
>>     BUILD_BUG_ON(MTRR_TYPE_WRTHROUGH > MTRR_TYPE_WRBACK);
>>
>> to ensure that the WT vs. WB precedence is correct.
> 
> Only WT and WB are allowed to be overlapped here otherwise is
> "undefined behavior". For example if the types are MTRR_TYPE_WRPROT
> and MTRR_TYPE_WRTHROUGH, min() will get MTRR_TYPE_WRTHROUGH rather than
> "undefined behavior".

Choosing MTRR_TYPE_WRTHROUGH is one example of achieve undefined
behavior.  Picking one type arbitrarily is just a different kind of
undefined behavior.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-03  7:55       ` Paolo Bonzini
@ 2015-06-04  8:23         ` Xiao Guangrong
  2015-06-04  8:26           ` Xiao Guangrong
  2015-06-04  8:36           ` Paolo Bonzini
  0 siblings, 2 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-04  8:23 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/03/2015 03:55 PM, Paolo Bonzini wrote:
>
>
> On 03/06/2015 04:56, Xiao Guangrong wrote:
>>
>>
>> On 06/01/2015 05:36 PM, Paolo Bonzini wrote:
>>>
>>>
>>> On 30/05/2015 12:59, Xiao Guangrong wrote:
>>>> Currently guest MTRR is completely prohibited if cache snoop is
>>>> supported on
>>>> IOMMU (!noncoherent_dma) and host does the emulation based on the
>>>> knowledge
>>>> from host side, however, host side is not the good point to know
>>>> what the purpose of guest is. A good example is that pass-throughed VGA
>>>> frame buffer is not always UC as host expected
>>>
>>> Can you explain how?  The original idea was that such a framebuffer
>>> would be kvm_is_reserved_pfn and thus be unconditionally UC.
>>
>> Yes, frame-buffer is always UC in current code, however, UC for
>> frame-buffer causes bad performance.
>
> Understood now, thanks.
>
>> So that guest will configure the range to MTRR, this patchset follows
>> guest MTRR and cooperates with guest PAT (ept.VMX_EPT_IPAT_BIT = 0) to
>> emulate guest cache type as guest expects.
>
> Unlike e.g. CR0.CD=1, UC memory does not snoop the cache to preserve
> coherency.  AMD, has special logic to do this, for example:
>
> - if guest PAT says "UC" and host MTRR says "WB", the processor will not
> cache the memory but will snoop the cache as if CR0.CD=1
>
> - if guest PAT says "WC" and host (nested page table) PAT says "WB" and
> host MTRR says "WB", the processor will still do write combining but
> also snoop the cache as if CR0.CD=1
>
> I am worried that the lack of this feature could cause problems if
> guests map QEMU's VGA framebuffer as uncached.  We have this problem on
> ARM, so it's not 100% theoretical.

CR0.CD is always 0 in both host and guest, i guess it's why we cleared
CR0.CD and CR0.NW in vmx_set_cr0().

>
> So, why do you need to always use IPAT=0?  Can patch 15 keep the current
> logic for RAM, like this:
>
> 	if (is_mmio || kvm_arch_has_noncoherent_dma(vcpu->kvm))
> 		ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
> 		      VMX_EPT_MT_EPTE_SHIFT;
> 	else
> 		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
> 			| VMX_EPT_IPAT_BIT;

Yeah, it's okay, actually we considered this way, however
- it's light enough, it did not hurt guest performance based on our
   benchmark.
- the logic has always used for noncherent_dma case, extend it to
   normal case should have low risk and also help us to check the logic.
- completely follow MTRRS spec would be better than host hides it.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-04  8:23         ` Xiao Guangrong
@ 2015-06-04  8:26           ` Xiao Guangrong
  2015-06-04  8:34             ` Paolo Bonzini
  2015-06-04  8:36           ` Paolo Bonzini
  1 sibling, 1 reply; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-04  8:26 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel



On 06/04/2015 04:23 PM, Xiao Guangrong wrote:
>
>
> On 06/03/2015 03:55 PM, Paolo Bonzini wrote:
>>
>>
>> On 03/06/2015 04:56, Xiao Guangrong wrote:
>>>
>>>
>>> On 06/01/2015 05:36 PM, Paolo Bonzini wrote:
>>>>
>>>>
>>>> On 30/05/2015 12:59, Xiao Guangrong wrote:
>>>>> Currently guest MTRR is completely prohibited if cache snoop is
>>>>> supported on
>>>>> IOMMU (!noncoherent_dma) and host does the emulation based on the
>>>>> knowledge
>>>>> from host side, however, host side is not the good point to know
>>>>> what the purpose of guest is. A good example is that pass-throughed VGA
>>>>> frame buffer is not always UC as host expected
>>>>
>>>> Can you explain how?  The original idea was that such a framebuffer
>>>> would be kvm_is_reserved_pfn and thus be unconditionally UC.
>>>
>>> Yes, frame-buffer is always UC in current code, however, UC for
>>> frame-buffer causes bad performance.
>>
>> Understood now, thanks.
>>
>>> So that guest will configure the range to MTRR, this patchset follows
>>> guest MTRR and cooperates with guest PAT (ept.VMX_EPT_IPAT_BIT = 0) to
>>> emulate guest cache type as guest expects.
>>
>> Unlike e.g. CR0.CD=1, UC memory does not snoop the cache to preserve
>> coherency.  AMD, has special logic to do this, for example:
>>
>> - if guest PAT says "UC" and host MTRR says "WB", the processor will not
>> cache the memory but will snoop the cache as if CR0.CD=1
>>
>> - if guest PAT says "WC" and host (nested page table) PAT says "WB" and
>> host MTRR says "WB", the processor will still do write combining but
>> also snoop the cache as if CR0.CD=1
>>
>> I am worried that the lack of this feature could cause problems if
>> guests map QEMU's VGA framebuffer as uncached.  We have this problem on
>> ARM, so it's not 100% theoretical.
>
> CR0.CD is always 0 in both host and guest, i guess it's why we cleared
> CR0.CD and CR0.NW in vmx_set_cr0().

It reminds me that we should check guest CR0.CD before check guest MTRR
and disable guest PAT if guest CR0.CD = 1.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-04  8:26           ` Xiao Guangrong
@ 2015-06-04  8:34             ` Paolo Bonzini
  0 siblings, 0 replies; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-04  8:34 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 04/06/2015 10:26, Xiao Guangrong wrote:
>>
>> CR0.CD is always 0 in both host and guest, i guess it's why we cleared
>> CR0.CD and CR0.NW in vmx_set_cr0().
> 
> It reminds me that we should check guest CR0.CD before check guest MTRR
> and disable guest PAT if guest CR0.CD = 1.

I think it can be done separately, on top.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-04  8:23         ` Xiao Guangrong
  2015-06-04  8:26           ` Xiao Guangrong
@ 2015-06-04  8:36           ` Paolo Bonzini
  2015-06-05  6:33             ` Xiao Guangrong
  1 sibling, 1 reply; 44+ messages in thread
From: Paolo Bonzini @ 2015-06-04  8:36 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: gleb, mtosatti, kvm, linux-kernel



On 04/06/2015 10:23, Xiao Guangrong wrote:
>>
>> So, why do you need to always use IPAT=0?  Can patch 15 keep the current
>> logic for RAM, like this:
>>
>>     if (is_mmio || kvm_arch_has_noncoherent_dma(vcpu->kvm))
>>         ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
>>               VMX_EPT_MT_EPTE_SHIFT;
>>     else
>>         ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
>>             | VMX_EPT_IPAT_BIT;
> 
> Yeah, it's okay, actually we considered this way, however
> - it's light enough, it did not hurt guest performance based on our
>   benchmark.
> - the logic has always used for noncherent_dma case, extend it to
>   normal case should have low risk and also help us to check the logic.

But noncoherent_dma is not the common case, so it's not necessarily true
that the risk is low.

> - completely follow MTRRS spec would be better than host hides it.

We are a virtualization platform, we know well when MTRRs are necessary.

Tis a risk from blindly obeying the guest MTRRs: userspace can see stale
data if the guest's accesses bypass the cache.  AMD bypasses this by
enabling snooping even in cases that ordinarily wouldn't snoop; for
Intel the solution is that RAM-backed areas should always use IPAT.

Paolo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range
  2015-06-04  8:36           ` Paolo Bonzini
@ 2015-06-05  6:33             ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-05  6:33 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gleb, mtosatti, kvm, linux-kernel, Zhang, Yang Z


[ CCed Zhang Yang ]

On 06/04/2015 04:36 PM, Paolo Bonzini wrote:
>
>
> On 04/06/2015 10:23, Xiao Guangrong wrote:
>>>
>>> So, why do you need to always use IPAT=0?  Can patch 15 keep the current
>>> logic for RAM, like this:
>>>
>>>      if (is_mmio || kvm_arch_has_noncoherent_dma(vcpu->kvm))
>>>          ret = kvm_mtrr_get_guest_memory_type(vcpu, gfn) <<
>>>                VMX_EPT_MT_EPTE_SHIFT;
>>>      else
>>>          ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
>>>              | VMX_EPT_IPAT_BIT;
>>
>> Yeah, it's okay, actually we considered this way, however
>> - it's light enough, it did not hurt guest performance based on our
>>    benchmark.
>> - the logic has always used for noncherent_dma case, extend it to
>>    normal case should have low risk and also help us to check the logic.
>
> But noncoherent_dma is not the common case, so it's not necessarily true
> that the risk is low.

I thought noncoherent_dma exists on 1st generation(s) IOMMU, it should
be fully tested at that time.

>
>> - completely follow MTRRS spec would be better than host hides it.
>
> We are a virtualization platform, we know well when MTRRs are necessary.
>
> Tis a risk from blindly obeying the guest MTRRs: userspace can see stale
> data if the guest's accesses bypass the cache.  AMD bypasses this by
> enabling snooping even in cases that ordinarily wouldn't snoop; for
> Intel the solution is that RAM-backed areas should always use IPAT.

Not sure if UC and other cacheable type combinations on guest and host
will cause problem. The SMD mentioned that snoop is not required only when
"The UC attribute comes from the MTRRs and the processors are not required
  to snoop their caches since the data could never have been cached."
(Vol 3. 11.5.2.2)
VMX do not touch hardware MTRR MSRs and i guess snoop works under this case.

I also noticed if SS (self-snooping) is supported we need not to invalidate
cache when programming memory type (Vol 3. 11.11.8), so that means CPU works
well on the page which has different cache types i guess.

After think it carefully, we (Zhang Yang) doubt if always set WB for DMA
memory is really a good idea because we can not assume WB DMA works well for
all devices. One example is that audio DMA (not a MMIO region) is required WC
to improve its performance.

However, we think the SDM is not clear enough so let's do full vMTRR on MMIO
and noncoherent_dma first. :)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 05/15] KVM: MTRR: clean up mtrr default type
  2015-05-30 10:59 ` [PATCH 05/15] KVM: MTRR: clean up mtrr default type Xiao Guangrong
  2015-06-01  9:11   ` Paolo Bonzini
@ 2015-06-09  0:35   ` David Matlack
  1 sibling, 0 replies; 44+ messages in thread
From: David Matlack @ 2015-06-09  0:35 UTC (permalink / raw)
  To: Xiao Guangrong
  Cc: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti, kvm list, linux-kernel

On Sat, May 30, 2015 at 3:59 AM, Xiao Guangrong
<guangrong.xiao@linux.intel.com> wrote:
> Use union definition to avoid the decode/code workload and drop all the
> hard code

Thank you for doing this cleanup. The new code is much clearer!

>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 12 ++++++++++--
>  arch/x86/kvm/mtrr.c             | 19 ++++++++-----------
>  2 files changed, 18 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2c3c52d..95ce2ff 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -347,8 +347,16 @@ enum {
>  struct kvm_mtrr {
>         struct mtrr_var_range var_ranges[KVM_NR_VAR_MTRR];
>         mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
> -       unsigned char enabled;
> -       mtrr_type def_type;
> +
> +       union {
> +               u64 deftype;
> +               struct {
> +                       unsigned def_type:8;
> +                       unsigned reserved:2;
> +                       unsigned fixed_mtrr_enabled:1;
> +                       unsigned mtrr_enabled:1;
> +               };
> +       };
>  };
>
>  struct kvm_vcpu_arch {
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index 562341b..6de49dd 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -105,7 +105,6 @@ EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
>  static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  {
>         struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> -       unsigned char mtrr_enabled = mtrr_state->enabled;
>         gfn_t start, end, mask;
>         int index;
>         bool is_fixed = true;
> @@ -114,7 +113,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>               !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>                 return;
>
> -       if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
> +       if (!mtrr_state->mtrr_enabled && msr != MSR_MTRRdefType)
>                 return;
>
>         switch (msr) {
> @@ -153,7 +152,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>                 end = ((start & mask) | ~mask) + 1;
>         }
>
> -       if (is_fixed && !(mtrr_enabled & 0x1))
> +       if (is_fixed && !mtrr_state->fixed_mtrr_enabled)
>                 return;
>
>         kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
> @@ -166,10 +165,9 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>         if (!kvm_mtrr_valid(vcpu, msr, data))
>                 return 1;
>
> -       if (msr == MSR_MTRRdefType) {
> -               vcpu->arch.mtrr_state.def_type = data;
> -               vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
> -       } else if (msr == MSR_MTRRfix64K_00000)
> +       if (msr == MSR_MTRRdefType)
> +               vcpu->arch.mtrr_state.deftype = data;
> +       else if (msr == MSR_MTRRfix64K_00000)
>                 p[0] = data;
>         else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
>                 p[1 + msr - MSR_MTRRfix16K_80000] = data;
> @@ -216,8 +214,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>                 return 1;
>
>         if (msr == MSR_MTRRdefType)
> -               *pdata = vcpu->arch.mtrr_state.def_type +
> -                        (vcpu->arch.mtrr_state.enabled << 10);
> +               *pdata = vcpu->arch.mtrr_state.deftype;
>         else if (msr == MSR_MTRRfix64K_00000)
>                 *pdata = p[0];
>         else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
> @@ -256,14 +253,14 @@ static int get_mtrr_type(struct kvm_mtrr *mtrr_state,
>         int i, num_var_ranges = KVM_NR_VAR_MTRR;
>
>         /* MTRR is completely disabled, use UC for all of physical memory. */
> -       if (!(mtrr_state->enabled & 0x2))
> +       if (!mtrr_state->mtrr_enabled)
>                 return MTRR_TYPE_UNCACHABLE;
>
>         /* Make end inclusive end, instead of exclusive */
>         end--;
>
>         /* Look in fixed ranges. Just return the type as per start */
> -       if ((mtrr_state->enabled & 0x1) && (start < 0x100000)) {
> +       if (mtrr_state->fixed_mtrr_enabled && (start < 0x100000)) {
>                 int idx;
>
>                 if (start < 0x80000) {
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range
  2015-05-30 10:59 ` [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range Xiao Guangrong
@ 2015-06-09  0:36   ` David Matlack
  2015-06-09  2:38     ` Xiao Guangrong
  0 siblings, 1 reply; 44+ messages in thread
From: David Matlack @ 2015-06-09  0:36 UTC (permalink / raw)
  To: Xiao Guangrong
  Cc: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti, kvm list, linux-kernel

On Sat, May 30, 2015 at 3:59 AM, Xiao Guangrong
<guangrong.xiao@linux.intel.com> wrote:
> It gets the range for the specified variable MTRR
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/kvm/mtrr.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index 888441e..aeb9767 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -217,10 +217,21 @@ static int fixed_msr_to_range_index(u32 msr)
>         return 0;
>  }
>
> +static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
> +{
> +       u64 mask;
> +
> +       *start = range->base & PAGE_MASK;
> +
> +       mask = range->mask & PAGE_MASK;
> +       mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
> +       *end = ((*start & mask) | ~mask) + 1;
> +}
> +
>  static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>  {
>         struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> -       gfn_t start, end, mask;
> +       gfn_t start, end;
>         int index;
>
>         if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
> @@ -244,11 +255,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>         default:
>                 /* variable range MTRRs. */
>                 index = (msr - 0x200) / 2;
> -               start = mtrr_state->var_ranges[index].base & PAGE_MASK;
> -               mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
> -               mask |= ~0ULL << cpuid_maxphyaddr(vcpu);

Why did you drop this in favor of boot_cpu_data.x86_phys_bits?

> -
> -               end = ((start & mask) | ~mask) + 1;
> +               var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end);
>         }
>
>  do_zap:
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-05-30 10:59 ` [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type Xiao Guangrong
  2015-06-01  9:33   ` Paolo Bonzini
@ 2015-06-09  0:36   ` David Matlack
  2015-06-09  2:45     ` Xiao Guangrong
  1 sibling, 1 reply; 44+ messages in thread
From: David Matlack @ 2015-06-09  0:36 UTC (permalink / raw)
  To: Xiao Guangrong
  Cc: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti, kvm list, linux-kernel

On Sat, May 30, 2015 at 3:59 AM, Xiao Guangrong
<guangrong.xiao@linux.intel.com> wrote:
> It walks all MTRRs and gets all the memory cache type setting for the
> specified range also it checks if the range is fully covered by MTRRs
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  arch/x86/kvm/mtrr.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 183 insertions(+)
>
> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
> index e59d138..35f86303 100644
> --- a/arch/x86/kvm/mtrr.c
> +++ b/arch/x86/kvm/mtrr.c
> @@ -395,6 +395,189 @@ void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
>         INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
>  }
>
> +struct mtrr_looker {
> +       /* input fields. */
> +       struct kvm_mtrr *mtrr_state;
> +       u64 start;
> +       u64 end;
> +
> +       /* output fields. */
> +       int mem_type;
> +       /* [start, end) is fully covered in MTRRs? */

s/fully/not fully/ ?

> +       bool partial_map;
> +
> +       /* private fields. */
> +       union {
> +               /* used for fixed MTRRs. */
> +               struct {
> +                       int index;
> +                       int seg;
> +               };
> +
> +               /* used for var MTRRs. */
> +               struct {
> +                       struct kvm_mtrr_range *range;
> +                       /* max address has been covered in var MTRRs. */
> +                       u64 start_max;
> +               };
> +       };
> +
> +       bool fixed;
> +};
> +
> +static void mtrr_lookup_init(struct mtrr_looker *looker,
> +                            struct kvm_mtrr *mtrr_state, u64 start, u64 end)
> +{
> +       looker->mtrr_state = mtrr_state;
> +       looker->start = start;
> +       looker->end = end;
> +}
> +
> +static u64 fixed_mtrr_range_end_addr(int seg, int index)
> +{
> +       struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
> +
> +        return mtrr_seg->start + mtrr_seg->range_size * index;

Should be (index + 1)?

> +}
> +
> +static bool mtrr_lookup_fixed_start(struct mtrr_looker *looker)
> +{
> +       int seg, index;
> +
> +       if (!looker->mtrr_state->fixed_mtrr_enabled)
> +               return false;
> +
> +       seg = fixed_mtrr_addr_to_seg(looker->start);
> +       if (seg < 0)
> +               return false;
> +
> +       looker->fixed = true;
> +       index = fixed_mtrr_addr_seg_to_range_index(looker->start, seg);
> +       looker->index = index;
> +       looker->seg = seg;
> +       looker->mem_type = looker->mtrr_state->fixed_ranges[index];
> +       looker->start = fixed_mtrr_range_end_addr(seg, index);
> +       return true;
> +}
> +
> +static bool match_var_range(struct mtrr_looker *looker,
> +                           struct kvm_mtrr_range *range)
> +{
> +       u64 start, end;
> +
> +       var_mtrr_range(range, &start, &end);
> +       if (!(start >= looker->end || end <= looker->start)) {
> +               looker->range = range;
> +               looker->mem_type = range->base & 0xff;
> +
> +               /*
> +                * the function is called when we do kvm_mtrr.head walking
> +                * that means range has the minimum base address interleaves
> +                * with [looker->start_max, looker->end).
> +                */

I'm having trouble understanding this comment. I think this is what you
are trying to say:

  this function is called when we do kvm_mtrr.head walking. range has the
  minimum base address which interleaves [looker->start_max, looker->end).

Let me know if I parsed it wrong.

> +               looker->partial_map |= looker->start_max < start;
> +
> +               /* update the max address has been covered. */
> +               looker->start_max = max(looker->start_max, end);
> +               return true;
> +       }
> +
> +       return false;
> +}
> +
> +static void mtrr_lookup_var_start(struct mtrr_looker *looker)
> +{
> +       struct kvm_mtrr *mtrr_state = looker->mtrr_state;
> +       struct kvm_mtrr_range *range;
> +
> +       looker->fixed = false;
> +       looker->partial_map = false;
> +       looker->start_max = looker->start;
> +       looker->mem_type = -1;
> +
> +       list_for_each_entry(range, &mtrr_state->head, node)
> +               if (match_var_range(looker, range))
> +                       return;
> +
> +       looker->partial_map = true;
> +}
> +
> +static void mtrr_lookup_fixed_next(struct mtrr_looker *looker)
> +{
> +       struct fixed_mtrr_segment *eseg = &fixed_seg_table[looker->seg];
> +       struct kvm_mtrr *mtrr_state = looker->mtrr_state;
> +       u64 end;
> +
> +       if (looker->start >= looker->end) {
> +               looker->mem_type = -1;
> +               looker->partial_map = false;
> +               return;
> +       }
> +
> +       WARN_ON(!looker->fixed);
> +
> +       looker->index++;
> +       end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
> +
> +       /* switch to next segment. */
> +       if (end >= eseg->end) {
> +               looker->seg++;
> +               looker->index = 0;
> +
> +               /* have looked up for all fixed MTRRs. */
> +               if (looker->seg >= ARRAY_SIZE(fixed_seg_table))
> +                       return mtrr_lookup_var_start(looker);
> +
> +               end = fixed_mtrr_range_end_addr(looker->seg, looker->index);
> +       }
> +
> +       looker->mem_type = mtrr_state->fixed_ranges[looker->index];
> +       looker->start = end;
> +}
> +
> +static void mtrr_lookup_var_next(struct mtrr_looker *looker)
> +{
> +       struct kvm_mtrr *mtrr_state = looker->mtrr_state;
> +
> +       WARN_ON(looker->fixed);
> +
> +       looker->mem_type = -1;
> +
> +       list_for_each_entry_continue(looker->range, &mtrr_state->head, node)
> +               if (match_var_range(looker, looker->range))
> +                       return;
> +
> +       looker->partial_map |= looker->start_max < looker->end;
> +}
> +
> +static void mtrr_lookup_start(struct mtrr_looker *looker)
> +{
> +       looker->mem_type = -1;
> +
> +       if (!looker->mtrr_state->mtrr_enabled) {
> +               looker->partial_map = true;
> +               return;
> +       }
> +
> +       if (!mtrr_lookup_fixed_start(looker))
> +               mtrr_lookup_var_start(looker);
> +}
> +
> +static void mtrr_lookup_next(struct mtrr_looker *looker)
> +{
> +       WARN_ON(looker->mem_type == -1);
> +
> +       if (looker->fixed)
> +               mtrr_lookup_fixed_next(looker);
> +       else
> +               mtrr_lookup_var_next(looker);
> +}
> +
> +#define mtrr_for_each_mem_type(_looker_, _mtrr_, _gpa_start_, _gpa_end_) \
> +       for (mtrr_lookup_init(_looker_, _mtrr_, _gpa_start_, _gpa_end_), \
> +            mtrr_lookup_start(_looker_); (_looker_)->mem_type != -1;    \
> +            mtrr_lookup_next(_looker_))
> +
>  u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
>  {
>         struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range
  2015-06-09  0:36   ` David Matlack
@ 2015-06-09  2:38     ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-09  2:38 UTC (permalink / raw)
  To: David Matlack
  Cc: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti, kvm list, linux-kernel


Thanks for your review, David!


On 06/09/2015 08:36 AM, David Matlack wrote:

>>   static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>   {
>>          struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
>> -       gfn_t start, end, mask;
>> +       gfn_t start, end;
>>          int index;
>>
>>          if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
>> @@ -244,11 +255,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>>          default:
>>                  /* variable range MTRRs. */
>>                  index = (msr - 0x200) / 2;
>> -               start = mtrr_state->var_ranges[index].base & PAGE_MASK;
>> -               mask = mtrr_state->var_ranges[index].mask & PAGE_MASK;
>> -               mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
>
> Why did you drop this in favor of boot_cpu_data.x86_phys_bits?
>

It can make the code more generic, @vcpu is not needed anymore.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type
  2015-06-09  0:36   ` David Matlack
@ 2015-06-09  2:45     ` Xiao Guangrong
  0 siblings, 0 replies; 44+ messages in thread
From: Xiao Guangrong @ 2015-06-09  2:45 UTC (permalink / raw)
  To: David Matlack
  Cc: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti, kvm list, linux-kernel



On 06/09/2015 08:36 AM, David Matlack wrote:
> On Sat, May 30, 2015 at 3:59 AM, Xiao Guangrong
> <guangrong.xiao@linux.intel.com> wrote:
>> It walks all MTRRs and gets all the memory cache type setting for the
>> specified range also it checks if the range is fully covered by MTRRs
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   arch/x86/kvm/mtrr.c | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 183 insertions(+)
>>
>> diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
>> index e59d138..35f86303 100644
>> --- a/arch/x86/kvm/mtrr.c
>> +++ b/arch/x86/kvm/mtrr.c
>> @@ -395,6 +395,189 @@ void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
>>          INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
>>   }
>>
>> +struct mtrr_looker {
>> +       /* input fields. */
>> +       struct kvm_mtrr *mtrr_state;
>> +       u64 start;
>> +       u64 end;
>> +
>> +       /* output fields. */
>> +       int mem_type;
>> +       /* [start, end) is fully covered in MTRRs? */
>
> s/fully/not fully/ ?

Yup, thanks for pointing it out.

>
>> +       bool partial_map;
>> +
>> +       /* private fields. */
>> +       union {
>> +               /* used for fixed MTRRs. */
>> +               struct {
>> +                       int index;
>> +                       int seg;
>> +               };
>> +
>> +               /* used for var MTRRs. */
>> +               struct {
>> +                       struct kvm_mtrr_range *range;
>> +                       /* max address has been covered in var MTRRs. */
>> +                       u64 start_max;
>> +               };
>> +       };
>> +
>> +       bool fixed;
>> +};
>> +
>> +static void mtrr_lookup_init(struct mtrr_looker *looker,
>> +                            struct kvm_mtrr *mtrr_state, u64 start, u64 end)
>> +{
>> +       looker->mtrr_state = mtrr_state;
>> +       looker->start = start;
>> +       looker->end = end;
>> +}
>> +
>> +static u64 fixed_mtrr_range_end_addr(int seg, int index)
>> +{
>> +       struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
>> +
>> +        return mtrr_seg->start + mtrr_seg->range_size * index;
>
> Should be (index + 1)?

Good eyes, will fix.

>
>> +}
>> +
>> +static bool mtrr_lookup_fixed_start(struct mtrr_looker *looker)
>> +{
>> +       int seg, index;
>> +
>> +       if (!looker->mtrr_state->fixed_mtrr_enabled)
>> +               return false;
>> +
>> +       seg = fixed_mtrr_addr_to_seg(looker->start);
>> +       if (seg < 0)
>> +               return false;
>> +
>> +       looker->fixed = true;
>> +       index = fixed_mtrr_addr_seg_to_range_index(looker->start, seg);
>> +       looker->index = index;
>> +       looker->seg = seg;
>> +       looker->mem_type = looker->mtrr_state->fixed_ranges[index];
>> +       looker->start = fixed_mtrr_range_end_addr(seg, index);
>> +       return true;
>> +}
>> +
>> +static bool match_var_range(struct mtrr_looker *looker,
>> +                           struct kvm_mtrr_range *range)
>> +{
>> +       u64 start, end;
>> +
>> +       var_mtrr_range(range, &start, &end);
>> +       if (!(start >= looker->end || end <= looker->start)) {
>> +               looker->range = range;
>> +               looker->mem_type = range->base & 0xff;
>> +
>> +               /*
>> +                * the function is called when we do kvm_mtrr.head walking
>> +                * that means range has the minimum base address interleaves
>> +                * with [looker->start_max, looker->end).
>> +                */
>
> I'm having trouble understanding this comment. I think this is what you
> are trying to say:
>
>    this function is called when we do kvm_mtrr.head walking. range has the
>    minimum base address which interleaves [looker->start_max, looker->end).
>
> Let me know if I parsed it wrong.

Yes, it is, will improve the comment.


^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2015-06-09  2:50 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-30 10:59 [PATCH 00/15] KVM: x86: fully implement vMTRR Xiao Guangrong
2015-05-30 10:58 ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 01/15] KVM: x86: move MTRR related code to a separate file Xiao Guangrong
2015-05-30 10:59 ` [PATCH 02/15] KVM: MTRR: handle MSR_MTRRcap in kvm_mtrr_get_msr Xiao Guangrong
2015-05-30 10:59 ` [PATCH 03/15] KVM: MTRR: remove mtrr_state.have_fixed Xiao Guangrong
2015-05-30 10:59 ` [PATCH 04/15] KVM: MTRR: exactly define the size of variable MTRRs Xiao Guangrong
2015-05-30 10:59 ` [PATCH 05/15] KVM: MTRR: clean up mtrr default type Xiao Guangrong
2015-06-01  9:11   ` Paolo Bonzini
2015-06-03  1:55     ` Xiao Guangrong
2015-06-09  0:35   ` David Matlack
2015-05-30 10:59 ` [PATCH 06/15] KVM: MTRR: do not split 64 bits MSR content Xiao Guangrong
2015-05-30 10:59 ` [PATCH 07/15] KVM: MTRR: improve kvm_mtrr_get_guest_memory_type Xiao Guangrong
2015-06-01  9:16   ` Paolo Bonzini
2015-06-03  2:12     ` Xiao Guangrong
2015-06-03  7:57       ` Paolo Bonzini
2015-05-30 10:59 ` [PATCH 08/15] KVM: MTRR: introduce fixed_mtrr_segment table Xiao Guangrong
2015-06-01  9:25   ` Paolo Bonzini
2015-06-03  2:29     ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 09/15] KVM: MTRR: introduce var_mtrr_range Xiao Guangrong
2015-06-09  0:36   ` David Matlack
2015-06-09  2:38     ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 10/15] KVM: MTRR: sort variable MTRRs Xiao Guangrong
2015-06-01  9:27   ` Paolo Bonzini
2015-06-03  2:31     ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 11/15] KVM: MTRR: introduce fixed_mtrr_addr_* functions Xiao Guangrong
2015-05-30 10:59 ` [PATCH 12/15] KVM: MTRR: introduce mtrr_for_each_mem_type Xiao Guangrong
2015-06-01  9:33   ` Paolo Bonzini
2015-06-01 14:26     ` Paolo Bonzini
2015-06-03  2:57       ` Xiao Guangrong
2015-06-03  2:40     ` Xiao Guangrong
2015-06-09  0:36   ` David Matlack
2015-06-09  2:45     ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 13/15] KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type Xiao Guangrong
2015-05-30 10:59 ` [PATCH 14/15] KVM: MTRR: do not map huage page for non-consistent range Xiao Guangrong
2015-06-01  9:36   ` Paolo Bonzini
2015-06-01  9:38     ` Paolo Bonzini
2015-06-03  2:56     ` Xiao Guangrong
2015-06-03  7:55       ` Paolo Bonzini
2015-06-04  8:23         ` Xiao Guangrong
2015-06-04  8:26           ` Xiao Guangrong
2015-06-04  8:34             ` Paolo Bonzini
2015-06-04  8:36           ` Paolo Bonzini
2015-06-05  6:33             ` Xiao Guangrong
2015-05-30 10:59 ` [PATCH 15/15] KVM: VMX: fully implement guest MTRR virtualization Xiao Guangrong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).