[RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR

All of lore.kernel.org
 help / color / mirror / Atom feed

* [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR
@ 2015-07-07 13:45 Paolo Bonzini
  2015-07-07 13:45 ` [PATCH 1/4] KVM: count number of assigned devices Paolo Bonzini
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 13:45 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

This part of the MTRR patches was dropped by Xiao.  Bring SVM on feature
parity with VMX, and then do guest MTRR virtualization for both VMX and SVM.

The IPAT bit of VMX extended page tables is emulated by mangling the guest
PAT value.

I do not have any AMD machines that support an IOMMU, so I would like
some help testing these patches.  Thanks,

Paolo

v1->v2: AMD IOMMUs do have snooping control [Joerg]
	New patch 1

Jan Kiszka (1):
  KVM: SVM: Sync g_pat with guest-written PAT value

Paolo Bonzini (3):
  KVM: count number of assigned devices
  KVM: SVM: use NPT page attributes
  KVM: x86: apply guest MTRR virtualization on host reserved pages

 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/iommu.c            |  2 ++
 arch/x86/kvm/svm.c              | 54 +++++++++++++++++++++++++++++++++++++----
 arch/x86/kvm/vmx.c              | 11 +++------
 arch/x86/kvm/x86.c              | 18 ++++++++++++++
 include/linux/kvm_host.h        | 18 +++++++++++++-
 virt/kvm/vfio.c                 |  5 ++++
 7 files changed, 96 insertions(+), 15 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/4] KVM: count number of assigned devices
  2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
@ 2015-07-07 13:45 ` Paolo Bonzini
  2015-07-07 15:22   ` Alex Williamson
  2015-07-07 13:45 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 13:45 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

If there are no assigned devices, the guest PAT are not providing
any useful information and can be overridden to writeback; VMX
always does this because it has the "IPAT" bit in its extended
page table entries, but SVM does not have anything similar.
Hook into VFIO and legacy device assignment so that they
provide this information to KVM.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/iommu.c            |  2 ++
 arch/x86/kvm/x86.c              | 18 ++++++++++++++++++
 include/linux/kvm_host.h        | 18 +++++++++++++++++-
 virt/kvm/vfio.c                 |  5 +++++
 5 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2a7f5d782c33..7fe26ba9082e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -602,8 +602,9 @@ struct kvm_arch {
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	bool iommu_noncoherent;
-#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
+#define __KVM_HAVE_ARCH_VFIO_HOOKS
 	atomic_t noncoherent_dma_count;
+	atomic_t assigned_device_count;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index 7dbced309ddb..5c520ebf6343 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 			goto out_unmap;
 	}
 
+	kvm_arch_start_assignment(kvm);
 	pci_set_dev_assigned(pdev);
 
 	dev_info(&pdev->dev, "kvm assign device\n");
@@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 	iommu_detach_device(domain, &pdev->dev);
 
 	pci_clear_dev_assigned(pdev);
+	kvm_arch_end_assignment(kvm);
 
 	dev_info(&pdev->dev, "kvm deassign device\n");
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbaf44e8f0d3..6e455875d2e4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8218,6 +8218,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_start_assignment(struct kvm *kvm)
+{
+	atomic_inc(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
+
+void kvm_arch_end_assignment(struct kvm *kvm)
+{
+	atomic_dec(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
+
+bool kvm_arch_has_assigned_device(struct kvm *kvm)
+{
+	return atomic_read(&kvm->arch.assigned_device_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
+
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
 	atomic_inc(&kvm->arch.noncoherent_dma_count);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9564fd78c547..56146fdb1ff0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -716,10 +716,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
+#ifdef __KVM_HAVE_ARCH_VFIO_HOOKS
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
 bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
+void kvm_arch_start_assignment(struct kvm *kvm);
+void kvm_arch_end_assignment(struct kvm *kvm);
+bool kvm_arch_has_assigned_device(struct kvm *kvm);
 #else
 static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
 {
@@ -733,6 +736,19 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
 {
 	return false;
 }
+
+static inline void kvm_arch_start_assignment(struct kvm *kvm)
+{
+}
+
+static inline void kvm_arch_end_assignment(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+{
+	return false;
+}
 #endif
 
 static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 620e37f741b8..1dd087da6f31 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 		list_add_tail(&kvg->node, &kv->group_list);
 		kvg->vfio_group = vfio_group;
 
+		kvm_arch_start_assignment(dev->kvm);
+
 		mutex_unlock(&kv->lock);
 
 		kvm_vfio_update_coherency(dev);
@@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
 			break;
 		}
 
+		kvm_arch_end_assignment(dev->kvm);
+
 		mutex_unlock(&kv->lock);
 
 		kvm_vfio_group_put_external_user(vfio_group);
@@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 		kvm_vfio_group_put_external_user(kvg->vfio_group);
 		list_del(&kvg->node);
 		kfree(kvg);
+		kvm_arch_end_assignment(dev->kvm);
 	}
 
 	kvm_vfio_update_coherency(dev);
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
  2015-07-07 13:45 ` [PATCH 1/4] KVM: count number of assigned devices Paolo Bonzini
@ 2015-07-07 13:45 ` Paolo Bonzini
  2015-07-08  5:59   ` Xiao Guangrong
  2015-07-17  0:35   ` Andy Lutomirski
  2015-07-07 13:45 ` [PATCH 3/4] KVM: SVM: Sync g_pat with guest-written PAT value Paolo Bonzini
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 13:45 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

Right now, NPT page attributes are not used, and the final page
attribute depends solely on gPAT (which however is not synced
correctly), the guest MTRRs and the guest page attributes.

However, we can do better by mimicking what is done for VMX.
In the absence of PCI passthrough, the guest PAT can be ignored
and the page attributes can be just WB.  If passthrough is being
used, instead, keep respecting the guest PAT, and emulate the guest
MTRRs through the PAT field of the nested page tables.

The only snag is that WP memory cannot be emulated correctly,
because Linux's default PAT setting only includes the other types.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 602b974a60a6..0f125c1860ec 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
+	 *
+	 * AMD does not have IPAT.  To emulate it for the case of guests
+	 * with no assigned devices, just set everything to WB.  If guests
+	 * have assigned devices, however, we cannot force WB for RAM
+	 * pages only, so use the guest IPAT as passed.
+	 */
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		*g_pat = 0x0606060606060606;
+	else
+		*g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	u8 cache;
+
+	/*
+	 * 1. MMIO: always map as UC
+	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+	 * 3. Passthrough: can't guarantee the result, try to trust guest.
+	 */
+	if (is_mmio)
+		return _PAGE_NOCACHE;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+
+	/* Linux's host PAT value does not support WP.  */
+	if (cache == _PAGE_CACHE_MODE_WP)
+		cache = _PAGE_CACHE_MODE_UC_MINUS;
+
+	return cachemode2protval(cache);
+}
+
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1221,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
+		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -4088,11 +4130,6 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }
 
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	return 0;
-}
-
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/4] KVM: SVM: Sync g_pat with guest-written PAT value
  2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
  2015-07-07 13:45 ` [PATCH 1/4] KVM: count number of assigned devices Paolo Bonzini
  2015-07-07 13:45 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini
@ 2015-07-07 13:45 ` Paolo Bonzini
  2015-07-07 13:45 ` [PATCH 4/4] KVM: x86: apply guest MTRR virtualization on host reserved pages Paolo Bonzini
  2015-07-07 14:06 ` [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Joerg Roedel
  4 siblings, 0 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 13:45 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv, Jan Kiszka

From: Jan Kiszka <jan.kiszka@siemens.com>

When hardware supports the g_pat VMCB field, we can use it for emulating
the PAT configuration that the guest configures by writing to the
corresponding MSR.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0f125c1860ec..728ac43fae16 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3296,6 +3296,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	case MSR_VM_IGNNE:
 		vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
 		break;
+	case MSR_IA32_CR_PAT:
+		if (npt_enabled) {
+			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+				return 1;
+			vcpu->arch.pat = data;
+			svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
+			mark_dirty(svm->vmcb, VMCB_NPT);
+			break;
+		}
+		/* fall through */
 	default:
 		return kvm_set_msr_common(vcpu, msr);
 	}
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/4] KVM: x86: apply guest MTRR virtualization on host reserved pages
  2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
                   ` (2 preceding siblings ...)
  2015-07-07 13:45 ` [PATCH 3/4] KVM: SVM: Sync g_pat with guest-written PAT value Paolo Bonzini
@ 2015-07-07 13:45 ` Paolo Bonzini
  2015-07-07 14:06 ` [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Joerg Roedel
  4 siblings, 0 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 13:45 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

Currently guest MTRR is avoided if kvm_is_reserved_pfn returns true.
However, the guest could prefer a different page type than UC for
such pages. A good example is that pass-throughed VGA frame buffer is
not always UC as host expected.

This patch enables full use of virtual guest MTRRs.

Suggested-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c |  7 ++-----
 arch/x86/kvm/vmx.c | 11 +++--------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 728ac43fae16..43f3a2a3479a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1107,14 +1107,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	u8 cache;
 
 	/*
-	 * 1. MMIO: always map as UC
+	 * 1. MMIO: trust guest MTRR, so same as item 3.
 	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
 	 * 3. Passthrough: can't guarantee the result, try to trust guest.
 	 */
-	if (is_mmio)
-		return _PAGE_NOCACHE;
-
-	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+	if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
 		return 0;
 
 	cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e856dd566f4c..5b4e9384717a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8632,22 +8632,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	u64 ipat = 0;
 
 	/* For VT-d and EPT combination
-	 * 1. MMIO: always map as UC
+	 * 1. MMIO: guest may want to apply WC, trust it.
 	 * 2. EPT with VT-d:
 	 *   a. VT-d without snooping control feature: can't guarantee the
-	 *	result, try to trust guest.
+	 *	result, try to trust guest.  So the same as item 1.
 	 *   b. VT-d with snooping control feature: snooping control feature of
 	 *	VT-d engine can guarantee the cache correctness. Just set it
 	 *	to WB to keep consistent with host. So the same as item 3.
 	 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
 	 *    consistent with host MTRR
 	 */
-	if (is_mmio) {
-		cache = MTRR_TYPE_UNCACHABLE;
-		goto exit;
-	}
-
-	if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+	if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
 		ipat = VMX_EPT_IPAT_BIT;
 		cache = MTRR_TYPE_WRBACK;
 		goto exit;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR
  2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
                   ` (3 preceding siblings ...)
  2015-07-07 13:45 ` [PATCH 4/4] KVM: x86: apply guest MTRR virtualization on host reserved pages Paolo Bonzini
@ 2015-07-07 14:06 ` Joerg Roedel
  2015-07-07 14:09   ` Paolo Bonzini
  4 siblings, 1 reply; 20+ messages in thread
From: Joerg Roedel @ 2015-07-07 14:06 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: linux-kernel, kvm, guangrong.xiao, alex.williamson, ogerlitz, amirv

Hi Paolo,

On Tue, Jul 07, 2015 at 03:45:35PM +0200, Paolo Bonzini wrote:
> I do not have any AMD machines that support an IOMMU, so I would like
> some help testing these patches.  Thanks,

What kind of testing do you want? Booting a guest with a device attached
is probably not sufficient, right?


	Joerg


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR
  2015-07-07 14:06 ` [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Joerg Roedel
@ 2015-07-07 14:09   ` Paolo Bonzini
  2015-07-07 14:14     ` Joerg Roedel
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 14:09 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: linux-kernel, kvm, guangrong.xiao, alex.williamson, ogerlitz, amirv



On 07/07/2015 16:06, Joerg Roedel wrote:
> > I do not have any AMD machines that support an IOMMU, so I would like
> > some help testing these patches.  Thanks,
>
> What kind of testing do you want? Booting a guest with a device attached
> is probably not sufficient, right?

The guest should not take ages to boot, which is typical of messed-up
MTRR/PAT.  And if you add some actual usage of the attached device, it
actually is sufficient.  Even a simple ping test, or typing through a
USB keyboard on a passed-through controller, is enough.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR
  2015-07-07 14:09   ` Paolo Bonzini
@ 2015-07-07 14:14     ` Joerg Roedel
  0 siblings, 0 replies; 20+ messages in thread
From: Joerg Roedel @ 2015-07-07 14:14 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: linux-kernel, kvm, guangrong.xiao, alex.williamson, ogerlitz, amirv

On Tue, Jul 07, 2015 at 04:09:07PM +0200, Paolo Bonzini wrote:
> The guest should not take ages to boot, which is typical of messed-up
> MTRR/PAT.  And if you add some actual usage of the attached device, it
> actually is sufficient.  Even a simple ping test, or typing through a
> USB keyboard on a passed-through controller, is enough.

Okay, I do some testing on this tomorrow and let you know.


	Joerg


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/4] KVM: count number of assigned devices
  2015-07-07 13:45 ` [PATCH 1/4] KVM: count number of assigned devices Paolo Bonzini
@ 2015-07-07 15:22   ` Alex Williamson
  2015-07-07 15:36     ` Paolo Bonzini
  0 siblings, 1 reply; 20+ messages in thread
From: Alex Williamson @ 2015-07-07 15:22 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, guangrong.xiao, jroedel, ogerlitz, amirv

On Tue, 2015-07-07 at 15:45 +0200, Paolo Bonzini wrote:
> If there are no assigned devices, the guest PAT are not providing
> any useful information and can be overridden to writeback; VMX
> always does this because it has the "IPAT" bit in its extended
> page table entries, but SVM does not have anything similar.
> Hook into VFIO and legacy device assignment so that they
> provide this information to KVM.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/iommu.c            |  2 ++
>  arch/x86/kvm/x86.c              | 18 ++++++++++++++++++
>  include/linux/kvm_host.h        | 18 +++++++++++++++++-
>  virt/kvm/vfio.c                 |  5 +++++
>  5 files changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2a7f5d782c33..7fe26ba9082e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -602,8 +602,9 @@ struct kvm_arch {
>  	struct list_head assigned_dev_head;
>  	struct iommu_domain *iommu_domain;
>  	bool iommu_noncoherent;
> -#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
> +#define __KVM_HAVE_ARCH_VFIO_HOOKS

Do we really want to tie these two things together under something
that's not strictly a "vfio" option?  Legacy assignment also makes use
of these, as shown in this patch, but even if we consider that temporary
until legacy assignment is removed, I can imagine platforms that might
care about one but not the other.  I don't really see the harm in using
a separate #define, perhaps __KVM_HAVE_ARCH_ASSIGNED_DEVICE.  Thanks,

Alex

>  	atomic_t noncoherent_dma_count;
> +	atomic_t assigned_device_count;
>  	struct kvm_pic *vpic;
>  	struct kvm_ioapic *vioapic;
>  	struct kvm_pit *vpit;
> diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
> index 7dbced309ddb..5c520ebf6343 100644
> --- a/arch/x86/kvm/iommu.c
> +++ b/arch/x86/kvm/iommu.c
> @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
>  			goto out_unmap;
>  	}
>  
> +	kvm_arch_start_assignment(kvm);
>  	pci_set_dev_assigned(pdev);
>  
>  	dev_info(&pdev->dev, "kvm assign device\n");
> @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
>  	iommu_detach_device(domain, &pdev->dev);
>  
>  	pci_clear_dev_assigned(pdev);
> +	kvm_arch_end_assignment(kvm);
>  
>  	dev_info(&pdev->dev, "kvm deassign device\n");
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bbaf44e8f0d3..6e455875d2e4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8218,6 +8218,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
>  			kvm_x86_ops->interrupt_allowed(vcpu);
>  }
>  
> +void kvm_arch_start_assignment(struct kvm *kvm)
> +{
> +	atomic_inc(&kvm->arch.assigned_device_count);
> +}
> +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
> +
> +void kvm_arch_end_assignment(struct kvm *kvm)
> +{
> +	atomic_dec(&kvm->arch.assigned_device_count);
> +}
> +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
> +
> +bool kvm_arch_has_assigned_device(struct kvm *kvm)
> +{
> +	return atomic_read(&kvm->arch.assigned_device_count);
> +}
> +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
> +
>  void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>  {
>  	atomic_inc(&kvm->arch.noncoherent_dma_count);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 9564fd78c547..56146fdb1ff0 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -716,10 +716,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
>  }
>  #endif
>  
> -#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
> +#ifdef __KVM_HAVE_ARCH_VFIO_HOOKS
>  void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
>  void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
>  bool kvm_arch_has_noncoherent_dma(struct kvm *kvm);
> +void kvm_arch_start_assignment(struct kvm *kvm);
> +void kvm_arch_end_assignment(struct kvm *kvm);
> +bool kvm_arch_has_assigned_device(struct kvm *kvm);
>  #else
>  static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
>  {
> @@ -733,6 +736,19 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
>  {
>  	return false;
>  }
> +
> +static inline void kvm_arch_start_assignment(struct kvm *kvm)
> +{
> +}
> +
> +static inline void kvm_arch_end_assignment(struct kvm *kvm)
> +{
> +}
> +
> +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
> +{
> +	return false;
> +}
>  #endif
>  
>  static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
> diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
> index 620e37f741b8..1dd087da6f31 100644
> --- a/virt/kvm/vfio.c
> +++ b/virt/kvm/vfio.c
> @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>  		list_add_tail(&kvg->node, &kv->group_list);
>  		kvg->vfio_group = vfio_group;
>  
> +		kvm_arch_start_assignment(dev->kvm);
> +
>  		mutex_unlock(&kv->lock);
>  
>  		kvm_vfio_update_coherency(dev);
> @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
>  			break;
>  		}
>  
> +		kvm_arch_end_assignment(dev->kvm);
> +
>  		mutex_unlock(&kv->lock);
>  
>  		kvm_vfio_group_put_external_user(vfio_group);
> @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
>  		kvm_vfio_group_put_external_user(kvg->vfio_group);
>  		list_del(&kvg->node);
>  		kfree(kvg);
> +		kvm_arch_end_assignment(dev->kvm);
>  	}
>  
>  	kvm_vfio_update_coherency(dev);




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/4] KVM: count number of assigned devices
  2015-07-07 15:22   ` Alex Williamson
@ 2015-07-07 15:36     ` Paolo Bonzini
  0 siblings, 0 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-07 15:36 UTC (permalink / raw)
  To: Alex Williamson
  Cc: linux-kernel, kvm, guangrong.xiao, jroedel, ogerlitz, amirv



On 07/07/2015 17:22, Alex Williamson wrote:
>> > -#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
>> > +#define __KVM_HAVE_ARCH_VFIO_HOOKS
> Do we really want to tie these two things together under something
> that's not strictly a "vfio" option?  Legacy assignment also makes use
> of these, as shown in this patch, but even if we consider that temporary
> until legacy assignment is removed, I can imagine platforms that might
> care about one but not the other.  I don't really see the harm in using
> a separate #define, perhaps __KVM_HAVE_ARCH_ASSIGNED_DEVICE.  Thanks,

Sure, that's okay.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-07 13:45 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini
@ 2015-07-08  5:59   ` Xiao Guangrong
  2015-07-08 11:19     ` Paolo Bonzini
  2015-07-17  0:35   ` Andy Lutomirski
  1 sibling, 1 reply; 20+ messages in thread
From: Xiao Guangrong @ 2015-07-08  5:59 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 07/07/2015 09:45 PM, Paolo Bonzini wrote:
> Right now, NPT page attributes are not used, and the final page
> attribute depends solely on gPAT (which however is not synced
> correctly), the guest MTRRs and the guest page attributes.
>
> However, we can do better by mimicking what is done for VMX.
> In the absence of PCI passthrough, the guest PAT can be ignored
> and the page attributes can be just WB.  If passthrough is being
> used, instead, keep respecting the guest PAT, and emulate the guest
> MTRRs through the PAT field of the nested page tables.
>
> The only snag is that WP memory cannot be emulated correctly,
> because Linux's default PAT setting only includes the other types.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 42 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 602b974a60a6..0f125c1860ec 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
>   	return target_tsc - tsc;
>   }
>
> +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
> +{
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +
> +	/* Unlike Intel, AMD takes the guest's CR0.CD into account.

I noticed this code in svm_set_cr0():

	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);

gCR0.CD is hidden to CPU if KVM_QUIRK_CD_NW_CLEARED is not set and looks like
it is the normal case after grepping Qemu code.

> +	 *
> +	 * AMD does not have IPAT.  To emulate it for the case of guests
> +	 * with no assigned devices, just set everything to WB.  If guests
> +	 * have assigned devices, however, we cannot force WB for RAM
> +	 * pages only, so use the guest IPAT as passed.
> +	 */
> +	if (!kvm_arch_has_assigned_device(vcpu->kvm))
> +		*g_pat = 0x0606060606060606;
> +	else
> +		*g_pat = vcpu->arch.pat;
> +}
> +
> +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
> +{
> +	u8 cache;
> +
> +	/*
> +	 * 1. MMIO: always map as UC
> +	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
> +	 * 3. Passthrough: can't guarantee the result, try to trust guest.
> +	 */
> +	if (is_mmio)
> +		return _PAGE_NOCACHE;
> +
> +	if (!kvm_arch_has_assigned_device(vcpu->kvm))
> +		return 0;
> +
> +	cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
> +

@cache return from kvm_mtrr_get_guest_memory_type is MTRR_TYPE_*
which is different with _PAGE_CACHE_MODE_*. The latter is pure SW
usage, e.g:
_PAGE_CACHE_MODE_WB = 0 and  #define MTRR_TYPE_WRBACK     6



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-08  5:59   ` Xiao Guangrong
@ 2015-07-08 11:19     ` Paolo Bonzini
  2015-07-09  2:30       ` Xiao Guangrong
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-08 11:19 UTC (permalink / raw)
  To: Xiao Guangrong, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 08/07/2015 07:59, Xiao Guangrong wrote:
> 
> 
> On 07/07/2015 09:45 PM, Paolo Bonzini wrote:
>> Right now, NPT page attributes are not used, and the final page
>> attribute depends solely on gPAT (which however is not synced
>> correctly), the guest MTRRs and the guest page attributes.
>>
>> However, we can do better by mimicking what is done for VMX.
>> In the absence of PCI passthrough, the guest PAT can be ignored
>> and the page attributes can be just WB.  If passthrough is being
>> used, instead, keep respecting the guest PAT, and emulate the guest
>> MTRRs through the PAT field of the nested page tables.
>>
>> The only snag is that WP memory cannot be emulated correctly,
>> because Linux's default PAT setting only includes the other types.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 42 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 602b974a60a6..0f125c1860ec 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct
>> kvm_vcpu *vcpu, u64 target_tsc)
>>       return target_tsc - tsc;
>>   }
>>
>> +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
>> +{
>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>> +
>> +    /* Unlike Intel, AMD takes the guest's CR0.CD into account.
> 
> I noticed this code in svm_set_cr0():
> 
>     if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
>         cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
> 
> gCR0.CD is hidden to CPU if KVM_QUIRK_CD_NW_CLEARED is not set and looks
> like
> it is the normal case after grepping Qemu code.
> 
>> +     *
>> +     * AMD does not have IPAT.  To emulate it for the case of guests
>> +     * with no assigned devices, just set everything to WB.  If guests
>> +     * have assigned devices, however, we cannot force WB for RAM
>> +     * pages only, so use the guest IPAT as passed.
>> +     */
>> +    if (!kvm_arch_has_assigned_device(vcpu->kvm))
>> +        *g_pat = 0x0606060606060606;
>> +    else
>> +        *g_pat = vcpu->arch.pat;
>> +}
>> +
>> +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool
>> is_mmio)
>> +{
>> +    u8 cache;
>> +
>> +    /*
>> +     * 1. MMIO: always map as UC
>> +     * 2. No passthrough: always map as WB, and force guest PAT to WB
>> as well
>> +     * 3. Passthrough: can't guarantee the result, try to trust guest.
>> +     */
>> +    if (is_mmio)
>> +        return _PAGE_NOCACHE;
>> +
>> +    if (!kvm_arch_has_assigned_device(vcpu->kvm))
>> +        return 0;
>> +
>> +    cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
>> +
> 
> @cache return from kvm_mtrr_get_guest_memory_type is MTRR_TYPE_*
> which is different with _PAGE_CACHE_MODE_*. The latter is pure SW
> usage, e.g:
> _PAGE_CACHE_MODE_WB = 0 and  #define MTRR_TYPE_WRBACK     6

Oops, you're right.  In fact my first version was correct, then I
changed it to use cachemode2protval and screwed up.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-08 11:19     ` Paolo Bonzini
@ 2015-07-09  2:30       ` Xiao Guangrong
  2015-07-09 15:18         ` Paolo Bonzini
  0 siblings, 1 reply; 20+ messages in thread
From: Xiao Guangrong @ 2015-07-09  2:30 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 07/08/2015 07:19 PM, Paolo Bonzini wrote:
>
>
> On 08/07/2015 07:59, Xiao Guangrong wrote:
>>
>>
>> On 07/07/2015 09:45 PM, Paolo Bonzini wrote:
>>> Right now, NPT page attributes are not used, and the final page
>>> attribute depends solely on gPAT (which however is not synced
>>> correctly), the guest MTRRs and the guest page attributes.
>>>
>>> However, we can do better by mimicking what is done for VMX.
>>> In the absence of PCI passthrough, the guest PAT can be ignored
>>> and the page attributes can be just WB.  If passthrough is being
>>> used, instead, keep respecting the guest PAT, and emulate the guest
>>> MTRRs through the PAT field of the nested page tables.
>>>
>>> The only snag is that WP memory cannot be emulated correctly,
>>> because Linux's default PAT setting only includes the other types.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>    arch/x86/kvm/svm.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>>>    1 file changed, 42 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 602b974a60a6..0f125c1860ec 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct
>>> kvm_vcpu *vcpu, u64 target_tsc)
>>>        return target_tsc - tsc;
>>>    }
>>>
>>> +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
>>> +{
>>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>>> +
>>> +    /* Unlike Intel, AMD takes the guest's CR0.CD into account.
>>
>> I noticed this code in svm_set_cr0():
>>
>>      if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
>>          cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
>>
>> gCR0.CD is hidden to CPU if KVM_QUIRK_CD_NW_CLEARED is not set and looks
>> like
>> it is the normal case after grepping Qemu code.
>>

How about this one? I still do not know how SVM properly emulates CR0.CD? :(

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-09  2:30       ` Xiao Guangrong
@ 2015-07-09 15:18         ` Paolo Bonzini
  2015-07-10  1:19           ` Xiao Guangrong
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-09 15:18 UTC (permalink / raw)
  To: Xiao Guangrong, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 09/07/2015 04:30, Xiao Guangrong wrote:
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index 602b974a60a6..0f125c1860ec 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct
>>>> kvm_vcpu *vcpu, u64 target_tsc)
>>>>        return target_tsc - tsc;
>>>>    }
>>>>
>>>> +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
>>>> +{
>>>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>>>> +
>>>> +    /* Unlike Intel, AMD takes the guest's CR0.CD into account.
>>>
>>> I noticed this code in svm_set_cr0():
>>>
>>>      if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
>>>          cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
>>>
>>> gCR0.CD is hidden to CPU if KVM_QUIRK_CD_NW_CLEARED is not set and looks
>>> like it is the normal case after grepping Qemu code.

Hi Xiao,

yes, this is correct.  QEMU still does not have support for disabling
"quirks", so gCR0.CD is currently hidden on SVM.  I would like to
include this series in 4.2, while for 4.3 I will disable the quirk above
altogether (it is superseded by the way PAT is forced to all-WB).

In any case, this is just a KVM limitation.  gCR0.CD is taken into
account by the processor when computing the effective memory type.  It
uses all of these:

- the guest page tables PAT/PCD/PWT

- the guest page tables CR3.PCD/CR3.PWD

- the nested page tables PAT/PCD/PWT

- the guest page tables CR3.PCD/CR3.PWD

- the host MTRRs

- gCR0.CD

- hCR0.CD

Paolo

> How about this one? I still do not know how SVM properly emulates
> CR0.CD? :(
> -- 
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-09 15:18         ` Paolo Bonzini
@ 2015-07-10  1:19           ` Xiao Guangrong
  2015-07-10 10:47             ` Paolo Bonzini
  0 siblings, 1 reply; 20+ messages in thread
From: Xiao Guangrong @ 2015-07-10  1:19 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 07/09/2015 11:18 PM, Paolo Bonzini wrote:
>
>
> On 09/07/2015 04:30, Xiao Guangrong wrote:
>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>>> index 602b974a60a6..0f125c1860ec 100644
>>>>> --- a/arch/x86/kvm/svm.c
>>>>> +++ b/arch/x86/kvm/svm.c
>>>>> @@ -1085,6 +1085,47 @@ static u64 svm_compute_tsc_offset(struct
>>>>> kvm_vcpu *vcpu, u64 target_tsc)
>>>>>         return target_tsc - tsc;
>>>>>     }
>>>>>
>>>>> +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
>>>>> +{
>>>>> +    struct kvm_vcpu *vcpu = &svm->vcpu;
>>>>> +
>>>>> +    /* Unlike Intel, AMD takes the guest's CR0.CD into account.
>>>>
>>>> I noticed this code in svm_set_cr0():
>>>>
>>>>       if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
>>>>           cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
>>>>
>>>> gCR0.CD is hidden to CPU if KVM_QUIRK_CD_NW_CLEARED is not set and looks
>>>> like it is the normal case after grepping Qemu code.
>
> Hi Xiao,
>
> yes, this is correct.  QEMU still does not have support for disabling
> "quirks", so gCR0.CD is currently hidden on SVM.  I would like to
> include this series in 4.2, while for 4.3 I will disable the quirk above
> altogether (it is superseded by the way PAT is forced to all-WB).
>

That plan sounds good to me.

You will drop disabled_quirks completely or just enable it in Qemu? :)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-10  1:19           ` Xiao Guangrong
@ 2015-07-10 10:47             ` Paolo Bonzini
  2015-07-10 16:02               ` Xiao Guangrong
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-10 10:47 UTC (permalink / raw)
  To: Xiao Guangrong, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 10/07/2015 03:19, Xiao Guangrong wrote:
>> yes, this is correct.  QEMU still does not have support for disabling
>> "quirks", so gCR0.CD is currently hidden on SVM.  I would like to
>> include this series in 4.2, while for 4.3 I will disable the quirk above
>> altogether (it is superseded by the way PAT is forced to all-WB).
> 
> That plan sounds good to me.
> 
> You will drop disabled_quirks completely or just enable it in Qemu? :)

I will drop this quirk completely.  Other quirks (well, there's just
one) will remain.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-10 10:47             ` Paolo Bonzini
@ 2015-07-10 16:02               ` Xiao Guangrong
  0 siblings, 0 replies; 20+ messages in thread
From: Xiao Guangrong @ 2015-07-10 16:02 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm
  Cc: jroedel, alex.williamson, ogerlitz, amirv



On 07/10/2015 06:47 PM, Paolo Bonzini wrote:
>
>
> On 10/07/2015 03:19, Xiao Guangrong wrote:
>>> yes, this is correct.  QEMU still does not have support for disabling
>>> "quirks", so gCR0.CD is currently hidden on SVM.  I would like to
>>> include this series in 4.2, while for 4.3 I will disable the quirk above
>>> altogether (it is superseded by the way PAT is forced to all-WB).
>>
>> That plan sounds good to me.
>>
>> You will drop disabled_quirks completely or just enable it in Qemu? :)
>
> I will drop this quirk completely.  Other quirks (well, there's just
> one) will remain.

Make senses.

The whole patchset looks good to me!

Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-07 13:45 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini
  2015-07-08  5:59   ` Xiao Guangrong
@ 2015-07-17  0:35   ` Andy Lutomirski
  2015-07-17  2:31     ` Paolo Bonzini
  1 sibling, 1 reply; 20+ messages in thread
From: Andy Lutomirski @ 2015-07-17  0:35 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

On 07/07/2015 06:45 AM, Paolo Bonzini wrote:
> Right now, NPT page attributes are not used, and the final page
> attribute depends solely on gPAT (which however is not synced
> correctly), the guest MTRRs and the guest page attributes.
>
> However, we can do better by mimicking what is done for VMX.
> In the absence of PCI passthrough, the guest PAT can be ignored
> and the page attributes can be just WB.  If passthrough is being
> used, instead, keep respecting the guest PAT, and emulate the guest
> MTRRs through the PAT field of the nested page tables.
>
> The only snag is that WP memory cannot be emulated correctly,
> because Linux's default PAT setting only includes the other types.
>

As of quite recently, Linux understands that the PAT has eight slots, 
and we can probably give you WP.  Do you want it?

--Andy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-17  0:35   ` Andy Lutomirski
@ 2015-07-17  2:31     ` Paolo Bonzini
  0 siblings, 0 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-17  2:31 UTC (permalink / raw)
  To: Andy Lutomirski, linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv



On 17/07/2015 02:35, Andy Lutomirski wrote:
> 
>> Right now, NPT page attributes are not used, and the final page
>> attribute depends solely on gPAT (which however is not synced
>> correctly), the guest MTRRs and the guest page attributes.
>>
>> However, we can do better by mimicking what is done for VMX.
>> In the absence of PCI passthrough, the guest PAT can be ignored
>> and the page attributes can be just WB.  If passthrough is being
>> used, instead, keep respecting the guest PAT, and emulate the guest
>> MTRRs through the PAT field of the nested page tables.
>>
>> The only snag is that WP memory cannot be emulated correctly,
>> because Linux's default PAT setting only includes the other types.
>>
> 
> As of quite recently, Linux understands that the PAT has eight slots,
> and we can probably give you WP.  Do you want it?

It doesn't really matter, it's mostly used in the firmware.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/4] KVM: SVM: use NPT page attributes
  2015-07-08 15:18 [RFC/RFT PATCH v3 " Paolo Bonzini
@ 2015-07-08 15:18 ` Paolo Bonzini
  0 siblings, 0 replies; 20+ messages in thread
From: Paolo Bonzini @ 2015-07-08 15:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: guangrong.xiao, jroedel, alex.williamson, ogerlitz, amirv

Right now, NPT page attributes are not used, and the final page
attribute depends solely on gPAT (which however is not synced
correctly), the guest MTRRs and the guest page attributes.

However, we can do better by mimicking what is done for VMX.
In the absence of PCI passthrough, the guest PAT can be ignored
and the page attributes can be just WB.  If passthrough is being
used, instead, keep respecting the guest PAT, and emulate the guest
MTRRs through the PAT field of the nested page tables.

The only snag is that MTRRs can only be emulated correctly if
Linux's PAT setting includes the type.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 96 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 602b974a60a6..414ec25b673e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }
 
+#define MTRR_TYPE_UC_MINUS	7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+	/*
+	 * WT and WP aren't always available in the host PAT.  Treat
+	 * them as UC and UC- respectively.  Everything else should be
+	 * there.
+	 */
+	switch (mtrr)
+	{
+	case MTRR_TYPE_WRTHROUGH:
+		return MTRR_TYPE_UNCACHABLE;
+	case MTRR_TYPE_WRPROT:
+		return MTRR_TYPE_UC_MINUS;
+	default:
+		BUG();
+	}
+}
+
+static void build_mtrr2protval(void)
+{
+	int i;
+	u64 pat;
+
+	for (i = 0; i < 8; i++)
+		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+	/* Ignore the invalid MTRR types.  */
+	mtrr2protval[2] = 0;
+	mtrr2protval[3] = 0;
+
+	/*
+	 * Use host PAT value to figure out the mapping from guest MTRR
+	 * values to nested page table PAT/PCD/PWT values.  We do not
+	 * want to change the host PAT value every time we enter the
+	 * guest.
+	 */
+	rdmsrl(MSR_IA32_CR_PAT, pat);
+	for (i = 0; i < 8; i++) {
+		u8 mtrr = pat >> (8 * i);
+
+		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+			mtrr2protval[mtrr] = __cm_idx2pte(i);
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+			u8 fallback = fallback_mtrr_type(i);
+			mtrr2protval[i] = mtrr2protval[fallback];
+			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+		}
+	}
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();
 
+	build_mtrr2protval();
 	return 0;
 
 err:
@@ -1085,6 +1144,42 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
+	 *
+	 * AMD does not have IPAT.  To emulate it for the case of guests
+	 * with no assigned devices, just set everything to WB.  If guests
+	 * have assigned devices, however, we cannot force WB for RAM
+	 * pages only, so use the guest PAT directly.
+	 */
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		*g_pat = 0x0606060606060606;
+	else
+		*g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	u8 mtrr;
+
+	/*
+	 * 1. MMIO: always map as UC
+	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+	 * 3. Passthrough: can't guarantee the result, try to trust guest.
+	 */
+	if (is_mmio)
+		return _PAGE_NOCACHE;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+	return mtrr2protval[mtrr];
+}
+
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1275,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
+		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -4088,11 +4184,6 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }
 
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	return 0;
-}
-
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2015-07-17  2:31 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-07 13:45 [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Paolo Bonzini
2015-07-07 13:45 ` [PATCH 1/4] KVM: count number of assigned devices Paolo Bonzini
2015-07-07 15:22   ` Alex Williamson
2015-07-07 15:36     ` Paolo Bonzini
2015-07-07 13:45 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini
2015-07-08  5:59   ` Xiao Guangrong
2015-07-08 11:19     ` Paolo Bonzini
2015-07-09  2:30       ` Xiao Guangrong
2015-07-09 15:18         ` Paolo Bonzini
2015-07-10  1:19           ` Xiao Guangrong
2015-07-10 10:47             ` Paolo Bonzini
2015-07-10 16:02               ` Xiao Guangrong
2015-07-17  0:35   ` Andy Lutomirski
2015-07-17  2:31     ` Paolo Bonzini
2015-07-07 13:45 ` [PATCH 3/4] KVM: SVM: Sync g_pat with guest-written PAT value Paolo Bonzini
2015-07-07 13:45 ` [PATCH 4/4] KVM: x86: apply guest MTRR virtualization on host reserved pages Paolo Bonzini
2015-07-07 14:06 ` [RFC/RFT PATCH v2 0/4] KVM: x86: full virtualization of guest MTRR Joerg Roedel
2015-07-07 14:09   ` Paolo Bonzini
2015-07-07 14:14     ` Joerg Roedel
2015-07-08 15:18 [RFC/RFT PATCH v3 " Paolo Bonzini
2015-07-08 15:18 ` [PATCH 2/4] KVM: SVM: use NPT page attributes Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.