LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Shanker Donthineni <sdonthineni@nvidia.com>
To: Marc Zyngier <maz@kernel.org>,
	Alex Williamson <alex.williamson@redhat.com>
Cc: Will Deacon <will@kernel.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Christoffer Dall <christoffer.dall@arm.com>,
	<linux-arm-kernel@lists.infradead.org>,
	<kvmarm@lists.cs.columbia.edu>, <linux-kernel@vger.kernel.org>,
	<kvm@vger.kernel.org>, Vikram Sethi <vsethi@nvidia.com>,
	Shanker Donthineni <sdonthineni@nvidia.com>,
	"Jason Sequeira" <jsequeira@nvidia.com>
Subject: [RFC 2/2] KVM: arm64: Add write-combine support for stage-2 entries
Date: Thu, 29 Apr 2021 11:29:06 -0500
Message-ID: <20210429162906.32742-3-sdonthineni@nvidia.com> (raw)
In-Reply-To: <20210429162906.32742-1-sdonthineni@nvidia.com>

In the current implementation, the device memory is always mapped as
DEVICE_nGnRE in stage-2. In the host kernel, device drivers have
flexibility whether to choose a memory-type device or write-combine
(Non-cacheable) depends on the use case. PCI specification has a
prefetchable BAR concept where multiple writes can be combined and
no side effects on reads. It provides huge performance improvement
and also allows unaligned access.

NVIDIA GPU PCIe devices have 3 BAR regions. Two regions are mapped to
video/compute memory and marked as prefetchable. The GPU driver takes
advantage of the write-combine feature for higher performance. The
same driver has no issues in the host kernel but crashes inside the
virtual machine because of unaligned accesses.

This patch finds the PTE attributes for device memory in VMA. It
updates the stage-2 attribute to NORMAL_NC for WC regions and
the default type DEVICE_nGnRE for non-WC regions.

Change-Id: Ibaea69c7a301df3c86609e871f6d066728391080
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
---
 arch/arm64/include/asm/kvm_mmu.h     |  3 ++-
 arch/arm64/include/asm/kvm_pgtable.h |  2 ++
 arch/arm64/include/asm/memory.h      |  4 +++-
 arch/arm64/kvm/hyp/pgtable.c         |  9 +++++++--
 arch/arm64/kvm/mmu.c                 | 21 ++++++++++++++++++---
 arch/arm64/kvm/vgic/vgic-v2.c        |  2 +-
 6 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 90873851f677..dec498a6ba2f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -160,7 +160,8 @@ void stage2_unmap_vm(struct kvm *kvm);
 int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
 void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
-			  phys_addr_t pa, unsigned long size, bool writable);
+			  phys_addr_t pa, unsigned long size, bool writable,
+			  bool writecombine);
 
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 8886d43cfb11..26f28220f6f3 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -35,6 +35,7 @@ struct kvm_pgtable {
  * @KVM_PGTABLE_PROT_W:		Write permission.
  * @KVM_PGTABLE_PROT_R:		Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
+ * @KVM_PGTABLE_PROT_WC:	Normal non-cacheable (WC).
  */
 enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -42,6 +43,7 @@ enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_R			= BIT(2),
 
 	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
+	KVM_PGTABLE_PROT_WC			= BIT(4),
 };
 
 #define PAGE_HYP		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..04a812b59437 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -144,13 +144,15 @@
  * Memory types for Stage-2 translation
  */
 #define MT_S2_NORMAL		0xf
+#define MT_S2_WRITE_COMBINE	5
 #define MT_S2_DEVICE_nGnRE	0x1
 
 /*
  * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
- * Stage-2 enforces Normal-WB and Device-nGnRE
+ * Stage-2 enforces Normal-WB, Normal-NC and Device-nGnRE
  */
 #define MT_S2_FWB_NORMAL	6
+#define MT_S2_FWB_WRITE_COMBINE	5
 #define MT_S2_FWB_DEVICE_nGnRE	1
 
 #ifdef CONFIG_ARM64_4K_PAGES
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 926fc07074f5..bdfed559eae2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -444,9 +444,14 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
 				    struct stage2_map_data *data)
 {
 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
-	kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
-			    PAGE_S2_MEMATTR(NORMAL);
 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+	kvm_pte_t attr = PAGE_S2_MEMATTR(NORMAL);
+
+	if (device) {
+		attr = (prot & KVM_PGTABLE_PROT_WC) ?
+			PAGE_S2_MEMATTR(WRITE_COMBINE) :
+			PAGE_S2_MEMATTR(DEVICE_nGnRE);
+	}
 
 	if (!(prot & KVM_PGTABLE_PROT_X))
 		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 8711894db8c2..5b8ec1ab12e2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -487,6 +487,16 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 	}
 }
 
+/**
+ * is_vma_write_combine - check if VMA is mapped with writecombine or not
+ * Return true if VMA mapped with MT_NORMAL_NC otherwise fasle
+ */
+static bool inline is_vma_write_combine(struct vm_area_struct *vma)
+{
+	pteval_t pteval = pgprot_val(vma->vm_page_prot);
+	return ((pteval & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC));
+}
+
 /**
  * kvm_phys_addr_ioremap - map a device range to guest IPA
  *
@@ -495,9 +505,11 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
  * @pa:		The physical address of the device
  * @size:	The size of the mapping
  * @writable:   Whether or not to create a writable mapping
+ * @writecombine: Whether or not to create a writecombine mapping
  */
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
-			  phys_addr_t pa, unsigned long size, bool writable)
+			  phys_addr_t pa, unsigned long size, bool writable,
+			  bool writecombine)
 {
 	phys_addr_t addr;
 	int ret = 0;
@@ -505,6 +517,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
 				     KVM_PGTABLE_PROT_R |
+				     (writecombine ? KVM_PGTABLE_PROT_WC : 0) |
 				     (writable ? KVM_PGTABLE_PROT_W : 0);
 
 	size += offset_in_page(guest_ipa);
@@ -891,7 +904,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 	if (device)
-		prot |= KVM_PGTABLE_PROT_DEVICE;
+		prot |= KVM_PGTABLE_PROT_DEVICE |
+			(is_vma_write_combine(vma) ? KVM_PGTABLE_PROT_WC : 0);
 	else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
 		prot |= KVM_PGTABLE_PROT_X;
 
@@ -1357,7 +1371,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
 						    vm_end - vm_start,
-						    writable);
+						    writable,
+						    is_vma_write_combine(vma));
 			if (ret)
 				break;
 		}
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f4..6f921efea6c0 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -336,7 +336,7 @@ int vgic_v2_map_resources(struct kvm *kvm)
 	if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
 		ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
 					    kvm_vgic_global_state.vcpu_base,
-					    KVM_VGIC_V2_CPU_SIZE, true);
+					    KVM_VGIC_V2_CPU_SIZE, true, false);
 		if (ret) {
 			kvm_err("Unable to remap VGIC CPU to VCPU\n");
 			return ret;
-- 
2.17.1


  parent reply index

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-29 16:29 [RFC 0/2] [RFC] Honor PCI prefetchable attributes for a virtual machine on ARM64 Shanker Donthineni
2021-04-29 16:29 ` [RFC 1/2] vfio/pci: keep the prefetchable attribute of a BAR region in VMA Shanker Donthineni
2021-04-29 18:28   ` Alex Williamson
2021-04-29 19:14     ` Shanker R Donthineni
2021-04-29 19:46       ` Alex Williamson
2021-04-29 22:08         ` Vikram Sethi
2021-04-30 11:25         ` Shanker R Donthineni
     [not found]           ` <87czucngdc.wl-maz@kernel.org>
2021-04-30 13:07             ` Shanker R Donthineni
2021-04-30 14:58             ` Shanker R Donthineni
     [not found]               ` <878s4zokll.wl-maz@kernel.org>
2021-04-30 16:57                 ` Vikram Sethi
2021-05-01  9:30                   ` Marc Zyngier
2021-05-01 11:36                     ` Shanker R Donthineni
     [not found]                       ` <87czu8uowe.wl-maz@kernel.org>
2021-05-03 12:08                         ` Shanker R Donthineni
2021-05-02 17:56                     ` Vikram Sethi
2021-05-03 10:17                       ` Marc Zyngier
2021-05-03 13:35                         ` Mark Kettenis
2021-05-03 13:59                           ` Vikram Sethi
2021-05-03 14:44                             ` Alex Williamson
2021-05-03 22:03                               ` Vikram Sethi
2021-05-04  8:30                                 ` Will Deacon
2021-05-05 18:02                                   ` Catalin Marinas
2021-05-06  7:22                                     ` Christoph Hellwig
2021-05-08 16:33                                     ` Shanker R Donthineni
2021-06-02  9:37                                       ` Marc Zyngier
2021-05-04 18:03                                 ` Alex Williamson
2021-06-02  9:11                                   ` Marc Zyngier
2021-04-30  9:54   ` Lorenzo Pieralisi
2021-04-30 12:38     ` Jason Gunthorpe
2021-04-29 16:29 ` Shanker Donthineni [this message]
2021-05-03  7:01 ` [RFC 0/2] [RFC] Honor PCI prefetchable attributes for a virtual machine on ARM64 Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210429162906.32742-3-sdonthineni@nvidia.com \
    --to=sdonthineni@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=christoffer.dall@arm.com \
    --cc=jsequeira@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=vsethi@nvidia.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git