qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Jason Wang <jasowang@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: [Qemu-devel] [PULL 43/49] intel_iommu: large page support
Date: Thu, 4 Feb 2016 23:53:53 +0200	[thread overview]
Message-ID: <1454612376-7072-44-git-send-email-mst@redhat.com> (raw)
In-Reply-To: <1454612376-7072-1-git-send-email-mst@redhat.com>

From: Jason Wang <jasowang@redhat.com>

Current intel_iommu only supports 4K page which may not be sufficient
to cover guest working set. This patch tries to enable 2M and 1G mapping
for intel_iommu. This is also useful for future device IOTLB
implementation to have a better hit rate.

Major work is adding a page mask field on IOTLB entry to make it
support large page. And also use the slpte level as key to do IOTLB
lookup. MAMV was increased to 18 to support direct invalidation for 1G
mapping.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu_internal.h |  6 ++--
 include/hw/i386/intel_iommu.h  |  1 +
 hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++------------
 3 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index ba288ab..e5f514c 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -113,6 +113,7 @@
 
 /* The shift of source_id in the key of IOTLB hash table */
 #define VTD_IOTLB_SID_SHIFT         36
+#define VTD_IOTLB_LVL_SHIFT         44
 #define VTD_IOTLB_MAX_SIZE          1024    /* Max size of the hash table */
 
 /* IOTLB_REG */
@@ -185,9 +186,10 @@
 #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
 #define VTD_MGAW                    39  /* Maximum Guest Address Width */
 #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16)
-#define VTD_MAMV                    9ULL
+#define VTD_MAMV                    18ULL
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
 #define VTD_CAP_PSI                 (1ULL << 39)
+#define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
 
 /* Supported Adjusted Guest Address Widths */
 #define VTD_CAP_SAGAW_SHIFT         8
@@ -320,7 +322,7 @@ typedef struct VTDInvDesc VTDInvDesc;
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
-    uint64_t gfn;
+    uint64_t addr;
     uint8_t mask;
 };
 typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 5dbadb7..b024ffa 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -83,6 +83,7 @@ struct VTDIOTLBEntry {
     uint64_t gfn;
     uint16_t domain_id;
     uint64_t slpte;
+    uint64_t mask;
     bool read_flags;
     bool write_flags;
 };
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c25b1fd..347718f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -153,14 +153,27 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
     return entry->domain_id == domain_id;
 }
 
+/* The shift of an addr for a certain level of paging structure */
+static inline uint32_t vtd_slpt_level_shift(uint32_t level)
+{
+    return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
+}
+
+static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
+{
+    return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
+}
+
 static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
                                         gpointer user_data)
 {
     VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
     VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
-    uint64_t gfn = info->gfn & info->mask;
+    uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
+    uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
     return (entry->domain_id == info->domain_id) &&
-            ((entry->gfn & info->mask) == gfn);
+            (((entry->gfn & info->mask) == gfn) ||
+             (entry->gfn == gfn_tlb));
 }
 
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
@@ -194,24 +207,46 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
     g_hash_table_remove_all(s->iotlb);
 }
 
+static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
+                                  uint32_t level)
+{
+    return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
+           ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
+}
+
+static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
+{
+    return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
+}
+
 static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
                                        hwaddr addr)
 {
+    VTDIOTLBEntry *entry;
     uint64_t key;
+    int level;
+
+    for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
+        key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
+                                source_id, level);
+        entry = g_hash_table_lookup(s->iotlb, &key);
+        if (entry) {
+            goto out;
+        }
+    }
 
-    key = (addr >> VTD_PAGE_SHIFT_4K) |
-           ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
-    return g_hash_table_lookup(s->iotlb, &key);
-
+out:
+    return entry;
 }
 
 static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
                              uint16_t domain_id, hwaddr addr, uint64_t slpte,
-                             bool read_flags, bool write_flags)
+                             bool read_flags, bool write_flags,
+                             uint32_t level)
 {
     VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
     uint64_t *key = g_malloc(sizeof(*key));
-    uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K;
+    uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
 
     VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
                 " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
@@ -226,7 +261,8 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
     entry->slpte = slpte;
     entry->read_flags = read_flags;
     entry->write_flags = write_flags;
-    *key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
+    entry->mask = vtd_slpt_level_page_mask(level);
+    *key = vtd_get_iotlb_key(gfn, source_id, level);
     g_hash_table_replace(s->iotlb, key, entry);
 }
 
@@ -501,12 +537,6 @@ static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce)
     return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
 }
 
-/* The shift of an addr for a certain level of paging structure */
-static inline uint32_t vtd_slpt_level_shift(uint32_t level)
-{
-    return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
-}
-
 static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
 {
     return slpte & VTD_SL_PT_BASE_ADDR_MASK;
@@ -762,7 +792,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
     VTDContextEntry ce;
     uint8_t bus_num = pci_bus_num(bus);
     VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
-    uint64_t slpte;
+    uint64_t slpte, page_mask;
     uint32_t level;
     uint16_t source_id = vtd_make_source_id(bus_num, devfn);
     int ret_fr;
@@ -802,6 +832,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         slpte = iotlb_entry->slpte;
         reads = iotlb_entry->read_flags;
         writes = iotlb_entry->write_flags;
+        page_mask = iotlb_entry->mask;
         goto out;
     }
     /* Try to fetch context-entry from cache first */
@@ -848,12 +879,13 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         return;
     }
 
+    page_mask = vtd_slpt_level_page_mask(level);
     vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
-                     reads, writes);
+                     reads, writes, level);
 out:
-    entry->iova = addr & VTD_PAGE_MASK_4K;
-    entry->translated_addr = vtd_get_slpte_addr(slpte) & VTD_PAGE_MASK_4K;
-    entry->addr_mask = ~VTD_PAGE_MASK_4K;
+    entry->iova = addr & page_mask;
+    entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
+    entry->addr_mask = ~page_mask;
     entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0);
 }
 
@@ -991,7 +1023,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
 
     assert(am <= VTD_MAMV);
     info.domain_id = domain_id;
-    info.gfn = addr >> VTD_PAGE_SHIFT_4K;
+    info.addr = addr;
     info.mask = ~((1 << am) - 1);
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
 }
@@ -1917,7 +1949,7 @@ static void vtd_init(IntelIOMMUState *s)
     s->iq_last_desc_type = VTD_INV_DESC_NONE;
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
-             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI;
+             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
     vtd_reset_context_cache(s);
-- 
MST

  parent reply	other threads:[~2016-02-04 21:54 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-04 21:50 [Qemu-devel] [PULL 00/49] pc and misc cleanups and fixes, virtio optimizations Michael S. Tsirkin
2016-02-04 21:50 ` [Qemu-devel] [PULL 14/49] virtio: combine write of an entry into used ring Michael S. Tsirkin
2016-02-04 21:50 ` [Qemu-devel] [PULL 01/49] Fix virtio migration Michael S. Tsirkin
2016-02-04 21:50 ` [Qemu-devel] [PULL 02/49] pc: acpi: merge SSDT into DSDT Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 03/49] tests: pc: acpi: drop not needed 'expected SSDT' blobs Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 04/49] tests: pc: acpi: add expected DSDT.bridge blobs and update DSDT blobs Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 05/49] virtio: move VirtQueueElement at the beginning of the structs Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 06/49] virtio: move allocation to virtqueue_pop/vring_pop Michael S. Tsirkin
2016-02-05 12:52   ` Peter Maydell
2016-02-06 18:10     ` Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 07/49] virtio: introduce qemu_get/put_virtqueue_element Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 08/49] virtio: introduce virtqueue_alloc_element Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 09/49] virtio: slim down allocation of VirtQueueElements Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 10/49] vring: " Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 11/49] virtio: combine the read of a descriptor Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 12/49] virtio: cache used_idx in a VirtQueue field Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 13/49] virtio: read avail_idx from VQ only when necessary Michael S. Tsirkin
2016-02-04 21:51 ` [Qemu-devel] [PULL 15/49] hw/pxb: add pxb devices to the bridge category Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 16/49] vhost-user-test: use correct ROM to speed up and avoid spurious failures Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 17/49] hw/pci: ensure that only PCI/PCIe bridges can be attached to pxb/pxb-pcie devices Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 18/49] ipmi: replace goto by a return statement Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 19/49] ipmi: replace *_MAXCMD defines Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 20/49] ipmi: cleanup error_report messages Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 21/49] ipmi: fix SDR length value Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 22/49] ipmi: introduce a struct ipmi_sdr_compact Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 23/49] ipmi: add get and set SENSOR_TYPE commands Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 24/49] ipmi: add GET_SYS_RESTART_CAUSE chassis command Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 25/49] ipmi: add ACPI power and GUID commands Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 26/49] pc: Move PcGuestInfo declaration to top of file Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 27/49] pc: Eliminate struct PcGuestInfoState Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 28/49] pc: Simplify pc_memory_init() signature Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 29/49] pc: Simplify xen_load_linux() signature Michael S. Tsirkin
2016-02-04 21:52 ` [Qemu-devel] [PULL 30/49] acpi: Remove guest_info parameters from functions Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 31/49] acpi: Don't save PcGuestInfo on AcpiBuildState Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 32/49] pc: Remove compat fields from PcGuestInfo Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 33/49] pc: Remove RAM size " Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 34/49] pc: Remove PcGuestInfo.isapc_ram_fw field Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 35/49] pc: Move PcGuestInfo.fw_cfg to PCMachineState Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 36/49] pc: Move APIC and NUMA data from PcGuestInfo " Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 37/49] pc: Eliminate PcGuestInfo struct Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 38/49] acpi: take oem_id in build_header(), optionally Michael S. Tsirkin
2016-02-04 22:25   ` Laszlo Ersek
2016-02-04 21:53 ` [Qemu-devel] [PULL 39/49] acpi: expose oem_id and oem_table_id in build_rsdt() Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 40/49] acpi: add function to extract oem_id and oem_table_id from the user's SLIC Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 41/49] pc: set the OEM fields in the RSDT and the FADT from the SLIC Michael S. Tsirkin
2016-02-04 21:53 ` [Qemu-devel] [PULL 42/49] dimm: Correct type of MemoryHotplugState->base Michael S. Tsirkin
2016-02-04 21:53 ` Michael S. Tsirkin [this message]
2016-02-04 21:53 ` [Qemu-devel] [PULL 44/49] fix MSI injection on Xen Michael S. Tsirkin
2016-02-04 21:54 ` [Qemu-devel] [PULL 45/49] net: set endianness on all backend devices Michael S. Tsirkin
2016-02-05  8:54   ` Greg Kurz
2016-02-04 21:54 ` [Qemu-devel] [PULL 46/49] i386/acpi: make floppy controller object dynamic Michael S. Tsirkin
2016-02-04 21:54 ` [Qemu-devel] [PULL 47/49] expose floppy drive geometry and CMOS type Michael S. Tsirkin
2016-02-04 21:54 ` [Qemu-devel] [PULL 48/49] i386: populate floppy drive information in DSDT Michael S. Tsirkin
2016-02-05 18:25   ` Igor Mammedov
2016-02-08 13:14     ` Roman Kagan
2016-02-08 20:20       ` John Snow
2016-02-09 15:52         ` Roman Kagan
2016-02-09 16:22           ` John Snow
2016-02-09 18:36             ` Laszlo Ersek
2016-02-09 18:48               ` Michael S. Tsirkin
2016-02-10 16:14                 ` John Snow
2016-02-10 16:48                   ` Michael S. Tsirkin
2016-02-10 17:24                   ` Roman Kagan
2016-02-10 17:10               ` Roman Kagan
2016-02-10 17:16                 ` John Snow
2016-02-10 17:33                   ` Roman Kagan
2016-02-10 21:54                     ` John Snow
2016-02-13 17:26               ` Kevin O'Connor
2016-02-14  6:45                 ` Laszlo Ersek
2016-02-14 15:02                 ` Michael S. Tsirkin
2016-02-17 14:31                   ` Roman Kagan
2016-02-10 16:57             ` Roman Kagan
2016-02-04 21:54 ` [Qemu-devel] [PULL 49/49] acpi: update expected DSDT Michael S. Tsirkin
2016-02-05 15:03 ` [Qemu-devel] [PULL 00/49] pc and misc cleanups and fixes, virtio optimizations Peter Maydell
2016-02-05 18:19   ` Igor Mammedov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454612376-7072-44-git-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).