qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: qemu-devel <qemu-devel@nongnu.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>, x86 <x86@kernel.org>,
	kvm <kvm@vger.kernel.org>
Subject: [PATCH] target/i386: Support up to 32768 CPUs without IRQ remapping
Date: Mon, 05 Oct 2020 15:18:19 +0100	[thread overview]
Message-ID: <78097f9218300e63e751e077a0a5ca029b56ba46.camel@infradead.org> (raw)

[-- Attachment #1: Type: text/plain, Size: 9059 bytes --]

The IOAPIC has an 'Extended Destination ID' field in its RTE, which maps
to bits 11-4 of the MSI address. Since those address bits fall within a
given 4KiB page they were historically non-trivial to use on real hardware.

The Intel IOMMU uses the lowest bit to indicate a remappable format MSI,
and then the remaining 7 bits are part of the index.

Where the remappable format bit isn't set, we can actually use the other
seven to allow external (IOAPIC and MSI) interrupts to reach up to 32768
CPUs instead of just the 255 permitted on bare metal.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/apic.c                          |  7 ++
 hw/i386/pc.c                                | 16 ++---
 include/standard-headers/asm-x86/kvm_para.h |  1 +
 target/i386/cpu.c                           |  5 +-
 target/i386/kvm.c                           | 74 +++++++++++++++------
 target/i386/kvm_i386.h                      |  2 +
 6 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 4eb2d77b87..aeb3366ae8 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -183,6 +183,13 @@ static void kvm_send_msi(MSIMessage *msg)
 {
     int ret;
 
+    /*
+     * The message has already passed through interrupt remapping if enabled,
+     * but the legacy extended destination ID in low bits still needs to be
+     * handled.
+     */
+    msg->address = kvm_swizzle_msi_ext_dest_id(msg->address);
+
     ret = kvm_irqchip_send_msi(kvm_state, *msg);
     if (ret < 0) {
         fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e87be5d29a..a06c091227 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -99,6 +99,7 @@
 
 GlobalProperty pc_compat_5_1[] = {
     { "ICH9-LPC", "x-smi-cpu-hotplug", "off" },
+    { TYPE_X86_CPU, "kvm-msi-ext-dest-id", "off" },
 };
 const size_t pc_compat_5_1_len = G_N_ELEMENTS(pc_compat_5_1);
 
@@ -807,17 +808,12 @@ void pc_machine_done(Notifier *notifier, void *data)
         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
     }
 
-    if (x86ms->apic_id_limit > 255 && !xen_enabled()) {
-        IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
 
-        if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) ||
-            iommu->intr_eim != ON_OFF_AUTO_ON) {
-            error_report("current -smp configuration requires "
-                         "Extended Interrupt Mode enabled. "
-                         "You can add an IOMMU using: "
-                         "-device intel-iommu,intremap=on,eim=on");
-            exit(EXIT_FAILURE);
-        }
+    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+        !kvm_irqchip_in_kernel()) {
+        error_report("current -smp configuration requires kernel "
+                     "irqchip support.");
+        exit(EXIT_FAILURE);
     }
 }
 
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
index 07877d3295..215d01b4ec 100644
--- a/include/standard-headers/asm-x86/kvm_para.h
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -32,6 +32,7 @@
 #define KVM_FEATURE_POLL_CONTROL	12
 #define KVM_FEATURE_PV_SCHED_YIELD	13
 #define KVM_FEATURE_ASYNC_PF_INT	14
+#define KVM_FEATURE_MSI_EXT_DEST_ID	15
 
 #define KVM_HINTS_REALTIME      0
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f37eb7b675..a93f50a6a7 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -799,7 +799,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
             "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt",
             NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi",
-            "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", NULL,
+            "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", "kvm-msi-ext-dest-id",
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             "kvmclock-stable-bit", NULL, NULL, NULL,
@@ -4109,6 +4109,7 @@ static PropValue kvm_default_props[] = {
     { "kvm-pv-eoi", "on" },
     { "kvmclock-stable-bit", "on" },
     { "x2apic", "on" },
+    { "kvm-msi-ext-dest-id", "off" },
     { "acpi", "off" },
     { "monitor", "off" },
     { "svm", "off" },
@@ -5132,6 +5133,8 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
     if (kvm_enabled()) {
         if (!kvm_irqchip_in_kernel()) {
             x86_cpu_change_kvm_default("x2apic", "off");
+        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+            x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
         }
 
         x86_cpu_apply_props(cpu, kvm_default_props);
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index f6dae4cfb6..90952cae7c 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -420,6 +420,9 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         if (!kvm_irqchip_in_kernel()) {
             ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
         }
+        if (kvm_irqchip_is_split()) {
+            ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID;
+        }
     } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
         ret |= 1U << KVM_HINTS_REALTIME;
     }
@@ -4583,38 +4586,71 @@ int kvm_arch_irqchip_create(KVMState *s)
     }
 }
 
+uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address)
+{
+        CPUX86State *env = &X86_CPU(first_cpu)->env;
+        uint64_t ext_id;
+
+        if (!first_cpu ||
+            !(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) {
+            return address;
+        }
+
+        /*
+         * If the remappable format bit is set, or the upper bits are
+         * already set in address_hi, or the low extended bits aren't
+         * there anyway, do nothing.
+         */
+        ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT);
+        if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) ||
+            (address >> 32))
+            return address;
+
+        address &= ~ext_id;
+        address |= ext_id << 35;
+        return address;
+}
+
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
                              uint64_t address, uint32_t data, PCIDevice *dev)
 {
     X86IOMMUState *iommu = x86_iommu_get_default();
 
     if (iommu) {
-        int ret;
-        MSIMessage src, dst;
         X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu);
 
-        if (!class->int_remap) {
-            return 0;
-        }
+        if (class->int_remap) {
+            int ret;
+            MSIMessage src, dst;
 
-        src.address = route->u.msi.address_hi;
-        src.address <<= VTD_MSI_ADDR_HI_SHIFT;
-        src.address |= route->u.msi.address_lo;
-        src.data = route->u.msi.data;
+            src.address = route->u.msi.address_hi;
+            src.address <<= VTD_MSI_ADDR_HI_SHIFT;
+            src.address |= route->u.msi.address_lo;
+            src.data = route->u.msi.data;
 
-        ret = class->int_remap(iommu, &src, &dst, dev ? \
-                               pci_requester_id(dev) : \
-                               X86_IOMMU_SID_INVALID);
-        if (ret) {
-            trace_kvm_x86_fixup_msi_error(route->gsi);
-            return 1;
-        }
+            ret = class->int_remap(iommu, &src, &dst, dev ?     \
+                                   pci_requester_id(dev) :      \
+                                   X86_IOMMU_SID_INVALID);
+            if (ret) {
+                trace_kvm_x86_fixup_msi_error(route->gsi);
+                return 1;
+            }
+
+            /*
+             * Handled untranslated compatibilty format interrupt with
+             * extended destination ID in the low bits 11-5. */
+            dst.address = kvm_swizzle_msi_ext_dest_id(dst.address);
 
-        route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
-        route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
-        route->u.msi.data = dst.data;
+            route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
+            route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
+            route->u.msi.data = dst.data;
+            return 0;
+        }
     }
 
+    address = kvm_swizzle_msi_ext_dest_id(address);
+    route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
+    route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
     return 0;
 }
 
diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h
index 0fce4e51d2..ede94760ae 100644
--- a/target/i386/kvm_i386.h
+++ b/target/i386/kvm_i386.h
@@ -49,4 +49,6 @@ bool kvm_has_waitpkg(void);
 
 bool kvm_hv_vpindex_settable(void);
 
+uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+
 #endif


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5174 bytes --]

             reply	other threads:[~2020-10-05 14:22 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-05 14:18 David Woodhouse [this message]
2020-10-07 12:24 ` [PATCH] target/i386: Support up to 32768 CPUs without IRQ remapping David Woodhouse
2020-10-08  6:56 ` Paolo Bonzini
2020-10-08  7:29   ` David Woodhouse
2020-10-08  7:53     ` Paolo Bonzini
2020-10-19 12:21       ` David Woodhouse
2020-10-19 13:55         ` Paolo Bonzini
2020-10-19 14:55           ` [PATCH] x86/kvm: Reserve KVM_FEATURE_MSI_EXT_DEST_ID David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=78097f9218300e63e751e077a0a5ca029b56ba46.camel@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).