All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Support x2APIC mode with TCG accelerator
@ 2023-02-21 16:04 Bui Quang Minh
  2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-21 16:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, Bui Quang Minh

This series implements x2APIC mode in userspace local APIC and the
RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. With this
series, we can now boot up Linux kernel in x2APIC with TCG accelerator.

Bui Quang Minh (4):
  apic: add support for x2APIC mode
  i386/tcg: implement x2APIC registers MSR access
  intel_iommu: allow Extended Interrupt Mode when using userspace local
    APIC
  test/avocado: test Linux boot up in x2APIC with userspace local APIC

 hw/i386/intel_iommu.c                |  11 --
 hw/intc/apic.c                       | 211 +++++++++++++++++++++------
 hw/intc/apic_common.c                |   2 +-
 include/hw/i386/apic.h               |   5 +-
 include/hw/i386/apic_internal.h      |   2 +-
 target/i386/cpu-sysemu.c             |   5 +
 target/i386/cpu.c                    |   5 +-
 target/i386/cpu.h                    |   4 +
 target/i386/tcg/sysemu/misc_helper.c |  27 ++++
 tests/avocado/tcg_x2apic.py          |  91 ++++++++++++
 10 files changed, 302 insertions(+), 61 deletions(-)
 create mode 100644 tests/avocado/tcg_x2apic.py

-- 
2.25.1



^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
@ 2023-02-21 16:04 ` Bui Quang Minh
  2023-02-24 14:29   ` Igor Mammedov
  2023-03-06 16:01   ` David Woodhouse
  2023-02-21 16:04 ` [PATCH 2/4] i386/tcg: implement x2APIC registers MSR access Bui Quang Minh
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-21 16:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, Bui Quang Minh

This commit refactors APIC registers read/write function to support both
MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
support larger APIC ID, self IPI, new IPI destination determination in
x2APIC mode.

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
 hw/intc/apic_common.c           |   2 +-
 include/hw/i386/apic.h          |   5 +-
 include/hw/i386/apic_internal.h |   2 +-
 4 files changed, 172 insertions(+), 48 deletions(-)

diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 2d3e55f4e2..205d5923ec 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -30,6 +30,7 @@
 #include "hw/i386/apic-msidef.h"
 #include "qapi/error.h"
 #include "qom/object.h"
+#include "tcg/helper-tcg.h"
 
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
@@ -48,7 +49,7 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-                                      uint8_t dest, uint8_t dest_mode);
+                                      uint32_t dest, uint8_t dest_mode);
 
 /* Find first bit starting from msb */
 static int apic_fls_bit(uint32_t value)
@@ -275,7 +276,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
                  apic_set_irq(apic_iter, vector_num, trigger_mode) );
 }
 
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
+void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode,
                       uint8_t vector_num, uint8_t trigger_mode)
 {
     uint32_t deliver_bitmask[MAX_APIC_WORDS];
@@ -287,8 +288,38 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
     apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
 }
 
+bool is_x2apic_mode(DeviceState *dev)
+{
+    APICCommonState *s = APIC(dev);
+
+    return s->apicbase & MSR_IA32_APICBASE_EXTD;
+}
+
 static void apic_set_base(APICCommonState *s, uint64_t val)
 {
+    /*
+     * Transition into invalid state
+     * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
+     * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
+     */
+    if (!(val & MSR_IA32_APICBASE_ENABLE) &&
+        (val & MSR_IA32_APICBASE_EXTD))
+        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+    /* Invalid transition from disabled mode to x2APIC */
+    if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+        !(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+        (val & MSR_IA32_APICBASE_ENABLE) &&
+        (val & MSR_IA32_APICBASE_EXTD))
+        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
+    /* Invalid transition from x2APIC to xAPIC */
+    if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
+        (s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+        (val & MSR_IA32_APICBASE_ENABLE) &&
+        !(val & MSR_IA32_APICBASE_EXTD))
+        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+
     s->apicbase = (val & 0xfffff000) |
         (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
     /* if disabled, cannot be enabled again */
@@ -297,6 +328,21 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
         cpu_clear_apic_feature(&s->cpu->env);
         s->spurious_vec &= ~APIC_SV_ENABLE;
     }
+
+    /* Transition from xAPIC to x2APIC */
+    if (cpu_has_x2apic_feature(&s->cpu->env) &&
+        !(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+        (val & MSR_IA32_APICBASE_EXTD)) {
+        s->apicbase |= MSR_IA32_APICBASE_EXTD;
+
+        s->log_dest = ((s->initial_apic_id & 0xffff0) << 16) |
+                      (1 << (s->initial_apic_id & 0xf));
+
+        /* disable MMIO interface */
+        qemu_mutex_lock_iothread();
+        memory_region_set_enabled(&s->io_memory, false);
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 static void apic_set_tpr(APICCommonState *s, uint8_t val)
@@ -454,7 +500,7 @@ static int apic_find_dest(uint8_t dest)
 }
 
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
-                                      uint8_t dest, uint8_t dest_mode)
+                                      uint32_t dest, uint8_t dest_mode)
 {
     APICCommonState *apic_iter;
     int i;
@@ -474,14 +520,22 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
         for(i = 0; i < MAX_APICS; i++) {
             apic_iter = local_apics[i];
             if (apic_iter) {
-                if (apic_iter->dest_mode == 0xf) {
-                    if (dest & apic_iter->log_dest)
-                        apic_set_bit(deliver_bitmask, i);
-                } else if (apic_iter->dest_mode == 0x0) {
-                    if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
-                        (dest & apic_iter->log_dest & 0x0f)) {
+                /* x2APIC mode */
+                if (apic_iter->apicbase & MSR_IA32_APICBASE_EXTD) {
+                    if ((dest & 0xffff0000) == (apic_iter->log_dest & 0xffff0000) &&
+                        (dest & apic_iter->log_dest & 0xffff)) {
                         apic_set_bit(deliver_bitmask, i);
                     }
+                } else {
+                    if (apic_iter->dest_mode == 0xf) {
+                        if (dest & apic_iter->log_dest)
+                            apic_set_bit(deliver_bitmask, i);
+                    } else if (apic_iter->dest_mode == 0x0) {
+                        if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
+                            (dest & apic_iter->log_dest & 0x0f)) {
+                            apic_set_bit(deliver_bitmask, i);
+                        }
+                    }
                 }
             } else {
                 break;
@@ -508,13 +562,12 @@ void apic_sipi(DeviceState *dev)
     s->wait_for_sipi = 0;
 }
 
-static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode,
+static void apic_deliver(DeviceState *dev, uint32_t dest, uint8_t dest_mode,
                          uint8_t delivery_mode, uint8_t vector_num,
-                         uint8_t trigger_mode)
+                         uint8_t trigger_mode, uint8_t dest_shorthand)
 {
     APICCommonState *s = APIC(dev);
     uint32_t deliver_bitmask[MAX_APIC_WORDS];
-    int dest_shorthand = (s->icr[0] >> 18) & 3;
     APICCommonState *apic_iter;
 
     switch (dest_shorthand) {
@@ -635,16 +688,11 @@ static void apic_timer(void *opaque)
     apic_timer_update(s, s->next_time);
 }
 
-static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+uint64_t apic_register_read(int index)
 {
     DeviceState *dev;
     APICCommonState *s;
-    uint32_t val;
-    int index;
-
-    if (size < 4) {
-        return 0;
-    }
+    uint64_t val;
 
     dev = cpu_get_current_apic();
     if (!dev) {
@@ -652,10 +700,12 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
     }
     s = APIC(dev);
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02: /* id */
-        val = s->id << 24;
+        if (is_x2apic_mode(dev))
+            val = s->initial_apic_id;
+        else
+            val = s->id << 24;
         break;
     case 0x03: /* version */
         val = s->version | ((APIC_LVT_NB - 1) << 16);
@@ -678,9 +728,16 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
         val = 0;
         break;
     case 0x0d:
-        val = s->log_dest << 24;
+        if (is_x2apic_mode(dev))
+            val = s->log_dest;
+        else
+            val = s->log_dest << 24;
         break;
     case 0x0e:
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
         val = (s->dest_mode << 28) | 0xfffffff;
         break;
     case 0x0f:
@@ -719,6 +776,22 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
         val = 0;
         break;
     }
+
+    return val;
+}
+
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
+{
+    uint32_t val;
+    int index;
+
+    if (size < 4) {
+        return 0;
+    }
+
+    index = (addr >> 4) & 0xff;
+    val = (uint32_t) apic_register_read(index);
+
     trace_apic_mem_readl(addr, val);
     return val;
 }
@@ -736,27 +809,10 @@ static void apic_send_msi(MSIMessage *msi)
     apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
-                           unsigned size)
+void apic_register_write(int index, uint64_t val)
 {
     DeviceState *dev;
     APICCommonState *s;
-    int index = (addr >> 4) & 0xff;
-
-    if (size < 4) {
-        return;
-    }
-
-    if (addr > 0xfff || !index) {
-        /* MSI and MMIO APIC are at the same memory location,
-         * but actually not on the global bus: MSI is on PCI bus
-         * APIC is connected directly to the CPU.
-         * Mapping them on the global bus happens to work because
-         * MSI registers are reserved in APIC MMIO and vice versa. */
-        MSIMessage msi = { .address = addr, .data = val };
-        apic_send_msi(&msi);
-        return;
-    }
 
     dev = cpu_get_current_apic();
     if (!dev) {
@@ -764,10 +820,12 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
     }
     s = APIC(dev);
 
-    trace_apic_mem_writel(addr, val);
-
     switch(index) {
     case 0x02:
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
         s->id = (val >> 24);
         break;
     case 0x03:
@@ -787,9 +845,17 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
         apic_eoi(s);
         break;
     case 0x0d:
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
         s->log_dest = val >> 24;
         break;
     case 0x0e:
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
         s->dest_mode = val >> 28;
         break;
     case 0x0f:
@@ -801,13 +867,27 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
     case 0x20 ... 0x27:
     case 0x28:
         break;
-    case 0x30:
+    case 0x30: {
+        uint32_t dest;
+
         s->icr[0] = val;
-        apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1,
+        if (is_x2apic_mode(dev)) {
+            s->icr[1] = val >> 32;
+            dest = s->icr[1];
+        } else {
+            dest = (s->icr[1] >> 24) & 0xff;
+        }
+
+        apic_deliver(dev, dest, (s->icr[0] >> 11) & 1,
                      (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff),
-                     (s->icr[0] >> 15) & 1);
+                     (s->icr[0] >> 15) & 1, (s->icr[0] >> 18) & 3);
         break;
+    }
     case 0x31:
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
         s->icr[1] = val;
         break;
     case 0x32 ... 0x37:
@@ -836,12 +916,53 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
             s->count_shift = (v + 1) & 7;
         }
         break;
+    case 0x3f: {
+        int vector = val & 0xff;
+
+        if (is_x2apic_mode(dev)) {
+            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
+        }
+
+        /*
+         * Self IPI is identical to IPI with
+         * - Destination shorthand: 1 (Self)
+         * - Trigger mode: 0 (Edge)
+         * - Delivery mode: 0 (Fixed)
+         */
+        apic_deliver(dev, 0, 0, APIC_DM_FIXED, vector, 0, 1);
+
+        break;
+    }
     default:
         s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
         break;
     }
 }
 
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+                           unsigned size)
+{
+    int index = (addr >> 4) & 0xff;
+
+    if (size < 4) {
+        return;
+    }
+
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        MSIMessage msi = { .address = addr, .data = val };
+        apic_send_msi(&msi);
+        return;
+    }
+
+    trace_apic_mem_writel(addr, val);
+    apic_register_write(index, val);
+}
+
 static void apic_pre_save(APICCommonState *s)
 {
     apic_sync_vapic(s, SYNC_FROM_VAPIC);
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 4a34f03047..9ea1397530 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -366,7 +366,7 @@ static const VMStateDescription vmstate_apic_common = {
         VMSTATE_UINT8(arb_id, APICCommonState),
         VMSTATE_UINT8(tpr, APICCommonState),
         VMSTATE_UINT32(spurious_vec, APICCommonState),
-        VMSTATE_UINT8(log_dest, APICCommonState),
+        VMSTATE_UINT32(log_dest, APICCommonState),
         VMSTATE_UINT8(dest_mode, APICCommonState),
         VMSTATE_UINT32_ARRAY(isr, APICCommonState, 8),
         VMSTATE_UINT32_ARRAY(tmr, APICCommonState, 8),
diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
index bdc15a7a73..871ca94c5c 100644
--- a/include/hw/i386/apic.h
+++ b/include/hw/i386/apic.h
@@ -3,7 +3,7 @@
 
 
 /* apic.c */
-void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
+void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode,
                       uint8_t vector_num, uint8_t trigger_mode);
 int apic_accept_pic_intr(DeviceState *s);
 void apic_deliver_pic_intr(DeviceState *s, int level);
@@ -18,6 +18,9 @@ void apic_sipi(DeviceState *s);
 void apic_poll_irq(DeviceState *d);
 void apic_designate_bsp(DeviceState *d, bool bsp);
 int apic_get_highest_priority_irr(DeviceState *dev);
+uint64_t apic_register_read(int index);
+void apic_register_write(int index, uint64_t val);
+bool is_x2apic_mode(DeviceState *d);
 
 /* pc.c */
 DeviceState *cpu_get_current_apic(void);
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 5f2ba24bfc..5f60cd5e78 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -164,7 +164,7 @@ struct APICCommonState {
     uint8_t arb_id;
     uint8_t tpr;
     uint32_t spurious_vec;
-    uint8_t log_dest;
+    uint32_t log_dest;
     uint8_t dest_mode;
     uint32_t isr[8];  /* in service register */
     uint32_t tmr[8];  /* trigger mode register */
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/4] i386/tcg: implement x2APIC registers MSR access
  2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
  2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
@ 2023-02-21 16:04 ` Bui Quang Minh
  2023-02-21 16:04 ` [PATCH 3/4] intel_iommu: allow Extended Interrupt Mode when using userspace local APIC Bui Quang Minh
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-21 16:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, Bui Quang Minh

i386 TCG now supports MSR access to x2APIC registers. The MRS address
ranges from 0x800 to 0x8ff.

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 target/i386/cpu-sysemu.c             |  5 +++++
 target/i386/cpu.c                    |  5 +++--
 target/i386/cpu.h                    |  4 ++++
 target/i386/tcg/sysemu/misc_helper.c | 27 +++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index 28115edf44..73f14161e9 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -235,6 +235,11 @@ void cpu_clear_apic_feature(CPUX86State *env)
     env->features[FEAT_1_EDX] &= ~CPUID_APIC;
 }
 
+bool cpu_has_x2apic_feature(CPUX86State *env)
+{
+    return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC;
+}
+
 bool cpu_is_bsp(X86CPU *cpu)
 {
     return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4d2b8d0444..ce9ec460f4 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -626,12 +626,13 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */   \
           CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \
           CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \
-          CPUID_EXT_FMA)
+          CPUID_EXT_FMA | CPUID_EXT_X2APIC)
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
           CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID,
           CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
-          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER */
+          CPUID_EXT_TSC_DEADLINE_TIMER
+          */
 
 #ifdef TARGET_X86_64
 #define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index d4bc19577a..e163d9a136 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -542,6 +542,9 @@ typedef enum X86Seg {
 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
 #define MSR_IA32_VMX_VMFUNC             0x00000491
 
+#define MSR_APIC_START                  0x00000800
+#define MSR_APIC_END                    0x000008ff
+
 #define XSTATE_FP_BIT                   0
 #define XSTATE_SSE_BIT                  1
 #define XSTATE_YMM_BIT                  2
@@ -2128,6 +2131,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 void cpu_clear_apic_feature(CPUX86State *env);
 void host_cpuid(uint32_t function, uint32_t count,
                 uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
+bool cpu_has_x2apic_feature(CPUX86State *env);
 
 /* helper.c */
 void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index e1528b7f80..1fce2076a3 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -25,6 +25,7 @@
 #include "exec/address-spaces.h"
 #include "exec/exec-all.h"
 #include "tcg/helper-tcg.h"
+#include "hw/i386/apic.h"
 
 void helper_outb(CPUX86State *env, uint32_t port, uint32_t data)
 {
@@ -289,6 +290,19 @@ void helper_wrmsr(CPUX86State *env)
         env->msr_bndcfgs = val;
         cpu_sync_bndcs_hflags(env);
         break;
+    case MSR_APIC_START ... MSR_APIC_END: {
+        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+        if (!is_x2apic_mode(env_archcpu(env)->apic_state)) {
+            goto error;
+        }
+
+        qemu_mutex_lock_iothread();
+        apic_register_write(index, val);
+        qemu_mutex_unlock_iothread();
+
+        break;
+    }
     default:
         if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
             && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
@@ -455,6 +469,19 @@ void helper_rdmsr(CPUX86State *env)
         val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16);
         break;
     }
+    case MSR_APIC_START ... MSR_APIC_END: {
+        int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START;
+
+        if (!is_x2apic_mode(env_archcpu(env)->apic_state)) {
+            raise_exception_ra(env, EXCP0D_GPF, GETPC());
+        }
+
+        qemu_mutex_lock_iothread();
+        val = apic_register_read(index);
+        qemu_mutex_unlock_iothread();
+
+        break;
+    }
     default:
         if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL
             && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL +
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/4] intel_iommu: allow Extended Interrupt Mode when using userspace local APIC
  2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
  2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
  2023-02-21 16:04 ` [PATCH 2/4] i386/tcg: implement x2APIC registers MSR access Bui Quang Minh
@ 2023-02-21 16:04 ` Bui Quang Minh
  2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
  2023-03-06 17:55 ` [PATCH 0/4] Support x2APIC mode with TCG accelerator David Woodhouse
  4 siblings, 0 replies; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-21 16:04 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, Bui Quang Minh

As userspace local APIC now supports x2APIC, intel interrupt remapping
hardware can be set to EIM mode when userspace local APIC is used.

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 hw/i386/intel_iommu.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 98a5c304a7..228a944bce 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4026,17 +4026,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
                       && x86_iommu_ir_supported(x86_iommu) ?
                                               ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
     }
-    if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-        if (!kvm_irqchip_is_split()) {
-            error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
-            return false;
-        }
-        if (!kvm_enable_x2apic()) {
-            error_setg(errp, "eim=on requires support on the KVM side"
-                             "(X2APIC_API, first shipped in v4.7)");
-            return false;
-        }
-    }
 
     /* Currently only address widths supported are 39 and 48 bits */
     if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with userspace local APIC
  2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
                   ` (2 preceding siblings ...)
  2023-02-21 16:04 ` [PATCH 3/4] intel_iommu: allow Extended Interrupt Mode when using userspace local APIC Bui Quang Minh
@ 2023-02-21 16:05 ` Bui Quang Minh
  2023-03-06 19:51   ` David Woodhouse
                     ` (2 more replies)
  2023-03-06 17:55 ` [PATCH 0/4] Support x2APIC mode with TCG accelerator David Woodhouse
  4 siblings, 3 replies; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-21 16:05 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, Bui Quang Minh

Simple test to check Linux boot up in x2APIC with userspace local APIC and
TCG accelerator.

Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 tests/avocado/tcg_x2apic.py | 91 +++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 tests/avocado/tcg_x2apic.py

diff --git a/tests/avocado/tcg_x2apic.py b/tests/avocado/tcg_x2apic.py
new file mode 100644
index 0000000000..ff4f27017c
--- /dev/null
+++ b/tests/avocado/tcg_x2apic.py
@@ -0,0 +1,91 @@
+# x2APIC with TCG accelerator tests
+# Based on intel_iommu.py, INTEL_IOMMU Functional tests
+#
+# Copyright (c) Bui Quang Minh <minhquangbui99@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+import os
+
+from avocado import skipIf
+from avocado_qemu import LinuxTest
+
+@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+class TCGx2APIC(LinuxTest):
+    """
+    :avocado: tags=arch:x86_64
+    :avocado: tags=distro:fedora
+    :avocado: tags=distro_version:31
+    :avocado: tags=machine:q35
+    :avocado: tags=accel:tcg
+    :avocado: tags=x2apic
+    """
+
+    IOMMU_ADDON = ',iommu_platform=on,disable-modern=off,disable-legacy=on'
+    kernel_path = None
+    initrd_path = None
+    kernel_params = None
+
+    def set_up_boot(self):
+        path = self.download_boot()
+        self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
+                         'drive=drv0,id=virtio-disk0,bootindex=1,'
+                         'werror=stop,rerror=stop' + self.IOMMU_ADDON)
+        self.vm.add_args('-device', 'virtio-gpu-pci' + self.IOMMU_ADDON)
+        self.vm.add_args('-drive',
+                         'file=%s,if=none,cache=writethrough,id=drv0' % path)
+
+    def setUp(self):
+        super(TCGx2APIC, self).setUp(None, 'virtio-net-pci' + self.IOMMU_ADDON)
+
+    def add_common_args(self):
+        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
+        self.vm.add_args('-object',
+                         'rng-random,id=rng0,filename=/dev/urandom')
+
+    def common_vm_setup(self, custom_kernel=None):
+        self.require_accelerator('tcg')
+        self.add_common_args()
+        self.vm.add_args('-accel', 'tcg')
+        self.vm.add_args('-device', 'intel-iommu,intremap=on,eim=on')
+        self.vm.add_args('-cpu', 'qemu64,+x2apic')
+
+        if custom_kernel is None:
+            return
+
+        kernel_url = self.distro.pxeboot_url + 'vmlinuz'
+        initrd_url = self.distro.pxeboot_url + 'initrd.img'
+        self.kernel_path = self.fetch_asset(kernel_url)
+        self.initrd_path = self.fetch_asset(initrd_url)
+
+    def run_and_check(self):
+        if self.kernel_path:
+            self.vm.add_args('-kernel', self.kernel_path,
+                             '-append', self.kernel_params,
+                             '-initrd', self.initrd_path)
+        self.launch_and_wait()
+        self.ssh_command('cat /proc/cmdline')
+        self.ssh_command('dmesg | grep "x2apic enabled"')
+
+    def test_physical_x2apic(self):
+        """
+        :avocado: tags=physical_x2apic
+        """
+
+        self.common_vm_setup(True)
+
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on x2apic_phys')
+        self.run_and_check()
+        self.ssh_command('dmesg | grep "Switched APIC routing to physical x2apic"')
+
+    def test_cluster_x2apic(self):
+        """
+        :avocado: tags=cluster_x2apic
+        """
+
+        self.common_vm_setup(True)
+        self.kernel_params = (self.distro.default_kernel_params +
+                              ' quiet intel_iommu=on')
+        self.run_and_check()
+        self.ssh_command('dmesg | grep "Switched APIC routing to cluster x2apic"')
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
@ 2023-02-24 14:29   ` Igor Mammedov
  2023-02-25 10:15     ` Bui Quang Minh
  2023-03-06 16:01   ` David Woodhouse
  1 sibling, 1 reply; 22+ messages in thread
From: Igor Mammedov @ 2023-02-24 14:29 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On Tue, 21 Feb 2023 23:04:57 +0700
Bui Quang Minh <minhquangbui99@gmail.com> wrote:

> This commit refactors APIC registers read/write function to support both
> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
> support larger APIC ID, self IPI, new IPI destination determination in
> x2APIC mode.
> 
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
>  hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
>  hw/intc/apic_common.c           |   2 +-
>  include/hw/i386/apic.h          |   5 +-
>  include/hw/i386/apic_internal.h |   2 +-
>  4 files changed, 172 insertions(+), 48 deletions(-)
> 
> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> index 2d3e55f4e2..205d5923ec 100644
> --- a/hw/intc/apic.c
> +++ b/hw/intc/apic.c
> @@ -30,6 +30,7 @@
>  #include "hw/i386/apic-msidef.h"
>  #include "qapi/error.h"
>  #include "qom/object.h"
> +#include "tcg/helper-tcg.h"
>  
>  #define MAX_APICS 255

I'm curious how does it work without increasing ^^^?

>  #define MAX_APIC_WORDS 8
> @@ -48,7 +49,7 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC,
>  static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
>  static void apic_update_irq(APICCommonState *s);
>  static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
> -                                      uint8_t dest, uint8_t dest_mode);
> +                                      uint32_t dest, uint8_t dest_mode);
>  
>  /* Find first bit starting from msb */
>  static int apic_fls_bit(uint32_t value)
> @@ -275,7 +276,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
>                   apic_set_irq(apic_iter, vector_num, trigger_mode) );
>  }
>  
> -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
> +void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode,
>                        uint8_t vector_num, uint8_t trigger_mode)
>  {
>      uint32_t deliver_bitmask[MAX_APIC_WORDS];
> @@ -287,8 +288,38 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
>      apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
>  }
>  
> +bool is_x2apic_mode(DeviceState *dev)
> +{
> +    APICCommonState *s = APIC(dev);
> +
> +    return s->apicbase & MSR_IA32_APICBASE_EXTD;
> +}
> +
>  static void apic_set_base(APICCommonState *s, uint64_t val)
>  {
> +    /*
> +     * Transition into invalid state
> +     * (s->apicbase & MSR_IA32_APICBASE_ENABLE == 0) &&
> +     * (s->apicbase & MSR_IA32_APICBASE_EXTD) == 1
> +     */
> +    if (!(val & MSR_IA32_APICBASE_ENABLE) &&
> +        (val & MSR_IA32_APICBASE_EXTD))
> +        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +
> +    /* Invalid transition from disabled mode to x2APIC */
> +    if (!(s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
> +        !(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
> +        (val & MSR_IA32_APICBASE_ENABLE) &&
> +        (val & MSR_IA32_APICBASE_EXTD))
> +        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +
> +    /* Invalid transition from x2APIC to xAPIC */
> +    if ((s->apicbase & MSR_IA32_APICBASE_ENABLE) &&
> +        (s->apicbase & MSR_IA32_APICBASE_EXTD) &&
> +        (val & MSR_IA32_APICBASE_ENABLE) &&
> +        !(val & MSR_IA32_APICBASE_EXTD))
> +        raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +
>      s->apicbase = (val & 0xfffff000) |
>          (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
>      /* if disabled, cannot be enabled again */
> @@ -297,6 +328,21 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
>          cpu_clear_apic_feature(&s->cpu->env);
>          s->spurious_vec &= ~APIC_SV_ENABLE;
>      }
> +
> +    /* Transition from xAPIC to x2APIC */
> +    if (cpu_has_x2apic_feature(&s->cpu->env) &&
> +        !(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
> +        (val & MSR_IA32_APICBASE_EXTD)) {
> +        s->apicbase |= MSR_IA32_APICBASE_EXTD;
> +
> +        s->log_dest = ((s->initial_apic_id & 0xffff0) << 16) |
> +                      (1 << (s->initial_apic_id & 0xf));
> +
> +        /* disable MMIO interface */
> +        qemu_mutex_lock_iothread();
> +        memory_region_set_enabled(&s->io_memory, false);
> +        qemu_mutex_unlock_iothread();
> +    }
>  }
>  
>  static void apic_set_tpr(APICCommonState *s, uint8_t val)
> @@ -454,7 +500,7 @@ static int apic_find_dest(uint8_t dest)
>  }
>  
>  static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
> -                                      uint8_t dest, uint8_t dest_mode)
> +                                      uint32_t dest, uint8_t dest_mode)
>  {
>      APICCommonState *apic_iter;
>      int i;
> @@ -474,14 +520,22 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
>          for(i = 0; i < MAX_APICS; i++) {
>              apic_iter = local_apics[i];
>              if (apic_iter) {
> -                if (apic_iter->dest_mode == 0xf) {
> -                    if (dest & apic_iter->log_dest)
> -                        apic_set_bit(deliver_bitmask, i);
> -                } else if (apic_iter->dest_mode == 0x0) {
> -                    if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
> -                        (dest & apic_iter->log_dest & 0x0f)) {
> +                /* x2APIC mode */
> +                if (apic_iter->apicbase & MSR_IA32_APICBASE_EXTD) {
> +                    if ((dest & 0xffff0000) == (apic_iter->log_dest & 0xffff0000) &&
> +                        (dest & apic_iter->log_dest & 0xffff)) {
>                          apic_set_bit(deliver_bitmask, i);
>                      }
> +                } else {
> +                    if (apic_iter->dest_mode == 0xf) {
> +                        if (dest & apic_iter->log_dest)
> +                            apic_set_bit(deliver_bitmask, i);
> +                    } else if (apic_iter->dest_mode == 0x0) {
> +                        if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
> +                            (dest & apic_iter->log_dest & 0x0f)) {
> +                            apic_set_bit(deliver_bitmask, i);
> +                        }
> +                    }
>                  }
>              } else {
>                  break;
> @@ -508,13 +562,12 @@ void apic_sipi(DeviceState *dev)
>      s->wait_for_sipi = 0;
>  }
>  
> -static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode,
> +static void apic_deliver(DeviceState *dev, uint32_t dest, uint8_t dest_mode,
>                           uint8_t delivery_mode, uint8_t vector_num,
> -                         uint8_t trigger_mode)
> +                         uint8_t trigger_mode, uint8_t dest_shorthand)
>  {
>      APICCommonState *s = APIC(dev);
>      uint32_t deliver_bitmask[MAX_APIC_WORDS];
> -    int dest_shorthand = (s->icr[0] >> 18) & 3;
>      APICCommonState *apic_iter;
>  
>      switch (dest_shorthand) {
> @@ -635,16 +688,11 @@ static void apic_timer(void *opaque)
>      apic_timer_update(s, s->next_time);
>  }
>  
> -static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
> +uint64_t apic_register_read(int index)
>  {
>      DeviceState *dev;
>      APICCommonState *s;
> -    uint32_t val;
> -    int index;
> -
> -    if (size < 4) {
> -        return 0;
> -    }
> +    uint64_t val;
>  
>      dev = cpu_get_current_apic();
>      if (!dev) {
> @@ -652,10 +700,12 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
>      }
>      s = APIC(dev);
>  
> -    index = (addr >> 4) & 0xff;
>      switch(index) {
>      case 0x02: /* id */
> -        val = s->id << 24;
> +        if (is_x2apic_mode(dev))
> +            val = s->initial_apic_id;
> +        else
> +            val = s->id << 24;
>          break;
>      case 0x03: /* version */
>          val = s->version | ((APIC_LVT_NB - 1) << 16);
> @@ -678,9 +728,16 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
>          val = 0;
>          break;
>      case 0x0d:
> -        val = s->log_dest << 24;
> +        if (is_x2apic_mode(dev))
> +            val = s->log_dest;
> +        else
> +            val = s->log_dest << 24;
>          break;
>      case 0x0e:
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
>          val = (s->dest_mode << 28) | 0xfffffff;
>          break;
>      case 0x0f:
> @@ -719,6 +776,22 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
>          val = 0;
>          break;
>      }
> +
> +    return val;
> +}
> +
> +static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    uint32_t val;
> +    int index;
> +
> +    if (size < 4) {
> +        return 0;
> +    }
> +
> +    index = (addr >> 4) & 0xff;
> +    val = (uint32_t) apic_register_read(index);
> +
>      trace_apic_mem_readl(addr, val);
>      return val;
>  }
> @@ -736,27 +809,10 @@ static void apic_send_msi(MSIMessage *msi)
>      apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
>  }
>  
> -static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
> -                           unsigned size)
> +void apic_register_write(int index, uint64_t val)
>  {
>      DeviceState *dev;
>      APICCommonState *s;
> -    int index = (addr >> 4) & 0xff;
> -
> -    if (size < 4) {
> -        return;
> -    }
> -
> -    if (addr > 0xfff || !index) {
> -        /* MSI and MMIO APIC are at the same memory location,
> -         * but actually not on the global bus: MSI is on PCI bus
> -         * APIC is connected directly to the CPU.
> -         * Mapping them on the global bus happens to work because
> -         * MSI registers are reserved in APIC MMIO and vice versa. */
> -        MSIMessage msi = { .address = addr, .data = val };
> -        apic_send_msi(&msi);
> -        return;
> -    }
>  
>      dev = cpu_get_current_apic();
>      if (!dev) {
> @@ -764,10 +820,12 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
>      }
>      s = APIC(dev);
>  
> -    trace_apic_mem_writel(addr, val);
> -
>      switch(index) {
>      case 0x02:
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
>          s->id = (val >> 24);
>          break;
>      case 0x03:
> @@ -787,9 +845,17 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
>          apic_eoi(s);
>          break;
>      case 0x0d:
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
>          s->log_dest = val >> 24;
>          break;
>      case 0x0e:
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
>          s->dest_mode = val >> 28;
>          break;
>      case 0x0f:
> @@ -801,13 +867,27 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
>      case 0x20 ... 0x27:
>      case 0x28:
>          break;
> -    case 0x30:
> +    case 0x30: {
> +        uint32_t dest;
> +
>          s->icr[0] = val;
> -        apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1,
> +        if (is_x2apic_mode(dev)) {
> +            s->icr[1] = val >> 32;
> +            dest = s->icr[1];
> +        } else {
> +            dest = (s->icr[1] >> 24) & 0xff;
> +        }
> +
> +        apic_deliver(dev, dest, (s->icr[0] >> 11) & 1,
>                       (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff),
> -                     (s->icr[0] >> 15) & 1);
> +                     (s->icr[0] >> 15) & 1, (s->icr[0] >> 18) & 3);
>          break;
> +    }
>      case 0x31:
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
>          s->icr[1] = val;
>          break;
>      case 0x32 ... 0x37:
> @@ -836,12 +916,53 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
>              s->count_shift = (v + 1) & 7;
>          }
>          break;
> +    case 0x3f: {
> +        int vector = val & 0xff;
> +
> +        if (is_x2apic_mode(dev)) {
> +            raise_exception_ra(&s->cpu->env, EXCP0D_GPF, GETPC());
> +        }
> +
> +        /*
> +         * Self IPI is identical to IPI with
> +         * - Destination shorthand: 1 (Self)
> +         * - Trigger mode: 0 (Edge)
> +         * - Delivery mode: 0 (Fixed)
> +         */
> +        apic_deliver(dev, 0, 0, APIC_DM_FIXED, vector, 0, 1);
> +
> +        break;
> +    }
>      default:
>          s->esr |= APIC_ESR_ILLEGAL_ADDRESS;
>          break;
>      }
>  }
>  
> +static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
> +                           unsigned size)
> +{
> +    int index = (addr >> 4) & 0xff;
> +
> +    if (size < 4) {
> +        return;
> +    }
> +
> +    if (addr > 0xfff || !index) {
> +        /* MSI and MMIO APIC are at the same memory location,
> +         * but actually not on the global bus: MSI is on PCI bus
> +         * APIC is connected directly to the CPU.
> +         * Mapping them on the global bus happens to work because
> +         * MSI registers are reserved in APIC MMIO and vice versa. */
> +        MSIMessage msi = { .address = addr, .data = val };
> +        apic_send_msi(&msi);
> +        return;
> +    }
> +
> +    trace_apic_mem_writel(addr, val);
> +    apic_register_write(index, val);
> +}
> +
>  static void apic_pre_save(APICCommonState *s)
>  {
>      apic_sync_vapic(s, SYNC_FROM_VAPIC);
> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
> index 4a34f03047..9ea1397530 100644
> --- a/hw/intc/apic_common.c
> +++ b/hw/intc/apic_common.c
> @@ -366,7 +366,7 @@ static const VMStateDescription vmstate_apic_common = {
>          VMSTATE_UINT8(arb_id, APICCommonState),
>          VMSTATE_UINT8(tpr, APICCommonState),
>          VMSTATE_UINT32(spurious_vec, APICCommonState),
> -        VMSTATE_UINT8(log_dest, APICCommonState),
> +        VMSTATE_UINT32(log_dest, APICCommonState),
>          VMSTATE_UINT8(dest_mode, APICCommonState),
>          VMSTATE_UINT32_ARRAY(isr, APICCommonState, 8),
>          VMSTATE_UINT32_ARRAY(tmr, APICCommonState, 8),
> diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h
> index bdc15a7a73..871ca94c5c 100644
> --- a/include/hw/i386/apic.h
> +++ b/include/hw/i386/apic.h
> @@ -3,7 +3,7 @@
>  
>  
>  /* apic.c */
> -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
> +void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode,
>                        uint8_t vector_num, uint8_t trigger_mode);
>  int apic_accept_pic_intr(DeviceState *s);
>  void apic_deliver_pic_intr(DeviceState *s, int level);
> @@ -18,6 +18,9 @@ void apic_sipi(DeviceState *s);
>  void apic_poll_irq(DeviceState *d);
>  void apic_designate_bsp(DeviceState *d, bool bsp);
>  int apic_get_highest_priority_irr(DeviceState *dev);
> +uint64_t apic_register_read(int index);
> +void apic_register_write(int index, uint64_t val);
> +bool is_x2apic_mode(DeviceState *d);
>  
>  /* pc.c */
>  DeviceState *cpu_get_current_apic(void);
> diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
> index 5f2ba24bfc..5f60cd5e78 100644
> --- a/include/hw/i386/apic_internal.h
> +++ b/include/hw/i386/apic_internal.h
> @@ -164,7 +164,7 @@ struct APICCommonState {
>      uint8_t arb_id;
>      uint8_t tpr;
>      uint32_t spurious_vec;
> -    uint8_t log_dest;
> +    uint32_t log_dest;
>      uint8_t dest_mode;
>      uint32_t isr[8];  /* in service register */
>      uint32_t tmr[8];  /* trigger mode register */



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-24 14:29   ` Igor Mammedov
@ 2023-02-25 10:15     ` Bui Quang Minh
  2023-02-27 16:07       ` Igor Mammedov
  0 siblings, 1 reply; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-25 10:15 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On 2/24/23 21:29, Igor Mammedov wrote:
> On Tue, 21 Feb 2023 23:04:57 +0700
> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> 
>> This commit refactors APIC registers read/write function to support both
>> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
>> support larger APIC ID, self IPI, new IPI destination determination in
>> x2APIC mode.
>>
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>> ---
>>   hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
>>   hw/intc/apic_common.c           |   2 +-
>>   include/hw/i386/apic.h          |   5 +-
>>   include/hw/i386/apic_internal.h |   2 +-
>>   4 files changed, 172 insertions(+), 48 deletions(-)
>>
>> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
>> index 2d3e55f4e2..205d5923ec 100644
>> --- a/hw/intc/apic.c
>> +++ b/hw/intc/apic.c
>> @@ -30,6 +30,7 @@
>>   #include "hw/i386/apic-msidef.h"
>>   #include "qapi/error.h"
>>   #include "qom/object.h"
>> +#include "tcg/helper-tcg.h"
>>   
>>   #define MAX_APICS 255
> 
> I'm curious how does it work without increasing ^^^?

Hmm, my commit message is not entirely correct. In this series, some 
operations (send IPI, IPI destination determination) have been updated 
to support x2APIC mode. However, the emulated APIC still doesn't support 
APIC ID larger than 255 because currently, we use a fixed length (255 + 
1) array to manage local APICs. So to support larger APIC ID, I think we 
need to find any way to manage those, as the possible allocated APIC ID 
range is large and maybe the allocated APIC ID is sparse which makes 
fixed length array so wasteful.

Thanks,
Quang Minh.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-25 10:15     ` Bui Quang Minh
@ 2023-02-27 16:07       ` Igor Mammedov
  2023-02-28 14:34         ` Bui Quang Minh
  0 siblings, 1 reply; 22+ messages in thread
From: Igor Mammedov @ 2023-02-27 16:07 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On Sat, 25 Feb 2023 17:15:17 +0700
Bui Quang Minh <minhquangbui99@gmail.com> wrote:

> On 2/24/23 21:29, Igor Mammedov wrote:
> > On Tue, 21 Feb 2023 23:04:57 +0700
> > Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >   
> >> This commit refactors APIC registers read/write function to support both
> >> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
> >> support larger APIC ID, self IPI, new IPI destination determination in
> >> x2APIC mode.
> >>
> >> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> >> ---
> >>   hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
> >>   hw/intc/apic_common.c           |   2 +-
> >>   include/hw/i386/apic.h          |   5 +-
> >>   include/hw/i386/apic_internal.h |   2 +-
> >>   4 files changed, 172 insertions(+), 48 deletions(-)
> >>
> >> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> >> index 2d3e55f4e2..205d5923ec 100644
> >> --- a/hw/intc/apic.c
> >> +++ b/hw/intc/apic.c
> >> @@ -30,6 +30,7 @@
> >>   #include "hw/i386/apic-msidef.h"
> >>   #include "qapi/error.h"
> >>   #include "qom/object.h"
> >> +#include "tcg/helper-tcg.h"
> >>   
> >>   #define MAX_APICS 255  
> > 
> > I'm curious how does it work without increasing ^^^?  
> 
> Hmm, my commit message is not entirely correct. In this series, some 
> operations (send IPI, IPI destination determination) have been updated 
> to support x2APIC mode. However, the emulated APIC still doesn't support 
> APIC ID larger than 255 because currently, we use a fixed length (255 + 
> 1) array to manage local APICs. So to support larger APIC ID, I think we 
> need to find any way to manage those, as the possible allocated APIC ID 
> range is large and maybe the allocated APIC ID is sparse which makes 
> fixed length array so wasteful.
how much sparse it is? 

benefits of simple static array is simplicity in management and O(1) access time.
QEMU does know in advance max apic id so we can size array by dynamically
allocating it when 1st apic is created. Or if IDs are too sparse
switch to another structure to keep mapping.


> 
> Thanks,
> Quang Minh.
> 



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-27 16:07       ` Igor Mammedov
@ 2023-02-28 14:34         ` Bui Quang Minh
  2023-02-28 16:39           ` Igor Mammedov
  0 siblings, 1 reply; 22+ messages in thread
From: Bui Quang Minh @ 2023-02-28 14:34 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On 2/27/23 23:07, Igor Mammedov wrote:
> On Sat, 25 Feb 2023 17:15:17 +0700
> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> 
>> On 2/24/23 21:29, Igor Mammedov wrote:
>>> On Tue, 21 Feb 2023 23:04:57 +0700
>>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>>>    
>>>> This commit refactors APIC registers read/write function to support both
>>>> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
>>>> support larger APIC ID, self IPI, new IPI destination determination in
>>>> x2APIC mode.
>>>>
>>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>>>> ---
>>>>    hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
>>>>    hw/intc/apic_common.c           |   2 +-
>>>>    include/hw/i386/apic.h          |   5 +-
>>>>    include/hw/i386/apic_internal.h |   2 +-
>>>>    4 files changed, 172 insertions(+), 48 deletions(-)
>>>>
>>>> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
>>>> index 2d3e55f4e2..205d5923ec 100644
>>>> --- a/hw/intc/apic.c
>>>> +++ b/hw/intc/apic.c
>>>> @@ -30,6 +30,7 @@
>>>>    #include "hw/i386/apic-msidef.h"
>>>>    #include "qapi/error.h"
>>>>    #include "qom/object.h"
>>>> +#include "tcg/helper-tcg.h"
>>>>    
>>>>    #define MAX_APICS 255
>>>
>>> I'm curious how does it work without increasing ^^^?
>>
>> Hmm, my commit message is not entirely correct. In this series, some
>> operations (send IPI, IPI destination determination) have been updated
>> to support x2APIC mode. However, the emulated APIC still doesn't support
>> APIC ID larger than 255 because currently, we use a fixed length (255 +
>> 1) array to manage local APICs. So to support larger APIC ID, I think we
>> need to find any way to manage those, as the possible allocated APIC ID
>> range is large and maybe the allocated APIC ID is sparse which makes
>> fixed length array so wasteful.
> how much sparse it is?

As far as I know, QEMU allows to set CPU's APIC ID, so user can pass a 
very sparse APIC ID array.

> benefits of simple static array is simplicity in management and O(1) access time.
> QEMU does know in advance max apic id so we can size array by dynamically
> allocating it when 1st apic is created. Or if IDs are too sparse
> switch to another structure to keep mapping.

I totally agree with this.

I admit that my main focus on this series is to make x2APIC mode 
function correctly with TCG accelerator, so I skip the part of extending 
the support for higher APIC ID.

Thanks,
Quang Minh.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-28 14:34         ` Bui Quang Minh
@ 2023-02-28 16:39           ` Igor Mammedov
  2023-03-04 14:10             ` Bui Quang Minh
  0 siblings, 1 reply; 22+ messages in thread
From: Igor Mammedov @ 2023-02-28 16:39 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On Tue, 28 Feb 2023 21:34:33 +0700
Bui Quang Minh <minhquangbui99@gmail.com> wrote:

> On 2/27/23 23:07, Igor Mammedov wrote:
> > On Sat, 25 Feb 2023 17:15:17 +0700
> > Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >   
> >> On 2/24/23 21:29, Igor Mammedov wrote:  
> >>> On Tue, 21 Feb 2023 23:04:57 +0700
> >>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >>>      
> >>>> This commit refactors APIC registers read/write function to support both
> >>>> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
> >>>> support larger APIC ID, self IPI, new IPI destination determination in
> >>>> x2APIC mode.
> >>>>
> >>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> >>>> ---
> >>>>    hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
> >>>>    hw/intc/apic_common.c           |   2 +-
> >>>>    include/hw/i386/apic.h          |   5 +-
> >>>>    include/hw/i386/apic_internal.h |   2 +-
> >>>>    4 files changed, 172 insertions(+), 48 deletions(-)
> >>>>
> >>>> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> >>>> index 2d3e55f4e2..205d5923ec 100644
> >>>> --- a/hw/intc/apic.c
> >>>> +++ b/hw/intc/apic.c
> >>>> @@ -30,6 +30,7 @@
> >>>>    #include "hw/i386/apic-msidef.h"
> >>>>    #include "qapi/error.h"
> >>>>    #include "qom/object.h"
> >>>> +#include "tcg/helper-tcg.h"
> >>>>    
> >>>>    #define MAX_APICS 255  
> >>>
> >>> I'm curious how does it work without increasing ^^^?  
> >>
> >> Hmm, my commit message is not entirely correct. In this series, some
> >> operations (send IPI, IPI destination determination) have been updated
> >> to support x2APIC mode. However, the emulated APIC still doesn't support
> >> APIC ID larger than 255 because currently, we use a fixed length (255 +
> >> 1) array to manage local APICs. So to support larger APIC ID, I think we
> >> need to find any way to manage those, as the possible allocated APIC ID
> >> range is large and maybe the allocated APIC ID is sparse which makes
> >> fixed length array so wasteful.  
> > how much sparse it is?  
> 
> As far as I know, QEMU allows to set CPU's APIC ID, so user can pass a 
> very sparse APIC ID array.

I don't think that it does permit this (if it does it's a bug that should be fixed).

As far as I'm aware QEMU derives apic_id from '-smp' and possibly cpu type
(there was some differences between Intel and AMD in how apic id was encoded
notably AMD having threads or cores that lead to sparse apic id), though I don't
remember current state of affairs in x86 cpu topo code.

> > benefits of simple static array is simplicity in management and O(1) access time.
> > QEMU does know in advance max apic id so we can size array by dynamically
> > allocating it when 1st apic is created. Or if IDs are too sparse
> > switch to another structure to keep mapping.  
> 
> I totally agree with this.
> 
> I admit that my main focus on this series is to make x2APIC mode 
> function correctly with TCG accelerator, so I skip the part of extending 
> the support for higher APIC ID.
the tricky part in such half approach is making sure that the code is
'correct' and won't lead to exploits.
It would be easier to review if it was completed solution instead of partial.  


> Thanks,
> Quang Minh.
> 



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-28 16:39           ` Igor Mammedov
@ 2023-03-04 14:10             ` Bui Quang Minh
  2023-03-06 10:43               ` Igor Mammedov
  0 siblings, 1 reply; 22+ messages in thread
From: Bui Quang Minh @ 2023-03-04 14:10 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On 2/28/23 23:39, Igor Mammedov wrote:
> On Tue, 28 Feb 2023 21:34:33 +0700
> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> 
>> On 2/27/23 23:07, Igor Mammedov wrote:
>>> On Sat, 25 Feb 2023 17:15:17 +0700
>>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>>>    
>>>> On 2/24/23 21:29, Igor Mammedov wrote:
>>>>> On Tue, 21 Feb 2023 23:04:57 +0700
>>>>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>>>>>       
>>>>>> This commit refactors APIC registers read/write function to support both
>>>>>> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
>>>>>> support larger APIC ID, self IPI, new IPI destination determination in
>>>>>> x2APIC mode.
>>>>>>
>>>>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>>>>>> ---
>>>>>>     hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
>>>>>>     hw/intc/apic_common.c           |   2 +-
>>>>>>     include/hw/i386/apic.h          |   5 +-
>>>>>>     include/hw/i386/apic_internal.h |   2 +-
>>>>>>     4 files changed, 172 insertions(+), 48 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
>>>>>> index 2d3e55f4e2..205d5923ec 100644
>>>>>> --- a/hw/intc/apic.c
>>>>>> +++ b/hw/intc/apic.c
>>>>>> @@ -30,6 +30,7 @@
>>>>>>     #include "hw/i386/apic-msidef.h"
>>>>>>     #include "qapi/error.h"
>>>>>>     #include "qom/object.h"
>>>>>> +#include "tcg/helper-tcg.h"
>>>>>>     
>>>>>>     #define MAX_APICS 255
>>>>>
>>>>> I'm curious how does it work without increasing ^^^?
>>>>
>>>> Hmm, my commit message is not entirely correct. In this series, some
>>>> operations (send IPI, IPI destination determination) have been updated
>>>> to support x2APIC mode. However, the emulated APIC still doesn't support
>>>> APIC ID larger than 255 because currently, we use a fixed length (255 +
>>>> 1) array to manage local APICs. So to support larger APIC ID, I think we
>>>> need to find any way to manage those, as the possible allocated APIC ID
>>>> range is large and maybe the allocated APIC ID is sparse which makes
>>>> fixed length array so wasteful.
>>> how much sparse it is?
>>
>> As far as I know, QEMU allows to set CPU's APIC ID, so user can pass a
>> very sparse APIC ID array.
> 
> I don't think that it does permit this (if it does it's a bug that should be fixed).
> 
> As far as I'm aware QEMU derives apic_id from '-smp' and possibly cpu type
> (there was some differences between Intel and AMD in how apic id was encoded
> notably AMD having threads or cores that lead to sparse apic id), though I don't
> remember current state of affairs in x86 cpu topo code.
> 
>>> benefits of simple static array is simplicity in management and O(1) access time.
>>> QEMU does know in advance max apic id so we can size array by dynamically
>>> allocating it when 1st apic is created. Or if IDs are too sparse
>>> switch to another structure to keep mapping.
>>
>> I totally agree with this.
>>
>> I admit that my main focus on this series is to make x2APIC mode
>> function correctly with TCG accelerator, so I skip the part of extending
>> the support for higher APIC ID.
> the tricky part in such half approach is making sure that the code is
> 'correct' and won't lead to exploits.
> It would be easier to review if it was completed solution instead of partial.

I looked around and found the way to dynamically allocate local_apics array

	void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
	{
		if (!kvm_irqchip_in_kernel()) {
         		apic_set_max_apic_id(x86ms->apic_id_limit);
     		}

	}

We already calculated apic_id_limit before creating CPU and local APIC 
so we can use that number to dynamically allocated local_apics.

However, there are still problems while trying to extending support to 
APIC ID larger than 255 because there are many places assume APIC ID is 
8-bit long. One of that is interrupt remapping which returns 32-bit 
destination ID but uses MSI (which has 8-bit destination) to send to 
APIC. I will look more into this.

Thanks,
Quang Minh.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-04 14:10             ` Bui Quang Minh
@ 2023-03-06 10:43               ` Igor Mammedov
  2023-03-06 15:51                 ` David Woodhouse
  0 siblings, 1 reply; 22+ messages in thread
From: Igor Mammedov @ 2023-03-06 10:43 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On Sat, 4 Mar 2023 21:10:54 +0700
Bui Quang Minh <minhquangbui99@gmail.com> wrote:

> On 2/28/23 23:39, Igor Mammedov wrote:
> > On Tue, 28 Feb 2023 21:34:33 +0700
> > Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >   
> >> On 2/27/23 23:07, Igor Mammedov wrote:  
> >>> On Sat, 25 Feb 2023 17:15:17 +0700
> >>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >>>      
> >>>> On 2/24/23 21:29, Igor Mammedov wrote:  
> >>>>> On Tue, 21 Feb 2023 23:04:57 +0700
> >>>>> Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >>>>>         
> >>>>>> This commit refactors APIC registers read/write function to support both
> >>>>>> MMIO read/write in xAPIC mode and MSR read/write in x2APIC mode. Also,
> >>>>>> support larger APIC ID, self IPI, new IPI destination determination in
> >>>>>> x2APIC mode.
> >>>>>>
> >>>>>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> >>>>>> ---
> >>>>>>     hw/intc/apic.c                  | 211 +++++++++++++++++++++++++-------
> >>>>>>     hw/intc/apic_common.c           |   2 +-
> >>>>>>     include/hw/i386/apic.h          |   5 +-
> >>>>>>     include/hw/i386/apic_internal.h |   2 +-
> >>>>>>     4 files changed, 172 insertions(+), 48 deletions(-)
> >>>>>>
> >>>>>> diff --git a/hw/intc/apic.c b/hw/intc/apic.c
> >>>>>> index 2d3e55f4e2..205d5923ec 100644
> >>>>>> --- a/hw/intc/apic.c
> >>>>>> +++ b/hw/intc/apic.c
> >>>>>> @@ -30,6 +30,7 @@
> >>>>>>     #include "hw/i386/apic-msidef.h"
> >>>>>>     #include "qapi/error.h"
> >>>>>>     #include "qom/object.h"
> >>>>>> +#include "tcg/helper-tcg.h"
> >>>>>>     
> >>>>>>     #define MAX_APICS 255  
> >>>>>
> >>>>> I'm curious how does it work without increasing ^^^?  
> >>>>
> >>>> Hmm, my commit message is not entirely correct. In this series, some
> >>>> operations (send IPI, IPI destination determination) have been updated
> >>>> to support x2APIC mode. However, the emulated APIC still doesn't support
> >>>> APIC ID larger than 255 because currently, we use a fixed length (255 +
> >>>> 1) array to manage local APICs. So to support larger APIC ID, I think we
> >>>> need to find any way to manage those, as the possible allocated APIC ID
> >>>> range is large and maybe the allocated APIC ID is sparse which makes
> >>>> fixed length array so wasteful.  
> >>> how much sparse it is?  
> >>
> >> As far as I know, QEMU allows to set CPU's APIC ID, so user can pass a
> >> very sparse APIC ID array.  
> > 
> > I don't think that it does permit this (if it does it's a bug that should be fixed).
> > 
> > As far as I'm aware QEMU derives apic_id from '-smp' and possibly cpu type
> > (there was some differences between Intel and AMD in how apic id was encoded
> > notably AMD having threads or cores that lead to sparse apic id), though I don't
> > remember current state of affairs in x86 cpu topo code.
> >   
> >>> benefits of simple static array is simplicity in management and O(1) access time.
> >>> QEMU does know in advance max apic id so we can size array by dynamically
> >>> allocating it when 1st apic is created. Or if IDs are too sparse
> >>> switch to another structure to keep mapping.  
> >>
> >> I totally agree with this.
> >>
> >> I admit that my main focus on this series is to make x2APIC mode
> >> function correctly with TCG accelerator, so I skip the part of extending
> >> the support for higher APIC ID.  
> > the tricky part in such half approach is making sure that the code is
> > 'correct' and won't lead to exploits.
> > It would be easier to review if it was completed solution instead of partial.  
> 
> I looked around and found the way to dynamically allocate local_apics array
> 
> 	void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
> 	{
> 		if (!kvm_irqchip_in_kernel()) {
>          		apic_set_max_apic_id(x86ms->apic_id_limit);
>      		}
> 
> 	}
> 
> We already calculated apic_id_limit before creating CPU and local APIC 
> so we can use that number to dynamically allocated local_apics.
> 
> However, there are still problems while trying to extending support to 
> APIC ID larger than 255 because there are many places assume APIC ID is 
> 8-bit long.
that's what I was concerned about (i.e. just enabling x2apic without fixing
with all code that just assumes 8bit apicid).

> One of that is interrupt remapping which returns 32-bit 
> destination ID but uses MSI (which has 8-bit destination) to send to 
> APIC. I will look more into this.

Thanks!

> 
> Thanks,
> Quang Minh.
> 



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-06 10:43               ` Igor Mammedov
@ 2023-03-06 15:51                 ` David Woodhouse
  2023-03-06 16:39                   ` Bui Quang Minh
  2023-03-07 11:25                   ` Igor Mammedov
  0 siblings, 2 replies; 22+ messages in thread
From: David Woodhouse @ 2023-03-06 15:51 UTC (permalink / raw)
  To: Igor Mammedov, Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 2142 bytes --]

On Mon, 2023-03-06 at 11:43 +0100, Igor Mammedov wrote:
> > However, there are still problems while trying to extending support to 
> > APIC ID larger than 255 because there are many places assume APIC ID is 
> > 8-bit long.
>
> that's what I was concerned about (i.e. just enabling x2apic without fixing
> with all code that just assumes 8bit apicid).

Even before you extend the physical APIC IDs past 254 or 255, there's
still the issue that 255 isn't a broadcast in X2APIC mode. And that
*logical* addressing will use more than 8 bits even when the physical
IDs are lower.

> > One of that is interrupt remapping which returns 32-bit 
> > destination ID but uses MSI (which has 8-bit destination) to send to 
> > APIC. I will look more into this.

The translated 'output' from the interrupt remapping doesn't "use MSI",
in the sense of a write transaction which happens to go to addresses in
the 0x00000000FEExxxxx range. The *input* to interrupt remapping comes
from that intercept.

The output is already "known" to be an MSI message, with a full 64-bit
address and 32-bit data, and the KVM API puts the high 24 bits of the
target APIC ID into the high word of the address.

If you look at the "generic" x86_iommu_irq_to_msi_message() in
hw/intc/x86-iommu.c you'll see it's also using the same trick:

    msg.__addr_hi = irq->dest & 0xffffff00;


That hack isn't guest-visible. There *is* a trick which is guest-
visible, implemented by both Hyper-V and KVM, which is to recognise
that actually there was an 'Extended Destination ID' field in bits 4-11
of the actual 32-bit MSI. Intel eventually used the low bit as the
selector for 'Remappable Format' MSI messages, but we've used the other
seven to extend the APIC ID to 15 bits even in a guest-visible way,
allowing up to 32768 CPUs without having to expose a virtual IOMMU.

But that should get translated to the KVM format with the high bits in
the top 32 bits of the address, fairly much transparently. All you need
to do is ensure that the TCG X2APIC support takes that KVM format, and
that it all enabled in the right circumstances.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
  2023-02-24 14:29   ` Igor Mammedov
@ 2023-03-06 16:01   ` David Woodhouse
  1 sibling, 0 replies; 22+ messages in thread
From: David Woodhouse @ 2023-03-06 16:01 UTC (permalink / raw)
  To: Bui Quang Minh, qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 1685 bytes --]

On Tue, 2023-02-21 at 23:04 +0700, Bui Quang Minh wrote:
> @@ -454,7 +500,7 @@ static int apic_find_dest(uint8_t dest)
>  }
>  
>  static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
> -                                      uint8_t dest, uint8_t dest_mode)
> +                                      uint32_t dest, uint8_t dest_mode)
>  {
>      APICCommonState *apic_iter;
>      int i;


I think somewhere here between these two hunks, you've forgotten to
stop interpreting 0xFF as broadcast when you're in X2APIC mode.

> @@ -474,14 +520,22 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
>          for(i = 0; i < MAX_APICS; i++) {
>              apic_iter = local_apics[i];
>              if (apic_iter) {
> -                if (apic_iter->dest_mode == 0xf) {
> -                    if (dest & apic_iter->log_dest)
> -                        apic_set_bit(deliver_bitmask, i);
> -                } else if (apic_iter->dest_mode == 0x0) {
> -                    if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) &&
> -                        (dest & apic_iter->log_dest & 0x0f)) {
> +                /* x2APIC mode */
> +                if (apic_iter->apicbase & MSR_IA32_APICBASE_EXTD) {
> +                    if ((dest & 0xffff0000) == (apic_iter->log_dest & 0xffff0000) &&
> +                        (dest & apic_iter->log_dest & 0xffff)) {



[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-06 15:51                 ` David Woodhouse
@ 2023-03-06 16:39                   ` Bui Quang Minh
  2023-03-06 16:50                     ` David Woodhouse
  2023-03-07 11:25                   ` Igor Mammedov
  1 sibling, 1 reply; 22+ messages in thread
From: Bui Quang Minh @ 2023-03-06 16:39 UTC (permalink / raw)
  To: David Woodhouse, Igor Mammedov
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On 3/6/23 22:51, David Woodhouse wrote:
> On Mon, 2023-03-06 at 11:43 +0100, Igor Mammedov wrote:
>>> However, there are still problems while trying to extending support to
>>> APIC ID larger than 255 because there are many places assume APIC ID is
>>> 8-bit long.
>>
>> that's what I was concerned about (i.e. just enabling x2apic without fixing
>> with all code that just assumes 8bit apicid).
> 
> Even before you extend the physical APIC IDs past 254 or 255, there's
> still the issue that 255 isn't a broadcast in X2APIC mode. And that
> *logical* addressing will use more than 8 bits even when the physical
> IDs are lower.
> 
>>> One of that is interrupt remapping which returns 32-bit
>>> destination ID but uses MSI (which has 8-bit destination) to send to
>>> APIC. I will look more into this.
> 
> The translated 'output' from the interrupt remapping doesn't "use MSI",
> in the sense of a write transaction which happens to go to addresses in
> the 0x00000000FEExxxxx range. The *input* to interrupt remapping comes
> from that intercept.
> 
> The output is already "known" to be an MSI message, with a full 64-bit
> address and 32-bit data, and the KVM API puts the high 24 bits of the
> target APIC ID into the high word of the address.
> 
> If you look at the "generic" x86_iommu_irq_to_msi_message() in
> hw/intc/x86-iommu.c you'll see it's also using the same trick:
> 
>      msg.__addr_hi = irq->dest & 0xffffff00;

Yeah, I see that trick too, with this hunk and temporarily disable 
broadcast destination id 0xff in physical mode, I am able to boot Linux 
with 255 CPUs (I can't see how to use few CPUs but configure with high 
APIC ID)

@@ -814,7 +816,12 @@ static void apic_send_msi(MSIMessage *msi)
  {
      uint64_t addr = msi->address;
      uint32_t data = msi->data;
-    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> 
MSI_ADDR_DEST_ID_SHIFT;
+    uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> 
MSI_ADDR_DEST_ID_SHIFT;
+    /*
+     * The higher 3 bytes of destination id is stored in higher word of
+     * msi address. See x86_iommu_irq_to_msi_message()
+     */
+    dest = dest | (addr >> 32);

I am reading the manual now, looks like broadcast destination id in 
x2APIC is 0xffff_ffff in both physical and logic mode.

By the way, thank you very much for your review.
Quang Minh


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-06 16:39                   ` Bui Quang Minh
@ 2023-03-06 16:50                     ` David Woodhouse
  2023-03-07 11:34                       ` Igor Mammedov
  0 siblings, 1 reply; 22+ messages in thread
From: David Woodhouse @ 2023-03-06 16:50 UTC (permalink / raw)
  To: Bui Quang Minh, Igor Mammedov
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 3016 bytes --]

On Mon, 2023-03-06 at 23:39 +0700, Bui Quang Minh wrote:
> On 3/6/23 22:51, David Woodhouse wrote:
> > On Mon, 2023-03-06 at 11:43 +0100, Igor Mammedov wrote:
> > > > However, there are still problems while trying to extending support to
> > > > APIC ID larger than 255 because there are many places assume APIC ID is
> > > > 8-bit long.
> > > 
> > > that's what I was concerned about (i.e. just enabling x2apic without fixing
> > > with all code that just assumes 8bit apicid).
> > 
> > Even before you extend the physical APIC IDs past 254 or 255, there's
> > still the issue that 255 isn't a broadcast in X2APIC mode. And that
> > *logical* addressing will use more than 8 bits even when the physical
> > IDs are lower.
> > 
> > > > One of that is interrupt remapping which returns 32-bit
> > > > destination ID but uses MSI (which has 8-bit destination) to send to
> > > > APIC. I will look more into this.
> > 
> > The translated 'output' from the interrupt remapping doesn't "use MSI",
> > in the sense of a write transaction which happens to go to addresses in
> > the 0x00000000FEExxxxx range. The *input* to interrupt remapping comes
> > from that intercept.
> > 
> > The output is already "known" to be an MSI message, with a full 64-bit
> > address and 32-bit data, and the KVM API puts the high 24 bits of the
> > target APIC ID into the high word of the address.
> > 
> > If you look at the "generic" x86_iommu_irq_to_msi_message() in
> > hw/intc/x86-iommu.c you'll see it's also using the same trick:
> > 
> >      msg.__addr_hi = irq->dest & 0xffffff00;
> 
> Yeah, I see that trick too, with this hunk and temporarily disable 
> broadcast destination id 0xff in physical mode, I am able to boot Linux 
> with 255 CPUs (I can't see how to use few CPUs but configure with high 
> APIC ID)

I never worked out how to explicitly assign high APIC IDs but you can
at least spread them out by explicitly setting the topology to
something weird like sockets=17,cores=3,threads=3 so that some APIC IDs
get skipped.

Of course, that doesn't let you exercise the interesting corner case of
physical APIC ID 0xff though. I wonder if there's a way of doing it
such that only CPU#0 and CPU#255 are *online* at boot, even if the rest
theoretically exist? 

> @@ -814,7 +816,12 @@ static void apic_send_msi(MSIMessage *msi)
>   {
>       uint64_t addr = msi->address;
>       uint32_t data = msi->data;
> -    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> +    uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> +    /*
> +     * The higher 3 bytes of destination id is stored in higher word of
> +     * msi address. See x86_iommu_irq_to_msi_message()
> +     */
> +    dest = dest | (addr >> 32);
> 
> I am reading the manual now, looks like broadcast destination id in 
> x2APIC is 0xffff_ffff in both physical and logic mode.

Yep, that looks about right.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/4] Support x2APIC mode with TCG accelerator
  2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
                   ` (3 preceding siblings ...)
  2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
@ 2023-03-06 17:55 ` David Woodhouse
  4 siblings, 0 replies; 22+ messages in thread
From: David Woodhouse @ 2023-03-06 17:55 UTC (permalink / raw)
  To: Bui Quang Minh, qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 690 bytes --]

On Tue, 2023-02-21 at 23:04 +0700, Bui Quang Minh wrote:
> This series implements x2APIC mode in userspace local APIC and the
> RDMSR/WRMSR helper to access x2APIC registers in x2APIC mode. With this
> series, we can now boot up Linux kernel in x2APIC with TCG accelerator.
> 
> Bui Quang Minh (4):
>   apic: add support for x2APIC mode
>   i386/tcg: implement x2APIC registers MSR access
>   intel_iommu: allow Extended Interrupt Mode when using userspace local
>     APIC
>   test/avocado: test Linux boot up in x2APIC with userspace local APIC

Please can you ensure CPUID 0x0B is correctly populated with the APIC
ID (and other information) when X2APIC is enabled?

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with userspace local APIC
  2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
@ 2023-03-06 19:51   ` David Woodhouse
  2023-03-07  7:22   ` Alex Bennée
  2023-03-07 11:39   ` Igor Mammedov
  2 siblings, 0 replies; 22+ messages in thread
From: David Woodhouse @ 2023-03-06 19:51 UTC (permalink / raw)
  To: Bui Quang Minh, qemu-devel
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 1366 bytes --]

On Tue, 2023-02-21 at 23:05 +0700, Bui Quang Minh wrote:
> 
> +    def test_physical_x2apic(self):
> +        """
> +        :avocado: tags=physical_x2apic
> +        """
> +
> +        self.common_vm_setup(True)
> +
> +        self.kernel_params = (self.distro.default_kernel_params +
> +                              ' quiet intel_iommu=on x2apic_phys')
> +        self.run_and_check()
> +        self.ssh_command('dmesg | grep "Switched APIC routing to physical x2apic"')
> +
> +    def test_cluster_x2apic(self):
> +        """
> +        :avocado: tags=cluster_x2apic
> +        """
> +
> +        self.common_vm_setup(True)
> +        self.kernel_params = (self.distro.default_kernel_params +
> +                              ' quiet intel_iommu=on')
> +        self.run_and_check()
> +        self.ssh_command('dmesg | grep "Switched APIC routing to cluster x2apic"')

Shouldn't Linux also enable X2APIC in physical mode if it doesn't have
an IOMMU? It'll just refuse to online any CPUs with APIC ID > 255.

For bonus points, make the Linux guest realise that we can do those
extra 7 bits of Extended Destinatation ID, which it normally detects
from the KVM_FEATURE_MSI_EXT_DEST_ID bit under KVM.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with userspace local APIC
  2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
  2023-03-06 19:51   ` David Woodhouse
@ 2023-03-07  7:22   ` Alex Bennée
  2023-03-07 11:39   ` Igor Mammedov
  2 siblings, 0 replies; 22+ messages in thread
From: Alex Bennée @ 2023-03-07  7:22 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum, qemu-devel


Bui Quang Minh <minhquangbui99@gmail.com> writes:

> Simple test to check Linux boot up in x2APIC with userspace local APIC and
> TCG accelerator.

These aren't really simple tests because they are booting up a whole
distro and on my system at least they timeout:

  ➜  ./tests/venv/bin/avocado run tests/avocado/tcg_x2apic.py 
  JOB ID     : 9347a29b02111cc865ab5485ed7e06ad932420a3
  JOB LOG    : /home/alex/avocado/job-results/job-2023-03-06T22.17-9347a29/job.log
   (1/2) tests/avocado/tcg_x2apic.py:TCGx2APIC.test_physical_x2apic: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n
  {'name': '1-tests/avocado/tcg_x2apic.py:TCGx2APIC.test_physical_x2apic', 'logdir': '/home/alex/avocado/job-results/job-2023-03-06T22.17-9347a29/test-results/1-tests_... (120
  .67 s)
   (2/2) tests/avocado/tcg_x2apic.py:TCGx2APIC.test_cluster_x2apic: INTERRUPTED: Test interrupted by SIGTERM\nRunner error occurred: Timeout reached\nOriginal status: ERROR\n{
  'name': '2-tests/avocado/tcg_x2apic.py:TCGx2APIC.test_cluster_x2apic', 'logdir': '/home/alex/avocado/job-results/job-2023-03-06T22.17-9347a29/test-results/2-tests_a... (120.
  66 s)
  RESULTS    : PASS 0 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 2 | CANCEL 0
  JOB TIME   : 241.63 s

Most of what this exercises is already covered by the boot_linux.py
tests which I would dearly like to deprecate the TCG versions for CI
because they burn so much CI time.

Ideally what I would like are specific directed tests that exercise the
full functionality of the various emulated APICs supported by QEMU
without the weight of a full distro boot.

Do the kvm-unit-test APIC tests count? Forcing TCG seems to fail a bunch
though:

✗  env QEMU=$HOME/lsrc/qemu.git/builds/all/qemu-system-x86_64 ACCEL=tcg ./run_tests.sh -g apic
FAIL apic-split 
PASS ioapic-split (19 tests)
FAIL x2apic 
FAIL xapic

It might be the kvm-unit-tests need a slight tweak to not enable
tsc-deadline for TCG only tests?

Failing that I would accept a kernel+initrd test the focused on
exercising the various APIC paths without the weight of a full distro.

<snip>

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-06 15:51                 ` David Woodhouse
  2023-03-06 16:39                   ` Bui Quang Minh
@ 2023-03-07 11:25                   ` Igor Mammedov
  1 sibling, 0 replies; 22+ messages in thread
From: Igor Mammedov @ 2023-03-07 11:25 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Bui Quang Minh, qemu-devel, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Michael S . Tsirkin, Marcel Apfelbaum

On Mon, 06 Mar 2023 15:51:45 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Mon, 2023-03-06 at 11:43 +0100, Igor Mammedov wrote:
> > > However, there are still problems while trying to extending support to 
> > > APIC ID larger than 255 because there are many places assume APIC ID is 
> > > 8-bit long.  
> >
> > that's what I was concerned about (i.e. just enabling x2apic without fixing
> > with all code that just assumes 8bit apicid).  
> 
> Even before you extend the physical APIC IDs past 254 or 255, there's
> still the issue that 255 isn't a broadcast in X2APIC mode. And that
> *logical* addressing will use more than 8 bits even when the physical
> IDs are lower.
> 
> > > One of that is interrupt remapping which returns 32-bit 
> > > destination ID but uses MSI (which has 8-bit destination) to send to 
> > > APIC. I will look more into this.  
> 
> The translated 'output' from the interrupt remapping doesn't "use MSI",
> in the sense of a write transaction which happens to go to addresses in
> the 0x00000000FEExxxxx range. The *input* to interrupt remapping comes
> from that intercept.
> 
> The output is already "known" to be an MSI message, with a full 64-bit
> address and 32-bit data, and the KVM API puts the high 24 bits of the
> target APIC ID into the high word of the address.
> 
> If you look at the "generic" x86_iommu_irq_to_msi_message() in
> hw/intc/x86-iommu.c you'll see it's also using the same trick:
> 
>     msg.__addr_hi = irq->dest & 0xffffff00;
> 
> 
> That hack isn't guest-visible. There *is* a trick which is guest-
> visible, implemented by both Hyper-V and KVM, which is to recognise
> that actually there was an 'Extended Destination ID' field in bits 4-11
> of the actual 32-bit MSI. Intel eventually used the low bit as the
> selector for 'Remappable Format' MSI messages, but we've used the other
> seven to extend the APIC ID to 15 bits even in a guest-visible way,
> allowing up to 32768 CPUs without having to expose a virtual IOMMU.
> 
> But that should get translated to the KVM format with the high bits in
> the top 32 bits of the address, fairly much transparently. All you need
> to do is ensure that the TCG X2APIC support takes that KVM format, and
> that it all enabled in the right circumstances.
I wouldn't bother with 'Extended Destination ID' KVM trick for emulated
x2apic and just limit impl. to what real hardware does.
Though potentially supporting it in TCG mode could be used for CI tests
to make sure we do not regress it (if someone bothered to write test-cases
for it).



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/4] apic: add support for x2APIC mode
  2023-03-06 16:50                     ` David Woodhouse
@ 2023-03-07 11:34                       ` Igor Mammedov
  0 siblings, 0 replies; 22+ messages in thread
From: Igor Mammedov @ 2023-03-07 11:34 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Bui Quang Minh, qemu-devel, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Michael S . Tsirkin, Marcel Apfelbaum

On Mon, 06 Mar 2023 16:50:29 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Mon, 2023-03-06 at 23:39 +0700, Bui Quang Minh wrote:
> > On 3/6/23 22:51, David Woodhouse wrote:  
> > > On Mon, 2023-03-06 at 11:43 +0100, Igor Mammedov wrote:  
> > > > > However, there are still problems while trying to extending support to
> > > > > APIC ID larger than 255 because there are many places assume APIC ID is
> > > > > 8-bit long.  
> > > > 
> > > > that's what I was concerned about (i.e. just enabling x2apic without fixing
> > > > with all code that just assumes 8bit apicid).  
> > > 
> > > Even before you extend the physical APIC IDs past 254 or 255, there's
> > > still the issue that 255 isn't a broadcast in X2APIC mode. And that
> > > *logical* addressing will use more than 8 bits even when the physical
> > > IDs are lower.
> > >   
> > > > > One of that is interrupt remapping which returns 32-bit
> > > > > destination ID but uses MSI (which has 8-bit destination) to send to
> > > > > APIC. I will look more into this.  
> > > 
> > > The translated 'output' from the interrupt remapping doesn't "use MSI",
> > > in the sense of a write transaction which happens to go to addresses in
> > > the 0x00000000FEExxxxx range. The *input* to interrupt remapping comes
> > > from that intercept.
> > > 
> > > The output is already "known" to be an MSI message, with a full 64-bit
> > > address and 32-bit data, and the KVM API puts the high 24 bits of the
> > > target APIC ID into the high word of the address.
> > > 
> > > If you look at the "generic" x86_iommu_irq_to_msi_message() in
> > > hw/intc/x86-iommu.c you'll see it's also using the same trick:
> > > 
> > >      msg.__addr_hi = irq->dest & 0xffffff00;  
> > 
> > Yeah, I see that trick too, with this hunk and temporarily disable 
> > broadcast destination id 0xff in physical mode, I am able to boot Linux 
> > with 255 CPUs (I can't see how to use few CPUs but configure with high 
> > APIC ID)  
> 
> I never worked out how to explicitly assign high APIC IDs but you can
> at least spread them out by explicitly setting the topology to
> something weird like sockets=17,cores=3,threads=3 so that some APIC IDs
> get skipped.
> 
> Of course, that doesn't let you exercise the interesting corner case of
> physical APIC ID 0xff though. I wonder if there's a way of doing it
> such that only CPU#0 and CPU#255 are *online* at boot, even if the rest
> theoretically exist? 

you can have arbitrary (withing -smp limits) vcpu at startup time by
using -device foo-cpu-type,topo-ids-here (modulo auto-created ones on
behalf -smp X value)

Possible vcpus for given -M/-smp/-cpu combination one can get using
hotpluggable-cpus HMP command or its QMP counterpart.
 
> > @@ -814,7 +816,12 @@ static void apic_send_msi(MSIMessage *msi)
> >   {
> >       uint64_t addr = msi->address;
> >       uint32_t data = msi->data;
> > -    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> > +    uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
> > +    /*
> > +     * The higher 3 bytes of destination id is stored in higher word of
> > +     * msi address. See x86_iommu_irq_to_msi_message()
> > +     */
> > +    dest = dest | (addr >> 32);
> > 
> > I am reading the manual now, looks like broadcast destination id in 
> > x2APIC is 0xffff_ffff in both physical and logic mode.  
> 
> Yep, that looks about right.



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with userspace local APIC
  2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
  2023-03-06 19:51   ` David Woodhouse
  2023-03-07  7:22   ` Alex Bennée
@ 2023-03-07 11:39   ` Igor Mammedov
  2 siblings, 0 replies; 22+ messages in thread
From: Igor Mammedov @ 2023-03-07 11:39 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: qemu-devel, Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Michael S . Tsirkin, Marcel Apfelbaum

On Tue, 21 Feb 2023 23:05:00 +0700
Bui Quang Minh <minhquangbui99@gmail.com> wrote:

> Simple test to check Linux boot up in x2APIC with userspace local APIC and
> TCG accelerator.

just an idea, while booting linux would give some coverage,
we probably would get much better coverage by using apic test case
from kvm unit test:
  https://gitlab.com/kvm-unit-tests/kvm-unit-tests/-/blob/master/x86/apic.c

> 
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
>  tests/avocado/tcg_x2apic.py | 91 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
>  create mode 100644 tests/avocado/tcg_x2apic.py
> 
> diff --git a/tests/avocado/tcg_x2apic.py b/tests/avocado/tcg_x2apic.py
> new file mode 100644
> index 0000000000..ff4f27017c
> --- /dev/null
> +++ b/tests/avocado/tcg_x2apic.py
> @@ -0,0 +1,91 @@
> +# x2APIC with TCG accelerator tests
> +# Based on intel_iommu.py, INTEL_IOMMU Functional tests
> +#
> +# Copyright (c) Bui Quang Minh <minhquangbui99@gmail.com>
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or
> +# later. See the COPYING file in the top-level directory.
> +import os
> +
> +from avocado import skipIf
> +from avocado_qemu import LinuxTest
> +
> +@skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
> +class TCGx2APIC(LinuxTest):
> +    """
> +    :avocado: tags=arch:x86_64
> +    :avocado: tags=distro:fedora
> +    :avocado: tags=distro_version:31
> +    :avocado: tags=machine:q35
> +    :avocado: tags=accel:tcg
> +    :avocado: tags=x2apic
> +    """
> +
> +    IOMMU_ADDON = ',iommu_platform=on,disable-modern=off,disable-legacy=on'
> +    kernel_path = None
> +    initrd_path = None
> +    kernel_params = None
> +
> +    def set_up_boot(self):
> +        path = self.download_boot()
> +        self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' +
> +                         'drive=drv0,id=virtio-disk0,bootindex=1,'
> +                         'werror=stop,rerror=stop' + self.IOMMU_ADDON)
> +        self.vm.add_args('-device', 'virtio-gpu-pci' + self.IOMMU_ADDON)
> +        self.vm.add_args('-drive',
> +                         'file=%s,if=none,cache=writethrough,id=drv0' % path)
> +
> +    def setUp(self):
> +        super(TCGx2APIC, self).setUp(None, 'virtio-net-pci' + self.IOMMU_ADDON)
> +
> +    def add_common_args(self):
> +        self.vm.add_args('-device', 'virtio-rng-pci,rng=rng0')
> +        self.vm.add_args('-object',
> +                         'rng-random,id=rng0,filename=/dev/urandom')
> +
> +    def common_vm_setup(self, custom_kernel=None):
> +        self.require_accelerator('tcg')
> +        self.add_common_args()
> +        self.vm.add_args('-accel', 'tcg')
> +        self.vm.add_args('-device', 'intel-iommu,intremap=on,eim=on')
> +        self.vm.add_args('-cpu', 'qemu64,+x2apic')
> +
> +        if custom_kernel is None:
> +            return
> +
> +        kernel_url = self.distro.pxeboot_url + 'vmlinuz'
> +        initrd_url = self.distro.pxeboot_url + 'initrd.img'
> +        self.kernel_path = self.fetch_asset(kernel_url)
> +        self.initrd_path = self.fetch_asset(initrd_url)
> +
> +    def run_and_check(self):
> +        if self.kernel_path:
> +            self.vm.add_args('-kernel', self.kernel_path,
> +                             '-append', self.kernel_params,
> +                             '-initrd', self.initrd_path)
> +        self.launch_and_wait()
> +        self.ssh_command('cat /proc/cmdline')
> +        self.ssh_command('dmesg | grep "x2apic enabled"')
> +
> +    def test_physical_x2apic(self):
> +        """
> +        :avocado: tags=physical_x2apic
> +        """
> +
> +        self.common_vm_setup(True)
> +
> +        self.kernel_params = (self.distro.default_kernel_params +
> +                              ' quiet intel_iommu=on x2apic_phys')
> +        self.run_and_check()
> +        self.ssh_command('dmesg | grep "Switched APIC routing to physical x2apic"')
> +
> +    def test_cluster_x2apic(self):
> +        """
> +        :avocado: tags=cluster_x2apic
> +        """
> +
> +        self.common_vm_setup(True)
> +        self.kernel_params = (self.distro.default_kernel_params +
> +                              ' quiet intel_iommu=on')
> +        self.run_and_check()
> +        self.ssh_command('dmesg | grep "Switched APIC routing to cluster x2apic"')



^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2023-03-07 11:40 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-21 16:04 [PATCH 0/4] Support x2APIC mode with TCG accelerator Bui Quang Minh
2023-02-21 16:04 ` [PATCH 1/4] apic: add support for x2APIC mode Bui Quang Minh
2023-02-24 14:29   ` Igor Mammedov
2023-02-25 10:15     ` Bui Quang Minh
2023-02-27 16:07       ` Igor Mammedov
2023-02-28 14:34         ` Bui Quang Minh
2023-02-28 16:39           ` Igor Mammedov
2023-03-04 14:10             ` Bui Quang Minh
2023-03-06 10:43               ` Igor Mammedov
2023-03-06 15:51                 ` David Woodhouse
2023-03-06 16:39                   ` Bui Quang Minh
2023-03-06 16:50                     ` David Woodhouse
2023-03-07 11:34                       ` Igor Mammedov
2023-03-07 11:25                   ` Igor Mammedov
2023-03-06 16:01   ` David Woodhouse
2023-02-21 16:04 ` [PATCH 2/4] i386/tcg: implement x2APIC registers MSR access Bui Quang Minh
2023-02-21 16:04 ` [PATCH 3/4] intel_iommu: allow Extended Interrupt Mode when using userspace local APIC Bui Quang Minh
2023-02-21 16:05 ` [PATCH 4/4] test/avocado: test Linux boot up in x2APIC with " Bui Quang Minh
2023-03-06 19:51   ` David Woodhouse
2023-03-07  7:22   ` Alex Bennée
2023-03-07 11:39   ` Igor Mammedov
2023-03-06 17:55 ` [PATCH 0/4] Support x2APIC mode with TCG accelerator David Woodhouse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.