All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-14 14:25 ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Michael S. Tsirkin, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

The check on x86ms->apic_id_limit in pc_machine_done() had two problems.

Firstly, we need KVM to support the X2APIC API in order to allow IRQ
delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
which was done elsewhere in *some* cases but not all.

Secondly, microvm needs the same check. So move it from pc_machine_done()
to x86_cpus_init() where it will work for both.

The check in kvm_cpu_instance_init() is now redundant and can be dropped.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Claudio Fontana <cfontana@suse.de>
---
 hw/i386/pc.c              |  8 --------
 hw/i386/x86.c             | 16 ++++++++++++++++
 target/i386/kvm/kvm-cpu.c |  2 +-
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index fd55fc725c..d3ab28fec5 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -740,14 +740,6 @@ void pc_machine_done(Notifier *notifier, void *data)
         /* update FW_CFG_NB_CPUS to account for -device added CPUs */
         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
     }
-
-
-    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-        !kvm_irqchip_in_kernel()) {
-        error_report("current -smp configuration requires kernel "
-                     "irqchip support.");
-        exit(EXIT_FAILURE);
-    }
 }
 
 void pc_guest_info_init(PCMachineState *pcms)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 4cf107baea..8da55d58ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -39,6 +39,7 @@
 #include "sysemu/replay.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/cpu-timers.h"
+#include "sysemu/xen.h"
 #include "trace.h"
 
 #include "hw/i386/x86.h"
@@ -123,6 +124,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
      */
     x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
                                                       ms->smp.max_cpus - 1) + 1;
+
+    /*
+     * Can we support APIC ID 255 or higher?
+     *
+     * Under Xen: yes.
+     * With userspace emulated lapic: no
+     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+     */
+    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+        (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+        error_report("current -smp configuration requires kernel "
+                     "irqchip and X2APIC API support.");
+        exit(EXIT_FAILURE);
+    }
+
     possible_cpus = mc->possible_cpu_arch_ids(ms);
     for (i = 0; i < ms->smp.cpus; i++) {
         x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index d95028018e..c60cb2dafb 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -165,7 +165,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
         /* only applies to builtin_x86_defs cpus */
         if (!kvm_irqchip_in_kernel()) {
             x86_cpu_change_kvm_default("x2apic", "off");
-        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+        } else if (kvm_irqchip_is_split()) {
             x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
         }
 
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-14 14:25 ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Igor Mammedov,
	Paolo Bonzini

The check on x86ms->apic_id_limit in pc_machine_done() had two problems.

Firstly, we need KVM to support the X2APIC API in order to allow IRQ
delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
which was done elsewhere in *some* cases but not all.

Secondly, microvm needs the same check. So move it from pc_machine_done()
to x86_cpus_init() where it will work for both.

The check in kvm_cpu_instance_init() is now redundant and can be dropped.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Claudio Fontana <cfontana@suse.de>
---
 hw/i386/pc.c              |  8 --------
 hw/i386/x86.c             | 16 ++++++++++++++++
 target/i386/kvm/kvm-cpu.c |  2 +-
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index fd55fc725c..d3ab28fec5 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -740,14 +740,6 @@ void pc_machine_done(Notifier *notifier, void *data)
         /* update FW_CFG_NB_CPUS to account for -device added CPUs */
         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
     }
-
-
-    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
-        !kvm_irqchip_in_kernel()) {
-        error_report("current -smp configuration requires kernel "
-                     "irqchip support.");
-        exit(EXIT_FAILURE);
-    }
 }
 
 void pc_guest_info_init(PCMachineState *pcms)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 4cf107baea..8da55d58ea 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -39,6 +39,7 @@
 #include "sysemu/replay.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/cpu-timers.h"
+#include "sysemu/xen.h"
 #include "trace.h"
 
 #include "hw/i386/x86.h"
@@ -123,6 +124,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
      */
     x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
                                                       ms->smp.max_cpus - 1) + 1;
+
+    /*
+     * Can we support APIC ID 255 or higher?
+     *
+     * Under Xen: yes.
+     * With userspace emulated lapic: no
+     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
+     */
+    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+        (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
+        error_report("current -smp configuration requires kernel "
+                     "irqchip and X2APIC API support.");
+        exit(EXIT_FAILURE);
+    }
+
     possible_cpus = mc->possible_cpu_arch_ids(ms);
     for (i = 0; i < ms->smp.cpus; i++) {
         x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index d95028018e..c60cb2dafb 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -165,7 +165,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
         /* only applies to builtin_x86_defs cpus */
         if (!kvm_irqchip_in_kernel()) {
             x86_cpu_change_kvm_default("x2apic", "off");
-        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+        } else if (kvm_irqchip_is_split()) {
             x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
         }
 
-- 
2.33.1



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 14:25 ` David Woodhouse
@ 2022-03-14 14:25   ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Michael S. Tsirkin, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

From: David Woodhouse <dwmw@amazon.co.uk>

By setting none of the SAGAW bits we can indicate to a guest that DMA
translation isn't supported. Tested by booting Windows 10, as well as
Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c         | 14 ++++++++++----
 include/hw/i386/intel_iommu.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 32471a44cb..948c653e74 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2214,7 +2214,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
     uint32_t changed = status ^ val;
 
     trace_vtd_reg_write_gcmd(status, val);
-    if (changed & VTD_GCMD_TE) {
+    if ((changed & VTD_GCMD_TE) && s->dma_translation) {
         /* Translation enable/disable */
         vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
     }
@@ -3122,6 +3122,7 @@ static Property vtd_properties[] = {
     DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
     DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
     DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
+    DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
              VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
-             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+             VTD_CAP_MGAW(s->aw_bits);
     if (s->dma_drain) {
         s->cap |= VTD_CAP_DRAIN;
     }
-    if (s->aw_bits == VTD_HOST_AW_48BIT) {
-        s->cap |= VTD_CAP_SAGAW_48bit;
+    if (s->dma_translation) {
+            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
+                    s->cap |= VTD_CAP_SAGAW_39bit;
+            }
+            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
+                    s->cap |= VTD_CAP_SAGAW_48bit;
+            }
     }
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3b5ac869db..d898be85ce 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -267,6 +267,7 @@ struct IntelIOMMUState {
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
     bool dma_drain;                 /* Whether DMA r/w draining enabled */
+    bool dma_translation;           /* Whether DMA translation supported */
 
     /*
      * Protects IOMMU states in general.  Currently it protects the
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-14 14:25   ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Igor Mammedov,
	Paolo Bonzini

From: David Woodhouse <dwmw@amazon.co.uk>

By setting none of the SAGAW bits we can indicate to a guest that DMA
translation isn't supported. Tested by booting Windows 10, as well as
Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c         | 14 ++++++++++----
 include/hw/i386/intel_iommu.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 32471a44cb..948c653e74 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2214,7 +2214,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
     uint32_t changed = status ^ val;
 
     trace_vtd_reg_write_gcmd(status, val);
-    if (changed & VTD_GCMD_TE) {
+    if ((changed & VTD_GCMD_TE) && s->dma_translation) {
         /* Translation enable/disable */
         vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
     }
@@ -3122,6 +3122,7 @@ static Property vtd_properties[] = {
     DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
     DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
     DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
+    DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
              VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
-             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
+             VTD_CAP_MGAW(s->aw_bits);
     if (s->dma_drain) {
         s->cap |= VTD_CAP_DRAIN;
     }
-    if (s->aw_bits == VTD_HOST_AW_48BIT) {
-        s->cap |= VTD_CAP_SAGAW_48bit;
+    if (s->dma_translation) {
+            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
+                    s->cap |= VTD_CAP_SAGAW_39bit;
+            }
+            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
+                    s->cap |= VTD_CAP_SAGAW_48bit;
+            }
     }
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3b5ac869db..d898be85ce 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -267,6 +267,7 @@ struct IntelIOMMUState {
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
     bool dma_drain;                 /* Whether DMA r/w draining enabled */
+    bool dma_translation;           /* Whether DMA translation supported */
 
     /*
      * Protects IOMMU states in general.  Currently it protects the
-- 
2.33.1



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 3/4] intel_iommu: Only allow interrupt remapping to be enabled if it's supported
  2022-03-14 14:25 ` David Woodhouse
@ 2022-03-14 14:25   ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Michael S. Tsirkin, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

From: David Woodhouse <dwmw@amazon.co.uk>

We should probably check if we were meant to be exposing IR, before
letting the guest turn the IRE bit on.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 948c653e74..e32cb933bd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2209,6 +2209,7 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
 /* Handle write to Global Command Register */
 static void vtd_handle_gcmd_write(IntelIOMMUState *s)
 {
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
     uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
     uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
     uint32_t changed = status ^ val;
@@ -2230,7 +2231,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
         /* Set/update the interrupt remapping root-table pointer */
         vtd_handle_gcmd_sirtp(s);
     }
-    if (changed & VTD_GCMD_IRE) {
+    if ((changed & VTD_GCMD_IRE) &&
+        x86_iommu_ir_supported(x86_iommu)) {
         /* Interrupt remap enable/disable */
         vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
     }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 3/4] intel_iommu: Only allow interrupt remapping to be enabled if it's supported
@ 2022-03-14 14:25   ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Igor Mammedov,
	Paolo Bonzini

From: David Woodhouse <dwmw@amazon.co.uk>

We should probably check if we were meant to be exposing IR, before
letting the guest turn the IRE bit on.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Peter Xu <peterx@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 948c653e74..e32cb933bd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2209,6 +2209,7 @@ static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
 /* Handle write to Global Command Register */
 static void vtd_handle_gcmd_write(IntelIOMMUState *s)
 {
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
     uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
     uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
     uint32_t changed = status ^ val;
@@ -2230,7 +2231,8 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
         /* Set/update the interrupt remapping root-table pointer */
         vtd_handle_gcmd_sirtp(s);
     }
-    if (changed & VTD_GCMD_IRE) {
+    if ((changed & VTD_GCMD_IRE) &&
+        x86_iommu_ir_supported(x86_iommu)) {
         /* Interrupt remap enable/disable */
         vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
     }
-- 
2.33.1



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 4/4] intel_iommu: Fix irqchip / X2APIC configuration checks
  2022-03-14 14:25 ` David Woodhouse
@ 2022-03-14 14:25   ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Michael S. Tsirkin, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

We don't need to check kvm_enable_x2apic(). It's perfectly OK to support
interrupt remapping even if we can't address CPUs above 254. Kind of
pointless, but still functional.

The check on kvm_enable_x2apic() needs to happen *anyway* in order to
allow CPUs above 254 even without an IOMMU, so allow that to happen
elsewhere.

However, we do require the *split* irqchip in order to rewrite I/OAPIC
destinations. So fix that check while we're here.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Claudio Fontana <cfontana@suse.de>
---
 hw/i386/intel_iommu.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e32cb933bd..1cc64b0efc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3786,15 +3786,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
                                               ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
     }
     if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-        if (!kvm_irqchip_in_kernel()) {
+        if (!kvm_irqchip_is_split()) {
             error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
             return false;
         }
-        if (!kvm_enable_x2apic()) {
-            error_setg(errp, "eim=on requires support on the KVM side"
-                             "(X2APIC_API, first shipped in v4.7)");
-            return false;
-        }
     }
 
     /* Currently only address widths supported are 39 and 48 bits */
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 4/4] intel_iommu: Fix irqchip / X2APIC configuration checks
@ 2022-03-14 14:25   ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 14:25 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Igor Mammedov,
	Paolo Bonzini

We don't need to check kvm_enable_x2apic(). It's perfectly OK to support
interrupt remapping even if we can't address CPUs above 254. Kind of
pointless, but still functional.

The check on kvm_enable_x2apic() needs to happen *anyway* in order to
allow CPUs above 254 even without an IOMMU, so allow that to happen
elsewhere.

However, we do require the *split* irqchip in order to rewrite I/OAPIC
destinations. So fix that check while we're here.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Claudio Fontana <cfontana@suse.de>
---
 hw/i386/intel_iommu.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e32cb933bd..1cc64b0efc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3786,15 +3786,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
                                               ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
     }
     if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
-        if (!kvm_irqchip_in_kernel()) {
+        if (!kvm_irqchip_is_split()) {
             error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
             return false;
         }
-        if (!kvm_enable_x2apic()) {
-            error_setg(errp, "eim=on requires support on the KVM side"
-                             "(X2APIC_API, first shipped in v4.7)");
-            return false;
-        }
     }
 
     /* Currently only address widths supported are 39 and 48 bits */
-- 
2.33.1



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 14:25   ` David Woodhouse
@ 2022-03-14 15:24     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-14 15:24 UTC (permalink / raw)
  To: David Woodhouse
  Cc: qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> By setting none of the SAGAW bits we can indicate to a guest that DMA
> translation isn't supported. Tested by booting Windows 10, as well as
> Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> Acked-by: Jason Wang <jasowang@redhat.com>

this is borderline like a feature, but ...

> ---
>  hw/i386/intel_iommu.c         | 14 ++++++++++----
>  include/hw/i386/intel_iommu.h |  1 +
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 32471a44cb..948c653e74 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2214,7 +2214,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
>      uint32_t changed = status ^ val;
>  
>      trace_vtd_reg_write_gcmd(status, val);
> -    if (changed & VTD_GCMD_TE) {
> +    if ((changed & VTD_GCMD_TE) && s->dma_translation) {
>          /* Translation enable/disable */
>          vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
>      }
> @@ -3122,6 +3122,7 @@ static Property vtd_properties[] = {
>      DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
>      DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
>      DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> +    DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
>      s->next_frcd_reg = 0;
>      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
>               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> +             VTD_CAP_MGAW(s->aw_bits);
>      if (s->dma_drain) {
>          s->cap |= VTD_CAP_DRAIN;
>      }
> -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> -        s->cap |= VTD_CAP_SAGAW_48bit;
> +    if (s->dma_translation) {
> +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> +                    s->cap |= VTD_CAP_SAGAW_39bit;
> +            }
> +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> +                    s->cap |= VTD_CAP_SAGAW_48bit;
> +            }
>      }
>      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>


... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
right? So maybe this patch is ok like this since it also fixes a
bug. Pls add this to commit log though.

  
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3b5ac869db..d898be85ce 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -267,6 +267,7 @@ struct IntelIOMMUState {
>      bool buggy_eim;                 /* Force buggy EIM unless eim=off */
>      uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
>      bool dma_drain;                 /* Whether DMA r/w draining enabled */
> +    bool dma_translation;           /* Whether DMA translation supported */
>  
>      /*
>       * Protects IOMMU states in general.  Currently it protects the
> -- 
> 2.33.1


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-14 15:24     ` Michael S. Tsirkin
  0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-14 15:24 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Igor Mammedov, Paolo Bonzini

On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> By setting none of the SAGAW bits we can indicate to a guest that DMA
> translation isn't supported. Tested by booting Windows 10, as well as
> Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> Acked-by: Jason Wang <jasowang@redhat.com>

this is borderline like a feature, but ...

> ---
>  hw/i386/intel_iommu.c         | 14 ++++++++++----
>  include/hw/i386/intel_iommu.h |  1 +
>  2 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 32471a44cb..948c653e74 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -2214,7 +2214,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
>      uint32_t changed = status ^ val;
>  
>      trace_vtd_reg_write_gcmd(status, val);
> -    if (changed & VTD_GCMD_TE) {
> +    if ((changed & VTD_GCMD_TE) && s->dma_translation) {
>          /* Translation enable/disable */
>          vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
>      }
> @@ -3122,6 +3122,7 @@ static Property vtd_properties[] = {
>      DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
>      DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
>      DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
> +    DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
>      s->next_frcd_reg = 0;
>      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
>               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> +             VTD_CAP_MGAW(s->aw_bits);
>      if (s->dma_drain) {
>          s->cap |= VTD_CAP_DRAIN;
>      }
> -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> -        s->cap |= VTD_CAP_SAGAW_48bit;
> +    if (s->dma_translation) {
> +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> +                    s->cap |= VTD_CAP_SAGAW_39bit;
> +            }
> +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> +                    s->cap |= VTD_CAP_SAGAW_48bit;
> +            }
>      }
>      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>


... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
right? So maybe this patch is ok like this since it also fixes a
bug. Pls add this to commit log though.

  
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3b5ac869db..d898be85ce 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -267,6 +267,7 @@ struct IntelIOMMUState {
>      bool buggy_eim;                 /* Force buggy EIM unless eim=off */
>      uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
>      bool dma_drain;                 /* Whether DMA r/w draining enabled */
> +    bool dma_translation;           /* Whether DMA translation supported */
>  
>      /*
>       * Protects IOMMU states in general.  Currently it protects the
> -- 
> 2.33.1



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 15:24     ` Michael S. Tsirkin
@ 2022-03-14 15:45       ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 15:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

[-- Attachment #1: Type: text/plain, Size: 2134 bytes --]

On Mon, 2022-03-14 at 11:24 -0400, Michael S. Tsirkin wrote:
> On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > By setting none of the SAGAW bits we can indicate to a guest that DMA
> > translation isn't supported. Tested by booting Windows 10, as well as
> > Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> > 
> > 
> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > Reviewed-by: Peter Xu <peterx@redhat.com>
> > Acked-by: Jason Wang <jasowang@redhat.com>
> 
> this is borderline like a feature, but ...

It's the opposite of a feature — it's turning the feature *off* ;)

> > 
> > @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
> >      s->next_frcd_reg = 0;
> >      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
> >               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> > -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> > +             VTD_CAP_MGAW(s->aw_bits);
> >      if (s->dma_drain) {
> >          s->cap |= VTD_CAP_DRAIN;
> >      }
> > -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> > -        s->cap |= VTD_CAP_SAGAW_48bit;
> > +    if (s->dma_translation) {
> > +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> > +                    s->cap |= VTD_CAP_SAGAW_39bit;
> > +            }
> > +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> > +                    s->cap |= VTD_CAP_SAGAW_48bit;
> > +            }
> >      }
> >      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
> > 
> 
> 
> ... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
> right? So maybe this patch is ok like this since it also fixes a
> bug. Pls add this to commit log though.

Nah, aw_bits cannot be < VTD_HOST_AW_39BIT. We explicitly check in
vtd_decide_config(), and only 39 or 48 bits are supported,
corresponding to 3-level or 4-level page tables.

The only time we'd want to *not* advertise 39-bit support is if we
aren't advertising DMA translation at all. Which is the (anti-)feature
introduced by this patch.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-14 15:45       ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 15:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Igor Mammedov, Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 2134 bytes --]

On Mon, 2022-03-14 at 11:24 -0400, Michael S. Tsirkin wrote:
> On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> > From: David Woodhouse <dwmw@amazon.co.uk>
> > 
> > By setting none of the SAGAW bits we can indicate to a guest that DMA
> > translation isn't supported. Tested by booting Windows 10, as well as
> > Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> > 
> > 
> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > Reviewed-by: Peter Xu <peterx@redhat.com>
> > Acked-by: Jason Wang <jasowang@redhat.com>
> 
> this is borderline like a feature, but ...

It's the opposite of a feature — it's turning the feature *off* ;)

> > 
> > @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
> >      s->next_frcd_reg = 0;
> >      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
> >               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> > -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> > +             VTD_CAP_MGAW(s->aw_bits);
> >      if (s->dma_drain) {
> >          s->cap |= VTD_CAP_DRAIN;
> >      }
> > -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> > -        s->cap |= VTD_CAP_SAGAW_48bit;
> > +    if (s->dma_translation) {
> > +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> > +                    s->cap |= VTD_CAP_SAGAW_39bit;
> > +            }
> > +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> > +                    s->cap |= VTD_CAP_SAGAW_48bit;
> > +            }
> >      }
> >      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
> > 
> 
> 
> ... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
> right? So maybe this patch is ok like this since it also fixes a
> bug. Pls add this to commit log though.

Nah, aw_bits cannot be < VTD_HOST_AW_39BIT. We explicitly check in
vtd_decide_config(), and only 39 or 48 bits are supported,
corresponding to 3-level or 4-level page tables.

The only time we'd want to *not* advertise 39-bit support is if we
aren't advertising DMA translation at all. Which is the (anti-)feature
introduced by this patch.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 14:25   ` David Woodhouse
@ 2022-03-14 16:01     ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 16:01 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Igor Mammedov,
	Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 523 bytes --]

On Mon, 2022-03-14 at 14:25 +0000, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> By setting none of the SAGAW bits we can indicate to a guest that DMA
> translation isn't supported. Tested by booting Windows 10, as well as
> Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10

I suppose we could update that commit message a little. Since I first
posted this and the IRE bugfix in October 2020, the Linux commit it
references was released in the v5.10 kernel.



[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-14 16:01     ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-14 16:01 UTC (permalink / raw)
  To: qemu-devel
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, Peter Xu, Claudio Fontana, Paolo Bonzini,
	Igor Mammedov

[-- Attachment #1: Type: text/plain, Size: 523 bytes --]

On Mon, 2022-03-14 at 14:25 +0000, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> By setting none of the SAGAW bits we can indicate to a guest that DMA
> translation isn't supported. Tested by booting Windows 10, as well as
> Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10

I suppose we could update that commit message a little. Since I first
posted this and the IRE bugfix in October 2020, the Linux commit it
references was released in the v5.10 kernel.



[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 15:45       ` David Woodhouse
@ 2022-03-14 22:27         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-14 22:27 UTC (permalink / raw)
  To: David Woodhouse
  Cc: qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

On Mon, Mar 14, 2022 at 03:45:47PM +0000, David Woodhouse wrote:
> On Mon, 2022-03-14 at 11:24 -0400, Michael S. Tsirkin wrote:
> > On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> > > From: David Woodhouse <dwmw@amazon.co.uk>
> > > 
> > > By setting none of the SAGAW bits we can indicate to a guest that DMA
> > > translation isn't supported. Tested by booting Windows 10, as well as
> > > Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> > > 
> > > 
> > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > > Reviewed-by: Peter Xu <peterx@redhat.com>
> > > Acked-by: Jason Wang <jasowang@redhat.com>
> > 
> > this is borderline like a feature, but ...
> 
> It's the opposite of a feature — it's turning the feature *off* ;)

Right. Still - do you believe it's appropriate in soft freeze
and if yes why?

> > > 
> > > @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
> > >      s->next_frcd_reg = 0;
> > >      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
> > >               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> > > -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> > > +             VTD_CAP_MGAW(s->aw_bits);
> > >      if (s->dma_drain) {
> > >          s->cap |= VTD_CAP_DRAIN;
> > >      }
> > > -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> > > -        s->cap |= VTD_CAP_SAGAW_48bit;
> > > +    if (s->dma_translation) {
> > > +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> > > +                    s->cap |= VTD_CAP_SAGAW_39bit;
> > > +            }
> > > +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> > > +                    s->cap |= VTD_CAP_SAGAW_48bit;
> > > +            }
> > >      }
> > >      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
> > > 
> > 
> > 
> > ... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
> > right? So maybe this patch is ok like this since it also fixes a
> > bug. Pls add this to commit log though.
> 
> Nah, aw_bits cannot be < VTD_HOST_AW_39BIT. We explicitly check in
> vtd_decide_config(), and only 39 or 48 bits are supported,
> corresponding to 3-level or 4-level page tables.

Oh right. So not a bugfix then.

> The only time we'd want to *not* advertise 39-bit support is if we
> aren't advertising DMA translation at all. Which is the (anti-)feature
> introduced by this patch.
> 



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-14 22:27         ` Michael S. Tsirkin
  0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-14 22:27 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Igor Mammedov, Paolo Bonzini

On Mon, Mar 14, 2022 at 03:45:47PM +0000, David Woodhouse wrote:
> On Mon, 2022-03-14 at 11:24 -0400, Michael S. Tsirkin wrote:
> > On Mon, Mar 14, 2022 at 02:25:42PM +0000, David Woodhouse wrote:
> > > From: David Woodhouse <dwmw@amazon.co.uk>
> > > 
> > > By setting none of the SAGAW bits we can indicate to a guest that DMA
> > > translation isn't supported. Tested by booting Windows 10, as well as
> > > Linux guests with the fix at https://git.kernel.org/torvalds/c/c40aaaac10
> > > 
> > > 
> > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > > Reviewed-by: Peter Xu <peterx@redhat.com>
> > > Acked-by: Jason Wang <jasowang@redhat.com>
> > 
> > this is borderline like a feature, but ...
> 
> It's the opposite of a feature — it's turning the feature *off* ;)

Right. Still - do you believe it's appropriate in soft freeze
and if yes why?

> > > 
> > > @@ -3627,12 +3628,17 @@ static void vtd_init(IntelIOMMUState *s)
> > >      s->next_frcd_reg = 0;
> > >      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
> > >               VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> > > -             VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
> > > +             VTD_CAP_MGAW(s->aw_bits);
> > >      if (s->dma_drain) {
> > >          s->cap |= VTD_CAP_DRAIN;
> > >      }
> > > -    if (s->aw_bits == VTD_HOST_AW_48BIT) {
> > > -        s->cap |= VTD_CAP_SAGAW_48bit;
> > > +    if (s->dma_translation) {
> > > +            if (s->aw_bits >= VTD_HOST_AW_39BIT) {
> > > +                    s->cap |= VTD_CAP_SAGAW_39bit;
> > > +            }
> > > +            if (s->aw_bits >= VTD_HOST_AW_48BIT) {
> > > +                    s->cap |= VTD_CAP_SAGAW_48bit;
> > > +            }
> > >      }
> > >      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
> > > 
> > 
> > 
> > ... this looks like you are actually fixing aw_bits < VTD_HOST_AW_39BIT,
> > right? So maybe this patch is ok like this since it also fixes a
> > bug. Pls add this to commit log though.
> 
> Nah, aw_bits cannot be < VTD_HOST_AW_39BIT. We explicitly check in
> vtd_decide_config(), and only 39 or 48 bits are supported,
> corresponding to 3-level or 4-level page tables.

Oh right. So not a bugfix then.

> The only time we'd want to *not* advertise 39-bit support is if we
> aren't advertising DMA translation at all. Which is the (anti-)feature
> introduced by this patch.
> 




^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-14 14:25 ` David Woodhouse
@ 2022-03-16  9:04   ` Igor Mammedov
  -1 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-16  9:04 UTC (permalink / raw)
  To: David Woodhouse
  Cc: qemu-devel, Michael S. Tsirkin, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Marcelo Tosatti, kvm, Claudio Fontana,
	Daniel P . Berrangé

On Mon, 14 Mar 2022 14:25:41 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> The check on x86ms->apic_id_limit in pc_machine_done() had two problems.
> 
> Firstly, we need KVM to support the X2APIC API in order to allow IRQ
> delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
> which was done elsewhere in *some* cases but not all.
> 
> Secondly, microvm needs the same check. So move it from pc_machine_done()
> to x86_cpus_init() where it will work for both.
> 
> The check in kvm_cpu_instance_init() is now redundant and can be dropped.

Well, I retested with the latest upstream kernel (both guest and host),
and adding kvm_enable_x2apic() is not sufficient as guest according
to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
fails.
So number of usable CPUs in guest stays at legacy level, leaving the rest
of CPUs in limbo.


> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
> Acked-by: Claudio Fontana <cfontana@suse.de>
> ---
>  hw/i386/pc.c              |  8 --------
>  hw/i386/x86.c             | 16 ++++++++++++++++
>  target/i386/kvm/kvm-cpu.c |  2 +-
>  3 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index fd55fc725c..d3ab28fec5 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -740,14 +740,6 @@ void pc_machine_done(Notifier *notifier, void *data)
>          /* update FW_CFG_NB_CPUS to account for -device added CPUs */
>          fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
>      }
> -
> -
> -    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> -        !kvm_irqchip_in_kernel()) {
> -        error_report("current -smp configuration requires kernel "
> -                     "irqchip support.");
> -        exit(EXIT_FAILURE);
> -    }
>  }
>  
>  void pc_guest_info_init(PCMachineState *pcms)
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 4cf107baea..8da55d58ea 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -39,6 +39,7 @@
>  #include "sysemu/replay.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/cpu-timers.h"
> +#include "sysemu/xen.h"
>  #include "trace.h"
>  
>  #include "hw/i386/x86.h"
> @@ -123,6 +124,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
>       */
>      x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
>                                                        ms->smp.max_cpus - 1) + 1;
> +
> +    /*
> +     * Can we support APIC ID 255 or higher?
> +     *
> +     * Under Xen: yes.
> +     * With userspace emulated lapic: no
> +     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
> +     */
> +    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> +        (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
> +        error_report("current -smp configuration requires kernel "
> +                     "irqchip and X2APIC API support.");
> +        exit(EXIT_FAILURE);
> +    }
> +
>      possible_cpus = mc->possible_cpu_arch_ids(ms);
>      for (i = 0; i < ms->smp.cpus; i++) {
>          x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index d95028018e..c60cb2dafb 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -165,7 +165,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
>          /* only applies to builtin_x86_defs cpus */
>          if (!kvm_irqchip_in_kernel()) {
>              x86_cpu_change_kvm_default("x2apic", "off");
> -        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
> +        } else if (kvm_irqchip_is_split()) {
>              x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
>          }
>  


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16  9:04   ` Igor Mammedov
  0 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-16  9:04 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini

On Mon, 14 Mar 2022 14:25:41 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> The check on x86ms->apic_id_limit in pc_machine_done() had two problems.
> 
> Firstly, we need KVM to support the X2APIC API in order to allow IRQ
> delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
> which was done elsewhere in *some* cases but not all.
> 
> Secondly, microvm needs the same check. So move it from pc_machine_done()
> to x86_cpus_init() where it will work for both.
> 
> The check in kvm_cpu_instance_init() is now redundant and can be dropped.

Well, I retested with the latest upstream kernel (both guest and host),
and adding kvm_enable_x2apic() is not sufficient as guest according
to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
fails.
So number of usable CPUs in guest stays at legacy level, leaving the rest
of CPUs in limbo.


> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
> Acked-by: Claudio Fontana <cfontana@suse.de>
> ---
>  hw/i386/pc.c              |  8 --------
>  hw/i386/x86.c             | 16 ++++++++++++++++
>  target/i386/kvm/kvm-cpu.c |  2 +-
>  3 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index fd55fc725c..d3ab28fec5 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -740,14 +740,6 @@ void pc_machine_done(Notifier *notifier, void *data)
>          /* update FW_CFG_NB_CPUS to account for -device added CPUs */
>          fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
>      }
> -
> -
> -    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> -        !kvm_irqchip_in_kernel()) {
> -        error_report("current -smp configuration requires kernel "
> -                     "irqchip support.");
> -        exit(EXIT_FAILURE);
> -    }
>  }
>  
>  void pc_guest_info_init(PCMachineState *pcms)
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 4cf107baea..8da55d58ea 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -39,6 +39,7 @@
>  #include "sysemu/replay.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/cpu-timers.h"
> +#include "sysemu/xen.h"
>  #include "trace.h"
>  
>  #include "hw/i386/x86.h"
> @@ -123,6 +124,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
>       */
>      x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
>                                                        ms->smp.max_cpus - 1) + 1;
> +
> +    /*
> +     * Can we support APIC ID 255 or higher?
> +     *
> +     * Under Xen: yes.
> +     * With userspace emulated lapic: no
> +     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
> +     */
> +    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> +        (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
> +        error_report("current -smp configuration requires kernel "
> +                     "irqchip and X2APIC API support.");
> +        exit(EXIT_FAILURE);
> +    }
> +
>      possible_cpus = mc->possible_cpu_arch_ids(ms);
>      for (i = 0; i < ms->smp.cpus; i++) {
>          x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index d95028018e..c60cb2dafb 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -165,7 +165,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
>          /* only applies to builtin_x86_defs cpus */
>          if (!kvm_irqchip_in_kernel()) {
>              x86_cpu_change_kvm_default("x2apic", "off");
> -        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
> +        } else if (kvm_irqchip_is_split()) {
>              x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
>          }
>  



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
  2022-03-14 22:27         ` Michael S. Tsirkin
@ 2022-03-16  9:34           ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16  9:34 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

[-- Attachment #1: Type: text/plain, Size: 876 bytes --]

On Mon, 2022-03-14 at 18:27 -0400, Michael S. Tsirkin wrote:
> On Mon, Mar 14, 2022 at 03:45:47PM +0000, David Woodhouse wrote:
> > It's the opposite of a feature — it's turning the feature *off* ;)
> 
> Right. Still - do you believe it's appropriate in soft freeze
> and if yes why?
> 

Not sure I care very much. I've reposted it a couple of times, with the
bug fixes that go along with it, since it was first posted in October
2020. I confess I don't keep track very much of the freeze status; I'm
only posting the series again this time because Igor posted a related
patch.

But all it's doing is giving us a way to *disable* functionality; it's
not adding new functionality. I suppose someone who cares might make an
argument that that's not so egregious a 'feature' to slip in the middle
of a set of bug fixes that have been outstanding for so long.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation
@ 2022-03-16  9:34           ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16  9:34 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Igor Mammedov, Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 876 bytes --]

On Mon, 2022-03-14 at 18:27 -0400, Michael S. Tsirkin wrote:
> On Mon, Mar 14, 2022 at 03:45:47PM +0000, David Woodhouse wrote:
> > It's the opposite of a feature — it's turning the feature *off* ;)
> 
> Right. Still - do you believe it's appropriate in soft freeze
> and if yes why?
> 

Not sure I care very much. I've reposted it a couple of times, with the
bug fixes that go along with it, since it was first posted in October
2020. I confess I don't keep track very much of the freeze status; I'm
only posting the series again this time because Igor posted a related
patch.

But all it's doing is giving us a way to *disable* functionality; it's
not adding new functionality. I suppose someone who cares might make an
argument that that's not so egregious a 'feature' to slip in the middle
of a set of bug fixes that have been outstanding for so long.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16  9:04   ` Igor Mammedov
@ 2022-03-16  9:37     ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16  9:37 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: qemu-devel, Michael S. Tsirkin, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Marcelo Tosatti, kvm, Claudio Fontana,
	Daniel P . Berrangé

[-- Attachment #1: Type: text/plain, Size: 977 bytes --]

On Wed, 2022-03-16 at 10:04 +0100, Igor Mammedov wrote:
> Well, I retested with the latest upstream kernel (both guest and host),
> and adding kvm_enable_x2apic() is not sufficient as guest according
> to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
> is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
> fails.

Correctly so. We need the split irqchip to support kvm-msi-ext-dest-id
which is why there's an explicity check for it.

> So number of usable CPUs in guest stays at legacy level, leaving the rest
> of CPUs in limbo.

Yep, that's the guest operating system's choice. Not a qemu problem.

Even if you have the split IRQ chip, if you boot a guest without kvm-
msi-ext-dest-id support, it'll refuse to use higher CPUs.

Or if you boot a guest without X2APIC support, it'll refuse to use
higher CPUs. 

That doesn't mean a user should be *forbidden* from launching qemu in
that configuration.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16  9:37     ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16  9:37 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 977 bytes --]

On Wed, 2022-03-16 at 10:04 +0100, Igor Mammedov wrote:
> Well, I retested with the latest upstream kernel (both guest and host),
> and adding kvm_enable_x2apic() is not sufficient as guest according
> to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
> is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
> fails.

Correctly so. We need the split irqchip to support kvm-msi-ext-dest-id
which is why there's an explicity check for it.

> So number of usable CPUs in guest stays at legacy level, leaving the rest
> of CPUs in limbo.

Yep, that's the guest operating system's choice. Not a qemu problem.

Even if you have the split IRQ chip, if you boot a guest without kvm-
msi-ext-dest-id support, it'll refuse to use higher CPUs.

Or if you boot a guest without X2APIC support, it'll refuse to use
higher CPUs. 

That doesn't mean a user should be *forbidden* from launching qemu in
that configuration.

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16  9:37     ` David Woodhouse
@ 2022-03-16  9:56       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-16  9:56 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Igor Mammedov, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Marcelo Tosatti, kvm, Claudio Fontana,
	Daniel P . Berrangé

On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> On Wed, 2022-03-16 at 10:04 +0100, Igor Mammedov wrote:
> > Well, I retested with the latest upstream kernel (both guest and host),
> > and adding kvm_enable_x2apic() is not sufficient as guest according
> > to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
> > is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
> > fails.
> 
> Correctly so. We need the split irqchip to support kvm-msi-ext-dest-id
> which is why there's an explicity check for it.
> 
> > So number of usable CPUs in guest stays at legacy level, leaving the rest
> > of CPUs in limbo.
> 
> Yep, that's the guest operating system's choice. Not a qemu problem.
> 
> Even if you have the split IRQ chip, if you boot a guest without kvm-
> msi-ext-dest-id support, it'll refuse to use higher CPUs.
> 
> Or if you boot a guest without X2APIC support, it'll refuse to use
> higher CPUs. 
> 
> That doesn't mean a user should be *forbidden* from launching qemu in
> that configuration.

Well the issue with all these configs which kind of work but not
the way they were specified is that down the road someone
creates a VM with this config and then expects us to maintain it
indefinitely.

So yes, if we are not sure we can support something properly it is
better to validate and exit than create a VM guests don't know how
to treat.

-- 
MST


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16  9:56       ` Michael S. Tsirkin
  0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-16  9:56 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Paolo Bonzini, Igor Mammedov

On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> On Wed, 2022-03-16 at 10:04 +0100, Igor Mammedov wrote:
> > Well, I retested with the latest upstream kernel (both guest and host),
> > and adding kvm_enable_x2apic() is not sufficient as guest according
> > to your patches in kernel caps max APICID at 255 unless kvm-msi-ext-dest-id
> > is enabled. And attempt in enabling kvm-msi-ext-dest-id with kernel-irqchip
> > fails.
> 
> Correctly so. We need the split irqchip to support kvm-msi-ext-dest-id
> which is why there's an explicity check for it.
> 
> > So number of usable CPUs in guest stays at legacy level, leaving the rest
> > of CPUs in limbo.
> 
> Yep, that's the guest operating system's choice. Not a qemu problem.
> 
> Even if you have the split IRQ chip, if you boot a guest without kvm-
> msi-ext-dest-id support, it'll refuse to use higher CPUs.
> 
> Or if you boot a guest without X2APIC support, it'll refuse to use
> higher CPUs. 
> 
> That doesn't mean a user should be *forbidden* from launching qemu in
> that configuration.

Well the issue with all these configs which kind of work but not
the way they were specified is that down the road someone
creates a VM with this config and then expects us to maintain it
indefinitely.

So yes, if we are not sure we can support something properly it is
better to validate and exit than create a VM guests don't know how
to treat.

-- 
MST



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16  9:56       ` Michael S. Tsirkin
@ 2022-03-16 10:37         ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16 10:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Daniel P. Berrangé
  Cc: Igor Mammedov, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Marcelo Tosatti, kvm, Claudio Fontana,
	Daniel P . Berrangé

[-- Attachment #1: Type: text/plain, Size: 1691 bytes --]

On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:
> On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> > Yep, that's the guest operating system's choice. Not a qemu problem.
> > 
> > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > 
> > Or if you boot a guest without X2APIC support, it'll refuse to use
> > higher CPUs. 
> > 
> > That doesn't mean a user should be *forbidden* from launching qemu in
> > that configuration.
> 
> Well the issue with all these configs which kind of work but not
> the way they were specified is that down the road someone
> creates a VM with this config and then expects us to maintain it
> indefinitely.
> 
> So yes, if we are not sure we can support something properly it is
> better to validate and exit than create a VM guests don't know how
> to treat.

Not entirely sure how to reconcile that with what Daniel said in
https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
was:

> We've generally said QEMU should not reject / block startup of valid
> hardware configurations, based on existance of bugs in certain guest
> OS, if the config would be valid for other guest.

That said, I cannot point at a *specific* example of a guest which can
use the higher CPUs even when it can't direct external interrupts at
them. I worked on making Linux capable of it, as I said, but didn't
pursue that in the end.

I *suspect* Windows might be able to do it, based on the way the
hyperv-iommu works (by cheating and returning -EINVAL when external
interrupts are directed at higher CPUs).



[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16 10:37         ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16 10:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Daniel P. Berrangé
  Cc: Eduardo Habkost, Daniel P . Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Paolo Bonzini, Igor Mammedov

[-- Attachment #1: Type: text/plain, Size: 1691 bytes --]

On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:
> On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> > Yep, that's the guest operating system's choice. Not a qemu problem.
> > 
> > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > 
> > Or if you boot a guest without X2APIC support, it'll refuse to use
> > higher CPUs. 
> > 
> > That doesn't mean a user should be *forbidden* from launching qemu in
> > that configuration.
> 
> Well the issue with all these configs which kind of work but not
> the way they were specified is that down the road someone
> creates a VM with this config and then expects us to maintain it
> indefinitely.
> 
> So yes, if we are not sure we can support something properly it is
> better to validate and exit than create a VM guests don't know how
> to treat.

Not entirely sure how to reconcile that with what Daniel said in
https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
was:

> We've generally said QEMU should not reject / block startup of valid
> hardware configurations, based on existance of bugs in certain guest
> OS, if the config would be valid for other guest.

That said, I cannot point at a *specific* example of a guest which can
use the higher CPUs even when it can't direct external interrupts at
them. I worked on making Linux capable of it, as I said, but didn't
pursue that in the end.

I *suspect* Windows might be able to do it, based on the way the
hyperv-iommu works (by cheating and returning -EINVAL when external
interrupts are directed at higher CPUs).



[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16 10:37         ` David Woodhouse
@ 2022-03-16 10:47           ` Michael S. Tsirkin
  -1 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-16 10:47 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Daniel P. Berrangé,
	Igor Mammedov, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Eduardo Habkost, Marcelo Tosatti, kvm, Claudio Fontana

On Wed, Mar 16, 2022 at 10:37:49AM +0000, David Woodhouse wrote:
> On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:
> > On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> > > Yep, that's the guest operating system's choice. Not a qemu problem.
> > > 
> > > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > > 
> > > Or if you boot a guest without X2APIC support, it'll refuse to use
> > > higher CPUs. 
> > > 
> > > That doesn't mean a user should be *forbidden* from launching qemu in
> > > that configuration.
> > 
> > Well the issue with all these configs which kind of work but not
> > the way they were specified is that down the road someone
> > creates a VM with this config and then expects us to maintain it
> > indefinitely.
> > 
> > So yes, if we are not sure we can support something properly it is
> > better to validate and exit than create a VM guests don't know how
> > to treat.
> 
> Not entirely sure how to reconcile that with what Daniel said in
> https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
> was:
> 
> > We've generally said QEMU should not reject / block startup of valid
> > hardware configurations, based on existance of bugs in certain guest
> > OS, if the config would be valid for other guest.

For sure, but is this a valid hardware configuration? That's
really the question.

> That said, I cannot point at a *specific* example of a guest which can
> use the higher CPUs even when it can't direct external interrupts at
> them. I worked on making Linux capable of it, as I said, but didn't
> pursue that in the end.
> 
> I *suspect* Windows might be able to do it, based on the way the
> hyperv-iommu works (by cheating and returning -EINVAL when external
> interrupts are directed at higher CPUs).
> 
> 

-- 
MST


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16 10:47           ` Michael S. Tsirkin
  0 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-03-16 10:47 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Eduardo Habkost, Daniel P. Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Paolo Bonzini, Igor Mammedov

On Wed, Mar 16, 2022 at 10:37:49AM +0000, David Woodhouse wrote:
> On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:
> > On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:
> > > Yep, that's the guest operating system's choice. Not a qemu problem.
> > > 
> > > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > > 
> > > Or if you boot a guest without X2APIC support, it'll refuse to use
> > > higher CPUs. 
> > > 
> > > That doesn't mean a user should be *forbidden* from launching qemu in
> > > that configuration.
> > 
> > Well the issue with all these configs which kind of work but not
> > the way they were specified is that down the road someone
> > creates a VM with this config and then expects us to maintain it
> > indefinitely.
> > 
> > So yes, if we are not sure we can support something properly it is
> > better to validate and exit than create a VM guests don't know how
> > to treat.
> 
> Not entirely sure how to reconcile that with what Daniel said in
> https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
> was:
> 
> > We've generally said QEMU should not reject / block startup of valid
> > hardware configurations, based on existance of bugs in certain guest
> > OS, if the config would be valid for other guest.

For sure, but is this a valid hardware configuration? That's
really the question.

> That said, I cannot point at a *specific* example of a guest which can
> use the higher CPUs even when it can't direct external interrupts at
> them. I worked on making Linux capable of it, as I said, but didn't
> pursue that in the end.
> 
> I *suspect* Windows might be able to do it, based on the way the
> hyperv-iommu works (by cheating and returning -EINVAL when external
> interrupts are directed at higher CPUs).
> 
> 

-- 
MST



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16 10:47           ` Michael S. Tsirkin
@ 2022-03-16 11:28             ` Igor Mammedov
  -1 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-16 11:28 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Woodhouse, Daniel P. Berrangé,
	qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

On Wed, 16 Mar 2022 06:47:48 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Mar 16, 2022 at 10:37:49AM +0000, David Woodhouse wrote:
> > On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:  
> > > On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:  
> > > > Yep, that's the guest operating system's choice. Not a qemu problem.
> > > > 
> > > > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > > > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > > > 
> > > > Or if you boot a guest without X2APIC support, it'll refuse to use
> > > > higher CPUs. 
> > > > 
> > > > That doesn't mean a user should be *forbidden* from launching qemu in
> > > > that configuration.  
> > > 
> > > Well the issue with all these configs which kind of work but not
> > > the way they were specified is that down the road someone
> > > creates a VM with this config and then expects us to maintain it
> > > indefinitely.
> > > 
> > > So yes, if we are not sure we can support something properly it is
> > > better to validate and exit than create a VM guests don't know how
> > > to treat.  
> > 
> > Not entirely sure how to reconcile that with what Daniel said in
> > https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
> > was:

Generally Daniel is right, as long as it's something that what real hardware
supports. (usually it's job if upper layers which know what guest OS is used,
and can tweak config based on that knowledge).

But it's virt only extension and none (tested with
 Windows (hangs on boot),
 Linux (brings up only first 255 cpus)
) of mainline OSes ended up up working as expected (i.e. user asked for this
many CPUs but can't really use them as expected).
Which would just lead to users reporting (obscure) bugs.

> > > We've generally said QEMU should not reject / block startup of valid
> > > hardware configurations, based on existance of bugs in certain guest
> > > OS, if the config would be valid for other guest.  
> 
> For sure, but is this a valid hardware configuration? That's
> really the question.

to me it looks like not complete PV feature so far.
if it's a configuration that is interesting for some users (some special
build OS/appliance that can use CPUs which are able to handle only IPIs)
or for development purposes than in should be an opt-in feature
instead of default one.
 
> > That said, I cannot point at a *specific* example of a guest which can
> > use the higher CPUs even when it can't direct external interrupts at
> > them. I worked on making Linux capable of it, as I said, but didn't
> > pursue that in the end.
> > 
> > I *suspect* Windows might be able to do it, based on the way the
> > hyperv-iommu works (by cheating and returning -EINVAL when external
> > interrupts are directed at higher CPUs).
Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
just kernel-irqchip=on in current state). (CCing Vitaly, he might know
if Windows might work and under what conditions)

Linux(recentish) was able to bring up all CPUs with APICID above 255
with 'split' irqchip and without iommu present (at least it boots to
command prompt).

What worked for both OSes (full boot), was split irqchip + iommu
(even without irq remapping, but I haven't tested with older guests
so irq remapping might be required anyways).


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16 11:28             ` Igor Mammedov
  0 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-16 11:28 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Eduardo Habkost, Daniel P. Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Paolo Bonzini, vkuznets,
	David Woodhouse

On Wed, 16 Mar 2022 06:47:48 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Mar 16, 2022 at 10:37:49AM +0000, David Woodhouse wrote:
> > On Wed, 2022-03-16 at 05:56 -0400, Michael S. Tsirkin wrote:  
> > > On Wed, Mar 16, 2022 at 09:37:07AM +0000, David Woodhouse wrote:  
> > > > Yep, that's the guest operating system's choice. Not a qemu problem.
> > > > 
> > > > Even if you have the split IRQ chip, if you boot a guest without kvm-
> > > > msi-ext-dest-id support, it'll refuse to use higher CPUs.
> > > > 
> > > > Or if you boot a guest without X2APIC support, it'll refuse to use
> > > > higher CPUs. 
> > > > 
> > > > That doesn't mean a user should be *forbidden* from launching qemu in
> > > > that configuration.  
> > > 
> > > Well the issue with all these configs which kind of work but not
> > > the way they were specified is that down the road someone
> > > creates a VM with this config and then expects us to maintain it
> > > indefinitely.
> > > 
> > > So yes, if we are not sure we can support something properly it is
> > > better to validate and exit than create a VM guests don't know how
> > > to treat.  
> > 
> > Not entirely sure how to reconcile that with what Daniel said in
> > https://lore.kernel.org/qemu-devel/Yi9BTkZIM3iZsvdK@redhat.com/ which
> > was:

Generally Daniel is right, as long as it's something that what real hardware
supports. (usually it's job if upper layers which know what guest OS is used,
and can tweak config based on that knowledge).

But it's virt only extension and none (tested with
 Windows (hangs on boot),
 Linux (brings up only first 255 cpus)
) of mainline OSes ended up up working as expected (i.e. user asked for this
many CPUs but can't really use them as expected).
Which would just lead to users reporting (obscure) bugs.

> > > We've generally said QEMU should not reject / block startup of valid
> > > hardware configurations, based on existance of bugs in certain guest
> > > OS, if the config would be valid for other guest.  
> 
> For sure, but is this a valid hardware configuration? That's
> really the question.

to me it looks like not complete PV feature so far.
if it's a configuration that is interesting for some users (some special
build OS/appliance that can use CPUs which are able to handle only IPIs)
or for development purposes than in should be an opt-in feature
instead of default one.
 
> > That said, I cannot point at a *specific* example of a guest which can
> > use the higher CPUs even when it can't direct external interrupts at
> > them. I worked on making Linux capable of it, as I said, but didn't
> > pursue that in the end.
> > 
> > I *suspect* Windows might be able to do it, based on the way the
> > hyperv-iommu works (by cheating and returning -EINVAL when external
> > interrupts are directed at higher CPUs).
Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
just kernel-irqchip=on in current state). (CCing Vitaly, he might know
if Windows might work and under what conditions)

Linux(recentish) was able to bring up all CPUs with APICID above 255
with 'split' irqchip and without iommu present (at least it boots to
command prompt).

What worked for both OSes (full boot), was split irqchip + iommu
(even without irq remapping, but I haven't tested with older guests
so irq remapping might be required anyways).



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-16 11:28             ` Igor Mammedov
@ 2022-03-16 14:31               ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16 14:31 UTC (permalink / raw)
  To: Igor Mammedov, Michael S. Tsirkin
  Cc: Daniel P. Berrangé,
	qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:
> Generally Daniel is right, as long as it's something that what real hardware
> supports. (usually it's job if upper layers which know what guest OS is used,
> and can tweak config based on that knowledge).
> 
> But it's virt only extension and none (tested with
>  Windows (hangs on boot),
>  Linux (brings up only first 255 cpus)
> ) of mainline OSes ended up up working as expected (i.e. user asked for this
> many CPUs but can't really use them as expected).

As I said, that kind of failure mode will happen even with the split
irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
kernels.

For older guests it would also happen on real hardware, and in VMs
where you expose an IOMMU with interrupt remapping. Some guests don't
support interrupt remapping, or don't support X2APIC at all.

> Which would just lead to users reporting (obscure) bugs.

It's not virt only. This could happen with real hardware.

> Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
> just kernel-irqchip=on in current state). (CCing Vitaly, he might know
> if Windows might work and under what conditions)
> 
> Linux(recentish) was able to bring up all CPUs with APICID above 255
> with 'split' irqchip and without iommu present (at least it boots to
> command prompt).

That'll be using the EXT_DEST_ID support.

> What worked for both OSes (full boot), was split irqchip + iommu
> (even without irq remapping, but I haven't tested with older guests
> so irq remapping might be required anyways).

Hm, that's surprising for Windows unless it's learned to use the
EXT_DEST_ID support. Or maybe it *can* cope with only targeting
external interrupts at CPUs < 255 but has a gratuitous check that
prevents it bringing them up unless there's an IOMMU... *even* if that
IOMMU doesn't have irq remapping anyway?

Anyway, as fas as I'm concerned it doesn't matter very much whether we
insist on the split irq chip or not. Feel free to repost your patch
rebased on top of my fixes, which are also in my tree at
https://git.infradead.org/users/dwmw2/qemu.git

The check you're modifying has moved to x86_cpus_init().

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-16 14:31               ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-16 14:31 UTC (permalink / raw)
  To: Igor Mammedov, Michael S. Tsirkin
  Cc: Eduardo Habkost, Daniel P. Berrangé,
	kvm, Jason Wang, Marcelo Tosatti, Richard Henderson, qemu-devel,
	Peter Xu, Claudio Fontana, Paolo Bonzini, vkuznets

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:
> Generally Daniel is right, as long as it's something that what real hardware
> supports. (usually it's job if upper layers which know what guest OS is used,
> and can tweak config based on that knowledge).
> 
> But it's virt only extension and none (tested with
>  Windows (hangs on boot),
>  Linux (brings up only first 255 cpus)
> ) of mainline OSes ended up up working as expected (i.e. user asked for this
> many CPUs but can't really use them as expected).

As I said, that kind of failure mode will happen even with the split
irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
kernels.

For older guests it would also happen on real hardware, and in VMs
where you expose an IOMMU with interrupt remapping. Some guests don't
support interrupt remapping, or don't support X2APIC at all.

> Which would just lead to users reporting (obscure) bugs.

It's not virt only. This could happen with real hardware.

> Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
> just kernel-irqchip=on in current state). (CCing Vitaly, he might know
> if Windows might work and under what conditions)
> 
> Linux(recentish) was able to bring up all CPUs with APICID above 255
> with 'split' irqchip and without iommu present (at least it boots to
> command prompt).

That'll be using the EXT_DEST_ID support.

> What worked for both OSes (full boot), was split irqchip + iommu
> (even without irq remapping, but I haven't tested with older guests
> so irq remapping might be required anyways).

Hm, that's surprising for Windows unless it's learned to use the
EXT_DEST_ID support. Or maybe it *can* cope with only targeting
external interrupts at CPUs < 255 but has a gratuitous check that
prevents it bringing them up unless there's an IOMMU... *even* if that
IOMMU doesn't have irq remapping anyway?

Anyway, as fas as I'm concerned it doesn't matter very much whether we
insist on the split irq chip or not. Feel free to repost your patch
rebased on top of my fixes, which are also in my tree at
https://git.infradead.org/users/dwmw2/qemu.git

The check you're modifying has moved to x86_cpus_init().

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
       [not found]               ` <20220317094209.2888b431@redhat.com>
@ 2022-03-17  9:05                   ` Igor Mammedov
  0 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-17  9:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Michael S. Tsirkin, berrange, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
and email got bounced back.

On Wed, 16 Mar 2022 14:31:33 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:  
> > Generally Daniel is right, as long as it's something that what real hardware
> > supports. (usually it's job if upper layers which know what guest OS is used,
> > and can tweak config based on that knowledge).
> > 
> > But it's virt only extension and none (tested with
> >  Windows (hangs on boot),
> >  Linux (brings up only first 255 cpus)
> > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > many CPUs but can't really use them as expected).    
> 
> As I said, that kind of failure mode will happen even with the split
> irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> kernels.
> 
> For older guests it would also happen on real hardware, and in VMs
> where you expose an IOMMU with interrupt remapping. Some guests don't
> support interrupt remapping, or don't support X2APIC at all.
>   
> > Which would just lead to users reporting (obscure) bugs.    
> 
> It's not virt only. This could happen with real hardware.  

I was talking about EXT_DEST_ID kvm extension.
With current upstream guest kernel, user gets only "bad cpu" messages
and then go figure what's wrong with configuration or
simply hangs in case of Windows.

> > Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
> > just kernel-irqchip=on in current state). (CCing Vitaly, he might know
> > if Windows might work and under what conditions)
> > 
> > Linux(recentish) was able to bring up all CPUs with APICID above 255
> > with 'split' irqchip and without iommu present (at least it boots to
> > command prompt).    
> 
> That'll be using the EXT_DEST_ID support.
>   
> > What worked for both OSes (full boot), was split irqchip + iommu
> > (even without irq remapping, but I haven't tested with older guests
> > so irq remapping might be required anyways).    
> 
> Hm, that's surprising for Windows unless it's learned to use the
> EXT_DEST_ID support. Or maybe it *can* cope with only targeting
> external interrupts at CPUs < 255 but has a gratuitous check that
> prevents it bringing them up unless there's an IOMMU... *even* if that
> IOMMU doesn't have irq remapping anyway?  

or maybe we are enabling irq remapping by default now.
I'll try to check, if guest is actually brings all CPUs up.

> Anyway, as fas as I'm concerned it doesn't matter very much whether we
> insist on the split irq chip or not. Feel free to repost your patch
> rebased on top of my fixes, which are also in my tree at
> https://git.infradead.org/users/dwmw2/qemu.git
> 
> The check you're modifying has moved to x86_cpus_init().  

if we are to keep iommu dependency then moving to x86_cpus_init()
isn't an option, it should be done at pc_machine_done() time.

in practice partial revert of your c1bb5418e to restore
iommu check including irq remapping.
In which case, do we still need kvm_enable_x2apic() check
you are adding here?


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-17  9:05                   ` Igor Mammedov
  0 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-17  9:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: berrange, kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini, vkuznets

re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
and email got bounced back.

On Wed, 16 Mar 2022 14:31:33 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:  
> > Generally Daniel is right, as long as it's something that what real hardware
> > supports. (usually it's job if upper layers which know what guest OS is used,
> > and can tweak config based on that knowledge).
> > 
> > But it's virt only extension and none (tested with
> >  Windows (hangs on boot),
> >  Linux (brings up only first 255 cpus)
> > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > many CPUs but can't really use them as expected).    
> 
> As I said, that kind of failure mode will happen even with the split
> irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> kernels.
> 
> For older guests it would also happen on real hardware, and in VMs
> where you expose an IOMMU with interrupt remapping. Some guests don't
> support interrupt remapping, or don't support X2APIC at all.
>   
> > Which would just lead to users reporting (obscure) bugs.    
> 
> It's not virt only. This could happen with real hardware.  

I was talking about EXT_DEST_ID kvm extension.
With current upstream guest kernel, user gets only "bad cpu" messages
and then go figure what's wrong with configuration or
simply hangs in case of Windows.

> > Testing shows, Windows (2019 and 2004 build) doesn't work (at least with
> > just kernel-irqchip=on in current state). (CCing Vitaly, he might know
> > if Windows might work and under what conditions)
> > 
> > Linux(recentish) was able to bring up all CPUs with APICID above 255
> > with 'split' irqchip and without iommu present (at least it boots to
> > command prompt).    
> 
> That'll be using the EXT_DEST_ID support.
>   
> > What worked for both OSes (full boot), was split irqchip + iommu
> > (even without irq remapping, but I haven't tested with older guests
> > so irq remapping might be required anyways).    
> 
> Hm, that's surprising for Windows unless it's learned to use the
> EXT_DEST_ID support. Or maybe it *can* cope with only targeting
> external interrupts at CPUs < 255 but has a gratuitous check that
> prevents it bringing them up unless there's an IOMMU... *even* if that
> IOMMU doesn't have irq remapping anyway?  

or maybe we are enabling irq remapping by default now.
I'll try to check, if guest is actually brings all CPUs up.

> Anyway, as fas as I'm concerned it doesn't matter very much whether we
> insist on the split irq chip or not. Feel free to repost your patch
> rebased on top of my fixes, which are also in my tree at
> https://git.infradead.org/users/dwmw2/qemu.git
> 
> The check you're modifying has moved to x86_cpus_init().  

if we are to keep iommu dependency then moving to x86_cpus_init()
isn't an option, it should be done at pc_machine_done() time.

in practice partial revert of your c1bb5418e to restore
iommu check including irq remapping.
In which case, do we still need kvm_enable_x2apic() check
you are adding here?



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-17  9:05                   ` Igor Mammedov
@ 2022-03-17 11:13                     ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-17 11:13 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Michael S. Tsirkin, berrange, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

[-- Attachment #1: Type: text/plain, Size: 4069 bytes --]

On Thu, 2022-03-17 at 10:05 +0100, Igor Mammedov wrote:
> re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
> and email got bounced back.
> 
> On Wed, 16 Mar 2022 14:31:33 +0000
> David Woodhouse <dwmw2@infradead.org> wrote:
> 
> > On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:  
> > > Generally Daniel is right, as long as it's something that what real hardware
> > > supports. (usually it's job if upper layers which know what guest OS is used,
> > > and can tweak config based on that knowledge).
> > > 
> > > But it's virt only extension and none (tested with
> > >  Windows (hangs on boot),
> > >  Linux (brings up only first 255 cpus)
> > > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > > many CPUs but can't really use them as expected).    
> > 
> > As I said, that kind of failure mode will happen even with the split
> > irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> > kernels.
> > 
> > For older guests it would also happen on real hardware, and in VMs
> > where you expose an IOMMU with interrupt remapping. Some guests don't
> > support interrupt remapping, or don't support X2APIC at all.
> >   
> > > Which would just lead to users reporting (obscure) bugs.    
> > 
> > It's not virt only. This could happen with real hardware.  
> 
> I was talking about EXT_DEST_ID kvm extension.

Then I'm confused, because that isn't the conversation we were having
before. And in that case what you say above about Linux only bringing
up the first 255 CPUs directly contradicts what you say below, my own
experience, and the whole *point* of the EXT_DEST_ID extension :)

Let's start again. You observed that Linux guests fail to bring up >254
vcPUs if qemu doesn't enable the EXT_DEST_ID support, and your fix was
to require EXT_DEST_ID (as a side-effect of requiring split irqchip).

This reminded me of the fixes I'd been posting since 2020 which still
haven't been merged, so I dusted those off and resent them.

I didn't incorporate your change, and objected to your patch because I
think it's pointless babysitting. Yes, in the general case if you want
your guest to use more than 254 vCPUs you need to take a moment to
think about precisely what your guest operating system requires in
order to support that.

At the very least it needs X2APIC support, and then you need *one* of:

 • EXT_DEST_ID,
 • Interrupt remapping, or
 • just using those vCPUs without external interrupts.

Both of the first two require the split irqchip, so your patch just
doesn't let users rely on that last option. I conceded (cited below)
because I don't know of any existing guest OS which does use that last
option. I'd attempted to make Linux do so, but eventually abandoned it:
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/irqaffinity

But now you seem to be making a different argument?

> > Anyway, as far as I'm concerned it doesn't matter very much whether we
> > insist on the split irq chip or not. Feel free to repost your patch
> > rebased on top of my fixes, which are also in my tree at
> > https://git.infradead.org/users/dwmw2/qemu.git
> > 
> > 
> > The check you're modifying has moved to x86_cpus_init().  
> 
> if we are to keep iommu dependency then moving to x86_cpus_init()
> isn't an option, it should be done at pc_machine_done() time.

Thus far, I didn't think anyone had been talking about a dependency on
IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
sufficient for Linux kernels from 5.10 onwards and they don't need the
IOMMU.

So no. Post your patch to s/kvm_irqchip_in_kernel/kvm_irqchip_is_split/
in x86_cpus_init() by all means; I don't care much about that and I've
even incorporated that change into my tree at
https://git.infradead.org/users/dwmw2/qemu.git so that if I do have to
repost these fixes yet again, it'll be included.

But let's not re-add the IOMMU dependency. That would be wrong.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-17 11:13                     ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-17 11:13 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: berrange, kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini, vkuznets

[-- Attachment #1: Type: text/plain, Size: 4069 bytes --]

On Thu, 2022-03-17 at 10:05 +0100, Igor Mammedov wrote:
> re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
> and email got bounced back.
> 
> On Wed, 16 Mar 2022 14:31:33 +0000
> David Woodhouse <dwmw2@infradead.org> wrote:
> 
> > On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:  
> > > Generally Daniel is right, as long as it's something that what real hardware
> > > supports. (usually it's job if upper layers which know what guest OS is used,
> > > and can tweak config based on that knowledge).
> > > 
> > > But it's virt only extension and none (tested with
> > >  Windows (hangs on boot),
> > >  Linux (brings up only first 255 cpus)
> > > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > > many CPUs but can't really use them as expected).    
> > 
> > As I said, that kind of failure mode will happen even with the split
> > irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> > kernels.
> > 
> > For older guests it would also happen on real hardware, and in VMs
> > where you expose an IOMMU with interrupt remapping. Some guests don't
> > support interrupt remapping, or don't support X2APIC at all.
> >   
> > > Which would just lead to users reporting (obscure) bugs.    
> > 
> > It's not virt only. This could happen with real hardware.  
> 
> I was talking about EXT_DEST_ID kvm extension.

Then I'm confused, because that isn't the conversation we were having
before. And in that case what you say above about Linux only bringing
up the first 255 CPUs directly contradicts what you say below, my own
experience, and the whole *point* of the EXT_DEST_ID extension :)

Let's start again. You observed that Linux guests fail to bring up >254
vcPUs if qemu doesn't enable the EXT_DEST_ID support, and your fix was
to require EXT_DEST_ID (as a side-effect of requiring split irqchip).

This reminded me of the fixes I'd been posting since 2020 which still
haven't been merged, so I dusted those off and resent them.

I didn't incorporate your change, and objected to your patch because I
think it's pointless babysitting. Yes, in the general case if you want
your guest to use more than 254 vCPUs you need to take a moment to
think about precisely what your guest operating system requires in
order to support that.

At the very least it needs X2APIC support, and then you need *one* of:

 • EXT_DEST_ID,
 • Interrupt remapping, or
 • just using those vCPUs without external interrupts.

Both of the first two require the split irqchip, so your patch just
doesn't let users rely on that last option. I conceded (cited below)
because I don't know of any existing guest OS which does use that last
option. I'd attempted to make Linux do so, but eventually abandoned it:
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/irqaffinity

But now you seem to be making a different argument?

> > Anyway, as far as I'm concerned it doesn't matter very much whether we
> > insist on the split irq chip or not. Feel free to repost your patch
> > rebased on top of my fixes, which are also in my tree at
> > https://git.infradead.org/users/dwmw2/qemu.git
> > 
> > 
> > The check you're modifying has moved to x86_cpus_init().  
> 
> if we are to keep iommu dependency then moving to x86_cpus_init()
> isn't an option, it should be done at pc_machine_done() time.

Thus far, I didn't think anyone had been talking about a dependency on
IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
sufficient for Linux kernels from 5.10 onwards and they don't need the
IOMMU.

So no. Post your patch to s/kvm_irqchip_in_kernel/kvm_irqchip_is_split/
in x86_cpus_init() by all means; I don't care much about that and I've
even incorporated that change into my tree at
https://git.infradead.org/users/dwmw2/qemu.git so that if I do have to
repost these fixes yet again, it'll be included.

But let's not re-add the IOMMU dependency. That would be wrong.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-17 11:13                     ` David Woodhouse
@ 2022-03-18 14:17                       ` Igor Mammedov
  -1 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-18 14:17 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Michael S. Tsirkin, berrange, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

On Thu, 17 Mar 2022 11:13:44 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Thu, 2022-03-17 at 10:05 +0100, Igor Mammedov wrote:
> > re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
> > and email got bounced back.
> > 
> > On Wed, 16 Mar 2022 14:31:33 +0000
> > David Woodhouse <dwmw2@infradead.org> wrote:
> >   
> > > On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:    
> > > > Generally Daniel is right, as long as it's something that what real hardware
> > > > supports. (usually it's job if upper layers which know what guest OS is used,
> > > > and can tweak config based on that knowledge).
> > > > 
> > > > But it's virt only extension and none (tested with
> > > >  Windows (hangs on boot),
> > > >  Linux (brings up only first 255 cpus)
> > > > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > > > many CPUs but can't really use them as expected).      
> > > 
> > > As I said, that kind of failure mode will happen even with the split
> > > irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> > > kernels.
> > > 
> > > For older guests it would also happen on real hardware, and in VMs
> > > where you expose an IOMMU with interrupt remapping. Some guests don't
> > > support interrupt remapping, or don't support X2APIC at all.
> > >     
> > > > Which would just lead to users reporting (obscure) bugs.      
> > > 
> > > It's not virt only. This could happen with real hardware.    
> > 
> > I was talking about EXT_DEST_ID kvm extension.  
> 
> Then I'm confused, because that isn't the conversation we were having
> before. And in that case what you say above about Linux only bringing
> up the first 255 CPUs directly contradicts what you say below, my own
> experience, and the whole *point* of the EXT_DEST_ID extension :)

Now I'm lost in translation too :)

> Let's start again. You observed that Linux guests fail to bring up >254
> vcPUs if qemu doesn't enable the EXT_DEST_ID support, and your fix was
> to require EXT_DEST_ID (as a side-effect of requiring split irqchip).
> 
> This reminded me of the fixes I'd been posting since 2020 which still
> haven't been merged, so I dusted those off and resent them.
> 
> I didn't incorporate your change, and objected to your patch because I
> think it's pointless babysitting.

1)

> Yes, in the general case if you want
> your guest to use more than 254 vCPUs you need to take a moment to
> think about precisely what your guest operating system requires in
> order to support that.
> 
> At the very least it needs X2APIC support, and then you need *one* of:
> 
>  • EXT_DEST_ID,
>  • Interrupt remapping, or
>  • just using those vCPUs without external interrupts.
> 
> Both of the first two require the split irqchip, so your patch just
> doesn't let users rely on that last option. I conceded (cited below)
> because I don't know of any existing guest OS which does use that last
> option. I'd attempted to make Linux do so, but eventually abandoned it:
> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/irqaffinity

Given that is was abandoned, it's unlikely that the last option was ever
used (and before EXT_DEST_ID, qemu was requiring irq remapping to start
guest with so many vcpus). If there will be a user for it in the future
we can relax restriction given that it will be properly documented/
user gets sane error/warning messages, so they could figure out what to do.

> But now you seem to be making a different argument?
> 
> > > Anyway, as far as I'm concerned it doesn't matter very much whether we
> > > insist on the split irq chip or not. Feel free to repost your patch
> > > rebased on top of my fixes, which are also in my tree at
> > > https://git.infradead.org/users/dwmw2/qemu.git
> > > 
> > > 
> > > The check you're modifying has moved to x86_cpus_init().    
> > 
> > if we are to keep iommu dependency then moving to x86_cpus_init()
> > isn't an option, it should be done at pc_machine_done() time.  
> 
> Thus far, I didn't think anyone had been talking about a dependency on
> IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
> sufficient for Linux kernels from 5.10 onwards and they don't need the
> IOMMU.

IOMMU was required before EXT_DEST_ID due to irq-remapping dependency,
and that conservative config worked fine for both Linux and Windows
guests. That's why I've raised question if we should revert restriction
to the way it was back then.

With Linux pre-5.10 guests, dmesg output at least complains about
IRQ remapping, so user has a small chance to be able to figure out
that IOMMU should be configured to get all CPUs working.
For post-5.10, all one gets is "bad cpu" without any clue as to why,
if EXT_DEST_ID is not advertised by hypervisor.
It would be better if guest kernel printed some error/warning in that case.

If we start with IOMMU, (Win/Linux) guests boot fine (modulo ancient ones)
(irq-remapping is 'on' by default since qemu-4.0).

> So no. Post your patch to s/kvm_irqchip_in_kernel/kvm_irqchip_is_split/
> in x86_cpus_init() by all means; I don't care much about that and I've
> even incorporated that change into my tree at
> https://git.infradead.org/users/dwmw2/qemu.git so that if I do have to
> repost these fixes yet again, it'll be included.

Looks fine to me, thanks.

> But let's not re-add the IOMMU dependency. That would be wrong.

We should document possible options, somewhere in QEMU.
So not only few would know about what options to use and when.
Something along lines above [1].


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-18 14:17                       ` Igor Mammedov
  0 siblings, 0 replies; 41+ messages in thread
From: Igor Mammedov @ 2022-03-18 14:17 UTC (permalink / raw)
  To: David Woodhouse
  Cc: berrange, kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini, vkuznets

On Thu, 17 Mar 2022 11:13:44 +0000
David Woodhouse <dwmw2@infradead.org> wrote:

> On Thu, 2022-03-17 at 10:05 +0100, Igor Mammedov wrote:
> > re-sending reply as something went wrong with headers (I suspect Daniel's name formatting)
> > and email got bounced back.
> > 
> > On Wed, 16 Mar 2022 14:31:33 +0000
> > David Woodhouse <dwmw2@infradead.org> wrote:
> >   
> > > On Wed, 2022-03-16 at 12:28 +0100, Igor Mammedov wrote:    
> > > > Generally Daniel is right, as long as it's something that what real hardware
> > > > supports. (usually it's job if upper layers which know what guest OS is used,
> > > > and can tweak config based on that knowledge).
> > > > 
> > > > But it's virt only extension and none (tested with
> > > >  Windows (hangs on boot),
> > > >  Linux (brings up only first 255 cpus)
> > > > ) of mainline OSes ended up up working as expected (i.e. user asked for this
> > > > many CPUs but can't really use them as expected).      
> > > 
> > > As I said, that kind of failure mode will happen even with the split
> > > irq chip and EXT_DEST_ID, with Windows and older (pre-5.10) Linux
> > > kernels.
> > > 
> > > For older guests it would also happen on real hardware, and in VMs
> > > where you expose an IOMMU with interrupt remapping. Some guests don't
> > > support interrupt remapping, or don't support X2APIC at all.
> > >     
> > > > Which would just lead to users reporting (obscure) bugs.      
> > > 
> > > It's not virt only. This could happen with real hardware.    
> > 
> > I was talking about EXT_DEST_ID kvm extension.  
> 
> Then I'm confused, because that isn't the conversation we were having
> before. And in that case what you say above about Linux only bringing
> up the first 255 CPUs directly contradicts what you say below, my own
> experience, and the whole *point* of the EXT_DEST_ID extension :)

Now I'm lost in translation too :)

> Let's start again. You observed that Linux guests fail to bring up >254
> vcPUs if qemu doesn't enable the EXT_DEST_ID support, and your fix was
> to require EXT_DEST_ID (as a side-effect of requiring split irqchip).
> 
> This reminded me of the fixes I'd been posting since 2020 which still
> haven't been merged, so I dusted those off and resent them.
> 
> I didn't incorporate your change, and objected to your patch because I
> think it's pointless babysitting.

1)

> Yes, in the general case if you want
> your guest to use more than 254 vCPUs you need to take a moment to
> think about precisely what your guest operating system requires in
> order to support that.
> 
> At the very least it needs X2APIC support, and then you need *one* of:
> 
>  • EXT_DEST_ID,
>  • Interrupt remapping, or
>  • just using those vCPUs without external interrupts.
> 
> Both of the first two require the split irqchip, so your patch just
> doesn't let users rely on that last option. I conceded (cited below)
> because I don't know of any existing guest OS which does use that last
> option. I'd attempted to make Linux do so, but eventually abandoned it:
> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/irqaffinity

Given that is was abandoned, it's unlikely that the last option was ever
used (and before EXT_DEST_ID, qemu was requiring irq remapping to start
guest with so many vcpus). If there will be a user for it in the future
we can relax restriction given that it will be properly documented/
user gets sane error/warning messages, so they could figure out what to do.

> But now you seem to be making a different argument?
> 
> > > Anyway, as far as I'm concerned it doesn't matter very much whether we
> > > insist on the split irq chip or not. Feel free to repost your patch
> > > rebased on top of my fixes, which are also in my tree at
> > > https://git.infradead.org/users/dwmw2/qemu.git
> > > 
> > > 
> > > The check you're modifying has moved to x86_cpus_init().    
> > 
> > if we are to keep iommu dependency then moving to x86_cpus_init()
> > isn't an option, it should be done at pc_machine_done() time.  
> 
> Thus far, I didn't think anyone had been talking about a dependency on
> IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
> sufficient for Linux kernels from 5.10 onwards and they don't need the
> IOMMU.

IOMMU was required before EXT_DEST_ID due to irq-remapping dependency,
and that conservative config worked fine for both Linux and Windows
guests. That's why I've raised question if we should revert restriction
to the way it was back then.

With Linux pre-5.10 guests, dmesg output at least complains about
IRQ remapping, so user has a small chance to be able to figure out
that IOMMU should be configured to get all CPUs working.
For post-5.10, all one gets is "bad cpu" without any clue as to why,
if EXT_DEST_ID is not advertised by hypervisor.
It would be better if guest kernel printed some error/warning in that case.

If we start with IOMMU, (Win/Linux) guests boot fine (modulo ancient ones)
(irq-remapping is 'on' by default since qemu-4.0).

> So no. Post your patch to s/kvm_irqchip_in_kernel/kvm_irqchip_is_split/
> in x86_cpus_init() by all means; I don't care much about that and I've
> even incorporated that change into my tree at
> https://git.infradead.org/users/dwmw2/qemu.git so that if I do have to
> repost these fixes yet again, it'll be included.

Looks fine to me, thanks.

> But let's not re-add the IOMMU dependency. That would be wrong.

We should document possible options, somewhere in QEMU.
So not only few would know about what options to use and when.
Something along lines above [1].



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-18 14:17                       ` Igor Mammedov
@ 2022-03-18 14:56                         ` David Woodhouse
  -1 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-18 14:56 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: Michael S. Tsirkin, berrange, qemu-devel, Peter Xu, Jason Wang,
	Marcel Apfelbaum, Paolo Bonzini, Richard Henderson,
	Marcelo Tosatti, kvm, Claudio Fontana, vkuznets

[-- Attachment #1: Type: text/plain, Size: 1173 bytes --]

On Fri, 2022-03-18 at 15:17 +0100, Igor Mammedov wrote:
> On Thu, 17 Mar 2022 11:13:44 +0000 David Woodhouse <dwmw2@infradead.org> wrote:
> > Thus far, I didn't think anyone had been talking about a dependency on
> > IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
> > sufficient for Linux kernels from 5.10 onwards and they don't need the
> > IOMMU.
> 
> IOMMU was required before EXT_DEST_ID due to irq-remapping dependency,
> and that conservative config worked fine for both Linux and Windows
> guests. That's why I've raised question if we should revert restriction
> to the way it was back then.
> 
> With Linux pre-5.10 guests, dmesg output at least complains about
> IRQ remapping, so user has a small chance to be able to figure out
> that IOMMU should be configured to get all CPUs working.
> For post-5.10, all one gets is "bad cpu" without any clue as to why,
> if EXT_DEST_ID is not advertised by hypervisor.
>
> It would be better if guest kernel printed some error/warning in that case.

That case doesn't exist any more since your change. You *can't* launch
qemu with that many vCPUs and not support EXT_DEST_ID.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
@ 2022-03-18 14:56                         ` David Woodhouse
  0 siblings, 0 replies; 41+ messages in thread
From: David Woodhouse @ 2022-03-18 14:56 UTC (permalink / raw)
  To: Igor Mammedov
  Cc: berrange, kvm, Michael S. Tsirkin, Jason Wang, Marcelo Tosatti,
	Richard Henderson, qemu-devel, Peter Xu, Claudio Fontana,
	Paolo Bonzini, vkuznets

[-- Attachment #1: Type: text/plain, Size: 1173 bytes --]

On Fri, 2022-03-18 at 15:17 +0100, Igor Mammedov wrote:
> On Thu, 17 Mar 2022 11:13:44 +0000 David Woodhouse <dwmw2@infradead.org> wrote:
> > Thus far, I didn't think anyone had been talking about a dependency on
> > IOMMU. That doesn't make any sense at all. EXT_DEST_ID is perfectly
> > sufficient for Linux kernels from 5.10 onwards and they don't need the
> > IOMMU.
> 
> IOMMU was required before EXT_DEST_ID due to irq-remapping dependency,
> and that conservative config worked fine for both Linux and Windows
> guests. That's why I've raised question if we should revert restriction
> to the way it was back then.
> 
> With Linux pre-5.10 guests, dmesg output at least complains about
> IRQ remapping, so user has a small chance to be able to figure out
> that IOMMU should be configured to get all CPUs working.
> For post-5.10, all one gets is "bad cpu" without any clue as to why,
> if EXT_DEST_ID is not advertised by hypervisor.
>
> It would be better if guest kernel printed some error/warning in that case.

That case doesn't exist any more since your change. You *can't* launch
qemu with that many vCPUs and not support EXT_DEST_ID.


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5965 bytes --]

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement
  2022-03-14 14:25 ` David Woodhouse
                   ` (4 preceding siblings ...)
  (?)
@ 2022-05-13 13:37 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 41+ messages in thread
From: Michael S. Tsirkin @ 2022-05-13 13:37 UTC (permalink / raw)
  To: David Woodhouse
  Cc: qemu-devel, Peter Xu, Jason Wang, Marcel Apfelbaum,
	Paolo Bonzini, Richard Henderson, Eduardo Habkost,
	Marcelo Tosatti, kvm, Claudio Fontana, Igor Mammedov,
	Daniel P . Berrangé

On Mon, Mar 14, 2022 at 02:25:41PM +0000, David Woodhouse wrote:
> The check on x86ms->apic_id_limit in pc_machine_done() had two problems.
> 
> Firstly, we need KVM to support the X2APIC API in order to allow IRQ
> delivery to APICs >= 255. So we need to call/check kvm_enable_x2apic(),
> which was done elsewhere in *some* cases but not all.
> 
> Secondly, microvm needs the same check. So move it from pc_machine_done()
> to x86_cpus_init() where it will work for both.
> 
> The check in kvm_cpu_instance_init() is now redundant and can be dropped.
> 
> Signed-off-by: David Woodhouse <dwmw2@infradead.org>
> Acked-by: Claudio Fontana <cfontana@suse.de>


Fow now I applied this as-is, in particular because if the added split
irqchip test make check started failing for me on Fedora.
Igor please go ahead and make a change on top limiting things
to the split irqchip.

> ---
>  hw/i386/pc.c              |  8 --------
>  hw/i386/x86.c             | 16 ++++++++++++++++
>  target/i386/kvm/kvm-cpu.c |  2 +-
>  3 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index fd55fc725c..d3ab28fec5 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -740,14 +740,6 @@ void pc_machine_done(Notifier *notifier, void *data)
>          /* update FW_CFG_NB_CPUS to account for -device added CPUs */
>          fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
>      }
> -
> -
> -    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> -        !kvm_irqchip_in_kernel()) {
> -        error_report("current -smp configuration requires kernel "
> -                     "irqchip support.");
> -        exit(EXIT_FAILURE);
> -    }
>  }
>  
>  void pc_guest_info_init(PCMachineState *pcms)
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 4cf107baea..8da55d58ea 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -39,6 +39,7 @@
>  #include "sysemu/replay.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/cpu-timers.h"
> +#include "sysemu/xen.h"
>  #include "trace.h"
>  
>  #include "hw/i386/x86.h"
> @@ -123,6 +124,21 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version)
>       */
>      x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms,
>                                                        ms->smp.max_cpus - 1) + 1;
> +
> +    /*
> +     * Can we support APIC ID 255 or higher?
> +     *
> +     * Under Xen: yes.
> +     * With userspace emulated lapic: no
> +     * With KVM's in-kernel lapic: only if X2APIC API is enabled.
> +     */
> +    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
> +        (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) {
> +        error_report("current -smp configuration requires kernel "
> +                     "irqchip and X2APIC API support.");
> +        exit(EXIT_FAILURE);
> +    }
> +
>      possible_cpus = mc->possible_cpu_arch_ids(ms);
>      for (i = 0; i < ms->smp.cpus; i++) {
>          x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal);
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index d95028018e..c60cb2dafb 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -165,7 +165,7 @@ static void kvm_cpu_instance_init(CPUState *cs)
>          /* only applies to builtin_x86_defs cpus */
>          if (!kvm_irqchip_in_kernel()) {
>              x86_cpu_change_kvm_default("x2apic", "off");
> -        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
> +        } else if (kvm_irqchip_is_split()) {
>              x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
>          }
>  
> -- 
> 2.33.1


^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2022-05-13 13:49 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-14 14:25 [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement David Woodhouse
2022-03-14 14:25 ` David Woodhouse
2022-03-14 14:25 ` [PATCH 2/4] intel_iommu: Support IR-only mode without DMA translation David Woodhouse
2022-03-14 14:25   ` David Woodhouse
2022-03-14 15:24   ` Michael S. Tsirkin
2022-03-14 15:24     ` Michael S. Tsirkin
2022-03-14 15:45     ` David Woodhouse
2022-03-14 15:45       ` David Woodhouse
2022-03-14 22:27       ` Michael S. Tsirkin
2022-03-14 22:27         ` Michael S. Tsirkin
2022-03-16  9:34         ` David Woodhouse
2022-03-16  9:34           ` David Woodhouse
2022-03-14 16:01   ` David Woodhouse
2022-03-14 16:01     ` David Woodhouse
2022-03-14 14:25 ` [PATCH 3/4] intel_iommu: Only allow interrupt remapping to be enabled if it's supported David Woodhouse
2022-03-14 14:25   ` David Woodhouse
2022-03-14 14:25 ` [PATCH 4/4] intel_iommu: Fix irqchip / X2APIC configuration checks David Woodhouse
2022-03-14 14:25   ` David Woodhouse
2022-03-16  9:04 ` [PATCH 1/4] target/i386: Fix sanity check on max APIC ID / X2APIC enablement Igor Mammedov
2022-03-16  9:04   ` Igor Mammedov
2022-03-16  9:37   ` David Woodhouse
2022-03-16  9:37     ` David Woodhouse
2022-03-16  9:56     ` Michael S. Tsirkin
2022-03-16  9:56       ` Michael S. Tsirkin
2022-03-16 10:37       ` David Woodhouse
2022-03-16 10:37         ` David Woodhouse
2022-03-16 10:47         ` Michael S. Tsirkin
2022-03-16 10:47           ` Michael S. Tsirkin
2022-03-16 11:28           ` Igor Mammedov
2022-03-16 11:28             ` Igor Mammedov
2022-03-16 14:31             ` David Woodhouse
2022-03-16 14:31               ` David Woodhouse
     [not found]               ` <20220317094209.2888b431@redhat.com>
2022-03-17  9:05                 ` Igor Mammedov
2022-03-17  9:05                   ` Igor Mammedov
2022-03-17 11:13                   ` David Woodhouse
2022-03-17 11:13                     ` David Woodhouse
2022-03-18 14:17                     ` Igor Mammedov
2022-03-18 14:17                       ` Igor Mammedov
2022-03-18 14:56                       ` David Woodhouse
2022-03-18 14:56                         ` David Woodhouse
2022-05-13 13:37 ` Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.