All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: x86@kernel.org
Cc: iommu <iommu@lists.linux-foundation.org>,
	kvm <kvm@vger.kernel.org>,
	linux-hyperv@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH 04/13] x86/apic: Support 15 bits of APIC ID in IOAPIC/MSI where available
Date: Mon,  5 Oct 2020 16:28:47 +0100	[thread overview]
Message-ID: <20201005152856.974112-4-dwmw2@infradead.org> (raw)
In-Reply-To: <20201005152856.974112-1-dwmw2@infradead.org>

From: David Woodhouse <dwmw@amazon.co.uk>

Some hypervisors can allow the guest to use the Extended Destination ID
field in the IOAPIC RTE and MSI address to address up to 32768 CPUs.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/mpspec.h   |  1 +
 arch/x86/include/asm/x86_init.h |  2 ++
 arch/x86/kernel/apic/apic.c     | 15 ++++++++++++++-
 arch/x86/kernel/apic/msi.c      |  9 ++++++++-
 arch/x86/kernel/x86_init.c      |  1 +
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index e90ac7e9ae2c..25ee8ca0a1f2 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -42,6 +42,7 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 extern unsigned int boot_cpu_physical_apicid;
 extern u8 boot_cpu_apic_version;
 extern unsigned long mp_lapic_addr;
+extern int msi_ext_dest_id;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern int smp_found_config;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 397196fae24d..5af3fe9e38f3 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -114,6 +114,7 @@ struct x86_init_pci {
  * @init_platform:		platform setup
  * @guest_late_init:		guest late init
  * @x2apic_available:		X2APIC detection
+ * @msi_ext_dest_id:		MSI and IOAPIC support 15-bit APIC IDs
  * @init_mem_mapping:		setup early mappings during init_mem_mapping()
  * @init_after_bootmem:		guest init after boot allocator is finished
  */
@@ -121,6 +122,7 @@ struct x86_hyper_init {
 	void (*init_platform)(void);
 	void (*guest_late_init)(void);
 	bool (*x2apic_available)(void);
+	bool (*msi_ext_dest_id)(void);
 	void (*init_mem_mapping)(void);
 	void (*init_after_bootmem)(void);
 };
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a75767052a92..459c78558f36 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1837,6 +1837,8 @@ static __init void x2apic_enable(void)
 
 static __init void try_to_enable_x2apic(int remap_mode)
 {
+	u32 apic_limit = 0;
+
 	if (x2apic_state == X2APIC_DISABLED)
 		return;
 
@@ -1858,7 +1860,15 @@ static __init void try_to_enable_x2apic(int remap_mode)
 				return;
 			}
 
-			x2apic_set_max_apicid(255);
+			/*
+			 * If the hypervisor supports extended destination ID
+			 * in IOAPIC and MSI, we can support that many CPUs.
+			 */
+			if (x86_init.hyper.msi_ext_dest_id()) {
+				msi_ext_dest_id = 1;
+				apic_limit = 32767;
+			} else
+				apic_limit = 255;
 		}
 
 		/*
@@ -1867,6 +1877,9 @@ static __init void try_to_enable_x2apic(int remap_mode)
 		 */
 		x2apic_phys = 1;
 	}
+	if (apic_limit)
+		x2apic_set_max_apicid(apic_limit);
+
 	x2apic_enable();
 }
 
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 356f8acf4927..4d891967bea4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,6 +23,8 @@
 
 struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
 
+int msi_ext_dest_id __ro_after_init;
+
 static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int dmar)
 {
 	msg->address_hi = MSI_ADDR_BASE_HI;
@@ -45,10 +47,15 @@ static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int
 	 * Only the IOMMU itself can use the trick of putting destination
 	 * APIC ID into the high bits of the address. Anything else would
 	 * just be writing to memory if it tried that, and needs IR to
-	 * address APICs above 255.
+	 * address APICs which can't be addressed in the normal 32-bit
+	 * address range at 0xFFExxxxx. That is typically just 8 bits, but
+	 * some hypervisors allow the extended destination ID field in bits
+	 * 11-5 to be used, giving support for 15 bits of APIC IDs in total.
 	 */
 	if (dmar)
 		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
+	else if (msi_ext_dest_id && cfg->dest_apicid < 0x8000)
+		msg->address_lo |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid) >> 3;
 	else
 		WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid));
 }
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a3038d8deb6a..8b395821cb8d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = {
 		.init_platform		= x86_init_noop,
 		.guest_late_init	= x86_init_noop,
 		.x2apic_available	= bool_x86_init_noop,
+		.msi_ext_dest_id	= bool_x86_init_noop,
 		.init_mem_mapping	= x86_init_noop,
 		.init_after_bootmem	= x86_init_noop,
 	},
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: David Woodhouse <dwmw2@infradead.org>
To: x86@kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	iommu <iommu@lists.linux-foundation.org>,
	linux-hyperv@vger.kernel.org, kvm <kvm@vger.kernel.org>
Subject: [PATCH 04/13] x86/apic: Support 15 bits of APIC ID in IOAPIC/MSI where available
Date: Mon,  5 Oct 2020 16:28:47 +0100	[thread overview]
Message-ID: <20201005152856.974112-4-dwmw2@infradead.org> (raw)
In-Reply-To: <20201005152856.974112-1-dwmw2@infradead.org>

From: David Woodhouse <dwmw@amazon.co.uk>

Some hypervisors can allow the guest to use the Extended Destination ID
field in the IOAPIC RTE and MSI address to address up to 32768 CPUs.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/mpspec.h   |  1 +
 arch/x86/include/asm/x86_init.h |  2 ++
 arch/x86/kernel/apic/apic.c     | 15 ++++++++++++++-
 arch/x86/kernel/apic/msi.c      |  9 ++++++++-
 arch/x86/kernel/x86_init.c      |  1 +
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index e90ac7e9ae2c..25ee8ca0a1f2 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -42,6 +42,7 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 extern unsigned int boot_cpu_physical_apicid;
 extern u8 boot_cpu_apic_version;
 extern unsigned long mp_lapic_addr;
+extern int msi_ext_dest_id;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 extern int smp_found_config;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 397196fae24d..5af3fe9e38f3 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -114,6 +114,7 @@ struct x86_init_pci {
  * @init_platform:		platform setup
  * @guest_late_init:		guest late init
  * @x2apic_available:		X2APIC detection
+ * @msi_ext_dest_id:		MSI and IOAPIC support 15-bit APIC IDs
  * @init_mem_mapping:		setup early mappings during init_mem_mapping()
  * @init_after_bootmem:		guest init after boot allocator is finished
  */
@@ -121,6 +122,7 @@ struct x86_hyper_init {
 	void (*init_platform)(void);
 	void (*guest_late_init)(void);
 	bool (*x2apic_available)(void);
+	bool (*msi_ext_dest_id)(void);
 	void (*init_mem_mapping)(void);
 	void (*init_after_bootmem)(void);
 };
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a75767052a92..459c78558f36 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1837,6 +1837,8 @@ static __init void x2apic_enable(void)
 
 static __init void try_to_enable_x2apic(int remap_mode)
 {
+	u32 apic_limit = 0;
+
 	if (x2apic_state == X2APIC_DISABLED)
 		return;
 
@@ -1858,7 +1860,15 @@ static __init void try_to_enable_x2apic(int remap_mode)
 				return;
 			}
 
-			x2apic_set_max_apicid(255);
+			/*
+			 * If the hypervisor supports extended destination ID
+			 * in IOAPIC and MSI, we can support that many CPUs.
+			 */
+			if (x86_init.hyper.msi_ext_dest_id()) {
+				msi_ext_dest_id = 1;
+				apic_limit = 32767;
+			} else
+				apic_limit = 255;
 		}
 
 		/*
@@ -1867,6 +1877,9 @@ static __init void try_to_enable_x2apic(int remap_mode)
 		 */
 		x2apic_phys = 1;
 	}
+	if (apic_limit)
+		x2apic_set_max_apicid(apic_limit);
+
 	x2apic_enable();
 }
 
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 356f8acf4927..4d891967bea4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,6 +23,8 @@
 
 struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
 
+int msi_ext_dest_id __ro_after_init;
+
 static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int dmar)
 {
 	msg->address_hi = MSI_ADDR_BASE_HI;
@@ -45,10 +47,15 @@ static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, int
 	 * Only the IOMMU itself can use the trick of putting destination
 	 * APIC ID into the high bits of the address. Anything else would
 	 * just be writing to memory if it tried that, and needs IR to
-	 * address APICs above 255.
+	 * address APICs which can't be addressed in the normal 32-bit
+	 * address range at 0xFFExxxxx. That is typically just 8 bits, but
+	 * some hypervisors allow the extended destination ID field in bits
+	 * 11-5 to be used, giving support for 15 bits of APIC IDs in total.
 	 */
 	if (dmar)
 		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
+	else if (msi_ext_dest_id && cfg->dest_apicid < 0x8000)
+		msg->address_lo |= MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid) >> 3;
 	else
 		WARN_ON_ONCE(MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid));
 }
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a3038d8deb6a..8b395821cb8d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = {
 		.init_platform		= x86_init_noop,
 		.guest_late_init	= x86_init_noop,
 		.x2apic_available	= bool_x86_init_noop,
+		.msi_ext_dest_id	= bool_x86_init_noop,
 		.init_mem_mapping	= x86_init_noop,
 		.init_after_bootmem	= x86_init_noop,
 	},
-- 
2.26.2

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  parent reply	other threads:[~2020-10-05 15:40 UTC|newest]

Thread overview: 102+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-05 15:28 [PATCH 0/13] Fix per-domain IRQ affinity, allow >255 CPUs on x86 without IRQ remapping David Woodhouse
2020-10-05 15:28 ` David Woodhouse
2020-10-05 15:28 ` [PATCH 01/13] x86/apic: Use x2apic in guest kernels even with unusable CPUs David Woodhouse
2020-10-05 15:28   ` David Woodhouse
2020-10-05 15:28   ` [PATCH 02/13] x86/msi: Only use high bits of MSI address for DMAR unit David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 20:45     ` Thomas Gleixner
2020-10-06 20:45       ` Thomas Gleixner
2020-10-05 15:28   ` [PATCH 03/13] x86/ioapic: Handle Extended Destination ID field in RTE David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-05 15:28   ` David Woodhouse [this message]
2020-10-05 15:28     ` [PATCH 04/13] x86/apic: Support 15 bits of APIC ID in IOAPIC/MSI where available David Woodhouse
2020-10-05 15:28   ` [PATCH 05/13] genirq: Prepare for default affinity to be passed to __irq_alloc_descs() David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:01     ` Thomas Gleixner
2020-10-06 21:01       ` Thomas Gleixner
2020-10-06 21:07       ` David Woodhouse
2020-10-06 21:07         ` David Woodhouse
2020-10-05 15:28   ` [PATCH 06/13] genirq: Add default_affinity argument " David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:06     ` Thomas Gleixner
2020-10-06 21:06       ` Thomas Gleixner
2020-10-05 15:28   ` [PATCH 07/13] irqdomain: Add max_affinity argument to irq_domain_alloc_descs() David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:26     ` Thomas Gleixner
2020-10-06 21:26       ` Thomas Gleixner
2020-10-07  7:19       ` David Woodhouse
2020-10-07  7:19         ` David Woodhouse
2020-10-07 13:37         ` Thomas Gleixner
2020-10-07 13:37           ` Thomas Gleixner
2020-10-07 14:10           ` David Woodhouse
2020-10-07 14:10             ` David Woodhouse
2020-10-07 15:57             ` Thomas Gleixner
2020-10-07 15:57               ` Thomas Gleixner
2020-10-07 16:11               ` David Woodhouse
2020-10-07 16:11                 ` David Woodhouse
2020-10-07 20:53                 ` Thomas Gleixner
2020-10-07 20:53                   ` Thomas Gleixner
2020-10-08  7:21               ` David Woodhouse
2020-10-08  7:21                 ` David Woodhouse
2020-10-08  9:34                 ` Thomas Gleixner
2020-10-08  9:34                   ` Thomas Gleixner
2020-10-08 11:10                   ` David Woodhouse
2020-10-08 11:10                     ` David Woodhouse
2020-10-08 12:40                     ` Thomas Gleixner
2020-10-08 12:40                       ` Thomas Gleixner
2020-10-09  7:54                       ` David Woodhouse
2020-10-09  7:54                         ` David Woodhouse
2020-10-05 15:28   ` [PATCH 08/13] genirq: Add irq_domain_set_affinity() David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:32     ` Thomas Gleixner
2020-10-06 21:32       ` Thomas Gleixner
2020-10-07  7:22       ` David Woodhouse
2020-10-07  7:22         ` David Woodhouse
2020-10-05 15:28   ` [PATCH 09/13] x86/irq: Add x86_non_ir_cpumask David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:42     ` Thomas Gleixner
2020-10-06 21:42       ` Thomas Gleixner
2020-10-07  7:25       ` David Woodhouse
2020-10-07  7:25         ` David Woodhouse
2020-10-05 15:28   ` [PATCH 10/13] x86/irq: Limit IOAPIC and MSI domains' affinity without IR David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-06 21:54     ` Thomas Gleixner
2020-10-06 21:54       ` Thomas Gleixner
2020-10-07  7:48       ` David Woodhouse
2020-10-07  7:48         ` David Woodhouse
2020-10-07 12:59         ` Thomas Gleixner
2020-10-07 12:59           ` Thomas Gleixner
2020-10-07 13:08           ` David Woodhouse
2020-10-07 13:08             ` David Woodhouse
2020-10-07 14:05             ` Thomas Gleixner
2020-10-07 14:05               ` Thomas Gleixner
2020-10-07 14:23               ` David Woodhouse
2020-10-07 14:23                 ` David Woodhouse
2020-10-07 16:02                 ` Thomas Gleixner
2020-10-07 16:02                   ` Thomas Gleixner
2020-10-07 16:15                   ` David Woodhouse
2020-10-07 16:15                     ` David Woodhouse
2020-10-07 15:05               ` David Woodhouse
2020-10-07 15:05                 ` David Woodhouse
2020-10-07 15:25                 ` Thomas Gleixner
2020-10-07 15:25                   ` Thomas Gleixner
2020-10-07 15:46                   ` David Woodhouse
2020-10-07 15:46                     ` David Woodhouse
2020-10-07 17:23                     ` Thomas Gleixner
2020-10-07 17:23                       ` Thomas Gleixner
2020-10-07 17:34                       ` David Woodhouse
2020-10-07 17:34                         ` David Woodhouse
2020-10-05 15:28   ` [PATCH 11/13] x86/smp: Allow more than 255 CPUs even without interrupt remapping David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-05 15:28   ` [PATCH 12/13] iommu/irq_remapping: Kill most of hyperv-iommu.c now it's redundant David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-05 15:28   ` [PATCH 13/13] x86/kvm: Add KVM_FEATURE_MSI_EXT_DEST_ID David Woodhouse
2020-10-05 15:28     ` David Woodhouse
2020-10-07  8:14     ` Paolo Bonzini
2020-10-07  8:14       ` Paolo Bonzini
2020-10-07  8:59       ` David Woodhouse
2020-10-07  8:59         ` David Woodhouse
2020-10-07 11:15         ` Paolo Bonzini
2020-10-07 11:15           ` Paolo Bonzini
2020-10-07 12:04           ` David Woodhouse
2020-10-07 12:04             ` David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201005152856.974112-4-dwmw2@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.