From: "Jan Beulich" <JBeulich@suse.com>
To: "xen-devel" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
Brian Woods <brian.woods@amd.com>,
Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Subject: [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
Date: Thu, 13 Jun 2019 07:23:59 -0600 [thread overview]
Message-ID: <5D024E6F0200007800237E25@prv1-mh.provo.novell.com> (raw)
In-Reply-To: <5D024C500200007800237DD8@prv1-mh.provo.novell.com>
This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.
A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.
Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -35,12 +35,34 @@ struct irte_basic {
unsigned int :8;
};
+struct irte_full {
+ unsigned int remap_en:1;
+ unsigned int sup_io_pf:1;
+ unsigned int int_type:3;
+ unsigned int rq_eoi:1;
+ unsigned int dm:1;
+ unsigned int guest_mode:1; /* MBZ */
+ unsigned int dest_lo:24;
+ unsigned int :32;
+ unsigned int vector:8;
+ unsigned int :24;
+ unsigned int :24;
+ unsigned int dest_hi:8;
+};
+
+static enum {
+ irte_basic,
+ irte_full,
+ irte_unset,
+} irte_mode __read_mostly = irte_unset;
+
union irte_ptr {
void *raw;
struct irte_basic *basic;
+ struct irte_full *full;
};
-#define INTREMAP_TABLE_ORDER 1
+#define INTREMAP_TABLE_ORDER (irte_mode == irte_basic ? 1 : 3)
#define INTREMAP_LENGTH 0xB
#define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
@@ -127,7 +149,19 @@ static union irte_ptr get_intremap_entry
ASSERT(table.raw && (offset < INTREMAP_ENTRIES));
- table.basic += offset;
+ switch ( irte_mode )
+ {
+ case irte_basic:
+ table.basic += offset;
+ break;
+
+ case irte_full:
+ table.full += offset;
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
return table;
}
@@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned
{
union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
- *entry.basic = (struct irte_basic){};
+ switch ( irte_mode )
+ {
+ case irte_basic:
+ *entry.basic = (struct irte_basic){};
+ break;
+
+ case irte_full:
+ entry.full->remap_en = 0;
+ wmb();
+ *entry.full = (struct irte_full){};
+ break;
+
+ default:
+ ASSERT_UNREACHABLE();
+ }
__clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
@@ -154,8 +202,38 @@ static void update_intremap_entry(union
.dest = dest,
.vector = vector,
};
+ struct irte_full full = {
+ .remap_en = 1,
+ .sup_io_pf = 0,
+ .int_type = int_type,
+ .rq_eoi = 0,
+ .dm = dest_mode,
+ .dest_lo = dest,
+ .dest_hi = dest >> 24,
+ .vector = vector,
+ };
+
+ switch ( irte_mode )
+ {
+ __uint128_t ret;
+ union {
+ __uint128_t raw;
+ struct irte_full full;
+ } old;
+
+ case irte_basic:
+ *entry.basic = basic;
+ break;
+
+ case irte_full:
+ old.full = *entry.full;
+ ret = cmpxchg16b(entry.full, &old, &full);
+ ASSERT(ret == old.raw);
+ break;
- *entry.basic = basic;
+ default:
+ ASSERT_UNREACHABLE();
+ }
}
static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -169,6 +247,11 @@ static inline void set_rte_index(struct
rte->delivery_mode = offset >> 8;
}
+static inline unsigned int get_full_dest(const struct irte_full *entry)
+{
+ return entry->dest_lo | (entry->dest_hi << 24);
+}
+
static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
@@ -178,10 +261,11 @@ static int update_intremap_entry_from_io
{
unsigned long flags;
union irte_ptr entry;
- u8 delivery_mode, dest, vector, dest_mode;
+ unsigned int delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
unsigned int offset;
+ bool fresh = false;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -189,7 +273,7 @@ static int update_intremap_entry_from_io
delivery_mode = rte->delivery_mode;
vector = rte->vector;
dest_mode = rte->dest_mode;
- dest = rte->dest.logical.logical_dest;
+ dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
@@ -204,25 +288,40 @@ static int update_intremap_entry_from_io
return -ENOSPC;
}
*index = offset;
- lo_update = 1;
+ fresh = true;
}
entry = get_intremap_entry(iommu->seg, req_id, offset);
- if ( !lo_update )
+ if ( fresh )
+ /* nothing */;
+ else if ( !lo_update )
{
/*
* Low half of incoming RTE is already in remapped format,
* so need to recover vector and delivery mode from IRTE.
*/
ASSERT(get_rte_index(rte) == offset);
- vector = entry.basic->vector;
+ if ( irte_mode == irte_basic )
+ vector = entry.basic->vector;
+ else
+ vector = entry.full->vector;
+ /* The IntType fields match for both formats. */
delivery_mode = entry.basic->int_type;
}
+ else if ( x2apic_enabled )
+ {
+ /*
+ * High half of incoming RTE was read from the I/O APIC and hence may
+ * not hold the full destination, so need to recover full destination
+ * from IRTE.
+ */
+ dest = get_full_dest(entry.full);
+ }
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
- if ( iommu->enabled )
+ if ( iommu->enabled && !fresh )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
@@ -246,6 +345,19 @@ int __init amd_iommu_setup_ioapic_remapp
spinlock_t *lock;
unsigned int offset;
+ for_each_amd_iommu ( iommu )
+ {
+ if ( irte_mode != irte_unset )
+ {
+ if ( iommu->ctrl.ga_en == (irte_mode == irte_basic) )
+ return -ENXIO;
+ }
+ else if ( iommu->ctrl.ga_en )
+ irte_mode = irte_full;
+ else
+ irte_mode = irte_basic;
+ }
+
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
{
@@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp
dest_mode = rte.dest_mode;
dest = rte.dest.logical.logical_dest;
+ if ( iommu->ctrl.xt_en )
+ {
+ /*
+ * In x2APIC mode we have no way of discovering the high 24
+ * bits of the destination of an already enabled interrupt.
+ * We come here earlier than for xAPIC mode, so no interrupts
+ * should have been set up before.
+ */
+ AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+ IO_APIC_ID(apic), pin);
+ }
+
spin_lock_irqsave(lock, flags);
offset = alloc_intremap_entry(seg, req_id, 1);
BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -314,7 +438,8 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf, rc;
+ int seg, bdf, rc;
+ bool saved_mask, fresh = false;
struct amd_iommu *iommu;
unsigned int idx;
@@ -356,12 +481,22 @@ void amd_iommu_ioapic_update_ire(
*(((u32 *)&new_rte) + 1) = value;
}
- if ( new_rte.mask &&
- ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+ if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
- __io_apic_write(apic, reg, value);
- return;
+
+ /*
+ * There's nowhere except the IRTE to store a full 32-bit destination,
+ * so we may not bypass entry allocation and updating of the low RTE
+ * half in the (usual) case of the high RTE half getting written first.
+ */
+ if ( new_rte.mask && !x2apic_enabled )
+ {
+ __io_apic_write(apic, reg, value);
+ return;
+ }
+
+ fresh = true;
}
/* mask the interrupt while we change the intremap table */
@@ -390,8 +525,12 @@ void amd_iommu_ioapic_update_ire(
if ( reg == rte_lo )
return;
- /* unmask the interrupt after we have updated the intremap table */
- if ( !saved_mask )
+ /*
+ * Unmask the interrupt after we have updated the intremap table. Also
+ * write the low half if a fresh entry was allocated for a high half
+ * update in x2APIC mode.
+ */
+ if ( !saved_mask || (x2apic_enabled && fresh) )
{
old_rte.mask = saved_mask;
__io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -405,25 +544,35 @@ unsigned int amd_iommu_read_ioapic_from_
unsigned int offset;
unsigned int val = __io_apic_read(apic, reg);
unsigned int pin = (reg - 0x10) / 2;
+ uint16_t seg, req_id;
+ union irte_ptr entry;
idx = ioapic_id_to_index(IO_APIC_ID(apic));
if ( idx == MAX_IO_APICS )
return -EINVAL;
offset = ioapic_sbdf[idx].pin_2_idx[pin];
+ if ( offset >= INTREMAP_ENTRIES )
+ return val;
- if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
+ seg = ioapic_sbdf[idx].seg;
+ req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf);
+ entry = get_intremap_entry(seg, req_id, offset);
+
+ if ( !(reg & 1) )
{
- u16 bdf = ioapic_sbdf[idx].bdf;
- u16 seg = ioapic_sbdf[idx].seg;
- u16 req_id = get_intremap_requestor_id(seg, bdf);
- union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
val &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
val |= MASK_INSR(entry.basic->int_type, IO_APIC_REDIR_DELIV_MODE_MASK);
- val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+ if ( irte_mode == irte_basic )
+ val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+ else
+ val |= MASK_INSR(entry.full->vector, IO_APIC_REDIR_VECTOR_MASK);
}
+ else if ( x2apic_enabled )
+ val = get_full_dest(entry.full);
return val;
}
@@ -435,9 +584,9 @@ static int update_intremap_entry_from_ms
unsigned long flags;
union irte_ptr entry;
u16 req_id, alias_id;
- u8 delivery_mode, dest, vector, dest_mode;
+ uint8_t delivery_mode, vector, dest_mode;
spinlock_t *lock;
- unsigned int offset, i;
+ unsigned int dest, offset, i;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -458,7 +607,12 @@ static int update_intremap_entry_from_ms
dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
- dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+ if ( x2apic_enabled )
+ dest = msg->dest32;
+ else
+ dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
offset = *remap_index;
if ( offset >= INTREMAP_ENTRIES )
{
@@ -603,8 +757,18 @@ void amd_iommu_read_msi_from_ire(
}
msg->data &= ~(INTREMAP_ENTRIES - 1);
+ /* The IntType fields match for both formats. */
msg->data |= MASK_INSR(entry.basic->int_type, MSI_DATA_DELIVERY_MODE_MASK);
- msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+ if ( irte_mode == irte_basic )
+ {
+ msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+ msg->dest32 = entry.basic->dest;
+ }
+ else
+ {
+ msg->data |= MASK_INSR(entry.full->vector, MSI_DATA_VECTOR_MASK);
+ msg->dest32 = get_full_dest(entry.full);
+ }
}
int __init amd_iommu_free_intremap_table(
@@ -667,18 +831,33 @@ int __init amd_setup_hpet_msi(struct msi
return rc;
}
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const void *table)
{
- u32 count;
+ unsigned int count;
+ union {
+ const void *raw;
+ const uint32_t *basic;
+ const uint64_t (*full)[2];
+ } tbl = { .raw = table };
- if ( !table )
+ if ( !table || irte_mode == irte_unset )
return;
for ( count = 0; count < INTREMAP_ENTRIES; count++ )
{
- if ( !table[count] )
- continue;
- printk(" IRTE[%03x] %08x\n", count, table[count]);
+ if ( irte_mode == irte_basic )
+ {
+ if ( !tbl.basic[count] )
+ continue;
+ printk(" IRTE[%03x] %08x\n", count, tbl.basic[count]);
+ }
+ else
+ {
+ if ( !tbl.full[count][0] && !tbl.full[count][1] )
+ continue;
+ printk(" IRTE[%03x] %016lx_%016lx\n",
+ count, tbl.full[count][1], tbl.full[count][0]);
+ }
}
}
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
next prev parent reply other threads:[~2019-06-13 13:24 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-13 13:14 [Xen-devel] [PATCH 0/9] x86: AMD x2APIC support Jan Beulich
2019-06-13 13:22 ` [Xen-devel] [PATCH 1/9] AMD/IOMMU: use bit field for extended feature register Jan Beulich
2019-06-17 19:07 ` Woods, Brian
2019-06-18 9:37 ` Jan Beulich
2019-06-17 20:23 ` Andrew Cooper
2019-06-18 9:33 ` Jan Beulich
2019-06-13 13:22 ` [Xen-devel] [PATCH 2/9] AMD/IOMMU: use bit field for control register Jan Beulich
2019-06-18 9:54 ` Andrew Cooper
2019-06-18 10:45 ` Jan Beulich
2019-06-13 13:23 ` [Xen-devel] [PATCH 3/9] AMD/IOMMU: use bit field for IRTE Jan Beulich
2019-06-18 10:37 ` Andrew Cooper
2019-06-18 11:53 ` Jan Beulich
2019-06-18 12:16 ` Andrew Cooper
2019-06-18 12:55 ` Jan Beulich
2019-06-18 11:31 ` Andrew Cooper
2019-06-18 11:47 ` Jan Beulich
2019-06-13 13:23 ` Jan Beulich [this message]
2019-06-18 11:57 ` [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format Andrew Cooper
2019-06-18 15:31 ` Jan Beulich
2019-06-13 13:24 ` [Xen-devel] [PATCH 5/9] AMD/IOMMU: split amd_iommu_init_one() Jan Beulich
2019-06-18 12:17 ` Andrew Cooper
2019-06-13 13:25 ` [Xen-devel] [PATCH 6/9] AMD/IOMMU: allow enabling with IRQ not yet set up Jan Beulich
2019-06-18 12:22 ` Andrew Cooper
2019-06-13 13:26 ` [Xen-devel] [PATCH 7/9] AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode Jan Beulich
2019-06-18 12:35 ` Andrew Cooper
2019-06-13 13:27 ` [Xen-devel] [PATCH 8/9] AMD/IOMMU: enable x2APIC mode when available Jan Beulich
2019-06-18 13:40 ` Andrew Cooper
2019-06-18 14:02 ` Jan Beulich
2019-06-13 13:28 ` [Xen-devel] [PATCH RFC 9/9] AMD/IOMMU: correct IRTE updating Jan Beulich
2019-06-18 13:28 ` Andrew Cooper
2019-06-18 14:58 ` Jan Beulich
2019-06-27 15:15 ` [Xen-devel] [PATCH v2 00/10] x86: AMD x2APIC support Jan Beulich
2019-06-27 15:19 ` [Xen-devel] [PATCH v2 01/10] AMD/IOMMU: restrict feature logging Jan Beulich
2019-07-01 15:37 ` Andrew Cooper
2019-07-01 15:59 ` Woods, Brian
2019-06-27 15:19 ` [Xen-devel] [PATCH v2 02/10] AMD/IOMMU: use bit field for extended feature register Jan Beulich
2019-07-02 12:09 ` Andrew Cooper
2019-07-02 13:48 ` Jan Beulich
2019-07-16 16:02 ` Jan Beulich
2019-06-27 15:20 ` [Xen-devel] [PATCH v2 03/10] AMD/IOMMU: use bit field for control register Jan Beulich
2019-07-02 12:20 ` Andrew Cooper
2019-06-27 15:20 ` [Xen-devel] [PATCH v2 04/10] AMD/IOMMU: use bit field for IRTE Jan Beulich
2019-07-02 12:33 ` Andrew Cooper
2019-07-02 13:56 ` Jan Beulich
2019-06-27 15:21 ` [Xen-devel] [PATCH v2 05/10] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format Jan Beulich
2019-07-02 14:41 ` Andrew Cooper
2019-07-03 8:46 ` Jan Beulich
2019-07-16 6:39 ` Jan Beulich
2019-06-27 15:21 ` [Xen-devel] [PATCH v2 06/10] AMD/IOMMU: split amd_iommu_init_one() Jan Beulich
2019-06-27 15:22 ` [Xen-devel] [PATCH v2 07/10] AMD/IOMMU: allow enabling with IRQ not yet set up Jan Beulich
2019-06-27 15:22 ` [Xen-devel] [PATCH v2 08/10] AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode Jan Beulich
2019-06-27 15:23 ` [Xen-devel] [PATCH v2 09/10] AMD/IOMMU: enable x2APIC mode when available Jan Beulich
2019-07-02 14:50 ` Andrew Cooper
2019-06-27 15:23 ` [Xen-devel] [PATCH RFC v2 10/10] AMD/IOMMU: correct IRTE updating Jan Beulich
2019-07-02 15:08 ` Andrew Cooper
2019-07-03 8:55 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5D024E6F0200007800237E25@prv1-mh.provo.novell.com \
--to=jbeulich@suse.com \
--cc=andrew.cooper3@citrix.com \
--cc=brian.woods@amd.com \
--cc=suravee.suthikulpanit@amd.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).