xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: "xen-devel" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Brian Woods <brian.woods@amd.com>,
	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Subject: [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format
Date: Thu, 13 Jun 2019 07:23:59 -0600	[thread overview]
Message-ID: <5D024E6F0200007800237E25@prv1-mh.provo.novell.com> (raw)
In-Reply-To: <5D024C500200007800237DD8@prv1-mh.provo.novell.com>

This is in preparation of actually enabling x2APIC mode, which requires
this wider IRTE format to be used.

A specific remark regarding the first hunk changing
amd_iommu_ioapic_update_ire(): This bypass was introduced for XSA-36,
i.e. by 94d4a1119d ("AMD,IOMMU: Clean up old entries in remapping
tables when creating new one"). Other code introduced by that change has
meanwhile disappeared or further changed, and I wonder if - rather than
adding an x2apic_enabled check to the conditional - the bypass couldn't
be deleted altogether. For now the goal is to affect the non-x2APIC
paths as little as possible.

Take the liberty and use the new "fresh" flag to suppress an unneeded
flush in update_intremap_entry_from_ioapic().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that AMD's doc says Lowest Priority ("Arbitrated" by their naming)
mode is unavailable in x2APIC mode, but they've confirmed this to be a
mistake on their part.

--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -35,12 +35,34 @@ struct irte_basic {
     unsigned int :8;
 };
 
+struct irte_full {
+    unsigned int remap_en:1;
+    unsigned int sup_io_pf:1;
+    unsigned int int_type:3;
+    unsigned int rq_eoi:1;
+    unsigned int dm:1;
+    unsigned int guest_mode:1; /* MBZ */
+    unsigned int dest_lo:24;
+    unsigned int :32;
+    unsigned int vector:8;
+    unsigned int :24;
+    unsigned int :24;
+    unsigned int dest_hi:8;
+};
+
+static enum {
+    irte_basic,
+    irte_full,
+    irte_unset,
+} irte_mode __read_mostly = irte_unset;
+
 union irte_ptr {
     void *raw;
     struct irte_basic *basic;
+    struct irte_full *full;
 };
 
-#define INTREMAP_TABLE_ORDER    1
+#define INTREMAP_TABLE_ORDER (irte_mode == irte_basic ? 1 : 3)
 #define INTREMAP_LENGTH 0xB
 #define INTREMAP_ENTRIES (1 << INTREMAP_LENGTH)
 
@@ -127,7 +149,19 @@ static union irte_ptr get_intremap_entry
 
     ASSERT(table.raw && (offset < INTREMAP_ENTRIES));
 
-    table.basic += offset;
+    switch ( irte_mode )
+    {
+    case irte_basic:
+        table.basic += offset;
+        break;
+
+    case irte_full:
+        table.full += offset;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     return table;
 }
@@ -136,7 +170,21 @@ static void free_intremap_entry(unsigned
 {
     union irte_ptr entry = get_intremap_entry(seg, bdf, offset);
 
-    *entry.basic = (struct irte_basic){};
+    switch ( irte_mode )
+    {
+    case irte_basic:
+        *entry.basic = (struct irte_basic){};
+        break;
+
+    case irte_full:
+        entry.full->remap_en = 0;
+        wmb();
+        *entry.full = (struct irte_full){};
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+    }
 
     __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
 }
@@ -154,8 +202,38 @@ static void update_intremap_entry(union
         .dest = dest,
         .vector = vector,
     };
+    struct irte_full full = {
+        .remap_en = 1,
+        .sup_io_pf = 0,
+        .int_type = int_type,
+        .rq_eoi = 0,
+        .dm = dest_mode,
+        .dest_lo = dest,
+        .dest_hi = dest >> 24,
+        .vector = vector,
+    };
+
+    switch ( irte_mode )
+    {
+        __uint128_t ret;
+        union {
+            __uint128_t raw;
+            struct irte_full full;
+        } old;
+
+    case irte_basic:
+        *entry.basic = basic;
+        break;
+
+    case irte_full:
+        old.full = *entry.full;
+        ret = cmpxchg16b(entry.full, &old, &full);
+        ASSERT(ret == old.raw);
+        break;
 
-    *entry.basic = basic;
+    default:
+        ASSERT_UNREACHABLE();
+    }
 }
 
 static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
@@ -169,6 +247,11 @@ static inline void set_rte_index(struct
     rte->delivery_mode = offset >> 8;
 }
 
+static inline unsigned int get_full_dest(const struct irte_full *entry)
+{
+    return entry->dest_lo | (entry->dest_hi << 24);
+}
+
 static int update_intremap_entry_from_ioapic(
     int bdf,
     struct amd_iommu *iommu,
@@ -178,10 +261,11 @@ static int update_intremap_entry_from_io
 {
     unsigned long flags;
     union irte_ptr entry;
-    u8 delivery_mode, dest, vector, dest_mode;
+    unsigned int delivery_mode, dest, vector, dest_mode;
     int req_id;
     spinlock_t *lock;
     unsigned int offset;
+    bool fresh = false;
 
     req_id = get_intremap_requestor_id(iommu->seg, bdf);
     lock = get_intremap_lock(iommu->seg, req_id);
@@ -189,7 +273,7 @@ static int update_intremap_entry_from_io
     delivery_mode = rte->delivery_mode;
     vector = rte->vector;
     dest_mode = rte->dest_mode;
-    dest = rte->dest.logical.logical_dest;
+    dest = x2apic_enabled ? rte->dest.dest32 : rte->dest.logical.logical_dest;
 
     spin_lock_irqsave(lock, flags);
 
@@ -204,25 +288,40 @@ static int update_intremap_entry_from_io
             return -ENOSPC;
         }
         *index = offset;
-        lo_update = 1;
+        fresh = true;
     }
 
     entry = get_intremap_entry(iommu->seg, req_id, offset);
-    if ( !lo_update )
+    if ( fresh )
+        /* nothing */;
+    else if ( !lo_update )
     {
         /*
          * Low half of incoming RTE is already in remapped format,
          * so need to recover vector and delivery mode from IRTE.
          */
         ASSERT(get_rte_index(rte) == offset);
-        vector = entry.basic->vector;
+        if ( irte_mode == irte_basic )
+            vector = entry.basic->vector;
+        else
+            vector = entry.full->vector;
+        /* The IntType fields match for both formats. */
         delivery_mode = entry.basic->int_type;
     }
+    else if ( x2apic_enabled )
+    {
+        /*
+         * High half of incoming RTE was read from the I/O APIC and hence may
+         * not hold the full destination, so need to recover full destination
+         * from IRTE.
+         */
+        dest = get_full_dest(entry.full);
+    }
     update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
 
     spin_unlock_irqrestore(lock, flags);
 
-    if ( iommu->enabled )
+    if ( iommu->enabled && !fresh )
     {
         spin_lock_irqsave(&iommu->lock, flags);
         amd_iommu_flush_intremap(iommu, req_id);
@@ -246,6 +345,19 @@ int __init amd_iommu_setup_ioapic_remapp
     spinlock_t *lock;
     unsigned int offset;
 
+    for_each_amd_iommu ( iommu )
+    {
+        if ( irte_mode != irte_unset )
+        {
+            if ( iommu->ctrl.ga_en == (irte_mode == irte_basic) )
+                return -ENXIO;
+        }
+        else if ( iommu->ctrl.ga_en )
+            irte_mode = irte_full;
+        else
+            irte_mode = irte_basic;
+    }
+
     /* Read ioapic entries and update interrupt remapping table accordingly */
     for ( apic = 0; apic < nr_ioapics; apic++ )
     {
@@ -280,6 +392,18 @@ int __init amd_iommu_setup_ioapic_remapp
             dest_mode = rte.dest_mode;
             dest = rte.dest.logical.logical_dest;
 
+            if ( iommu->ctrl.xt_en )
+            {
+                /*
+                 * In x2APIC mode we have no way of discovering the high 24
+                 * bits of the destination of an already enabled interrupt.
+                 * We come here earlier than for xAPIC mode, so no interrupts
+                 * should have been set up before.
+                 */
+                AMD_IOMMU_DEBUG("Unmasked IO-APIC#%u entry %u in x2APIC mode\n",
+                                IO_APIC_ID(apic), pin);
+            }
+
             spin_lock_irqsave(lock, flags);
             offset = alloc_intremap_entry(seg, req_id, 1);
             BUG_ON(offset >= INTREMAP_ENTRIES);
@@ -314,7 +438,8 @@ void amd_iommu_ioapic_update_ire(
     struct IO_APIC_route_entry new_rte = { 0 };
     unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
     unsigned int pin = (reg - 0x10) / 2;
-    int saved_mask, seg, bdf, rc;
+    int seg, bdf, rc;
+    bool saved_mask, fresh = false;
     struct amd_iommu *iommu;
     unsigned int idx;
 
@@ -356,12 +481,22 @@ void amd_iommu_ioapic_update_ire(
         *(((u32 *)&new_rte) + 1) = value;
     }
 
-    if ( new_rte.mask &&
-         ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
+    if ( ioapic_sbdf[idx].pin_2_idx[pin] >= INTREMAP_ENTRIES )
     {
         ASSERT(saved_mask);
-        __io_apic_write(apic, reg, value);
-        return;
+
+        /*
+         * There's nowhere except the IRTE to store a full 32-bit destination,
+         * so we may not bypass entry allocation and updating of the low RTE
+         * half in the (usual) case of the high RTE half getting written first.
+         */
+        if ( new_rte.mask && !x2apic_enabled )
+        {
+            __io_apic_write(apic, reg, value);
+            return;
+        }
+
+        fresh = true;
     }
 
     /* mask the interrupt while we change the intremap table */
@@ -390,8 +525,12 @@ void amd_iommu_ioapic_update_ire(
     if ( reg == rte_lo )
         return;
 
-    /* unmask the interrupt after we have updated the intremap table */
-    if ( !saved_mask )
+    /*
+     * Unmask the interrupt after we have updated the intremap table. Also
+     * write the low half if a fresh entry was allocated for a high half
+     * update in x2APIC mode.
+     */
+    if ( !saved_mask || (x2apic_enabled && fresh) )
     {
         old_rte.mask = saved_mask;
         __io_apic_write(apic, rte_lo, *((u32 *)&old_rte));
@@ -405,25 +544,35 @@ unsigned int amd_iommu_read_ioapic_from_
     unsigned int offset;
     unsigned int val = __io_apic_read(apic, reg);
     unsigned int pin = (reg - 0x10) / 2;
+    uint16_t seg, req_id;
+    union irte_ptr entry;
 
     idx = ioapic_id_to_index(IO_APIC_ID(apic));
     if ( idx == MAX_IO_APICS )
         return -EINVAL;
 
     offset = ioapic_sbdf[idx].pin_2_idx[pin];
+    if ( offset >= INTREMAP_ENTRIES )
+        return val;
 
-    if ( !(reg & 1) && offset < INTREMAP_ENTRIES )
+    seg = ioapic_sbdf[idx].seg;
+    req_id = get_intremap_requestor_id(seg, ioapic_sbdf[idx].bdf);
+    entry = get_intremap_entry(seg, req_id, offset);
+
+    if ( !(reg & 1) )
     {
-        u16 bdf = ioapic_sbdf[idx].bdf;
-        u16 seg = ioapic_sbdf[idx].seg;
-        u16 req_id = get_intremap_requestor_id(seg, bdf);
-        union irte_ptr entry = get_intremap_entry(seg, req_id, offset);
 
         ASSERT(offset == (val & (INTREMAP_ENTRIES - 1)));
         val &= ~(INTREMAP_ENTRIES - 1);
+        /* The IntType fields match for both formats. */
         val |= MASK_INSR(entry.basic->int_type, IO_APIC_REDIR_DELIV_MODE_MASK);
-        val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+        if ( irte_mode == irte_basic )
+            val |= MASK_INSR(entry.basic->vector, IO_APIC_REDIR_VECTOR_MASK);
+        else
+            val |= MASK_INSR(entry.full->vector, IO_APIC_REDIR_VECTOR_MASK);
     }
+    else if ( x2apic_enabled )
+        val = get_full_dest(entry.full);
 
     return val;
 }
@@ -435,9 +584,9 @@ static int update_intremap_entry_from_ms
     unsigned long flags;
     union irte_ptr entry;
     u16 req_id, alias_id;
-    u8 delivery_mode, dest, vector, dest_mode;
+    uint8_t delivery_mode, vector, dest_mode;
     spinlock_t *lock;
-    unsigned int offset, i;
+    unsigned int dest, offset, i;
 
     req_id = get_dma_requestor_id(iommu->seg, bdf);
     alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -458,7 +607,12 @@ static int update_intremap_entry_from_ms
     dest_mode = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
     delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
     vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
-    dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+
+    if ( x2apic_enabled )
+        dest = msg->dest32;
+    else
+        dest = MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK);
+
     offset = *remap_index;
     if ( offset >= INTREMAP_ENTRIES )
     {
@@ -603,8 +757,18 @@ void amd_iommu_read_msi_from_ire(
     }
 
     msg->data &= ~(INTREMAP_ENTRIES - 1);
+    /* The IntType fields match for both formats. */
     msg->data |= MASK_INSR(entry.basic->int_type, MSI_DATA_DELIVERY_MODE_MASK);
-    msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+    if ( irte_mode == irte_basic )
+    {
+        msg->data |= MASK_INSR(entry.basic->vector, MSI_DATA_VECTOR_MASK);
+        msg->dest32 = entry.basic->dest;
+    }
+    else
+    {
+        msg->data |= MASK_INSR(entry.full->vector, MSI_DATA_VECTOR_MASK);
+        msg->dest32 = get_full_dest(entry.full);
+    }
 }
 
 int __init amd_iommu_free_intremap_table(
@@ -667,18 +831,33 @@ int __init amd_setup_hpet_msi(struct msi
     return rc;
 }
 
-static void dump_intremap_table(const u32 *table)
+static void dump_intremap_table(const void *table)
 {
-    u32 count;
+    unsigned int count;
+    union {
+        const void *raw;
+        const uint32_t *basic;
+        const uint64_t (*full)[2];
+    } tbl = { .raw = table };
 
-    if ( !table )
+    if ( !table || irte_mode == irte_unset )
         return;
 
     for ( count = 0; count < INTREMAP_ENTRIES; count++ )
     {
-        if ( !table[count] )
-            continue;
-        printk("    IRTE[%03x] %08x\n", count, table[count]);
+        if ( irte_mode == irte_basic )
+        {
+            if ( !tbl.basic[count] )
+                continue;
+            printk("    IRTE[%03x] %08x\n", count, tbl.basic[count]);
+        }
+        else
+        {
+            if ( !tbl.full[count][0] && !tbl.full[count][1] )
+                continue;
+            printk("    IRTE[%03x] %016lx_%016lx\n",
+                   count, tbl.full[count][1], tbl.full[count][0]);
+        }
     }
 }
 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-06-13 13:24 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-13 13:14 [Xen-devel] [PATCH 0/9] x86: AMD x2APIC support Jan Beulich
2019-06-13 13:22 ` [Xen-devel] [PATCH 1/9] AMD/IOMMU: use bit field for extended feature register Jan Beulich
2019-06-17 19:07   ` Woods, Brian
2019-06-18  9:37     ` Jan Beulich
2019-06-17 20:23   ` Andrew Cooper
2019-06-18  9:33     ` Jan Beulich
2019-06-13 13:22 ` [Xen-devel] [PATCH 2/9] AMD/IOMMU: use bit field for control register Jan Beulich
2019-06-18  9:54   ` Andrew Cooper
2019-06-18 10:45     ` Jan Beulich
2019-06-13 13:23 ` [Xen-devel] [PATCH 3/9] AMD/IOMMU: use bit field for IRTE Jan Beulich
2019-06-18 10:37   ` Andrew Cooper
2019-06-18 11:53     ` Jan Beulich
2019-06-18 12:16       ` Andrew Cooper
2019-06-18 12:55         ` Jan Beulich
2019-06-18 11:31   ` Andrew Cooper
2019-06-18 11:47     ` Jan Beulich
2019-06-13 13:23 ` Jan Beulich [this message]
2019-06-18 11:57   ` [Xen-devel] [PATCH 4/9] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format Andrew Cooper
2019-06-18 15:31     ` Jan Beulich
2019-06-13 13:24 ` [Xen-devel] [PATCH 5/9] AMD/IOMMU: split amd_iommu_init_one() Jan Beulich
2019-06-18 12:17   ` Andrew Cooper
2019-06-13 13:25 ` [Xen-devel] [PATCH 6/9] AMD/IOMMU: allow enabling with IRQ not yet set up Jan Beulich
2019-06-18 12:22   ` Andrew Cooper
2019-06-13 13:26 ` [Xen-devel] [PATCH 7/9] AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode Jan Beulich
2019-06-18 12:35   ` Andrew Cooper
2019-06-13 13:27 ` [Xen-devel] [PATCH 8/9] AMD/IOMMU: enable x2APIC mode when available Jan Beulich
2019-06-18 13:40   ` Andrew Cooper
2019-06-18 14:02     ` Jan Beulich
2019-06-13 13:28 ` [Xen-devel] [PATCH RFC 9/9] AMD/IOMMU: correct IRTE updating Jan Beulich
2019-06-18 13:28   ` Andrew Cooper
2019-06-18 14:58     ` Jan Beulich
2019-06-27 15:15 ` [Xen-devel] [PATCH v2 00/10] x86: AMD x2APIC support Jan Beulich
2019-06-27 15:19   ` [Xen-devel] [PATCH v2 01/10] AMD/IOMMU: restrict feature logging Jan Beulich
2019-07-01 15:37     ` Andrew Cooper
2019-07-01 15:59     ` Woods, Brian
2019-06-27 15:19   ` [Xen-devel] [PATCH v2 02/10] AMD/IOMMU: use bit field for extended feature register Jan Beulich
2019-07-02 12:09     ` Andrew Cooper
2019-07-02 13:48       ` Jan Beulich
2019-07-16 16:02       ` Jan Beulich
2019-06-27 15:20   ` [Xen-devel] [PATCH v2 03/10] AMD/IOMMU: use bit field for control register Jan Beulich
2019-07-02 12:20     ` Andrew Cooper
2019-06-27 15:20   ` [Xen-devel] [PATCH v2 04/10] AMD/IOMMU: use bit field for IRTE Jan Beulich
2019-07-02 12:33     ` Andrew Cooper
2019-07-02 13:56       ` Jan Beulich
2019-06-27 15:21   ` [Xen-devel] [PATCH v2 05/10] AMD/IOMMU: introduce 128-bit IRTE non-guest-APIC IRTE format Jan Beulich
2019-07-02 14:41     ` Andrew Cooper
2019-07-03  8:46       ` Jan Beulich
2019-07-16  6:39       ` Jan Beulich
2019-06-27 15:21   ` [Xen-devel] [PATCH v2 06/10] AMD/IOMMU: split amd_iommu_init_one() Jan Beulich
2019-06-27 15:22   ` [Xen-devel] [PATCH v2 07/10] AMD/IOMMU: allow enabling with IRQ not yet set up Jan Beulich
2019-06-27 15:22   ` [Xen-devel] [PATCH v2 08/10] AMD/IOMMU: adjust setup of internal interrupt for x2APIC mode Jan Beulich
2019-06-27 15:23   ` [Xen-devel] [PATCH v2 09/10] AMD/IOMMU: enable x2APIC mode when available Jan Beulich
2019-07-02 14:50     ` Andrew Cooper
2019-06-27 15:23   ` [Xen-devel] [PATCH RFC v2 10/10] AMD/IOMMU: correct IRTE updating Jan Beulich
2019-07-02 15:08     ` Andrew Cooper
2019-07-03  8:55       ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5D024E6F0200007800237E25@prv1-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=brian.woods@amd.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).