All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 00/13] x2apic and interrupt-remapping related fixes
@ 2009-03-17  0:04 Suresh Siddha
  2009-03-17  0:04 ` [patch 01/13] intr-remapping: fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock Suresh Siddha
                   ` (12 more replies)
  0 siblings, 13 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel

This patchset enables support for kexec, vt-d fault-handling,
simplified io-apic level triggered irq migration and other fixes, cleanups
for platforms enabling x2apic and interrupt-remapping.

thanks,
suresh
-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 01/13] intr-remapping: fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 02/13] dmar: move page fault handling code to dmar.c Suresh Siddha
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_lockdep_warning.patch --]
[-- Type: text/plain, Size: 7221 bytes --]

On x2apic enabled system:
   [ INFO: hard-safe -> hard-unsafe lock order detected ]
   2.6.27-03151-g4480f15b #1
   ------------------------------------------------------
   swapper/1 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
    (irq_2_ir_lock){--..}, at: [<ffffffff8038ebc0>] get_irte+0x2f/0x95
   
   and this task is already holding:
    (&irq_desc_lock_class){+...}, at: [<ffffffff802649ed>] setup_irq+0x67/0x281
   which would create a new lock dependency:
    (&irq_desc_lock_class){+...} -> (irq_2_ir_lock){--..}
   
   but this new dependency connects a hard-irq-safe lock:
    (&irq_desc_lock_class){+...}
   ... which became hard-irq-safe at:
     [<ffffffffffffffff>] 0xffffffffffffffff
   
   to a hard-irq-unsafe lock:
    (irq_2_ir_lock){--..}
   ... which became hard-irq-unsafe at:
   ...  [<ffffffff802547b5>] __lock_acquire+0x571/0x706
     [<ffffffff8025499f>] lock_acquire+0x55/0x71
     [<ffffffff8062f2c4>] _spin_lock+0x2c/0x38
     [<ffffffff8038ee50>] alloc_irte+0x8a/0x14b
     [<ffffffff8021f733>] setup_IO_APIC_irq+0x119/0x30e
     [<ffffffff8090860e>] setup_IO_APIC+0x146/0x6e5
     [<ffffffff809058fc>] native_smp_prepare_cpus+0x24e/0x2e9
     [<ffffffff808f982c>] kernel_init+0x5a/0x176
     [<ffffffff8020c289>] child_rip+0xa/0x11
     [<ffffffffffffffff>] 0xffffffffffffffff

Fix this theoretical lock order issue by using spin_lock_irqsave() instead of
spin_lock()

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -117,21 +117,22 @@ int get_irte(int irq, struct irte *entry
 {
 	int index;
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
 	if (!entry)
 		return -1;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	*entry = *(irq_iommu->iommu->ir_table->base + index);
 
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 	return 0;
 }
 
@@ -141,6 +142,7 @@ int alloc_irte(struct intel_iommu *iommu
 	struct irq_2_iommu *irq_iommu;
 	u16 index, start_index;
 	unsigned int mask = 0;
+	unsigned long flags;
 	int i;
 
 	if (!count)
@@ -170,7 +172,7 @@ int alloc_irte(struct intel_iommu *iommu
 		return -1;
 	}
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	do {
 		for (i = index; i < index + count; i++)
 			if  (table->base[i].present)
@@ -182,7 +184,7 @@ int alloc_irte(struct intel_iommu *iommu
 		index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
 
 		if (index == start_index) {
-			spin_unlock(&irq_2_ir_lock);
+			spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 			printk(KERN_ERR "can't allocate an IRTE\n");
 			return -1;
 		}
@@ -193,7 +195,7 @@ int alloc_irte(struct intel_iommu *iommu
 
 	irq_iommu = irq_2_iommu_alloc(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		printk(KERN_ERR "can't allocate irq_2_iommu\n");
 		return -1;
 	}
@@ -203,7 +205,7 @@ int alloc_irte(struct intel_iommu *iommu
 	irq_iommu->sub_handle = 0;
 	irq_iommu->irte_mask = mask;
 
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return index;
 }
@@ -223,30 +225,32 @@ int map_irq_to_irte_handle(int irq, u16 
 {
 	int index;
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
 	*sub_handle = irq_iommu->sub_handle;
 	index = irq_iommu->irte_index;
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 	return index;
 }
 
 int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 
 	irq_iommu = irq_2_iommu_alloc(irq);
 
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		printk(KERN_ERR "can't allocate irq_2_iommu\n");
 		return -1;
 	}
@@ -256,7 +260,7 @@ int set_irte_irq(int irq, struct intel_i
 	irq_iommu->sub_handle = subhandle;
 	irq_iommu->irte_mask = 0;
 
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return 0;
 }
@@ -264,11 +268,12 @@ int set_irte_irq(int irq, struct intel_i
 int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
 {
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
@@ -277,7 +282,7 @@ int clear_irte_irq(int irq, struct intel
 	irq_iommu->sub_handle = 0;
 	irq_2_iommu(irq)->irte_mask = 0;
 
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return 0;
 }
@@ -289,11 +294,12 @@ int modify_irte(int irq, struct irte *ir
 	struct irte *irte;
 	struct intel_iommu *iommu;
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
@@ -306,7 +312,7 @@ int modify_irte(int irq, struct irte *ir
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
 	rc = qi_flush_iec(iommu, index, 0);
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return rc;
 }
@@ -317,11 +323,12 @@ int flush_irte(int irq)
 	int index;
 	struct intel_iommu *iommu;
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
@@ -330,7 +337,7 @@ int flush_irte(int irq)
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 
 	rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return rc;
 }
@@ -363,11 +370,12 @@ int free_irte(int irq)
 	struct irte *irte;
 	struct intel_iommu *iommu;
 	struct irq_2_iommu *irq_iommu;
+	unsigned long flags;
 
-	spin_lock(&irq_2_ir_lock);
+	spin_lock_irqsave(&irq_2_ir_lock, flags);
 	irq_iommu = valid_irq_2_iommu(irq);
 	if (!irq_iommu) {
-		spin_unlock(&irq_2_ir_lock);
+		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 		return -1;
 	}
 
@@ -387,7 +395,7 @@ int free_irte(int irq)
 	irq_iommu->sub_handle = 0;
 	irq_iommu->irte_mask = 0;
 
-	spin_unlock(&irq_2_ir_lock);
+	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 
 	return rc;
 }

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 02/13] dmar: move page fault handling code to dmar.c
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
  2009-03-17  0:04 ` [patch 01/13] intr-remapping: fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 03/13] enable fault handling for intr-remapping Suresh Siddha
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: move_page_fault_to_dmar.patch --]
[-- Type: text/plain, Size: 11352 bytes --]

Move page fault handling code to dmar.c
This will be shared both by DMA-remapping and Intr-remapping code.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/intel-iommu.c
===================================================================
--- tip.orig/drivers/pci/intel-iommu.c
+++ tip/drivers/pci/intel-iommu.c
@@ -1004,194 +1004,6 @@ static int iommu_disable_translation(str
 	return 0;
 }
 
-/* iommu interrupt handling. Most stuff are MSI-like. */
-
-static const char *fault_reason_strings[] =
-{
-	"Software",
-	"Present bit in root entry is clear",
-	"Present bit in context entry is clear",
-	"Invalid context entry",
-	"Access beyond MGAW",
-	"PTE Write access is not set",
-	"PTE Read access is not set",
-	"Next page table ptr is invalid",
-	"Root table address invalid",
-	"Context table ptr is invalid",
-	"non-zero reserved fields in RTP",
-	"non-zero reserved fields in CTP",
-	"non-zero reserved fields in PTE",
-};
-#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
-
-const char *dmar_get_fault_reason(u8 fault_reason)
-{
-	if (fault_reason > MAX_FAULT_REASON_IDX)
-		return "Unknown";
-	else
-		return fault_reason_strings[fault_reason];
-}
-
-void dmar_msi_unmask(unsigned int irq)
-{
-	struct intel_iommu *iommu = get_irq_data(irq);
-	unsigned long flag;
-
-	/* unmask it */
-	spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(0, iommu->reg + DMAR_FECTL_REG);
-	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_mask(unsigned int irq)
-{
-	unsigned long flag;
-	struct intel_iommu *iommu = get_irq_data(irq);
-
-	/* mask it */
-	spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
-	/* Read a reg to force flush the post write */
-	readl(iommu->reg + DMAR_FECTL_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_write(int irq, struct msi_msg *msg)
-{
-	struct intel_iommu *iommu = get_irq_data(irq);
-	unsigned long flag;
-
-	spin_lock_irqsave(&iommu->register_lock, flag);
-	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
-	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
-	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
-	struct intel_iommu *iommu = get_irq_data(irq);
-	unsigned long flag;
-
-	spin_lock_irqsave(&iommu->register_lock, flag);
-	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
-	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
-	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
-static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
-		u8 fault_reason, u16 source_id, unsigned long long addr)
-{
-	const char *reason;
-
-	reason = dmar_get_fault_reason(fault_reason);
-
-	printk(KERN_ERR
-		"DMAR:[%s] Request device [%02x:%02x.%d] "
-		"fault addr %llx \n"
-		"DMAR:[fault reason %02d] %s\n",
-		(type ? "DMA Read" : "DMA Write"),
-		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
-		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
-	return 0;
-}
-
-#define PRIMARY_FAULT_REG_LEN (16)
-static irqreturn_t iommu_page_fault(int irq, void *dev_id)
-{
-	struct intel_iommu *iommu = dev_id;
-	int reg, fault_index;
-	u32 fault_status;
-	unsigned long flag;
-
-	spin_lock_irqsave(&iommu->register_lock, flag);
-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-
-	/* TBD: ignore advanced fault log currently */
-	if (!(fault_status & DMA_FSTS_PPF))
-		goto clear_overflow;
-
-	fault_index = dma_fsts_fault_record_index(fault_status);
-	reg = cap_fault_reg_offset(iommu->cap);
-	while (1) {
-		u8 fault_reason;
-		u16 source_id;
-		u64 guest_addr;
-		int type;
-		u32 data;
-
-		/* highest 32 bits */
-		data = readl(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN + 12);
-		if (!(data & DMA_FRCD_F))
-			break;
-
-		fault_reason = dma_frcd_fault_reason(data);
-		type = dma_frcd_type(data);
-
-		data = readl(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN + 8);
-		source_id = dma_frcd_source_id(data);
-
-		guest_addr = dmar_readq(iommu->reg + reg +
-				fault_index * PRIMARY_FAULT_REG_LEN);
-		guest_addr = dma_frcd_page_addr(guest_addr);
-		/* clear the fault */
-		writel(DMA_FRCD_F, iommu->reg + reg +
-			fault_index * PRIMARY_FAULT_REG_LEN + 12);
-
-		spin_unlock_irqrestore(&iommu->register_lock, flag);
-
-		iommu_page_fault_do_one(iommu, type, fault_reason,
-				source_id, guest_addr);
-
-		fault_index++;
-		if (fault_index > cap_num_fault_regs(iommu->cap))
-			fault_index = 0;
-		spin_lock_irqsave(&iommu->register_lock, flag);
-	}
-clear_overflow:
-	/* clear primary fault overflow */
-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	if (fault_status & DMA_FSTS_PFO)
-		writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
-
-	spin_unlock_irqrestore(&iommu->register_lock, flag);
-	return IRQ_HANDLED;
-}
-
-int dmar_set_interrupt(struct intel_iommu *iommu)
-{
-	int irq, ret;
-
-	irq = create_irq();
-	if (!irq) {
-		printk(KERN_ERR "IOMMU: no free vectors\n");
-		return -EINVAL;
-	}
-
-	set_irq_data(irq, iommu);
-	iommu->irq = irq;
-
-	ret = arch_setup_dmar_msi(irq);
-	if (ret) {
-		set_irq_data(irq, NULL);
-		iommu->irq = 0;
-		destroy_irq(irq);
-		return 0;
-	}
-
-	/* Force fault register is cleared */
-	iommu_page_fault(irq, iommu);
-
-	ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
-	if (ret)
-		printk(KERN_ERR "IOMMU: can't request irq\n");
-	return ret;
-}
 
 static int iommu_init_domains(struct intel_iommu *iommu)
 {
Index: tip/drivers/pci/dmar.c
===================================================================
--- tip.orig/drivers/pci/dmar.c
+++ tip/drivers/pci/dmar.c
@@ -31,6 +31,8 @@
 #include <linux/iova.h>
 #include <linux/intel-iommu.h>
 #include <linux/timer.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
@@ -812,3 +814,192 @@ int dmar_enable_qi(struct intel_iommu *i
 
 	return 0;
 }
+
+/* iommu interrupt handling. Most stuff are MSI-like. */
+
+static const char *fault_reason_strings[] =
+{
+	"Software",
+	"Present bit in root entry is clear",
+	"Present bit in context entry is clear",
+	"Invalid context entry",
+	"Access beyond MGAW",
+	"PTE Write access is not set",
+	"PTE Read access is not set",
+	"Next page table ptr is invalid",
+	"Root table address invalid",
+	"Context table ptr is invalid",
+	"non-zero reserved fields in RTP",
+	"non-zero reserved fields in CTP",
+	"non-zero reserved fields in PTE",
+};
+#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
+
+const char *dmar_get_fault_reason(u8 fault_reason)
+{
+	if (fault_reason > MAX_FAULT_REASON_IDX)
+		return "Unknown";
+	else
+		return fault_reason_strings[fault_reason];
+}
+
+void dmar_msi_unmask(unsigned int irq)
+{
+	struct intel_iommu *iommu = get_irq_data(irq);
+	unsigned long flag;
+
+	/* unmask it */
+	spin_lock_irqsave(&iommu->register_lock, flag);
+	writel(0, iommu->reg + DMAR_FECTL_REG);
+	/* Read a reg to force flush the post write */
+	readl(iommu->reg + DMAR_FECTL_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_mask(unsigned int irq)
+{
+	unsigned long flag;
+	struct intel_iommu *iommu = get_irq_data(irq);
+
+	/* mask it */
+	spin_lock_irqsave(&iommu->register_lock, flag);
+	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+	/* Read a reg to force flush the post write */
+	readl(iommu->reg + DMAR_FECTL_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_write(int irq, struct msi_msg *msg)
+{
+	struct intel_iommu *iommu = get_irq_data(irq);
+	unsigned long flag;
+
+	spin_lock_irqsave(&iommu->register_lock, flag);
+	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
+	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
+	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+void dmar_msi_read(int irq, struct msi_msg *msg)
+{
+	struct intel_iommu *iommu = get_irq_data(irq);
+	unsigned long flag;
+
+	spin_lock_irqsave(&iommu->register_lock, flag);
+	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
+	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
+	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flag);
+}
+
+static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
+		u8 fault_reason, u16 source_id, unsigned long long addr)
+{
+	const char *reason;
+
+	reason = dmar_get_fault_reason(fault_reason);
+
+	printk(KERN_ERR
+		"DMAR:[%s] Request device [%02x:%02x.%d] "
+		"fault addr %llx \n"
+		"DMAR:[fault reason %02d] %s\n",
+		(type ? "DMA Read" : "DMA Write"),
+		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+	return 0;
+}
+
+#define PRIMARY_FAULT_REG_LEN (16)
+static irqreturn_t dmar_fault(int irq, void *dev_id)
+{
+	struct intel_iommu *iommu = dev_id;
+	int reg, fault_index;
+	u32 fault_status;
+	unsigned long flag;
+
+	spin_lock_irqsave(&iommu->register_lock, flag);
+	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+
+	/* TBD: ignore advanced fault log currently */
+	if (!(fault_status & DMA_FSTS_PPF))
+		goto clear_overflow;
+
+	fault_index = dma_fsts_fault_record_index(fault_status);
+	reg = cap_fault_reg_offset(iommu->cap);
+	while (1) {
+		u8 fault_reason;
+		u16 source_id;
+		u64 guest_addr;
+		int type;
+		u32 data;
+
+		/* highest 32 bits */
+		data = readl(iommu->reg + reg +
+				fault_index * PRIMARY_FAULT_REG_LEN + 12);
+		if (!(data & DMA_FRCD_F))
+			break;
+
+		fault_reason = dma_frcd_fault_reason(data);
+		type = dma_frcd_type(data);
+
+		data = readl(iommu->reg + reg +
+				fault_index * PRIMARY_FAULT_REG_LEN + 8);
+		source_id = dma_frcd_source_id(data);
+
+		guest_addr = dmar_readq(iommu->reg + reg +
+				fault_index * PRIMARY_FAULT_REG_LEN);
+		guest_addr = dma_frcd_page_addr(guest_addr);
+		/* clear the fault */
+		writel(DMA_FRCD_F, iommu->reg + reg +
+			fault_index * PRIMARY_FAULT_REG_LEN + 12);
+
+		spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+		dmar_fault_do_one(iommu, type, fault_reason,
+				source_id, guest_addr);
+
+		fault_index++;
+		if (fault_index > cap_num_fault_regs(iommu->cap))
+			fault_index = 0;
+		spin_lock_irqsave(&iommu->register_lock, flag);
+	}
+clear_overflow:
+	/* clear primary fault overflow */
+	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+	if (fault_status & DMA_FSTS_PFO)
+		writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
+
+	spin_unlock_irqrestore(&iommu->register_lock, flag);
+	return IRQ_HANDLED;
+}
+
+int dmar_set_interrupt(struct intel_iommu *iommu)
+{
+	int irq, ret;
+
+	irq = create_irq();
+	if (!irq) {
+		printk(KERN_ERR "IOMMU: no free vectors\n");
+		return -EINVAL;
+	}
+
+	set_irq_data(irq, iommu);
+	iommu->irq = irq;
+
+	ret = arch_setup_dmar_msi(irq);
+	if (ret) {
+		set_irq_data(irq, NULL);
+		iommu->irq = 0;
+		destroy_irq(irq);
+		return 0;
+	}
+
+	/* Force fault register is cleared */
+	dmar_fault(irq, iommu);
+
+	ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
+	if (ret)
+		printk(KERN_ERR "IOMMU: can't request irq\n");
+	return ret;
+}

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 03/13] enable fault handling for intr-remapping
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
  2009-03-17  0:04 ` [patch 01/13] intr-remapping: fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock Suresh Siddha
  2009-03-17  0:04 ` [patch 02/13] dmar: move page fault handling code to dmar.c Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 04/13] dmar: routines for disabling queued invalidation and intr remapping Suresh Siddha
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: enable_dmar_fault_for_interrupt_remapping.patch --]
[-- Type: text/plain, Size: 10208 bytes --]

Enable fault handling flow for intr-remapping aswell. Fault handling
code now shared by both dma-remapping and intr-remapping.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/dmar.c
===================================================================
--- tip.orig/drivers/pci/dmar.c
+++ tip/drivers/pci/dmar.c
@@ -511,6 +511,7 @@ int alloc_iommu(struct dmar_drhd_unit *d
 		return -ENOMEM;
 
 	iommu->seq_id = iommu_allocated++;
+	sprintf (iommu->name, "dmar%d", iommu->seq_id);
 
 	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
 	if (!iommu->reg) {
@@ -817,7 +818,13 @@ int dmar_enable_qi(struct intel_iommu *i
 
 /* iommu interrupt handling. Most stuff are MSI-like. */
 
-static const char *fault_reason_strings[] =
+enum faulttype {
+	DMA_REMAP,
+	INTR_REMAP,
+	UNKNOWN,
+};
+
+static const char *dma_remap_fault_reasons[] =
 {
 	"Software",
 	"Present bit in root entry is clear",
@@ -833,14 +840,33 @@ static const char *fault_reason_strings[
 	"non-zero reserved fields in CTP",
 	"non-zero reserved fields in PTE",
 };
+
+static const char *intr_remap_fault_reasons[] =
+{
+	"Detected reserved fields in the decoded interrupt-remapped request",
+	"Interrupt index exceeded the interrupt-remapping table size",
+	"Present field in the IRTE entry is clear",
+	"Error accessing interrupt-remapping table pointed by IRTA_REG",
+	"Detected reserved fields in the IRTE entry",
+	"Blocked a compatibility format interrupt request",
+	"Blocked an interrupt request due to source-id verification failure",
+};
+
 #define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
 
-const char *dmar_get_fault_reason(u8 fault_reason)
+const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 {
-	if (fault_reason > MAX_FAULT_REASON_IDX)
+	if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
+				     ARRAY_SIZE(intr_remap_fault_reasons))) {
+		*fault_type = INTR_REMAP;
+		return intr_remap_fault_reasons[fault_reason - 0x20];
+	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
+		*fault_type = DMA_REMAP;
+		return dma_remap_fault_reasons[fault_reason];
+	} else {
+		*fault_type = UNKNOWN;
 		return "Unknown";
-	else
-		return fault_reason_strings[fault_reason];
+	}
 }
 
 void dmar_msi_unmask(unsigned int irq)
@@ -897,16 +923,25 @@ static int dmar_fault_do_one(struct inte
 		u8 fault_reason, u16 source_id, unsigned long long addr)
 {
 	const char *reason;
+	int fault_type;
 
-	reason = dmar_get_fault_reason(fault_reason);
+	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
-	printk(KERN_ERR
-		"DMAR:[%s] Request device [%02x:%02x.%d] "
-		"fault addr %llx \n"
-		"DMAR:[fault reason %02d] %s\n",
-		(type ? "DMA Read" : "DMA Write"),
-		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
-		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+	if (fault_type == INTR_REMAP)
+		printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
+		       "fault index %llx\n"
+			"INTR-REMAP:[fault reason %02d] %s\n",
+			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
+			PCI_FUNC(source_id & 0xFF), addr >> 48,
+			fault_reason, reason);
+	else
+		printk(KERN_ERR
+		       "DMAR:[%s] Request device [%02x:%02x.%d] "
+		       "fault addr %llx \n"
+		       "DMAR:[fault reason %02d] %s\n",
+		       (type ? "DMA Read" : "DMA Write"),
+		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
+		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
 	return 0;
 }
 
@@ -920,10 +955,13 @@ static irqreturn_t dmar_fault(int irq, v
 
 	spin_lock_irqsave(&iommu->register_lock, flag);
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+	if (fault_status)
+		printk(KERN_ERR "DRHD: handling fault status reg %x\n",
+		       fault_status);
 
 	/* TBD: ignore advanced fault log currently */
 	if (!(fault_status & DMA_FSTS_PPF))
-		goto clear_overflow;
+		goto clear_rest;
 
 	fault_index = dma_fsts_fault_record_index(fault_status);
 	reg = cap_fault_reg_offset(iommu->cap);
@@ -964,11 +1002,10 @@ static irqreturn_t dmar_fault(int irq, v
 			fault_index = 0;
 		spin_lock_irqsave(&iommu->register_lock, flag);
 	}
-clear_overflow:
-	/* clear primary fault overflow */
+clear_rest:
+	/* clear all the other faults */
 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
-	if (fault_status & DMA_FSTS_PFO)
-		writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
+	writel(fault_status, iommu->reg + DMAR_FSTS_REG);
 
 	spin_unlock_irqrestore(&iommu->register_lock, flag);
 	return IRQ_HANDLED;
@@ -978,6 +1015,12 @@ int dmar_set_interrupt(struct intel_iomm
 {
 	int irq, ret;
 
+	/*
+	 * Check if the fault interrupt is already initialized.
+	 */
+	if (iommu->irq)
+		return 0;
+
 	irq = create_irq();
 	if (!irq) {
 		printk(KERN_ERR "IOMMU: no free vectors\n");
@@ -1003,3 +1046,26 @@ int dmar_set_interrupt(struct intel_iomm
 		printk(KERN_ERR "IOMMU: can't request irq\n");
 	return ret;
 }
+
+int __init enable_drhd_fault_handling(void)
+{
+	struct dmar_drhd_unit *drhd;
+
+	/*
+	 * Enable fault control interrupt.
+	 */
+	for_each_drhd_unit(drhd) {
+		int ret;
+		struct intel_iommu *iommu = drhd->iommu;
+		ret = dmar_set_interrupt(iommu);
+
+		if (ret) {
+			printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
+			       " interrupt, ret %d\n",
+			       (unsigned long long)drhd->reg_base_addr, ret);
+			return -1;
+		}
+	}
+
+	return 0;
+}
Index: tip/include/linux/dmar.h
===================================================================
--- tip.orig/include/linux/dmar.h
+++ tip/include/linux/dmar.h
@@ -49,6 +49,7 @@ extern int dmar_dev_scope_init(void);
 
 /* Intel IOMMU detection */
 extern void detect_intel_iommu(void);
+extern int enable_drhd_fault_handling(void);
 
 
 extern int parse_ioapics_under_ir(void);
@@ -116,9 +117,6 @@ extern struct intel_iommu *map_ioapic_to
 #define intr_remapping_enabled		(0)
 #endif
 
-#ifdef CONFIG_DMAR
-extern const char *dmar_get_fault_reason(u8 fault_reason);
-
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
@@ -129,6 +127,7 @@ extern void dmar_msi_write(int irq, stru
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
 extern int arch_setup_dmar_msi(unsigned int irq);
 
+#ifdef CONFIG_DMAR
 extern int iommu_detected, no_iommu;
 extern struct list_head dmar_rmrr_units;
 struct dmar_rmrr_unit {
Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -308,7 +308,7 @@ int modify_irte(int irq, struct irte *ir
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
+	set_64bit((unsigned long *)irte, irte_modified->low);
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
 	rc = qi_flush_iec(iommu, index, 0);
Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -3294,7 +3294,12 @@ static int msi_compose_msg(struct pci_de
 	} else
 #endif
 	{
-		msg->address_hi = MSI_ADDR_BASE_HI;
+		if (x2apic_enabled())
+			msg->address_hi = MSI_ADDR_BASE_HI |
+					  MSI_ADDR_EXT_DEST_ID(dest);
+		else
+			msg->address_hi = MSI_ADDR_BASE_HI;
+
 		msg->address_lo =
 			MSI_ADDR_BASE_LO |
 			((apic->irq_dest_mode == 0) ?
@@ -3528,7 +3533,7 @@ void arch_teardown_msi_irq(unsigned int 
 	destroy_irq(irq);
 }
 
-#ifdef CONFIG_DMAR
+#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
Index: tip/arch/x86/include/asm/msidef.h
===================================================================
--- tip.orig/arch/x86/include/asm/msidef.h
+++ tip/arch/x86/include/asm/msidef.h
@@ -47,6 +47,7 @@
 #define	 MSI_ADDR_DEST_ID_MASK		0x00ffff0
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 					 MSI_ADDR_DEST_ID_MASK)
+#define MSI_ADDR_EXT_DEST_ID(dest)	((dest) & 0xffffff00)
 
 #define MSI_ADDR_IR_EXT_INT		(1 << 4)
 #define MSI_ADDR_IR_SHV			(1 << 3)
Index: tip/arch/x86/kernel/apic/probe_64.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/probe_64.c
+++ tip/arch/x86/kernel/apic/probe_64.c
@@ -68,6 +68,15 @@ void __init default_setup_apic_routing(v
 			apic = &apic_physflat;
 		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
 	}
+
+#ifdef CONFIG_X86_X2APIC
+	/*
+	 * Now that apic routing model is selected, configure the
+	 * fault handling for intr remapping.
+	 */
+	if (intr_remapping_enabled)
+		enable_drhd_fault_handling();
+#endif
 }
 
 /* Same for both flat and physical. */
Index: tip/include/linux/intel-iommu.h
===================================================================
--- tip.orig/include/linux/intel-iommu.h
+++ tip/include/linux/intel-iommu.h
@@ -292,6 +292,8 @@ struct intel_iommu {
 	spinlock_t	register_lock; /* protect register handling */
 	int		seq_id;	/* sequence id of the iommu */
 	int		agaw; /* agaw of this iommu */
+	unsigned int 	irq;
+	unsigned char 	name[13];    /* Device Name */
 
 #ifdef CONFIG_DMAR
 	unsigned long 	*domain_ids; /* bitmap of domains */
@@ -299,8 +301,6 @@ struct intel_iommu {
 	spinlock_t	lock; /* protect context, domain ids */
 	struct root_entry *root_entry; /* virtual address */
 
-	unsigned int irq;
-	unsigned char name[7];    /* Device Name */
 	struct iommu_flush flush;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
Index: tip/drivers/pci/intel-iommu.c
===================================================================
--- tip.orig/drivers/pci/intel-iommu.c
+++ tip/drivers/pci/intel-iommu.c
@@ -1799,7 +1799,7 @@ static int __init init_dmars(void)
 	struct dmar_rmrr_unit *rmrr;
 	struct pci_dev *pdev;
 	struct intel_iommu *iommu;
-	int i, ret, unit = 0;
+	int i, ret;
 
 	/*
 	 * for each drhd
@@ -1921,7 +1921,6 @@ static int __init init_dmars(void)
 		if (drhd->ignored)
 			continue;
 		iommu = drhd->iommu;
-		sprintf (iommu->name, "dmar%d", unit++);
 
 		iommu_flush_write_buffer(iommu);
 

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 04/13] dmar: routines for disabling queued invalidation and intr remapping
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (2 preceding siblings ...)
  2009-03-17  0:04 ` [patch 03/13] enable fault handling for intr-remapping Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 05/13] dmar: start with sane state while enabling dma and interrupt-remapping Suresh Siddha
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: dmar_qi_routines.patch --]
[-- Type: text/plain, Size: 2907 bytes --]

Routines for disabling queued invalidation and interrupt remapping.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/dmar.c
===================================================================
--- tip.orig/drivers/pci/dmar.c
+++ tip/drivers/pci/dmar.c
@@ -754,6 +754,42 @@ int qi_flush_iotlb(struct intel_iommu *i
 }
 
 /*
+ * Disable Queued Invalidation interface.
+ */
+void dmar_disable_qi(struct intel_iommu *iommu)
+{
+	unsigned long flags;
+	u32 sts;
+	cycles_t start_time = get_cycles();
+
+	if (!ecap_qis(iommu->ecap))
+		return;
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+
+	sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
+	if (!(sts & DMA_GSTS_QIES))
+		goto end;
+
+	/*
+	 * Give a chance to HW to complete the pending invalidation requests.
+	 */
+	while ((readl(iommu->reg + DMAR_IQT_REG) !=
+		readl(iommu->reg + DMAR_IQH_REG)) &&
+		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
+		cpu_relax();
+
+	iommu->gcmd &= ~DMA_GCMD_QIE;
+
+	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
+
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
+		      !(sts & DMA_GSTS_QIES), sts);
+end:
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
+/*
  * Enable Queued Invalidation interface. This is a must to support
  * interrupt-remapping. Also used by DMA-remapping, which replaces
  * register based IOTLB invalidation.
Index: tip/include/linux/intel-iommu.h
===================================================================
--- tip.orig/include/linux/intel-iommu.h
+++ tip/include/linux/intel-iommu.h
@@ -321,6 +321,7 @@ extern struct dmar_drhd_unit * dmar_find
 extern int alloc_iommu(struct dmar_drhd_unit *drhd);
 extern void free_iommu(struct intel_iommu *iommu);
 extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void dmar_disable_qi(struct intel_iommu *iommu);
 extern void qi_global_iec(struct intel_iommu *iommu);
 
 extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -467,6 +467,33 @@ static int setup_intr_remapping(struct i
 	return 0;
 }
 
+/*
+ * Disable Interrupt Remapping.
+ */
+static void disable_intr_remapping(struct intel_iommu *iommu)
+{
+	unsigned long flags;
+	u32 sts;
+
+	if (!ecap_ir_support(iommu->ecap))
+		return;
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+
+	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
+	if (!(sts & DMA_GSTS_IRES))
+		goto end;
+
+	iommu->gcmd &= ~DMA_GCMD_IRE;
+	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
+
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
+		      readl, !(sts & DMA_GSTS_IRES), sts);
+
+end:
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+}
+
 int __init enable_intr_remapping(int eim)
 {
 	struct dmar_drhd_unit *drhd;

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 05/13] dmar: start with sane state while enabling dma and interrupt-remapping
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (3 preceding siblings ...)
  2009-03-17  0:04 ` [patch 04/13] dmar: routines for disabling queued invalidation and intr remapping Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 06/13] intr-remapping: fix free_irte() to clear all the IRTE entries Suresh Siddha
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: dmar_error_status_clear.patch --]
[-- Type: text/plain, Size: 3396 bytes --]

Start from a sane state while enabling dma and interrupt-remapping, by
clearing the previous recorded faults and disabling previously
enabled queued invalidation and interrupt-remapping.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/intel-iommu.c
===================================================================
--- tip.orig/drivers/pci/intel-iommu.c
+++ tip/drivers/pci/intel-iommu.c
@@ -1855,11 +1855,40 @@ static int __init init_dmars(void)
 		}
 	}
 
+	/*
+	 * Start from the sane iommu hardware state.
+	 */
 	for_each_drhd_unit(drhd) {
 		if (drhd->ignored)
 			continue;
 
 		iommu = drhd->iommu;
+
+		/*
+		 * If the queued invalidation is already initialized by us
+		 * (for example, while enabling interrupt-remapping) then
+		 * we got the things already rolling from a sane state.
+		 */
+		if (iommu->qi)
+			continue;
+
+		/*
+		 * Clear any previous faults.
+		 */
+		dmar_fault(-1, iommu);
+		/*
+		 * Disable queued invalidation if supported and already enabled
+		 * before OS handover.
+		 */
+		dmar_disable_qi(iommu);
+	}
+
+	for_each_drhd_unit(drhd) {
+		if (drhd->ignored)
+			continue;
+
+		iommu = drhd->iommu;
+
 		if (dmar_enable_qi(iommu)) {
 			/*
 			 * Queued Invalidate not enabled, use Register Based
Index: tip/drivers/pci/dmar.c
===================================================================
--- tip.orig/drivers/pci/dmar.c
+++ tip/drivers/pci/dmar.c
@@ -982,7 +982,7 @@ static int dmar_fault_do_one(struct inte
 }
 
 #define PRIMARY_FAULT_REG_LEN (16)
-static irqreturn_t dmar_fault(int irq, void *dev_id)
+irqreturn_t dmar_fault(int irq, void *dev_id)
 {
 	struct intel_iommu *iommu = dev_id;
 	int reg, fault_index;
@@ -1074,9 +1074,6 @@ int dmar_set_interrupt(struct intel_iomm
 		return 0;
 	}
 
-	/* Force fault register is cleared */
-	dmar_fault(irq, iommu);
-
 	ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
 	if (ret)
 		printk(KERN_ERR "IOMMU: can't request irq\n");
Index: tip/include/linux/dmar.h
===================================================================
--- tip.orig/include/linux/dmar.h
+++ tip/include/linux/dmar.h
@@ -24,6 +24,7 @@
 #include <linux/acpi.h>
 #include <linux/types.h>
 #include <linux/msi.h>
+#include <linux/irqreturn.h>
 
 #if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
 struct intel_iommu;
@@ -125,6 +126,7 @@ extern void dmar_msi_mask(unsigned int i
 extern void dmar_msi_read(int irq, struct msi_msg *msg);
 extern void dmar_msi_write(int irq, struct msi_msg *msg);
 extern int dmar_set_interrupt(struct intel_iommu *iommu);
+extern irqreturn_t dmar_fault(int irq, void *dev_id);
 extern int arch_setup_dmar_msi(unsigned int irq);
 
 #ifdef CONFIG_DMAR
Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -499,6 +499,23 @@ int __init enable_intr_remapping(int eim
 	struct dmar_drhd_unit *drhd;
 	int setup = 0;
 
+	for_each_drhd_unit(drhd) {
+		struct intel_iommu *iommu = drhd->iommu;
+
+		/*
+		 * Clear previous faults.
+		 */
+		dmar_fault(-1, iommu);
+
+		/*
+		 * Disable intr remapping and queued invalidation, if already
+		 * enabled prior to OS handover.
+		 */
+		disable_intr_remapping(iommu);
+
+		dmar_disable_qi(iommu);
+	}
+
 	/*
 	 * check for the Interrupt-remapping support
 	 */

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 06/13] intr-remapping: fix free_irte() to clear all the IRTE entries
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (4 preceding siblings ...)
  2009-03-17  0:04 ` [patch 05/13] dmar: start with sane state while enabling dma and interrupt-remapping Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:04 ` [patch 07/13] x2apic: use virtual wire A mode in disable_IO_APIC() with interrupt-remapping Suresh Siddha
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_free_irte.patch --]
[-- Type: text/plain, Size: 641 bytes --]

Fix the typo which was not clearing all the interrupt remapping table
entries corresponding to an irq.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -386,7 +386,7 @@ int free_irte(int irq)
 
 	if (!irq_iommu->sub_handle) {
 		for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
-			set_64bit((unsigned long *)irte, 0);
+			set_64bit((unsigned long *)(irte + i), 0);
 		rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 	}
 

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 07/13] x2apic: use virtual wire A mode in disable_IO_APIC() with interrupt-remapping
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (5 preceding siblings ...)
  2009-03-17  0:04 ` [patch 06/13] intr-remapping: fix free_irte() to clear all the IRTE entries Suresh Siddha
@ 2009-03-17  0:04 ` Suresh Siddha
  2009-03-17  0:05 ` [patch 08/13] x86: fix clear_local_APIC() in the presence of x2apic Suresh Siddha
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:04 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner
  Cc: linux-kernel, Suresh Siddha, Eric W. Biederman

[-- Attachment #1: use_virtual_wire_a_kexec.patch --]
[-- Type: text/plain, Size: 1730 bytes --]

disable_IO_APIC() gets called during crashdump aswell, which configures the
IO-APIC/LAPIC so that legacy interrupts can be delivered for the kexec'd kernel.

In the presence of interrupt-remapping, we need to change the
interrupt-remapping configuration aswell as modifying IO-APIC for virtual wire
B mode.

To keep things simple during the crash, use virtual wire A mode
(for which we don't need to touch io-apic and interrupt-remapping tables).

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
---

Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -2040,8 +2040,13 @@ void disable_IO_APIC(void)
 	 * If the i8259 is routed through an IOAPIC
 	 * Put that IOAPIC in virtual wire mode
 	 * so legacy interrupts can be delivered.
+	 *
+	 * With interrupt-remapping, for now we will use virtual wire A mode,
+	 * as virtual wire B is little complex (need to configure both
+	 * IOAPIC RTE aswell as interrupt-remapping table entry).
+	 * As this gets called during crash dump, keep this simple for now.
 	 */
-	if (ioapic_i8259.pin != -1) {
+	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
 		struct IO_APIC_route_entry entry;
 
 		memset(&entry, 0, sizeof(entry));
@@ -2061,7 +2066,10 @@ void disable_IO_APIC(void)
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 
-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+	/*
+	 * Use virtual wire A mode when interrupt remapping is enabled.
+	 */
+	disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 08/13] x86: fix clear_local_APIC() in the presence of x2apic
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (6 preceding siblings ...)
  2009-03-17  0:04 ` [patch 07/13] x2apic: use virtual wire A mode in disable_IO_APIC() with interrupt-remapping Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  2009-03-17  0:05 ` [patch 09/13] x86: cleanup the IO-APIC level migration with interrupt-remapping Suresh Siddha
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_clear_local_apic_with_x2apic.patch --]
[-- Type: text/plain, Size: 1819 bytes --]

We were not clearing the local APIC in clear_local_APIC() in the
presence of x2apic. Fix it.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/arch/x86/kernel/apic/apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/apic.c
+++ tip/arch/x86/kernel/apic/apic.c
@@ -812,7 +812,7 @@ void clear_local_APIC(void)
 	u32 v;
 
 	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
+	if (!x2apic && !apic_phys)
 		return;
 
 	maxlvt = lapic_get_maxlvt();
@@ -1527,12 +1527,10 @@ void __init early_init_lapic_mapping(voi
  */
 void __init init_apic_mappings(void)
 {
-#ifdef CONFIG_X86_X2APIC
 	if (x2apic) {
 		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
-#endif
 
 	/*
 	 * If no local APIC can be found then set up a fake all
@@ -1976,12 +1974,9 @@ static int lapic_resume(struct sys_devic
 
 	local_irq_save(flags);
 
-#ifdef CONFIG_X86_X2APIC
 	if (x2apic)
 		enable_x2apic();
-	else
-#endif
-	{
+	else {
 		/*
 		 * Make sure the APICBASE points to the right address
 		 *
Index: tip/arch/x86/include/asm/apic.h
===================================================================
--- tip.orig/arch/x86/include/asm/apic.h
+++ tip/arch/x86/include/asm/apic.h
@@ -184,6 +184,9 @@ static inline int x2apic_enabled(void)
 {
 	return 0;
 }
+
+#define	x2apic	0
+
 #endif
 
 extern int get_physical_broadcast(void);
Index: tip/arch/x86/include/asm/irq_remapping.h
===================================================================
--- tip.orig/arch/x86/include/asm/irq_remapping.h
+++ tip/arch/x86/include/asm/irq_remapping.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_X86_IRQ_REMAPPING_H
 #define _ASM_X86_IRQ_REMAPPING_H
 
-extern int x2apic;
-
 #define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
 
 #endif	/* _ASM_X86_IRQ_REMAPPING_H */

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 09/13] x86: cleanup the IO-APIC level migration with interrupt-remapping
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (7 preceding siblings ...)
  2009-03-17  0:05 ` [patch 08/13] x86: fix clear_local_APIC() in the presence of x2apic Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  2009-03-17  0:05 ` [patch 10/13] cleanup ifdef CONFIG_INTR_REMAP in io_apic code Suresh Siddha
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner
  Cc: linux-kernel, Suresh Siddha, Eric W. Biederman, rajesh.sankaran

[-- Attachment #1: ioapic_eoi.patch --]
[-- Type: text/plain, Size: 9943 bytes --]

In the current code, for level triggered migration, we need to modify the
io-apic RTE with the update vector information, along with modifying interrupt
remapping table entry(IRTE) with vector and destination. This is to ensure that
remote IRR bit inthe IOAPIC RTE gets cleared when the cpu does EOI.

With this patch, for level triggered, we eliminate the io-apic RTE modification
(with the updated vector information), by using a virtual vector (io-apic pin
number).  Real vector that is used for interrupting cpu will be coming from
the interrupt-remapping table entry. Trigger mode in the IRTE will always be
edge, and the actual level or edge trigger will be setup in the IO-APIC RTE.
So a level triggered interrupt will appear as an edge to the local apic
cpu but still as level to the IO-APIC.

With this change, level irq migration can be done by simply modifying
the interrupt-remapping table entry with out changing the io-apic RTE.
And as the interrupt appears as edge at the cpu, in addition to do the
local apic EOI, we need to do IO-APIC directed EOI to clear the remote
IRR bit in  the IO-APIC RTE.

This simplies the irq migration in the presence of interrupt-remapping.

Idea-by: Rajesh Sankaran <rajesh.sankaran@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
---

Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -389,6 +389,8 @@ struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
 	unsigned int data;
+	unsigned int unused2[11];
+	unsigned int eoi;
 };
 
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
@@ -397,6 +399,12 @@ static __attribute_const__ struct io_api
 		+ (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
 }
 
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(vector, &io_apic->eoi);
+}
+
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
 {
 	struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -1478,7 +1486,7 @@ static void ioapic_register_intr(int irq
 int setup_ioapic_entry(int apic_id, int irq,
 		       struct IO_APIC_route_entry *entry,
 		       unsigned int destination, int trigger,
-		       int polarity, int vector)
+		       int polarity, int vector, int pin)
 {
 	/*
 	 * add it to the IO-APIC irq-routing table:
@@ -1504,7 +1512,14 @@ int setup_ioapic_entry(int apic_id, int 
 
 		irte.present = 1;
 		irte.dst_mode = apic->irq_dest_mode;
-		irte.trigger_mode = trigger;
+		/*
+		 * Trigger mode in the IRTE will always be edge, and the
+		 * actual level or edge trigger will be setup in the IO-APIC
+		 * RTE. This will help simplify level triggered irq migration.
+		 * For more details, see the comments above explainig IO-APIC
+		 * irq migration in the presence of interrupt-remapping.
+		 */
+		irte.trigger_mode = 0;
 		irte.dlvry_mode = apic->irq_delivery_mode;
 		irte.vector = vector;
 		irte.dest_id = IRTE_DEST(destination);
@@ -1515,18 +1530,23 @@ int setup_ioapic_entry(int apic_id, int 
 		ir_entry->zero = 0;
 		ir_entry->format = 1;
 		ir_entry->index = (index & 0x7fff);
+		/*
+		 * IO-APIC RTE will be configured with virtual vector.
+		 * irq handler will do the explicit EOI to the io-apic.
+		 */
+		ir_entry->vector = pin;
 	} else
 #endif
 	{
 		entry->delivery_mode = apic->irq_delivery_mode;
 		entry->dest_mode = apic->irq_dest_mode;
 		entry->dest = destination;
+		entry->vector = vector;
 	}
 
 	entry->mask = 0;				/* enable IRQ */
 	entry->trigger = trigger;
 	entry->polarity = polarity;
-	entry->vector = vector;
 
 	/* Mask level triggered irqs.
 	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
@@ -1561,7 +1581,7 @@ static void setup_IO_APIC_irq(int apic_i
 
 
 	if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
-			       dest, trigger, polarity, cfg->vector)) {
+			       dest, trigger, polarity, cfg->vector, pin)) {
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic_id].apicid, pin);
 		__clear_irq_vector(irq, cfg);
@@ -2311,37 +2331,24 @@ static int ioapic_retrigger_irq(unsigned
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_INTR_REMAP
-static void ir_irq_migration(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
  *
- * For edge triggered, irq migration is a simple atomic update(of vector
- * and cpu destination) of IRTE and flush the hardware cache.
- *
- * For level triggered, we need to modify the io-apic RTE aswell with the update
- * vector information, along with modifying IRTE with vector and destination.
- * So irq migration for level triggered is little  bit more complex compared to
- * edge triggered migration. But the good news is, we use the same algorithm
- * for level triggered migration as we have today, only difference being,
- * we now initiate the irq migration from process context instead of the
- * interrupt context.
+ * For both level and edge triggered, irq migration is a simple atomic
+ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
  *
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
- * suppression) to the IO-APIC, level triggered irq migration will also be
- * as simple as edge triggered migration and we can do the irq migration
- * with a simple atomic update to IO-APIC RTE.
+ * For level triggered, we eliminate the io-apic RTE modification (with the
+ * updated vector information), by using a virtual vector (io-apic pin number).
+ * Real vector that is used for interrupting cpu will be coming from
+ * the interrupt-remapping table entry.
  */
 static void
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
-	int modify_ioapic_rte;
 	unsigned int dest;
-	unsigned long flags;
 	unsigned int irq;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
@@ -2359,13 +2366,6 @@ migrate_ioapic_irq_desc(struct irq_desc 
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
-	modify_ioapic_rte = desc->status & IRQ_LEVEL;
-	if (modify_ioapic_rte) {
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__target_IO_APIC_irq(irq, dest, cfg);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
-	}
-
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
 
@@ -2380,73 +2380,12 @@ migrate_ioapic_irq_desc(struct irq_desc 
 	cpumask_copy(desc->affinity, mask);
 }
 
-static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
-{
-	int ret = -1;
-	struct irq_cfg *cfg = desc->chip_data;
-
-	mask_IO_APIC_irq_desc(desc);
-
-	if (io_apic_level_ack_pending(cfg)) {
-		/*
-		 * Interrupt in progress. Migrating irq now will change the
-		 * vector information in the IO-APIC RTE and that will confuse
-		 * the EOI broadcast performed by cpu.
-		 * So, delay the irq migration to the next instance.
-		 */
-		schedule_delayed_work(&ir_migration_work, 1);
-		goto unmask;
-	}
-
-	/* everthing is clear. we have right of way */
-	migrate_ioapic_irq_desc(desc, desc->pending_mask);
-
-	ret = 0;
-	desc->status &= ~IRQ_MOVE_PENDING;
-	cpumask_clear(desc->pending_mask);
-
-unmask:
-	unmask_IO_APIC_irq_desc(desc);
-
-	return ret;
-}
-
-static void ir_irq_migration(struct work_struct *work)
-{
-	unsigned int irq;
-	struct irq_desc *desc;
-
-	for_each_irq_desc(irq, desc) {
-		if (desc->status & IRQ_MOVE_PENDING) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&desc->lock, flags);
-			if (!desc->chip->set_affinity ||
-			    !(desc->status & IRQ_MOVE_PENDING)) {
-				desc->status &= ~IRQ_MOVE_PENDING;
-				spin_unlock_irqrestore(&desc->lock, flags);
-				continue;
-			}
-
-			desc->chip->set_affinity(irq, desc->pending_mask);
-			spin_unlock_irqrestore(&desc->lock, flags);
-		}
-	}
-}
-
 /*
  * Migrates the IRQ destination in the process context.
  */
 static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	if (desc->status & IRQ_LEVEL) {
-		desc->status |= IRQ_MOVE_PENDING;
-		cpumask_copy(desc->pending_mask, mask);
-		migrate_irq_remapped_level_desc(desc);
-		return;
-	}
-
 	migrate_ioapic_irq_desc(desc, mask);
 }
 static void set_ir_ioapic_affinity_irq(unsigned int irq,
@@ -2537,9 +2476,44 @@ static inline void irq_complete_move(str
 #endif
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_eoi(apic, pin);
+		entry = entry->next;
+	}
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ack_x2apic_level(unsigned int irq)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
 	ack_x2APIC_irq();
+	eoi_ioapic_irq(desc);
 }
 
 static void ack_x2apic_edge(unsigned int irq)
Index: tip/arch/x86/include/asm/io_apic.h
===================================================================
--- tip.orig/arch/x86/include/asm/io_apic.h
+++ tip/arch/x86/include/asm/io_apic.h
@@ -172,7 +172,7 @@ extern void probe_nr_irqs_gsi(void);
 extern int setup_ioapic_entry(int apic, int irq,
 			      struct IO_APIC_route_entry *entry,
 			      unsigned int destination, int trigger,
-			      int polarity, int vector);
+			      int polarity, int vector, int pin);
 extern void ioapic_write_entry(int apic, int pin,
 			       struct IO_APIC_route_entry e);
 #else  /* !CONFIG_X86_IO_APIC */

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 10/13] cleanup ifdef CONFIG_INTR_REMAP in io_apic code
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (8 preceding siblings ...)
  2009-03-17  0:05 ` [patch 09/13] x86: cleanup the IO-APIC level migration with interrupt-remapping Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  2009-03-17  0:05 ` [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled Suresh Siddha
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_ifdef_config_intr_remap.patch --]
[-- Type: text/plain, Size: 8791 bytes --]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---
Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -554,16 +554,12 @@ static void __target_IO_APIC_irq(unsigne
 
 		apic = entry->apic;
 		pin = entry->pin;
-#ifdef CONFIG_INTR_REMAP
 		/*
 		 * With interrupt-remapping, destination information comes
 		 * from interrupt-remapping table entry.
 		 */
 		if (!irq_remapped(irq))
 			io_apic_write(apic, 0x11 + pin*2, dest);
-#else
-		io_apic_write(apic, 0x11 + pin*2, dest);
-#endif
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
@@ -1419,9 +1415,8 @@ void __setup_vector_irq(int cpu)
 }
 
 static struct irq_chip ioapic_chip;
-#ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip;
-#endif
+static struct irq_chip msi_ir_chip;
 
 #define IOAPIC_AUTO     -1
 #define IOAPIC_EDGE     0
@@ -1460,7 +1455,6 @@ static void ioapic_register_intr(int irq
 	else
 		desc->status &= ~IRQ_LEVEL;
 
-#ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
 		desc->status |= IRQ_MOVE_PCNTXT;
 		if (trigger)
@@ -1472,7 +1466,7 @@ static void ioapic_register_intr(int irq
 						      handle_edge_irq, "edge");
 		return;
 	}
-#endif
+
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 	    trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
@@ -1493,7 +1487,6 @@ int setup_ioapic_entry(int apic_id, int 
 	 */
 	memset(entry,0,sizeof(*entry));
 
-#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled) {
 		struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
 		struct irte irte;
@@ -1535,9 +1528,7 @@ int setup_ioapic_entry(int apic_id, int 
 		 * irq handler will do the explicit EOI to the io-apic.
 		 */
 		ir_entry->vector = pin;
-	} else
-#endif
-	{
+	} else {
 		entry->delivery_mode = apic->irq_delivery_mode;
 		entry->dest_mode = apic->irq_dest_mode;
 		entry->dest = destination;
@@ -1662,10 +1653,8 @@ static void __init setup_timer_IRQ0_pin(
 {
 	struct IO_APIC_route_entry entry;
 
-#ifdef CONFIG_INTR_REMAP
 	if (intr_remapping_enabled)
 		return;
-#endif
 
 	memset(&entry, 0, sizeof(entry));
 
@@ -2395,6 +2384,11 @@ static void set_ir_ioapic_affinity_irq(u
 
 	set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
+#else
+static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+						   const struct cpumask *mask)
+{
+}
 #endif
 
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
@@ -2883,10 +2877,8 @@ static inline void __init check_timer(vo
 	 * 8259A.
 	 */
 	if (pin1 == -1) {
-#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("BIOS bug: timer not connected to IO-APIC");
-#endif
 		pin1 = pin2;
 		apic1 = apic2;
 		no_pin1 = 1;
@@ -2922,10 +2914,8 @@ static inline void __init check_timer(vo
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 		}
-#ifdef CONFIG_INTR_REMAP
 		if (intr_remapping_enabled)
 			panic("timer doesn't work through Interrupt-remapped IO-APIC");
-#endif
 		local_irq_disable();
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
@@ -3219,9 +3209,7 @@ void destroy_irq(unsigned int irq)
 	if (desc)
 		desc->chip_data = cfg;
 
-#ifdef CONFIG_INTR_REMAP
 	free_irte(irq);
-#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq, cfg);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -3247,7 +3235,6 @@ static int msi_compose_msg(struct pci_de
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
 
-#ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
 		struct irte irte;
 		int ir_index;
@@ -3273,9 +3260,7 @@ static int msi_compose_msg(struct pci_de
 				  MSI_ADDR_IR_SHV |
 				  MSI_ADDR_IR_INDEX1(ir_index) |
 				  MSI_ADDR_IR_INDEX2(ir_index);
-	} else
-#endif
-	{
+	} else {
 		if (x2apic_enabled())
 			msg->address_hi = MSI_ADDR_BASE_HI |
 					  MSI_ADDR_EXT_DEST_ID(dest);
@@ -3392,6 +3377,7 @@ static struct irq_chip msi_ir_chip = {
 #endif
 	.retrigger	= ioapic_retrigger_irq,
 };
+#endif
 
 /*
  * Map the PCI dev to the corresponding remapping hardware unit
@@ -3419,7 +3405,6 @@ static int msi_alloc_irte(struct pci_dev
 	}
 	return index;
 }
-#endif
 
 static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 {
@@ -3433,7 +3418,6 @@ static int setup_msi_irq(struct pci_dev 
 	set_irq_msi(irq, msidesc);
 	write_msi_msg(irq, &msg);
 
-#ifdef CONFIG_INTR_REMAP
 	if (irq_remapped(irq)) {
 		struct irq_desc *desc = irq_to_desc(irq);
 		/*
@@ -3442,7 +3426,6 @@ static int setup_msi_irq(struct pci_dev 
 		desc->status |= IRQ_MOVE_PCNTXT;
 		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
 	} else
-#endif
 		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3456,11 +3439,8 @@ int arch_setup_msi_irqs(struct pci_dev *
 	int ret, sub_handle;
 	struct msi_desc *msidesc;
 	unsigned int irq_want;
-
-#ifdef CONFIG_INTR_REMAP
 	struct intel_iommu *iommu = 0;
 	int index = 0;
-#endif
 
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
@@ -3469,7 +3449,6 @@ int arch_setup_msi_irqs(struct pci_dev *
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
-#ifdef CONFIG_INTR_REMAP
 		if (!intr_remapping_enabled)
 			goto no_ir;
 
@@ -3497,7 +3476,6 @@ int arch_setup_msi_irqs(struct pci_dev *
 			set_irte_irq(irq, iommu, index, sub_handle);
 		}
 no_ir:
-#endif
 		ret = setup_msi_irq(dev, msidesc, irq);
 		if (ret < 0)
 			goto error;
@@ -4032,11 +4010,9 @@ void __init setup_ioapic_dest(void)
 			else
 				mask = apic->target_cpus();
 
-#ifdef CONFIG_INTR_REMAP
 			if (intr_remapping_enabled)
 				set_ir_ioapic_affinity_irq_desc(desc, mask);
 			else
-#endif
 				set_ioapic_affinity_irq_desc(desc, mask);
 		}
 
Index: tip/include/linux/dmar.h
===================================================================
--- tip.orig/include/linux/dmar.h
+++ tip/include/linux/dmar.h
@@ -26,9 +26,8 @@
 #include <linux/msi.h>
 #include <linux/irqreturn.h>
 
-#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
 struct intel_iommu;
-
+#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
 struct dmar_drhd_unit {
 	struct list_head list;		/* list of drhd units	*/
 	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
@@ -52,7 +51,6 @@ extern int dmar_dev_scope_init(void);
 extern void detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
 
-
 extern int parse_ioapics_under_ir(void);
 extern int alloc_iommu(struct dmar_drhd_unit *);
 #else
@@ -65,12 +63,12 @@ static inline int dmar_table_init(void)
 {
 	return -ENODEV;
 }
+static inline int enable_drhd_fault_handling(void)
+{
+	return -1;
+}
 #endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
 
-#ifdef CONFIG_INTR_REMAP
-extern int intr_remapping_enabled;
-extern int enable_intr_remapping(int);
-
 struct irte {
 	union {
 		struct {
@@ -99,6 +97,10 @@ struct irte {
 		__u64 high;
 	};
 };
+#ifdef CONFIG_INTR_REMAP
+extern int intr_remapping_enabled;
+extern int enable_intr_remapping(int);
+
 extern int get_irte(int irq, struct irte *entry);
 extern int modify_irte(int irq, struct irte *irte_modified);
 extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
@@ -113,6 +115,35 @@ extern int irq_remapped(int irq);
 extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
 #else
+static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+{
+	return -1;
+}
+static inline int modify_irte(int irq, struct irte *irte_modified)
+{
+	return -1;
+}
+static inline int free_irte(int irq)
+{
+	return -1;
+}
+static inline int map_irq_to_irte_handle(int irq, u16 *sub_handle)
+{
+	return -1;
+}
+static inline int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
+			       u16 sub_handle)
+{
+	return -1;
+}
+static inline struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+{
+	return NULL;
+}
+static inline struct intel_iommu *map_ioapic_to_ir(int apic)
+{
+	return NULL;
+}
 #define irq_remapped(irq)		(0)
 #define enable_intr_remapping(mode)	(-1)
 #define intr_remapping_enabled		(0)
Index: tip/arch/x86/kernel/apic/probe_64.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/probe_64.c
+++ tip/arch/x86/kernel/apic/probe_64.c
@@ -69,14 +69,12 @@ void __init default_setup_apic_routing(v
 		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
 	}
 
-#ifdef CONFIG_X86_X2APIC
 	/*
 	 * Now that apic routing model is selected, configure the
 	 * fault handling for intr remapping.
 	 */
 	if (intr_remapping_enabled)
 		enable_drhd_fault_handling();
-#endif
 }
 
 /* Same for both flat and physical. */

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (9 preceding siblings ...)
  2009-03-17  0:05 ` [patch 10/13] cleanup ifdef CONFIG_INTR_REMAP in io_apic code Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  2009-04-09 15:08   ` Jaswinder Singh Rajput
  2009-03-17  0:05 ` [patch 12/13] x86: fix broken irq migration logic while cleaning up multiple vectors Suresh Siddha
  2009-03-17  0:05 ` [patch 13/13] dmar: use atomic allocations for QI and Intr-remapping init Suresh Siddha
  12 siblings, 1 reply; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_kmalloc_with_irqs_disabled.patch --]
[-- Type: text/plain, Size: 3233 bytes --]

save_mask_IO_APIC_setup() was using non atomic memory allocation while getting
called with interrupts disabled. Fix this by splitting this into two different
function. Allocation part save_IO_APIC_setup() now happens before
disabling interrupts.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/arch/x86/kernel/apic/apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/apic.c
+++ tip/arch/x86/kernel/apic/apic.c
@@ -1338,15 +1338,16 @@ void __init enable_IR_x2apic(void)
 		return;
 	}
 
-	local_irq_save(flags);
-	mask_8259A();
-
-	ret = save_mask_IO_APIC_setup();
+	ret = save_IO_APIC_setup();
 	if (ret) {
 		pr_info("Saving IO-APIC state failed: %d\n", ret);
 		goto end;
 	}
 
+	local_irq_save(flags);
+	mask_IO_APIC_setup();
+	mask_8259A();
+
 	ret = enable_intr_remapping(1);
 
 	if (ret && x2apic_preenabled) {
@@ -1371,10 +1372,10 @@ end_restore:
 	else
 		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
 
-end:
 	unmask_8259A();
 	local_irq_restore(flags);
 
+end:
 	if (!ret) {
 		if (!x2apic_preenabled)
 			pr_info("Enabled x2apic and interrupt-remapping\n");
Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -853,9 +853,9 @@ __setup("pirq=", ioapic_pirq_setup);
 static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
 
 /*
- * Saves and masks all the unmasked IO-APIC RTE's
+ * Saves all the IO-APIC RTE's
  */
-int save_mask_IO_APIC_setup(void)
+int save_IO_APIC_setup(void)
 {
 	union IO_APIC_reg_01 reg_01;
 	unsigned long flags;
@@ -880,16 +880,9 @@ int save_mask_IO_APIC_setup(void)
 	}
 
 	for (apic = 0; apic < nr_ioapics; apic++)
-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-			struct IO_APIC_route_entry entry;
-
-			entry = early_ioapic_entries[apic][pin] =
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			early_ioapic_entries[apic][pin] =
 				ioapic_read_entry(apic, pin);
-			if (!entry.mask) {
-				entry.mask = 1;
-				ioapic_write_entry(apic, pin, entry);
-			}
-		}
 
 	return 0;
 
@@ -902,6 +895,25 @@ nomem:
 	return -ENOMEM;
 }
 
+void mask_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		if (!early_ioapic_entries[apic])
+			break;
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin];
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	}
+}
+
 void restore_IO_APIC_setup(void)
 {
 	int apic, pin;
Index: tip/arch/x86/include/asm/io_apic.h
===================================================================
--- tip.orig/arch/x86/include/asm/io_apic.h
+++ tip/arch/x86/include/asm/io_apic.h
@@ -162,7 +162,8 @@ extern int (*ioapic_renumber_irq)(int io
 extern void ioapic_init_mappings(void);
 
 #ifdef CONFIG_X86_64
-extern int save_mask_IO_APIC_setup(void);
+extern int save_IO_APIC_setup(void);
+extern void mask_IO_APIC_setup(void);
 extern void restore_IO_APIC_setup(void);
 extern void reinit_intr_remapped_IO_APIC(int);
 #endif

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 12/13] x86: fix broken irq migration logic while cleaning up multiple vectors
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (10 preceding siblings ...)
  2009-03-17  0:05 ` [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  2009-03-17  0:05 ` [patch 13/13] dmar: use atomic allocations for QI and Intr-remapping init Suresh Siddha
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner
  Cc: linux-kernel, Suresh Siddha, Eric W. Biederman

[-- Attachment #1: fix_spurious_interrupt.patch --]
[-- Type: text/plain, Size: 2633 bytes --]

During irq migration, we send a low priority interrupt to the previous
irq destination. This happens in non interrupt-remapping case after interrupt
starts arriving at new destination and in interrupt-remapping case after
modifying and flushing the interrupt-remapping table entry caches.

This low priority irq cleanup handler can cleanup multiple vectors, as
multiple irq's can be migrated at almost the same time. While
there will be multiple invocations of irq cleanup handler (one cleanup
IPI for each irq migration), first invocation of the cleanup handler
can potentially cleanup more than one vector (as the first invocation can
see the requests for more than vector cleanup). When we cleanup multiple
vectors during the first invocation of the smp_irq_move_cleanup_interrupt(),
other vectors that are to be cleanedup can still be pending in the local
cpu's IRR (as smp_irq_move_cleanup_interrupt() runs with interrupts disabled).

When we are ready to unhook a vector corresponding to an irq, check if that
vector is registered in the local cpu's IRR. If so skip that cleanup and
do a self IPI with the cleanup vector, so that we give a chance to
service the pending vector interrupt and then cleanup that vector
allocation once we execute the lowest priority handler.

This fixes spurious interrupts seen when migrating multiple vectors
at the same time.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
---

Index: tip/arch/x86/kernel/apic/io_apic.c
===================================================================
--- tip.orig/arch/x86/kernel/apic/io_apic.c
+++ tip/arch/x86/kernel/apic/io_apic.c
@@ -2414,6 +2414,7 @@ asmlinkage void smp_irq_move_cleanup_int
 	me = smp_processor_id();
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
 		unsigned int irq;
+		unsigned int irr;
 		struct irq_desc *desc;
 		struct irq_cfg *cfg;
 		irq = __get_cpu_var(vector_irq)[vector];
@@ -2433,6 +2434,18 @@ asmlinkage void smp_irq_move_cleanup_int
 		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 			goto unlock;
 
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
 		__get_cpu_var(vector_irq)[vector] = -1;
 		cfg->move_cleanup_count--;
 unlock:

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 13/13] dmar: use atomic allocations for QI and Intr-remapping init
  2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
                   ` (11 preceding siblings ...)
  2009-03-17  0:05 ` [patch 12/13] x86: fix broken irq migration logic while cleaning up multiple vectors Suresh Siddha
@ 2009-03-17  0:05 ` Suresh Siddha
  12 siblings, 0 replies; 15+ messages in thread
From: Suresh Siddha @ 2009-03-17  0:05 UTC (permalink / raw)
  To: mingo, tglx, hpa, dwmw2, yinghai, steiner; +Cc: linux-kernel, Suresh Siddha

[-- Attachment #1: fix_dmar_non_atomic_allocations.patch --]
[-- Type: text/plain, Size: 1715 bytes --]

Queued invalidation and interrupt-remapping will get initialized with
interrupts disbaled (while enabling interrupt-remapping). So use
GFP_ATOMIC instead of GFP_KERNEL for memory alloacations.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tip/drivers/pci/dmar.c
===================================================================
--- tip.orig/drivers/pci/dmar.c
+++ tip/drivers/pci/dmar.c
@@ -809,20 +809,20 @@ int dmar_enable_qi(struct intel_iommu *i
 	if (iommu->qi)
 		return 0;
 
-	iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
 	if (!iommu->qi)
 		return -ENOMEM;
 
 	qi = iommu->qi;
 
-	qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+	qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
 	if (!qi->desc) {
 		kfree(qi);
 		iommu->qi = 0;
 		return -ENOMEM;
 	}
 
-	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
 	if (!qi->desc_status) {
 		free_page((unsigned long) qi->desc);
 		kfree(qi);
Index: tip/drivers/pci/intr_remapping.c
===================================================================
--- tip.orig/drivers/pci/intr_remapping.c
+++ tip/drivers/pci/intr_remapping.c
@@ -447,12 +447,12 @@ static int setup_intr_remapping(struct i
 	struct page *pages;
 
 	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
-					     GFP_KERNEL);
+					     GFP_ATOMIC);
 
 	if (!iommu->ir_table)
 		return -ENOMEM;
 
-	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+	pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
 
 	if (!pages) {
 		printk(KERN_ERR "failed to allocate pages of order %d\n",

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled
  2009-03-17  0:05 ` [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled Suresh Siddha
@ 2009-04-09 15:08   ` Jaswinder Singh Rajput
  0 siblings, 0 replies; 15+ messages in thread
From: Jaswinder Singh Rajput @ 2009-04-09 15:08 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: mingo, tglx, hpa, dwmw2, yinghai, steiner, linux-kernel

Hello Suresh,

I am getting sparse warning from this patch.
If possible, please run sparse before sending the patches.


On Mon, 2009-03-16 at 17:05 -0700, Suresh Siddha wrote:
> plain text document attachment (fix_kmalloc_with_irqs_disabled.patch)
> save_mask_IO_APIC_setup() was using non atomic memory allocation while getting
> called with interrupts disabled. Fix this by splitting this into two different
> function. Allocation part save_IO_APIC_setup() now happens before
> disabling interrupts.
> 
> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> ---
> 
> Index: tip/arch/x86/kernel/apic/io_apic.c
> ===================================================================
> --- tip.orig/arch/x86/kernel/apic/io_apic.c
> +++ tip/arch/x86/kernel/apic/io_apic.c
> @@ -902,6 +895,25 @@ nomem:
>  	return -ENOMEM;
>  }
>  
> +void mask_IO_APIC_setup(void)
> +{
> +	int apic, pin;
> +

arch/x86/kernel/apic/io_apic.c:806:6: warning: symbol 'apic' shadows an earlier one
  arch/x86/include/asm/apic.h:352:20: originally declared here

Don't worry you are not alone ;-)

  CHECK   arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/io_apic.c:544:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:806:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:994:7: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:1012:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:1440:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:1700:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:1994:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here
arch/x86/kernel/apic/io_apic.c:2781:6: warning: symbol 'apic' shadows an earlier one
 arch/x86/include/asm/apic.h:352:20: originally declared here

Ingo:
don't you think struct apic *apic is very common name can we find some better name.

--
JSR



^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-04-09 15:08 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-17  0:04 [patch 00/13] x2apic and interrupt-remapping related fixes Suresh Siddha
2009-03-17  0:04 ` [patch 01/13] intr-remapping: fix "hard-safe -> hard-unsafe lock order detected" with irq_2_ir_lock Suresh Siddha
2009-03-17  0:04 ` [patch 02/13] dmar: move page fault handling code to dmar.c Suresh Siddha
2009-03-17  0:04 ` [patch 03/13] enable fault handling for intr-remapping Suresh Siddha
2009-03-17  0:04 ` [patch 04/13] dmar: routines for disabling queued invalidation and intr remapping Suresh Siddha
2009-03-17  0:04 ` [patch 05/13] dmar: start with sane state while enabling dma and interrupt-remapping Suresh Siddha
2009-03-17  0:04 ` [patch 06/13] intr-remapping: fix free_irte() to clear all the IRTE entries Suresh Siddha
2009-03-17  0:04 ` [patch 07/13] x2apic: use virtual wire A mode in disable_IO_APIC() with interrupt-remapping Suresh Siddha
2009-03-17  0:05 ` [patch 08/13] x86: fix clear_local_APIC() in the presence of x2apic Suresh Siddha
2009-03-17  0:05 ` [patch 09/13] x86: cleanup the IO-APIC level migration with interrupt-remapping Suresh Siddha
2009-03-17  0:05 ` [patch 10/13] cleanup ifdef CONFIG_INTR_REMAP in io_apic code Suresh Siddha
2009-03-17  0:05 ` [patch 11/13] ioapic: Fix non atomic allocation with interrupts disabled Suresh Siddha
2009-04-09 15:08   ` Jaswinder Singh Rajput
2009-03-17  0:05 ` [patch 12/13] x86: fix broken irq migration logic while cleaning up multiple vectors Suresh Siddha
2009-03-17  0:05 ` [patch 13/13] dmar: use atomic allocations for QI and Intr-remapping init Suresh Siddha

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.