linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Takao Indoh <indou.takao@jp.fujitsu.com>
To: linux-pci@vger.kernel.org, x86@kernel.org, linux-kernel@vger.kernel.org
Cc: tokunaga.keiich@jp.fujitsu.com, kexec@lists.infradead.org,
	hbabu@us.ibm.com, andi@firstfloor.org, ddutile@redhat.com,
	vgoyal@redhat.com, ishii.hironobu@jp.fujitsu.com, hpa@zytor.com,
	bhelgaas@google.com, tglx@linutronix.de, yinghai@kernel.org,
	mingo@redhat.com, Takao Indoh <indou.takao@jp.fujitsu.com>,
	khalid@gonehiking.org
Subject: [PATCH v7 4/5] x86, pci: Reset PCIe devices at boot time
Date: Tue, 27 Nov 2012 09:43:13 +0900 (JST)	[thread overview]
Message-ID: <20121127004223.3604.78831.sendpatchset@tindoh.g01.fujitsu.local> (raw)
In-Reply-To: <20121127004144.3604.61708.sendpatchset@tindoh.g01.fujitsu.local>

This patch resets PCIe devices at boot time when "reset_devices" is
specified.

Kdump with intel_iommu=on fails becasue ongoing DMA from first kernel
causes DMAR fault when page table of DMAR is initialized while kdump
kernel is booting up. To solve this problem, this patch resets PCIe
devices during boot to stop its DMA.

Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
---
 arch/x86/include/asm/pci-direct.h |    1 +
 arch/x86/kernel/setup.c           |    3 +
 arch/x86/pci/early.c              |  241 +++++++++++++++++++++++++++++++++++++
 3 files changed, 245 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
index b6360d3..5620070 100644
--- a/arch/x86/include/asm/pci-direct.h
+++ b/arch/x86/include/asm/pci-direct.h
@@ -18,6 +18,7 @@ extern int early_pci_allowed(void);
 extern unsigned int pci_early_dump_regs;
 extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
 extern void early_dump_pci_devices(void);
+extern void early_reset_pcie_devices(void);
 
 struct pci_dev *get_early_pci_dev(int num, int slot, int func);
 #endif /* _ASM_X86_PCI_DIRECT_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ca45696..2e7928e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1001,6 +1001,9 @@ void __init setup_arch(char **cmdline_p)
 	generic_apic_probe();
 
 	early_quirks();
+#ifdef CONFIG_PCI
+	early_reset_pcie_devices();
+#endif
 
 	/*
 	 * Read APIC and some other early information from ACPI tables.
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 024def7..ff737f3 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/bootmem.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
 #include <asm/pci_x86.h>
@@ -183,3 +184,243 @@ __init struct pci_dev *get_early_pci_dev(int num, int slot, int func)
 
 	return pdev;
 }
+
+struct pcie_dev {
+	int cap;	/* position of PCI Express capability */
+	int flags;	/* PCI_EXP_FLAGS */
+
+	/* saved configration register */
+	u32 pci_cfg[16];
+	u16 pcie_cfg[7];
+};
+
+struct pcie_port {
+	struct list_head dev;
+	u8 bus;
+	u8 slot;
+	u8 func;
+	u8 secondary;
+	struct pcie_dev child[PCI_MAX_FUNCTIONS];
+};
+
+static __initdata LIST_HEAD(device_list);
+
+static void __init early_udelay(int loops)
+{
+	while (loops--) {
+		/* Approximately 1 us */
+		native_io_delay();
+	}
+}
+
+static void __init do_reset(u8 bus, u8 slot, u8 func)
+{
+	struct pci_dev *dev;
+	u16 ctrl;
+
+	dev = get_early_pci_dev(bus, slot, func);
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
+
+	/* Assert Secondary Bus Reset */
+	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+	ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+
+	/*
+	 * PCIe spec requires software to ensure a minimum reset duration
+	 * (Trst == 1ms). We have here 5ms safety margin because early_udelay
+	 * is not precise.
+	 */
+	early_udelay(5000);
+
+	/* De-assert Secondary Bus Reset */
+	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+}
+
+static void __init save_state(u8 bus, u8 slot, u8 func, struct pcie_dev *pdev)
+{
+	struct pci_dev *dev;
+	int i;
+
+	dev = get_early_pci_dev(bus, slot, func);
+	dev->is_pcie = 1;
+	dev->pcie_cap = pdev->cap;
+	dev->pcie_flags_reg = pdev->flags;
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
+
+	for (i = 0; i < 16; i++)
+		pci_read_config_dword(dev, i * 4, pdev->pci_cfg + i);
+	i = 0;
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_SLTCTL, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_RTCTL, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &pdev->pcie_cfg[i++]);
+	pcie_capability_read_word(dev, PCI_EXP_SLTCTL2, &pdev->pcie_cfg[i++]);
+}
+
+static void __init restore_state(u8 bus, u8 slot, u8 func,
+				 struct pcie_dev *pdev)
+{
+	struct pci_dev *dev;
+	int i = 0;
+
+	dev = get_early_pci_dev(bus, slot, func);
+	dev->is_pcie = 1;
+	dev->pcie_cap = pdev->cap;
+	dev->pcie_flags_reg = pdev->flags;
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
+	       bus, slot, func);
+
+	pcie_capability_write_word(dev, PCI_EXP_DEVCTL, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_SLTCTL, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_RTCTL, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_DEVCTL2, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_LNKCTL2, pdev->pcie_cfg[i++]);
+	pcie_capability_write_word(dev, PCI_EXP_SLTCTL2, pdev->pcie_cfg[i++]);
+
+	for (i = 15; i >= 0; i--)
+		pci_write_config_dword(dev, i * 4, pdev->pci_cfg[i]);
+}
+
+static void __init find_pcie_device(unsigned bus, unsigned slot, unsigned func)
+{
+	struct pci_dev *dev;
+	int f, pcie_type, count;
+	u8 secondary, type;
+	u16 vendor;
+	u32 class;
+	struct pcie_port *port;
+	int pcie_cap[PCI_MAX_FUNCTIONS];
+	int pcie_flags[PCI_MAX_FUNCTIONS];
+
+	dev = get_early_pci_dev(bus, slot, func);
+	set_pcie_port_type(dev);
+	if (!pci_is_pcie(dev))
+		return;
+
+	pcie_type = pci_pcie_type(dev);
+	if ((pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
+	    (pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
+		return;
+
+	pci_read_config_byte(dev, PCI_HEADER_TYPE, &type);
+	if ((type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+		return;
+	pci_read_config_byte(dev, PCI_SECONDARY_BUS, &secondary);
+
+	memset(pcie_cap, 0, sizeof(pcie_cap));
+	memset(pcie_flags, 0, sizeof(pcie_flags));
+	for (count = 0, f = 0; f < PCI_MAX_FUNCTIONS; f++) {
+		dev = get_early_pci_dev(secondary, 0, f);
+		pci_read_config_word(dev, PCI_VENDOR_ID, &vendor);
+		if (vendor == 0xffff)
+			continue;
+
+		set_pcie_port_type(dev);
+		if (!pci_is_pcie(dev))
+			continue;
+
+		pcie_type = pci_pcie_type(dev);
+		if ((pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
+		    (pcie_type == PCI_EXP_TYPE_PCI_BRIDGE))
+			/* Don't reset switch, bridge */
+			return;
+
+		pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+		if ((class >> 24) == PCI_BASE_CLASS_DISPLAY)
+			/* Don't reset VGA device */
+			return;
+
+		count++;
+		pcie_cap[f] = dev->pcie_cap;
+		pcie_flags[f] = dev->pcie_flags_reg;
+	}
+
+	if (!count)
+		return;
+
+	port = (struct pcie_port *)alloc_bootmem(sizeof(struct pcie_port));
+	if (port == NULL) {
+		printk(KERN_ERR "pci 0000:%02x:%02x.%d alloc_bootmem failed\n",
+		       bus, slot, func);
+		return;
+	}
+	memset(port, 0, sizeof(*port));
+	port->bus = bus;
+	port->slot = slot;
+	port->func = func;
+	port->secondary = secondary;
+	for (f = 0; f < PCI_MAX_FUNCTIONS; f++)
+		if (pcie_cap[f]) {
+			port->child[f].cap = pcie_cap[f];
+			port->child[f].flags = pcie_flags[f];
+			save_state(secondary, 0, f, &port->child[f]);
+		}
+	list_add_tail(&port->dev, &device_list);
+}
+
+void __init early_reset_pcie_devices(void)
+{
+	unsigned bus, slot, func;
+	struct pcie_port *port, *tmp;
+	struct pci_dev *dev;
+
+	if (!early_pci_allowed() || !reset_devices)
+		return;
+
+	/*
+	 * Find PCIe port(root port and downstream port) and save config
+	 * registers of its downstream devices
+	 */
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < PCI_MAX_FUNCTIONS; func++) {
+				u16 vendor;
+				u8 type;
+
+				dev = get_early_pci_dev(bus, slot, func);
+				pci_read_config_word(dev, PCI_VENDOR_ID,
+						     &vendor);
+				if (vendor == 0xffff)
+					continue;
+
+				pci_read_config_byte(dev, PCI_HEADER_TYPE,
+						     &type);
+				find_pcie_device(bus, slot, func);
+
+				if ((func == 0) && !(type & 0x80))
+					break;
+			}
+		}
+	}
+
+	if (list_empty(&device_list))
+		return;
+
+	/* Do bus reset */
+	list_for_each_entry(port, &device_list, dev)
+		do_reset(port->bus, port->slot, port->func);
+
+	/*
+	 * According to PCIe spec, software must wait a minimum of 100 ms
+	 * before sending a configuration request. We have 500ms safety margin
+	 * here.
+	 */
+	early_udelay(500000);
+
+	/* Restore config registers and free memory */
+	list_for_each_entry_safe(port, tmp, &device_list, dev) {
+		for (func = 0; func < PCI_MAX_FUNCTIONS; func++)
+			if (port->child[func].cap)
+				restore_state(port->secondary, 0, func,
+					      &port->child[func]);
+		free_bootmem(__pa(port), sizeof(*port));
+	}
+}
-- 
1.7.1



  parent reply	other threads:[~2012-11-27  0:43 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-27  0:42 [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu Takao Indoh
2012-11-27  0:42 ` [PATCH v7 1/5] x86, pci: add dummy pci device for early stage Takao Indoh
2012-11-27  0:42 ` [PATCH v7 2/5] PCI: Define the maximum number of PCI function Takao Indoh
2012-11-27  0:42 ` [PATCH v7 3/5] Make reset_devices available at early stage Takao Indoh
2012-11-27  0:43 ` Takao Indoh [this message]
2012-11-27  0:43 ` [PATCH v7 5/5] x86, pci: Enable PCI INTx when MSI is disabled Takao Indoh
2012-11-30 15:49 ` [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu MUNEDA Takahiro
2012-12-21 16:19   ` Yinghai Lu
2013-01-07 19:09     ` Thomas Renninger
2013-01-07 20:16       ` Yinghai Lu
2013-01-08  0:42         ` Thomas Renninger
2013-01-08  3:04           ` Yinghai Lu
2013-01-08 16:47             ` [PATCH] Only reset e820 once, even with multiple memmap=exactmap params Thomas Renninger
2013-01-08 17:19               ` Yinghai Lu
2013-01-10  3:21                 ` Thomas Renninger
2013-01-10 14:26                   ` Vivek Goyal
2013-01-10 16:53                     ` Yinghai Lu
2013-01-10 17:01                       ` Vivek Goyal
2013-01-10 17:11                         ` Yinghai Lu
2013-01-10 23:34                   ` Yinghai Lu
2013-01-11 12:33                     ` [PATCH] x86 e820: only void usable memory areas in memmap=exactmap case Thomas Renninger
2013-01-11 16:16                       ` Yinghai Lu
2013-01-11 18:24                         ` Thomas Renninger
2013-01-11 19:59                           ` Yinghai Lu
2013-01-11 20:06                             ` H. Peter Anvin
2013-01-11 21:09                               ` Yinghai Lu
2013-01-11 22:16                                 ` H. Peter Anvin
2013-01-12 11:31                                   ` Thomas Renninger
2013-01-12 17:07                                     ` Yinghai Lu
2013-01-14  2:08                                       ` Thomas Renninger
2013-01-14  2:43                                         ` Yinghai Lu
2013-01-14 15:05                                           ` Thomas Renninger
2013-01-14 19:04                                             ` Yinghai Lu
2013-01-15  0:54                                               ` Thomas Renninger
2013-01-15  4:45                                                 ` Yinghai Lu
2013-01-22 15:21                                                   ` Thomas Renninger
2013-01-08 16:50         ` [PATCH v7 0/5] Reset PCIe devices to address DMA problem on kdump with iommu Thomas Renninger
2013-01-08 17:27           ` Yinghai Lu
2013-01-09  2:32             ` Thomas Renninger
2013-01-09  4:39               ` Takao Indoh
2013-01-21  1:11       ` Takao Indoh
2013-01-23  0:47         ` Thomas Renninger
2013-01-24  0:23           ` Takao Indoh
2013-01-29  1:14             ` Thomas Renninger
2013-01-30  5:01               ` Takao Indoh
2013-03-04  0:56           ` Takao Indoh
2013-03-04 22:00             ` Don Dutile
2013-03-05  0:56               ` Takao Indoh
     [not found] ` <CAK4g67ZEUfCqqpa1-4wkN4+OXZYQqLTiJC+6OpwVWVLfO2_7xQ@mail.gmail.com>
2012-12-21 10:37   ` Takao Indoh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121127004223.3604.78831.sendpatchset@tindoh.g01.fujitsu.local \
    --to=indou.takao@jp.fujitsu.com \
    --cc=andi@firstfloor.org \
    --cc=bhelgaas@google.com \
    --cc=ddutile@redhat.com \
    --cc=hbabu@us.ibm.com \
    --cc=hpa@zytor.com \
    --cc=ishii.hironobu@jp.fujitsu.com \
    --cc=kexec@lists.infradead.org \
    --cc=khalid@gonehiking.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tokunaga.keiich@jp.fujitsu.com \
    --cc=vgoyal@redhat.com \
    --cc=x86@kernel.org \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).