All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huang Ying <ying.huang@intel.com>
To: Len Brown <lenb@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <andi@firstfloor.org>,
	Tony Luck <tony.luck@intel.com>,
	ying.huang@intel.com, linux-acpi@vger.kernel.org,
	Jesse Barnes <jbarnes@virtuousgeek.org>,
	Zhang Yanmin <yanmin.zhang@intel.com>
Subject: [PATCH 7/9] PCIe, AER, add aer_recover_queue
Date: Tue, 17 May 2011 16:08:37 +0800	[thread overview]
Message-ID: <1305619719-7480-8-git-send-email-ying.huang@intel.com> (raw)
In-Reply-To: <1305619719-7480-1-git-send-email-ying.huang@intel.com>

In addition to native PCIe AER, now APEI (ACPI Platform Error
Interface) GHES (Generic Hardware Error Source) can be used to report
PCIe AER errors too.  To add support to APEI GHES PCIe AER recovery,
aer_recover_queue is added to export the recovery function in native
PCIe AER driver.

Recoverable PCIe AER errors are reported via NMI in APEI GHES.  Then
APEI GHES uses irq_work to delay the error processing into an IRQ
handler.  But PCIe AER recovery can be very time-consuming, so
aer_recover_queue, which can be used in IRQ handler, delays the real
recovery action into the process context, that is, work queue.

Signed-off-by: Huang Ying <ying.huang@intel.com>
CC: Jesse Barnes <jbarnes@virtuousgeek.org>
CC: Zhang Yanmin <yanmin.zhang@intel.com>
---
 drivers/pci/pcie/aer/aerdrv_core.c     |   76 +++++++++++++++++++++++++++++----
 drivers/pci/pcie/aer/aerdrv_errprint.c |    3 -
 include/linux/aer.h                    |    3 +
 3 files changed, 74 insertions(+), 8 deletions(-)

--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -24,6 +24,7 @@
 #include <linux/suspend.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/kfifo.h>
 #include "aerdrv.h"
 
 static int forceload;
@@ -445,8 +446,7 @@ static struct pcie_port_service_driver *
 	return drv;
 }
 
-static pci_ers_result_t reset_link(struct pcie_device *aerdev,
-		struct pci_dev *dev)
+static pci_ers_result_t reset_link(struct pci_dev *dev)
 {
 	struct pci_dev *udev;
 	pci_ers_result_t status;
@@ -486,7 +486,6 @@ static pci_ers_result_t reset_link(struc
 
 /**
  * do_recovery - handle nonfatal/fatal error recovery process
- * @aerdev: pointer to a pcie_device data structure of root port
  * @dev: pointer to a pci_dev data structure of agent detecting an error
  * @severity: error severity type
  *
@@ -494,8 +493,7 @@ static pci_ers_result_t reset_link(struc
  * error detected message to all downstream drivers within a hierarchy in
  * question and return the returned code.
  */
-static void do_recovery(struct pcie_device *aerdev, struct pci_dev *dev,
-		int severity)
+static void do_recovery(struct pci_dev *dev, int severity)
 {
 	pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
 	enum pci_channel_state state;
@@ -511,7 +509,7 @@ static void do_recovery(struct pcie_devi
 			report_error_detected);
 
 	if (severity == AER_FATAL) {
-		result = reset_link(aerdev, dev);
+		result = reset_link(dev);
 		if (result != PCI_ERS_RESULT_RECOVERED)
 			goto failed;
 	}
@@ -576,9 +574,73 @@ static void handle_error_source(struct p
 			pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
 					info->status);
 	} else
-		do_recovery(aerdev, dev, info->severity);
+		do_recovery(dev, info->severity);
 }
 
+#ifdef CONFIG_ACPI_APEI_PCIEAER
+static void aer_recover_work_func(struct work_struct *work);
+
+#define AER_RECOVER_RING_ORDER		4
+#define AER_RECOVER_RING_SIZE		(1 << AER_RECOVER_RING_ORDER)
+
+struct aer_recover_entry
+{
+	u8	bus;
+	u8	devfn;
+	u16	domain;
+	int	severity;
+};
+
+static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
+		    AER_RECOVER_RING_SIZE);
+/*
+ * Mutual exclusion for writers of aer_recover_ring, reader side don't
+ * need lock, because there is only one reader and lock is not needed
+ * between reader and writer.
+ */
+static DEFINE_SPINLOCK(aer_recover_ring_lock);
+static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
+
+void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
+		       int severity)
+{
+	unsigned long flags;
+	struct aer_recover_entry entry = {
+		.bus		= bus,
+		.devfn		= devfn,
+		.domain		= domain,
+		.severity	= severity,
+	};
+
+	spin_lock_irqsave(&aer_recover_ring_lock, flags);
+	if (kfifo_put(&aer_recover_ring, &entry))
+		schedule_work(&aer_recover_work);
+	else
+		pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
+		       domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+	spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
+}
+EXPORT_SYMBOL_GPL(aer_recover_queue);
+
+static void aer_recover_work_func(struct work_struct *work)
+{
+	struct aer_recover_entry entry;
+	struct pci_dev *pdev;
+
+	while (kfifo_get(&aer_recover_ring, &entry)) {
+		pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
+						   entry.devfn);
+		if (!pdev) {
+			pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
+			       entry.domain, entry.bus,
+			       PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
+			continue;
+		}
+		do_recovery(pdev, entry.severity);
+	}
+}
+#endif
+
 /**
  * get_device_error_info - read error status from dev and store it to info
  * @dev: pointer to the device expected to have a error record
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -204,7 +204,7 @@ void aer_print_port_info(struct pci_dev
 }
 
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-static int cper_severity_to_aer(int cper_severity)
+int cper_severity_to_aer(int cper_severity)
 {
 	switch (cper_severity) {
 	case CPER_SEV_RECOVERABLE:
@@ -215,6 +215,7 @@ static int cper_severity_to_aer(int cper
 		return AER_CORRECTABLE;
 	}
 }
+EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
 void cper_print_aer(const char *prefix, int cper_severity,
 		    struct aer_capability_regs *aer)
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -51,5 +51,8 @@ static inline int pci_cleanup_aer_uncorr
 
 extern void cper_print_aer(const char *prefix, int cper_severity,
 			   struct aer_capability_regs *aer);
+extern int cper_severity_to_aer(int cper_severity);
+extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
+			      int severity);
 #endif //_AER_H_
 

  parent reply	other threads:[~2011-05-17  8:08 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-17  8:08 [PATCH 0/9] ACPI, APEI patches for 2.6.40 Huang Ying
2011-05-17  8:08 ` [PATCH 1/9] Add Kconfig option ARCH_HAVE_NMI_SAFE_CMPXCHG Huang Ying
2011-05-17  8:08 ` [PATCH 2/9] lib, Add lock-less NULL terminated single list Huang Ying
2011-05-17  8:08 ` [PATCH 3/9] lib, Make gen_pool memory allocator lockless Huang Ying
2011-05-17  8:08 ` [PATCH 4/9] ACPI, APEI, GHES, printk support for recoverable error via NMI Huang Ying
2011-05-17  8:08 ` [PATCH 5/9] HWPoison: add memory_failure_queue() Huang Ying
2011-05-17  8:46   ` Ingo Molnar
2011-05-17  8:52     ` Huang Ying
2011-05-17  9:26       ` Ingo Molnar
2011-05-18  1:10         ` Huang Ying
2011-05-20 11:56           ` Ingo Molnar
2011-05-22  8:14             ` huang ying
2011-05-22 10:00               ` Ingo Molnar
2011-05-22 12:32                 ` huang ying
2011-05-22 12:32                   ` huang ying
2011-05-22 13:25                   ` Ingo Molnar
2011-05-22 13:25                     ` Ingo Molnar
2011-05-23  2:38                     ` Huang Ying
2011-05-23 11:01                       ` Ingo Molnar
2011-05-23 16:45                         ` Luck, Tony
2011-05-25 14:08                           ` Ingo Molnar
2011-05-24  2:10                         ` Huang Ying
2011-05-24  2:48                           ` Ingo Molnar
2011-05-24  3:07                             ` Huang Ying
2011-05-24  4:24                               ` Ingo Molnar
2011-05-25  7:41                 ` Hidetoshi Seto
2011-05-25 14:11                   ` Ingo Molnar
2011-05-26  1:33                     ` Hidetoshi Seto
2011-05-17  8:08 ` [PATCH 6/9] ACPI, APEI, GHES: Add hardware memory error recovery support Huang Ying
2011-05-17  8:08 ` Huang Ying [this message]
2011-06-01 18:49   ` [PATCH 7/9] PCIe, AER, add aer_recover_queue Jesse Barnes
2011-06-02  5:09     ` Huang Ying
2011-06-02 15:05   ` Jesse Barnes
2011-05-17  8:08 ` [PATCH 8/9] ACPI, APEI, GHES: Add PCIe AER recovery support Huang Ying
2011-05-17  8:08 ` [PATCH 9/9] ACPI, APEI, ERST, Prevent erst_dbg from loading if ERST is disabled Huang Ying
2011-05-29  6:55 ` [PATCH 0/9] ACPI, APEI patches for 2.6.40 Len Brown
2011-05-29 11:31   ` huang ying
2011-05-29 11:31     ` huang ying
2011-05-30  6:48   ` Chen Gong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1305619719-7480-8-git-send-email-ying.huang@intel.com \
    --to=ying.huang@intel.com \
    --cc=andi@firstfloor.org \
    --cc=jbarnes@virtuousgeek.org \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=yanmin.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.