===== drivers/pci/hotplug/rpaphp.h 1.11 vs edited ===== --- 1.11/drivers/pci/hotplug/rpaphp.h 2004-10-06 11:43:44 -05:00 +++ edited/drivers/pci/hotplug/rpaphp.h 2004-11-17 16:00:37 -06:00 @@ -126,6 +126,8 @@ extern int register_pci_slot(struct slot extern int rpaphp_unconfig_pci_adapter(struct slot *slot); extern int rpaphp_get_pci_adapter_status(struct slot *slot, int is_init, u8 * value); extern struct hotplug_slot *rpaphp_find_hotplug_slot(struct pci_dev *dev); +extern void init_eeh_handler (void); +extern void exit_eeh_handler (void); /* rpaphp_core.c */ extern int rpaphp_add_slot(struct device_node *dn); ===== drivers/pci/hotplug/rpaphp_core.c 1.18 vs edited ===== --- 1.18/drivers/pci/hotplug/rpaphp_core.c 2004-10-06 11:43:44 -05:00 +++ edited/drivers/pci/hotplug/rpaphp_core.c 2004-11-17 16:00:37 -06:00 @@ -443,12 +443,18 @@ static int __init rpaphp_init(void) { info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + /* Get set to handle EEH events. */ + init_eeh_handler(); + /* read all the PRA info from the system */ return init_rpa(); } static void __exit rpaphp_exit(void) { + /* Let EEH know we are going away. */ + exit_eeh_handler(); + cleanup_slots(); } ===== drivers/pci/hotplug/rpaphp_pci.c 1.16 vs edited ===== --- 1.16/drivers/pci/hotplug/rpaphp_pci.c 2004-10-19 11:54:38 -05:00 +++ edited/drivers/pci/hotplug/rpaphp_pci.c 2004-11-17 17:23:39 -06:00 @@ -22,8 +22,12 @@ * Send feedback to * */ +#include +#include #include +#include #include +#include #include #include "../pci.h" /* for pci_add_new_bus */ @@ -63,6 +67,7 @@ int rpaphp_claim_resource(struct pci_dev root ? "Address space collision on" : "No parent found for", resource, dtype, pci_name(dev), res->start, res->end); + dump_stack(); } return err; } @@ -185,6 +190,19 @@ rpaphp_fixup_new_pci_devices(struct pci_ static int rpaphp_pci_config_bridge(struct pci_dev *dev); +static void rpaphp_eeh_add_bus_device(struct pci_bus *bus) +{ + struct pci_dev *dev; + list_for_each_entry(dev, &bus->devices, bus_list) { + eeh_add_device_late(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *subbus = dev->subordinate; + if (bus) + rpaphp_eeh_add_bus_device (subbus); + } + } +} + /***************************************************************************** rpaphp_pci_config_slot() will configure all devices under the given slot->dn and return the the first pci_dev. @@ -212,6 +230,8 @@ rpaphp_pci_config_slot(struct device_nod } if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) rpaphp_pci_config_bridge(dev); + + rpaphp_eeh_add_bus_device(bus); } return dev; } @@ -220,7 +240,6 @@ static int rpaphp_pci_config_bridge(stru { u8 sec_busno; struct pci_bus *child_bus; - struct pci_dev *child_dev; dbg("Enter %s: BRIDGE dev=%s\n", __FUNCTION__, pci_name(dev)); @@ -237,11 +256,7 @@ static int rpaphp_pci_config_bridge(stru /* do pci_scan_child_bus */ pci_scan_child_bus(child_bus); - list_for_each_entry(child_dev, &child_bus->devices, bus_list) { - eeh_add_device_late(child_dev); - } - - /* fixup new pci devices without touching bus struct */ + /* Fixup new pci devices without touching bus struct */ rpaphp_fixup_new_pci_devices(child_bus, 0); /* Make the discovered devices available */ @@ -279,7 +294,7 @@ static void print_slot_pci_funcs(struct return; } #else -static void print_slot_pci_funcs(struct slot *slot) +static inline void print_slot_pci_funcs(struct slot *slot) { return; } @@ -361,7 +376,6 @@ static void rpaphp_eeh_remove_bus_device if (pdev) rpaphp_eeh_remove_bus_device(pdev); } - } return; } @@ -563,10 +577,14 @@ exit: return retval; } -struct hotplug_slot *rpaphp_find_hotplug_slot(struct pci_dev *dev) +/** + * rpaphp_find_slot - find and return the slot holding the device + * @dev: pci device for which we want the slot structure. + */ +static struct slot *rpaphp_find_slot(struct pci_dev *dev) { - struct list_head *tmp, *n; - struct slot *slot; + struct list_head *tmp, *n; + struct slot *slot; list_for_each_safe(tmp, n, &rpaphp_slot_head) { struct pci_bus *bus; @@ -585,14 +603,109 @@ struct hotplug_slot *rpaphp_find_hotplug if (!bus) { continue; /* should never happen? */ } + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { - struct pci_dev *pdev = pci_dev_b(ln); - if (pdev == dev) - return slot->hotplug_slot; + struct pci_dev *pdev = pci_dev_b(ln); + if (pdev == dev) + return slot; } } return NULL; } -EXPORT_SYMBOL_GPL(rpaphp_find_hotplug_slot); +/* ------------------------------------------------------- */ +/** + * handle_eeh_events -- reset a PCI device after hard lockup. + * + * pSeries systems will isolate a PCI slot if the PCI-Host + * bridge detects address or data parity errors, DMA's + * occuring to wild addresses (which usually happen due to + * bugs in device drivers or in PCI adapter firmware). + * Slot isolations also occur if #SERR, #PERR or other misc + * PCI-related errors are detected. + * + * Recovery process consists of unplugging the device driver + * (which generated hotplug events to userspace), then issuing + * a PCI #RST to the device, then reconfiguring the PCI config + * space for all bridges & devices under this slot, and then + * finally restarting the device drivers (which cause a second + * set of hotplug events to go out to userspace). + */ +int handle_eeh_events (struct notifier_block *self, + unsigned long reason, void *ev) +{ + struct eeh_event *event = ev; + struct slot *frozen_slot; + struct eeh_cfg_tree * saved_bars; + + frozen_slot = rpaphp_find_slot(event->dev); + if (!frozen_slot) + { + printk (KERN_ERR + "EEH: Cannot find PCI slot for EEH error! dev=%p dn=%p\n", + event->dev, event->dn); + return 1; + } + + /* Keep a copy of the config space registers */ + saved_bars = eeh_save_bars(frozen_slot->dn); + of_node_get(event->dn); + pci_dev_get(event->dev); + + rpaphp_unconfig_pci_adapter (frozen_slot); + + event->dn->eeh_freeze_count ++; + if (event->dn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) { + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards + */ + printk (KERN_ERR + "EEH: device %s:%s has failed %d times \n" + "and has been permanently disabled. Please try reseating\n" + "this device or replacing it.\n", + pci_name (event->dev), + pci_pretty_name (event->dev), + EEH_MAX_ALLOWED_FREEZES); + goto rdone; + } + + /* Reset the pci controller. (Asserts RST#; resets config space). + * Reconfigure bridges and devices */ + rtas_set_slot_reset (event->dn); + rtas_configure_bridge(event->dn); + eeh_restore_bars(saved_bars); + + /* Give the system 5 seconds to finish running the user-space + * hotplug scripts, e.g. ifdown for ethernet. Yes, this is a hack, + * but if we don't do this, weird things happen. + */ + ssleep (5); + + rpaphp_enable_pci_slot (frozen_slot); + + /* The new device node is different than the old one; + * copy over the freeze count, so that we don't loose track of it. + */ + frozen_slot->dn->eeh_freeze_count = event->dn->eeh_freeze_count; +rdone: + of_node_put(event->dn); + pci_dev_put(event->dev); + return 0; +} + +static struct notifier_block eeh_block; + +void __init init_eeh_handler (void) +{ + eeh_block.notifier_call = handle_eeh_events; + eeh_register_notifier (&eeh_block); +} + +void __exit exit_eeh_handler (void) +{ + eeh_unregister_notifier (&eeh_block); +} +