All of lore.kernel.org
 help / color / mirror / Atom feed
From: Venu Busireddy <venu.busireddy@oracle.com>
To: venu.busireddy@oracle.com,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>
Cc: xen-devel@lists.xen.org
Subject: Containing unrecoverable AER errors...
Date: Wed,  7 Jun 2017 14:24:32 -0500	[thread overview]
Message-ID: <20170607192432.20500-1-venu.busireddy@oracle.com> (raw)


Hi,

I am working on creating a patch to aid in containing the unrecoverable
AER errors generated by PCI devices assigned to guests in passthrough
mode.

The overall approach is as follows:

1. Change the BIOS settings such that the AER error handling is delegated
   to the host.

2. Change the xen_pciback driver to store the name (SBDF) of the erring
   device in xenstore.

3. At the time of creating the guest, setup a watcher for such writes to
   the xenstore.

4. When the watcher is kicked off due to errors, *shutdown* the guest and
   mark the erring device unassignable until administrative intervention.

I got all of this working, but I was advised that shutting down the
guest is not the correct approach, because the guest may or may not
respond to the shutdown. The suggestion was to destroy the guest.

I ran into a problem with that. libxl_domain_destroy() is not
callable from within libxl. I tried to create a new wrapper to call
libxl__domain_destroy(), but the callback function never gets called!
Not surprisingly, because the description in libxl/libxl_internal.h
about asynchronous operations does prohibit this!

What is the best way to kill/destroy a guest from within libxl? Could you
please advise? I am including the patches below for reference (please
ignore the few debug statements). The problem part is the function
aer_backend_watch_callback() in tools/libxl/libxl_pci.c.


diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 6331a95..5a4bae5 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -656,11 +656,13 @@ static const struct pci_device_id pcistub_ids[] = {
 };
 
 #define PCI_NODENAME_MAX 40
+#define PCI_DEVICENAME_MAX 14
 static void kill_domain_by_device(struct pcistub_device *psdev)
 {
 	struct xenbus_transaction xbt;
 	int err;
 	char nodename[PCI_NODENAME_MAX];
+	char devicename[PCI_DEVICENAME_MAX];
 
 	BUG_ON(!psdev);
 	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
@@ -675,6 +677,18 @@ static void kill_domain_by_device(struct pcistub_device *psdev)
 	}
 	/*PV AER handlers will set this flag*/
 	xenbus_printf(xbt, nodename, "aerState" , "aerfail");
+
+	/*
+	 * Xend versions <= 4.4 depend on "aerState" and expect its value
+	 * to be set to "aerfail". Therefore, add a new node "aerFailedSBDF"
+	 * to set the device name.
+	 */
+	snprintf(devicename, PCI_DEVICENAME_MAX, "%04x:%02x:%02x.%x",
+		 pci_domain_nr(psdev->dev->bus),
+		 psdev->dev->bus->number,
+		 PCI_SLOT(psdev->dev->devfn), PCI_FUNC(psdev->dev->devfn));
+	xenbus_printf(xbt, nodename, "aerFailedSBDF" , devicename);
+
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
 		if (err == -EAGAIN)
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 951a5dc..5e0f123 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -393,9 +393,15 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     {
     case XEN_DOMCTL_createdomain:
     case XEN_DOMCTL_test_assign_device:
+    case XEN_DOMCTL_test_hidden_device:
     case XEN_DOMCTL_gdbsx_guestmemio:
         d = NULL;
         break;
+    case XEN_DOMCTL_hide_device:
+    case XEN_DOMCTL_unhide_device:
+        rcu_lock_domain(dom_xen);
+        d = dom_xen;
+        break;
     default:
         d = rcu_lock_domain_by_id(op->domain);
         if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo )
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index c8e2d2d..eb8fd99 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -31,6 +31,7 @@
 #include <xen/softirq.h>
 #include <xen/tasklet.h>
 #include <xsm/xsm.h>
+#include <xen/mm.h>
 #include <asm/msi.h>
 #include "ats.h"
 
@@ -1333,19 +1334,31 @@ int iommu_remove_device(struct pci_dev *pdev)
     return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
 }
 
+static bool device_assigned_to_domain(struct domain *d, u16 seg, u8 bus, u8 devfn)
+{
+    bool rc = false;
+
+    pcidevs_lock();
+
+    if ( pci_get_pdev_by_domain(d, seg, bus, devfn) )
+        rc = true;
+
+    pcidevs_unlock();
+    return rc;
+}
+
 /*
  * If the device isn't owned by the hardware domain, it means it already
  * has been assigned to other domain, or it doesn't exist.
  */
 static int device_assigned(u16 seg, u8 bus, u8 devfn)
 {
-    struct pci_dev *pdev;
-
-    pcidevs_lock();
-    pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
-    pcidevs_unlock();
+    return device_assigned_to_domain(hardware_domain, seg, bus, devfn) ? 0 : -EBUSY;
+}
 
-    return pdev ? 0 : -EBUSY;
+static int device_hidden(u16 seg, u8 bus, u8 devfn)
+{
+    return device_assigned_to_domain(dom_xen, seg, bus, devfn) ? -EBUSY : 0;
 }
 
 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
@@ -1354,6 +1367,22 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
     struct pci_dev *pdev;
     int rc = 0;
 
+    if ( device_hidden(seg, bus, devfn) )
+        return -EINVAL;
+
+    if ( d == dom_xen )
+    {
+        pdev = pci_get_pdev(seg, bus, devfn);
+        if ( pdev )
+        {
+            pdev->domain = dom_xen;
+            list_add(&pdev->domain_list, &dom_xen->arch.pdev_list);
+            return rc;
+        }
+        else
+            return -ENODEV;
+    }
+
     if ( !iommu_enabled || !hd->platform_ops )
         return 0;
 
@@ -1417,10 +1446,23 @@ int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
     struct pci_dev *pdev = NULL;
     int ret = 0;
 
+    ASSERT(pcidevs_locked());
+
+    if ( d == dom_xen )
+    {
+        pdev = pci_get_pdev(seg, bus, devfn);
+        if ( pdev )
+        {
+            pdev->domain = hardware_domain;
+            list_add(&pdev->domain_list, &dom_xen->arch.pdev_list);
+            return ret;
+        }
+        else return -ENODEV;
+    }
+
     if ( !iommu_enabled || !hd->platform_ops )
         return -EINVAL;
 
-    ASSERT(pcidevs_locked());
     pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
     if ( !pdev )
         return -ENODEV;
@@ -1600,6 +1642,15 @@ int iommu_do_pci_domctl(
                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
             ret = -EINVAL;
         }
+
+        if ( device_hidden(seg, bus, devfn) )
+        {
+            printk(XENLOG_G_INFO
+                   "%04x:%02x:%02x.%u device is hidden\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+            ret = -EINVAL;
+        }
+
         break;
 
     case XEN_DOMCTL_assign_device:
@@ -1636,8 +1687,15 @@ int iommu_do_pci_domctl(
             break;
         }
 
-        ret = device_assigned(seg, bus, devfn) ?:
-              assign_device(d, seg, bus, devfn, flag);
+        if ( device_hidden(seg, bus, devfn) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( !device_assigned(seg, bus, devfn) )
+            ret = assign_device(d, seg, bus, devfn, flag);
+
         if ( ret == -ERESTART )
             ret = hypercall_create_continuation(__HYPERVISOR_domctl,
                                                 "h", u_domctl);
@@ -1671,6 +1729,12 @@ int iommu_do_pci_domctl(
         bus = PCI_BUS(machine_sbdf);
         devfn = PCI_DEVFN2(machine_sbdf);
 
+        if ( device_hidden(seg, bus, devfn) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
         pcidevs_lock();
         ret = deassign_device(d, seg, bus, devfn);
         pcidevs_unlock();
@@ -1679,7 +1743,86 @@ int iommu_do_pci_domctl(
                    "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
                    d->domain_id, ret);
+        break;
+
+    case XEN_DOMCTL_hide_device:
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+        ret = xsm_hide_device(XSM_HOOK, d, machine_sbdf);
+        if ( ret )
+            break;
+
+        if ( unlikely(d->is_dying) )
+        {
+            ret = -EAGAIN;
+            break;
+        }
+
+        seg = machine_sbdf >> 16;
+        bus = PCI_BUS(machine_sbdf);
+        devfn = PCI_DEVFN2(machine_sbdf);
+        flag = domctl->u.assign_device.flag;
+
+        if ( device_hidden(seg, bus, devfn) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        pcidevs_lock();
+        ret = assign_device(dom_xen, seg, bus, devfn, flag);
+        pcidevs_unlock();
+        if ( ret == -ERESTART )
+            ret = hypercall_create_continuation(__HYPERVISOR_domctl,
+                                                "h", u_domctl);
+        else if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_hide_device: "
+                   "hide %04x:%02x:%02x.%u failed (%d)\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
+        break;
+
+    case XEN_DOMCTL_unhide_device:
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+        ret = xsm_unhide_device(XSM_HOOK, d, machine_sbdf);
+        if ( ret )
+            break;
+
+        if ( unlikely(d->is_dying) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        seg = machine_sbdf >> 16;
+        bus = PCI_BUS(machine_sbdf);
+        devfn = PCI_DEVFN2(machine_sbdf);
+
+        if ( !device_hidden(seg, bus, devfn) )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        pcidevs_lock();
+        ret = deassign_device(dom_xen, seg, bus, devfn);
+        pcidevs_unlock();
+
+        if ( ret == -ERESTART )
+            ret = hypercall_create_continuation(__HYPERVISOR_domctl,
+                                                "h", u_domctl);
+        else if ( ret )
+            printk(XENLOG_G_ERR "XEN_DOMCTL_unhide_device: "
+                   "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
+                   seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                   d->domain_id, ret);
+        break;
+
+    case XEN_DOMCTL_test_hidden_device:
+        machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
+        seg = machine_sbdf >> 16;
+        bus = PCI_BUS(machine_sbdf);
+        devfn = PCI_DEVFN2(machine_sbdf);
 
+        ret = device_hidden(seg, bus, devfn);
         break;
 
     default:
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index e6cf211..1b043ea 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1222,6 +1222,9 @@ struct xen_domctl {
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
 #define XEN_DOMCTL_gdbsx_domstatus             1003
+#define XEN_DOMCTL_hide_device                 2001
+#define XEN_DOMCTL_unhide_device               2002
+#define XEN_DOMCTL_test_hidden_device          2003
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
     domid_t  domain;
     union {
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 62fcea6..0b820e1 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -355,6 +355,24 @@ static XSM_INLINE int xsm_deassign_device(XSM_DEFAULT_ARG struct domain *d, uint
     return xsm_default_action(action, current->domain, d);
 }
 
+static XSM_INLINE int xsm_hide_device(XSM_DEFAULT_ARG struct domain *d, uint32_t machine_bdf)
+{
+    XSM_ASSERT_ACTION(XSM_HOOK);
+    return xsm_default_action(action, current->domain, d);
+}
+
+static XSM_INLINE int xsm_unhide_device(XSM_DEFAULT_ARG struct domain *d, uint32_t machine_bdf)
+{
+    XSM_ASSERT_ACTION(XSM_HOOK);
+    return xsm_default_action(action, current->domain, d);
+}
+
+static XSM_INLINE int xsm_test_hidden_device(XSM_DEFAULT_ARG uint32_t machine_bdf)
+{
+    XSM_ASSERT_ACTION(XSM_HOOK);
+    return xsm_default_action(action, current->domain, NULL);
+}
+
 #endif /* HAS_PASSTHROUGH && HAS_PCI */
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_DEVICE_TREE)
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 60c0fd6..03dbeff 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -479,6 +479,22 @@ static inline int xsm_deassign_device(xsm_default_t def, struct domain *d, uint3
 {
     return xsm_ops->deassign_device(d, machine_bdf);
 }
+
+static inline int xsm_hide_device(xsm_default_t def, struct domain *d, uint32_t machine_bdf)
+{
+    return xsm_ops->hide_device(d, machine_bdf);
+}
+
+static inline int xsm_unhide_device(xsm_default_t def, struct domain *d, uint32_t machine_bdf)
+{
+    return xsm_ops->unhide_device(d, machine_bdf);
+}
+
+static inline int xsm_test_hidden_device(xsm_default_t def, uint32_t machine_bdf)
+{
+    return xsm_ops->test_hidden_device(machine_bdf);
+}
+
 #endif /* HAS_PASSTHROUGH && HAS_PCI) */
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_DEVICE_TREE)
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 3cb5492..78111bb 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -94,6 +94,9 @@ void __init xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, test_assign_device);
     set_to_dummy_if_null(ops, assign_device);
     set_to_dummy_if_null(ops, deassign_device);
+    set_to_dummy_if_null(ops, hide_device);
+    set_to_dummy_if_null(ops, unhide_device);
+    set_to_dummy_if_null(ops, test_hidden_device);
 #endif
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_DEVICE_TREE)
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index fd84ac0..3695768 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1311,6 +1311,22 @@ static int flask_deassign_device(struct domain *d, uint32_t machine_bdf)
 
     return avc_current_has_perm(rsid, SECCLASS_RESOURCE, RESOURCE__REMOVE_DEVICE, NULL);
 }
+
+static int flask_unhide_device(struct domain *d, uint32_t machine_bdf)
+{
+    return flask_deassign_device(d, machine_bdf);
+}
+
+static int flask_hide_device(struct domain *d, uint32_t machine_bdf)
+{
+    return flask_assign_device(d, machine_bdf);
+}
+
+static int flask_test_hidden_device(struct domain *d, uint32_t machine_bdf)
+{
+    return flask_test_assign_device(d, machine_bdf);
+}
+
 #endif /* HAS_PASSTHROUGH && HAS_PCI */
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_DEVICE_TREE)
@@ -1783,6 +1799,9 @@ static struct xsm_operations flask_ops = {
     .test_assign_device = flask_test_assign_device,
     .assign_device = flask_assign_device,
     .deassign_device = flask_deassign_device,
+    .hide_device = flask_hide_device,
+    .unhide_device = flask_unhide_device,
+    .test_hidden_device = flask_test_hidden_device,
 #endif
 
 #if defined(CONFIG_HAS_PASSTHROUGH) && defined(CONFIG_HAS_DEVICE_TREE)
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index 1f7eb35..873df59 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -437,13 +437,13 @@ class resource
 # XEN_DOMCTL_iomem_permission, XEN_DOMCTL_memory_mapping
     add_iomem
     remove_iomem
-# XEN_DOMCTL_get_device_group, XEN_DOMCTL_test_assign_device:
+# XEN_DOMCTL_get_device_group, XEN_DOMCTL_test_assign_device, XEN_DOMCTL_test_hidden_device:
 #  source = domain making the hypercall
 #  target = device being queried
     stat_device
-# XEN_DOMCTL_assign_device
+# XEN_DOMCTL_assign_device, XEN_DOMCTL_hide_device
     add_device
-# XEN_DOMCTL_deassign_device
+# XEN_DOMCTL_deassign_device, XEN_DOMCTL_unhide_device
     remove_device
 # checked for PCI hot and cold-plug hypercalls, with target as the PCI device
 # checked for CPU and memory hotplug with xen_t as the target
diff --git a/docs/man/xl.pod.1.in b/docs/man/xl.pod.1.in
index 78bf884..86f7089 100644
--- a/docs/man/xl.pod.1.in
+++ b/docs/man/xl.pod.1.in
@@ -1462,6 +1462,13 @@ These are devices in the system which are configured to be
 available for passthrough and are bound to a suitable PCI
 backend driver in domain 0 rather than a real driver.
 
+=item B<pci-assignable-list-hidden>
+
+List all the assignable PCI devices that are hidden.
+When a PCI device assigned to a guest in passthrough mode causes
+unrecoverable AER errors, the hypervisor shuts down the guest and hides
+the device from being assignable to the guests.
+
 =item B<pci-assignable-add> I<BDF>
 
 Make the device at PCI Bus/Device/Function BDF assignable to guests.
@@ -1484,6 +1491,23 @@ it will also attempt to re-bind the device to its original driver, making it
 usable by Domain 0 again.  If the device is not bound to pciback, it will
 return success.
 
+=item B<pci-assignable-hide> I<BDF>
+
+Hide the device at PCI Bus/Device/Function BDF from being assignable
+to guests, similar to the way the hypervisor would hide the device that
+caused unrecoverable AER errors.
+When a PCI device assigned to a guest in passthrough mode causes
+unrecoverable AER errors, the hypervisor shuts down the guest and hides
+the device from being assignable to the guests.
+
+=item B<pci-assignable-unhide> I<BDF>
+
+Unhide the device at PCI Bus/Device/Function BDF that was previously
+hidden by the hypervisor due to unrecoverable AER errors.
+When a PCI device assigned to a guest in passthrough mode causes
+unrecoverable AER errors, the hypervisor shuts down the guest and hides
+the device from being assignable to the guests.
+
 =item B<pci-attach> I<domain-id> I<BDF>
 
 Hot-plug a new pass-through pci device to the specified domain.
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 1629f41..9730285 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1670,6 +1670,10 @@ int xc_assign_device(xc_interface *xch,
                      uint32_t machine_sbdf,
                      uint32_t flag);
 
+int xc_hide_device(xc_interface *xch, uint32_t machine_bdf);
+int xc_unhide_device(xc_interface *xch, uint32_t machine_bdf);
+int xc_test_hidden_device(xc_interface *xch, uint32_t machine_bdf);
+
 int xc_get_device_group(xc_interface *xch,
                      uint32_t domid,
                      uint32_t machine_sbdf,
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 00909ad4..714d632 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -1501,6 +1501,44 @@ int xc_assign_device(
     return do_domctl(xch, &domctl);
 }
 
+int xc_hide_device(
+    xc_interface *xch,
+    uint32_t machine_sbdf)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_hide_device;
+    domctl.domain = DOMID_XEN;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+
+    return do_domctl(xch, &domctl);
+}
+
+int xc_unhide_device(
+    xc_interface *xch,
+    uint32_t machine_sbdf)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_unhide_device;
+    domctl.domain = DOMID_XEN;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+
+    return do_domctl(xch, &domctl);
+}
+
+int xc_test_hidden_device(
+    xc_interface *xch,
+    uint32_t machine_sbdf)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_test_hidden_device;
+    domctl.u.assign_device.u.pci.machine_sbdf = machine_sbdf;
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_get_device_group(
     xc_interface *xch,
     uint32_t domid,
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index cf8687a..5a5bd14 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -1944,6 +1944,9 @@ int libxl_device_events_handler(libxl_ctx *ctx,
 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev, int rebind);
 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev, int rebind);
 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num);
+int libxl_device_pci_assignable_hide(libxl_ctx *ctx, libxl_device_pci *pcidev);
+int libxl_device_pci_assignable_unhide(libxl_ctx *ctx, libxl_device_pci *pcidev);
+int libxl_device_pci_assignable_is_hidden(libxl_ctx *ctx, libxl_device_pci *pcidev);
 
 /* CPUID handling */
 int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str);
diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
index 1ea789e..4c78798 100644
--- a/tools/libxl/libxl_event.h
+++ b/tools/libxl/libxl_event.h
@@ -178,6 +178,8 @@ void libxl_event_register_callbacks(libxl_ctx *ctx,
 typedef struct libxl__evgen_domain_death libxl_evgen_domain_death;
 int libxl_evenable_domain_death(libxl_ctx *ctx, uint32_t domid,
                          libxl_ev_user, libxl_evgen_domain_death **evgen_out);
+int libxl_reg_aer_events_handler(libxl_ctx *, uint32_t) LIBXL_EXTERNAL_CALLERS_ONLY;
+void libxl_unreg_aer_events_handler(libxl_ctx *, uint32_t);
 void libxl_evdisable_domain_death(libxl_ctx *ctx, libxl_evgen_domain_death*);
   /* Arranges for the generation of DOMAIN_SHUTDOWN and DOMAIN_DEATH
    * events.  A domain which is destroyed before it shuts down
diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c
index b14df16..ad7fb27 100644
--- a/tools/libxl/libxl_pci.c
+++ b/tools/libxl/libxl_pci.c
@@ -874,6 +874,42 @@ int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev,
     return rc;
 }
 
+int libxl_device_pci_assignable_hide(libxl_ctx *ctx, libxl_device_pci *pcidev)
+{
+    GC_INIT(ctx);
+    int rc;
+
+    rc = xc_hide_device(ctx->xch, pcidev_encode_bdf(pcidev));
+    if (rc < 0)
+        LOGD(ERROR, 0, "xc_hide_device failed");
+
+    GC_FREE;
+    return rc;
+}
+
+int libxl_device_pci_assignable_unhide(libxl_ctx *ctx, libxl_device_pci *pcidev)
+{
+    GC_INIT(ctx);
+    int rc;
+
+    rc = xc_unhide_device(ctx->xch, pcidev_encode_bdf(pcidev));
+    if (rc < 0)
+        LOGD(ERROR, 0, "xc_unhide_device failed");
+
+    GC_FREE;
+    return rc;
+}
+
+int libxl_device_pci_assignable_is_hidden(libxl_ctx *ctx, libxl_device_pci *pcidev)
+{
+    GC_INIT(ctx);
+    int rc;
+
+    rc = xc_test_hidden_device(ctx->xch, pcidev_encode_bdf(pcidev));
+
+    GC_FREE;
+    return rc;
+}
 
 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev,
                                        int rebind)
@@ -1292,6 +1328,138 @@ out:
     return rc;
 }
 
+static void domain_destroy_callback(libxl__egc *egc,
+                                    libxl__domain_destroy_state *dds,
+                                    int rc)
+{
+    STATE_AO_GC(dds->ao);
+
+    LOGD(ERROR, dds->domid, "domain_destroy_callback(): Entered");
+    if (rc)
+        LOGD(ERROR, dds->domid, "Destruction of domain failed, rc = %d", rc);
+
+    libxl__ao_complete(egc, ao, rc);
+    LOGD(ERROR, dds->domid, "domain_destroy_callback(): Exiting");
+}
+
+
+static int domain_destroy(libxl_ctx *ctx, uint32_t domid)
+{
+    AO_CREATE(ctx, domid, (const libxl_asyncop_how *)0);
+    libxl__domain_destroy_state *dds;
+
+    LOGD(ERROR, domid, "domain_destroy(): Entered");
+    GCNEW(dds);
+    dds->ao = ao;
+    dds->domid = domid;
+    dds->callback = domain_destroy_callback;
+    libxl__domain_destroy(egc, dds);
+
+    LOGD(ERROR, domid, "domain_destroy(): ao->complete = %d", ao->complete);
+    while (!ao->complete) {
+        LOGD(ERROR, domid, "domain_destroy(): ao->complete = %d", ao->complete);
+        sleep(5);
+    }
+
+    LOGD(ERROR, domid, "domain_destroy(): Exiting, rc = 0x%08x", ao->rc);
+    return ao->rc;
+}
+
+typedef struct {
+    uint32_t domid;
+    libxl__ev_xswatch watch;
+} libxl_aer_watch;
+static libxl_aer_watch aer_watch;
+
+static void aer_backend_watch_callback(libxl__egc *egc,
+                                       libxl__ev_xswatch *watch,
+                                       const char *watch_path,
+                                       const char *event_path)
+{
+    EGC_GC;
+    libxl_aer_watch *l_aer_watch = CONTAINER_OF(watch, *l_aer_watch, watch);
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    uint32_t domid = l_aer_watch->domid;
+    uint32_t seg, bus, dev, fn;
+    int rc;
+    char *p, *path, *dst_path;
+    const char *aerFailedSBDF;
+    struct xs_permissions rwperm[1];
+
+    LOGD(ERROR, domid, " Entered, ctx = %p", ctx);
+    if (strlen(watch_path) > 0) LOGD(ERROR, domid, " watch_path = %s", watch_path);
+    if (strlen(event_path) > 0) LOGD(ERROR, domid, " event_path = %s", event_path);
+    LOGD(ERROR, domid, " domid = %d", domid);
+
+    /* Extract the backend directory. */
+    path = libxl__strdup(gc, event_path);
+    p = strrchr(path, '/');
+    if (p == NULL)
+        goto skip;
+    if (strcmp(p, "/aerFailedSBDF") != 0)
+        goto skip;
+    /* Truncate the string so it points to the backend directory. */
+    *p = '\0';
+
+    /* Fetch the value of the failed PCI device. */
+    rc = libxl__xs_read_checked(gc, XBT_NULL,
+            GCSPRINTF("%s/aerFailedSBDF", path), &aerFailedSBDF);
+    if (rc || !aerFailedSBDF)
+        goto skip;
+
+    libxl_unreg_aer_events_handler(ctx, domid);
+
+    rc = domain_destroy(ctx, domid);
+
+    sscanf(aerFailedSBDF, "%x:%x:%x.%x", &seg, &bus, &dev, &fn);
+    rc = xc_hide_device(ctx->xch, seg << 16 | bus << 8 | dev << 3 | fn);
+    if (rc)
+        LOGD(ERROR, domid, " xc_hide_device() failed, rc = %d", rc);
+
+    rwperm[0].id = 0;
+    rwperm[0].perms = XS_PERM_READ | XS_PERM_WRITE;
+    dst_path = GCSPRINTF("/local/domain/0/backend/pci/0/0/%s", "aerFailedPCIs");
+    rc = libxl__xs_mknod(gc, XBT_NULL, dst_path, rwperm, 1);
+    if (rc) {
+        LOGD(ERROR, domid, " libxl__xs_mknod() failed, rc = %d", rc);
+        goto skip;
+    }
+
+    rc = libxl__xs_write_checked(gc, XBT_NULL, dst_path, aerFailedSBDF);
+    if (rc)
+        LOGD(ERROR, domid, " libxl__xs_write_checked() failed, rc = %d", rc);
+
+skip:
+    return;
+}
+
+/* Handler of events for device driver domains */
+int libxl_reg_aer_events_handler(libxl_ctx *ctx, uint32_t domid)
+{
+    int rc;
+    char *be_path;
+    GC_INIT(ctx);
+
+    /*
+     * We use absolute paths because we want xswatch to also return
+     * absolute paths that can be parsed by libxl__parse_backend_path.
+     */
+    aer_watch.domid = domid;
+    be_path = GCSPRINTF("/local/domain/0/backend/pci/%u/0/aerFailedSBDF", domid);
+    rc = libxl__ev_xswatch_register(gc, &aer_watch.watch,
+                                    aer_backend_watch_callback, be_path);
+    return rc;
+}
+
+/* Handler of events for device driver domains */
+void libxl_unreg_aer_events_handler(libxl_ctx *ctx, uint32_t domid)
+{
+    GC_INIT(ctx);
+
+    libxl__ev_xswatch_deregister(gc, &aer_watch.watch);
+    return;
+}
+
 static void libxl__add_pcidevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
                                libxl_domain_config *d_config,
                                libxl__multidev *multidev)
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index 5d112af..ad822df 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -609,6 +609,76 @@ static PyObject *pyxc_deassign_device(XcObject *self,
     return Py_BuildValue("i", sbdf);
 }
 
+static PyObject *pyxc_hide_device(XcObject *self,
+                                  PyObject *args,
+                                  PyObject *kwds)
+{
+    uint32_t sbdf = 0;
+    char *pci_str;
+    int seg, bus, dev, func;
+    static char *kwd_list[] = { "pci", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list, &pci_str) )
+    {
+        sbdf = -1;
+        goto end_hide;
+    }
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        sbdf = seg << 16;
+        sbdf |= (bus & 0xff) << 8;
+        sbdf |= (dev & 0x1f) << 3;
+        sbdf |= (func & 0x7);
+
+        if ( xc_hide_device(self->xc_handle, sbdf) != 0 )
+        {
+            if ( errno == ENOSYS )
+                sbdf = -1;
+            break;
+        }
+        sbdf = 0;
+    }
+
+end_hide:
+    return Py_BuildValue("i", sbdf);
+}
+
+static PyObject *pyxc_unhide_device(XcObject *self,
+                                    PyObject *args,
+                                    PyObject *kwds)
+{
+    uint32_t sbdf = 0;
+    char *pci_str;
+    int seg, bus, dev, func;
+    static char *kwd_list[] = { "pci", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list, &pci_str) )
+    {
+        sbdf = -1;
+        goto end_unhide;
+    }
+
+    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
+    {
+        sbdf = seg << 16;
+        sbdf |= (bus & 0xff) << 8;
+        sbdf |= (dev & 0x1f) << 3;
+        sbdf |= (func & 0x7);
+
+        if ( xc_unhide_device(self->xc_handle, sbdf) != 0 )
+        {
+            if ( errno == ENOSYS )
+                sbdf = -1;
+            break;
+        }
+        sbdf = 0;
+    }
+
+end_unhide:
+    return Py_BuildValue("i", sbdf);
+}
+
 static PyObject *pyxc_get_device_group(XcObject *self,
                                          PyObject *args)
 {
@@ -2233,7 +2303,21 @@ static PyMethodDef pyxc_methods[] = {
        " dom     [int]:      Domain to deassign device from.\n"
        " pci_str [str]:      PCI devices.\n"
        "Returns: [int] 0 on success, or device bdf that can't be deassigned.\n" },
+
+     { "hide_device",
+       (PyCFunction)pyxc_hide_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Hide device after AER fatal error trigger.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] device bdf on success or -1 if it cant be hidden.\n" },
   
+     { "unhide_device",
+       (PyCFunction)pyxc_unhide_device,
+       METH_VARARGS | METH_KEYWORDS, "\n"
+       "Unhide hidden device after AER fatal error trigger.\n"
+       " pci_str [str]:      PCI devices.\n"
+       "Returns: [int] device bdf on success or -1 if it cant be unhidden.\n" },
+
     { "sched_id_get",
       (PyCFunction)pyxc_sched_id_get,
       METH_NOARGS, "\n"
diff --git a/tools/xl/xl.h b/tools/xl/xl.h
index aa95b77..915fe86 100644
--- a/tools/xl/xl.h
+++ b/tools/xl/xl.h
@@ -121,9 +121,12 @@ int main_vncviewer(int argc, char **argv);
 int main_pcilist(int argc, char **argv);
 int main_pcidetach(int argc, char **argv);
 int main_pciattach(int argc, char **argv);
+int main_pciassignable_hide(int argc, char **argv);
+int main_pciassignable_unhide(int argc, char **argv);
 int main_pciassignable_add(int argc, char **argv);
 int main_pciassignable_remove(int argc, char **argv);
 int main_pciassignable_list(int argc, char **argv);
+int main_pciassignable_list_hidden(int argc, char **argv);
 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
 int main_restore(int argc, char **argv);
 int main_migrate_receive(int argc, char **argv);
diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c
index 30eb93c..e23bd15 100644
--- a/tools/xl/xl_cmdtable.c
+++ b/tools/xl/xl_cmdtable.c
@@ -119,6 +119,23 @@ struct cmd_spec cmd_table[] = {
       "List all the assignable pci devices",
       "",
     },
+    { "pci-assignable-list-hidden",
+      &main_pciassignable_list_hidden, 0, 0,
+      "List all the pci devices hidden due to AER errors",
+      "",
+    },
+    { "pci-assignable-hide",
+      &main_pciassignable_hide, 0, 1,
+      "Hide a PCI device",
+      "<BDF>",
+      "-h                      Print this help.\n"
+    },
+    { "pci-assignable-unhide",
+      &main_pciassignable_unhide, 0, 1,
+      "Unhide a PCI device",
+      "<BDF>",
+      "-h                      Print this help.\n"
+    },
     { "pause",
       &main_pause, 0, 1,
       "Pause execution of a domain",
diff --git a/tools/xl/xl_pci.c b/tools/xl/xl_pci.c
index 58345bd..f48c469 100644
--- a/tools/xl/xl_pci.c
+++ b/tools/xl/xl_pci.c
@@ -163,8 +163,9 @@ static void pciassignable_list(void)
     if ( pcidevs == NULL )
         return;
     for (i = 0; i < num; i++) {
-        printf("%04x:%02x:%02x.%01x\n",
-               pcidevs[i].domain, pcidevs[i].bus, pcidevs[i].dev, pcidevs[i].func);
+        if (!libxl_device_pci_assignable_is_hidden(ctx, &pcidevs[i]))
+            printf("%04x:%02x:%02x.%01x\n",
+                   pcidevs[i].domain, pcidevs[i].bus, pcidevs[i].dev, pcidevs[i].func);
         libxl_device_pci_dispose(&pcidevs[i]);
     }
     free(pcidevs);
@@ -182,6 +183,126 @@ int main_pciassignable_list(int argc, char **argv)
     return 0;
 }
 
+static void pciassignable_list_hidden(void)
+{
+    libxl_device_pci *pcidevs;
+    int num, i;
+
+    pcidevs = libxl_device_pci_assignable_list(ctx, &num);
+
+    if ( pcidevs == NULL )
+        return;
+    for (i = 0; i < num; i++) {
+        if (libxl_device_pci_assignable_is_hidden(ctx, &pcidevs[i]))
+            printf("%04x:%02x:%02x.%01x\n",
+                   pcidevs[i].domain, pcidevs[i].bus, pcidevs[i].dev, pcidevs[i].func);
+        libxl_device_pci_dispose(&pcidevs[i]);
+    }
+    free(pcidevs);
+}
+
+int main_pciassignable_list_hidden(int argc, char **argv)
+{
+    int opt;
+
+    SWITCH_FOREACH_OPT(opt, "", NULL, "pci-assignable-list-hidden", 0) {
+        /* No options */
+    }
+
+    pciassignable_list_hidden();
+    return 0;
+}
+
+static int pciassignable_hide(const char *bdf)
+{
+    libxl_device_pci pcidev;
+    XLU_Config *config;
+    int r = EXIT_SUCCESS;
+
+    libxl_device_pci_init(&pcidev);
+
+    config = xlu_cfg_init(stderr, "command line");
+    if (!config) {
+        perror("xlu_cfg_init");
+        exit(-1);
+    }
+
+    if (xlu_pci_parse_bdf(config, &pcidev, bdf)) {
+        fprintf(stderr, "pci-assignable-hide: malformed BDF specification \"%s\"\n", bdf);
+        exit(2);
+    }
+
+    if (libxl_device_pci_assignable_hide(ctx, &pcidev))
+        r = EXIT_FAILURE;
+
+    libxl_device_pci_dispose(&pcidev);
+    xlu_cfg_destroy(config);
+
+    return r;
+}
+
+int main_pciassignable_hide(int argc, char **argv)
+{
+    int opt;
+    const char *bdf = NULL;
+
+    SWITCH_FOREACH_OPT(opt, "", NULL, "main_pciassignable_hide", 1) {
+        /* No options */
+    }
+
+    bdf = argv[optind];
+
+    if (pciassignable_hide(bdf))
+        return EXIT_FAILURE;
+
+    return EXIT_SUCCESS;
+}
+
+static int pciassignable_unhide(const char *bdf)
+{
+    libxl_device_pci pcidev;
+    XLU_Config *config;
+    int r = EXIT_SUCCESS;
+
+    libxl_device_pci_init(&pcidev);
+
+    config = xlu_cfg_init(stderr, "command line");
+    if (!config) {
+        perror("xlu_cfg_init");
+        exit(-1);
+    }
+
+    if (xlu_pci_parse_bdf(config, &pcidev, bdf)) {
+        fprintf(stderr, "pci-assignable-unhide: malformed BDF specification \"%s\"\n", bdf);
+        exit(2);
+    }
+
+    if (libxl_device_pci_assignable_unhide(ctx, &pcidev))
+        r = EXIT_FAILURE;
+
+    libxl_device_pci_dispose(&pcidev);
+    xlu_cfg_destroy(config);
+
+    return r;
+}
+
+int main_pciassignable_unhide(int argc, char **argv)
+{
+    int opt;
+    const char *bdf = NULL;
+
+    SWITCH_FOREACH_OPT(opt, "", NULL, "main_pciassignable_unhide", 1) {
+        /* No options */
+    }
+
+    bdf = argv[optind];
+
+    if (pciassignable_unhide(bdf))
+        return EXIT_FAILURE;
+
+    return EXIT_SUCCESS;
+}
+
 static int pciassignable_add(const char *bdf, int rebind)
 {
     libxl_device_pci pcidev;
diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
index 89c2b25..10a48a9 100644
--- a/tools/xl/xl_vmcontrol.c
+++ b/tools/xl/xl_vmcontrol.c
@@ -966,6 +966,15 @@ start:
     LOG("Waiting for domain %s (domid %u) to die [pid %ld]",
         d_config.c_info.name, domid, (long)getpid());
 
+    ret = libxl_reg_aer_events_handler(ctx, domid);
+    if (ret) {
+        /*
+         * This error may not be severe enough to fail the creation of the VM.
+         * Log the error, and continue with the creation.
+         */
+        LOG("libxl_reg_aer_events_handler() failed, ret = 0x%08x", ret);
+    }
+
     ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
     if (ret) goto out;
 
@@ -993,6 +1002,7 @@ start:
             LOG("Domain %u has shut down, reason code %d 0x%x", domid,
                 event->u.domain_shutdown.shutdown_reason,
                 event->u.domain_shutdown.shutdown_reason);
+            libxl_unreg_aer_events_handler(ctx, domid);
             switch (handle_domain_death(&domid, event, &d_config)) {
             case DOMAIN_RESTART_SOFT_RESET:
                 domid_soft_reset = domid;
@@ -1059,6 +1069,7 @@ start:
 
         case LIBXL_EVENT_TYPE_DOMAIN_DEATH:
             LOG("Domain %u has been destroyed.", domid);
+            libxl_unreg_aer_events_handler(ctx, domid);
             libxl_event_free(ctx, event);
             ret = 0;
             goto out;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

             reply	other threads:[~2017-06-07 19:24 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-07 19:24 Venu Busireddy [this message]
2017-06-20 11:56 ` Containing unrecoverable AER errors Wei Liu
2017-06-29 15:46   ` Venu Busireddy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170607192432.20500-1-venu.busireddy@oracle.com \
    --to=venu.busireddy@oracle.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.