All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] vfio-ccw: Implement request notifier
@ 2020-11-17  3:26 Eric Farman
  2020-11-17  3:26 ` [RFC PATCH 1/2] Update linux headers Eric Farman
  2020-11-17  3:26 ` [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier Eric Farman
  0 siblings, 2 replies; 7+ messages in thread
From: Eric Farman @ 2020-11-17  3:26 UTC (permalink / raw)
  To: Cornelia Huck, Thomas Huth, Alex Williamson
  Cc: Eric Farman, qemu-s390x, qemu-devel

This is the corresponding QEMU code for the kernel series posted here:

https://lore.kernel.org/kvm/20201117032139.50988-1-farman@linux.ibm.com/

Long story short, when a device disappears because of a subchannel
event, userspace can receive a notification that the device should
be released as it is no longer usable. This implements that for the
vfio-ccw interface.

Eric Farman (2):
  Update linux headers
  vfio-ccw: Connect the device request notifier

 hw/vfio/ccw.c              | 40 ++++++++++++++++++++++++++++++++++----
 linux-headers/linux/vfio.h |  1 +
 2 files changed, 37 insertions(+), 4 deletions(-)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC PATCH 1/2] Update linux headers
  2020-11-17  3:26 [RFC PATCH 0/2] vfio-ccw: Implement request notifier Eric Farman
@ 2020-11-17  3:26 ` Eric Farman
  2020-11-17  3:26 ` [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier Eric Farman
  1 sibling, 0 replies; 7+ messages in thread
From: Eric Farman @ 2020-11-17  3:26 UTC (permalink / raw)
  To: Cornelia Huck, Thomas Huth, Alex Williamson
  Cc: Eric Farman, qemu-s390x, qemu-devel

This is a placeholder for a proper run of scripts/update-linux-headers.sh

Signed-off-by: Eric Farman <farman@linux.ibm.com>
---
 linux-headers/linux/vfio.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index b92dcc4daf..609099e455 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -820,6 +820,7 @@ enum {
 enum {
 	VFIO_CCW_IO_IRQ_INDEX,
 	VFIO_CCW_CRW_IRQ_INDEX,
+	VFIO_CCW_REQ_IRQ_INDEX,
 	VFIO_CCW_NUM_IRQS
 };
 
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier
  2020-11-17  3:26 [RFC PATCH 0/2] vfio-ccw: Implement request notifier Eric Farman
  2020-11-17  3:26 ` [RFC PATCH 1/2] Update linux headers Eric Farman
@ 2020-11-17  3:26 ` Eric Farman
  2020-11-19 11:58   ` Cornelia Huck
  2020-11-20  2:51   ` Halil Pasic
  1 sibling, 2 replies; 7+ messages in thread
From: Eric Farman @ 2020-11-17  3:26 UTC (permalink / raw)
  To: Cornelia Huck, Thomas Huth, Alex Williamson
  Cc: Eric Farman, qemu-s390x, qemu-devel

Now that the vfio-ccw code has a notifier interface to request that
a device be unplugged, let's wire that together.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
---
 hw/vfio/ccw.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index d2755d7fc5..bc78a0ad76 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -49,6 +49,7 @@ struct VFIOCCWDevice {
     struct ccw_crw_region *crw_region;
     EventNotifier io_notifier;
     EventNotifier crw_notifier;
+    EventNotifier req_notifier;
     bool force_orb_pfch;
     bool warned_orb_pfch;
 };
@@ -287,6 +288,21 @@ static void vfio_ccw_crw_read(VFIOCCWDevice *vcdev)
     } while (1);
 }
 
+static void vfio_ccw_req_notifier_handler(void *opaque)
+{
+    VFIOCCWDevice *vcdev = opaque;
+    Error *err = NULL;
+
+    if (!event_notifier_test_and_clear(&vcdev->req_notifier)) {
+        return;
+    }
+
+    qdev_unplug(DEVICE(vcdev), &err);
+    if (err) {
+        warn_reportf_err(err, VFIO_MSG_PREFIX, vcdev->vdev.name);
+    }
+}
+
 static void vfio_ccw_crw_notifier_handler(void *opaque)
 {
     VFIOCCWDevice *vcdev = opaque;
@@ -386,6 +402,10 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
         notifier = &vcdev->crw_notifier;
         fd_read = vfio_ccw_crw_notifier_handler;
         break;
+    case VFIO_CCW_REQ_IRQ_INDEX:
+        notifier = &vcdev->req_notifier;
+        fd_read = vfio_ccw_req_notifier_handler;
+        break;
     default:
         error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
         return;
@@ -440,6 +460,9 @@ static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
     case VFIO_CCW_CRW_IRQ_INDEX:
         notifier = &vcdev->crw_notifier;
         break;
+    case VFIO_CCW_REQ_IRQ_INDEX:
+        notifier = &vcdev->req_notifier;
+        break;
     default:
         error_report("vfio: Unsupported device irq(%d)", irq);
         return;
@@ -661,20 +684,28 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
 
     vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, &err);
     if (err) {
-        goto out_notifier_err;
+        goto out_io_notifier_err;
     }
 
     if (vcdev->crw_region) {
         vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, &err);
         if (err) {
-            vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
-            goto out_notifier_err;
+            goto out_crw_notifier_err;
         }
     }
 
+    vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, &err);
+    if (err) {
+        goto out_req_notifier_err;
+    }
+
     return;
 
-out_notifier_err:
+out_req_notifier_err:
+    vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
+out_crw_notifier_err:
+    vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
+out_io_notifier_err:
     vfio_ccw_put_region(vcdev);
 out_region_err:
     vfio_ccw_put_device(vcdev);
@@ -696,6 +727,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
     S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
     VFIOGroup *group = vcdev->vdev.group;
 
+    vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX);
     vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX);
     vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
     vfio_ccw_put_region(vcdev);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier
  2020-11-17  3:26 ` [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier Eric Farman
@ 2020-11-19 11:58   ` Cornelia Huck
  2020-11-20  2:51   ` Halil Pasic
  1 sibling, 0 replies; 7+ messages in thread
From: Cornelia Huck @ 2020-11-19 11:58 UTC (permalink / raw)
  To: Eric Farman; +Cc: qemu-s390x, Thomas Huth, Alex Williamson, qemu-devel

On Tue, 17 Nov 2020 04:26:05 +0100
Eric Farman <farman@linux.ibm.com> wrote:

> Now that the vfio-ccw code has a notifier interface to request that
> a device be unplugged, let's wire that together.
> 
> Signed-off-by: Eric Farman <farman@linux.ibm.com>
> ---
>  hw/vfio/ccw.c | 40 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 36 insertions(+), 4 deletions(-)

This looks good to me.

Reviewed-by: Cornelia Huck <cohuck@redhat.com>



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier
  2020-11-17  3:26 ` [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier Eric Farman
  2020-11-19 11:58   ` Cornelia Huck
@ 2020-11-20  2:51   ` Halil Pasic
  2020-11-20 11:38     ` Availability of physical devices and migration (was: Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier) Cornelia Huck
  1 sibling, 1 reply; 7+ messages in thread
From: Halil Pasic @ 2020-11-20  2:51 UTC (permalink / raw)
  To: Eric Farman
  Cc: Thomas Huth, Boris Fiuczynski, Cornelia Huck, Alex Williamson,
	qemu-devel, qemu-s390x, Marc Hartmayer

On Tue, 17 Nov 2020 04:26:05 +0100
Eric Farman <farman@linux.ibm.com> wrote:

> Now that the vfio-ccw code has a notifier interface to request that
> a device be unplugged, let's wire that together.

I'm aware of the fact that performing an unplug is what vfio-pci does,
but I was not aware of this before, and I became curious with regards
to how is this going to mesh with migration (in the future).

The sentence 'For this to work, QEMU has to be launched with the same
arguments the two times.' form docs/devel/migration.rst should not
be taken literally, I know, but the VM configuration changing not because
it was changed via a management interface, but because of events that
may not be under the control of whoever is managing the VM does
make thinks harder to reason about.

I suppose, we now basically mandate that whoever manages the VM, either
a) maintains his own model of the VM and listens to the events, to
update it if such a device removal happens, or
b) inspects the VM at some appropriate point in time, to figure out how
the target VM is supposed to be started.

I think libvirt does a).

I also suppose, such a device removal may not happen during device
migration. That is, form the QEMU perspective I  believe taken care
by the BQL.

But I'm in the dark regarding the management software/libivrt view. For
example what happens if we get a req_notification on the target while
pre-copy memory migration? At that point the destination is already
receiving pages, thus it is already constructed.

My questions are not necessarily addressed to you Eric. Maybe the
folks working on vfio migration can help us out. I'm also adding
our libvirt guys.

Regards,
Halil


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Availability of physical devices and migration (was: Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier)
  2020-11-20  2:51   ` Halil Pasic
@ 2020-11-20 11:38     ` Cornelia Huck
  2020-11-20 12:29       ` Halil Pasic
  0 siblings, 1 reply; 7+ messages in thread
From: Cornelia Huck @ 2020-11-20 11:38 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Eric Farman, Boris Fiuczynski, libvir-list, Alex Williamson,
	qemu-devel, qemu-s390x, Marc Hartmayer, Thomas Huth

On Fri, 20 Nov 2020 03:51:07 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Tue, 17 Nov 2020 04:26:05 +0100
> Eric Farman <farman@linux.ibm.com> wrote:
> 
> > Now that the vfio-ccw code has a notifier interface to request that
> > a device be unplugged, let's wire that together.  
> 
> I'm aware of the fact that performing an unplug is what vfio-pci does,
> but I was not aware of this before, and I became curious with regards
> to how is this going to mesh with migration (in the future).
> 
> The sentence 'For this to work, QEMU has to be launched with the same
> arguments the two times.' form docs/devel/migration.rst should not
> be taken literally, I know, but the VM configuration changing not because
> it was changed via a management interface, but because of events that
> may not be under the control of whoever is managing the VM does
> make thinks harder to reason about.
> 
> I suppose, we now basically mandate that whoever manages the VM, either
> a) maintains his own model of the VM and listens to the events, to
> update it if such a device removal happens, or
> b) inspects the VM at some appropriate point in time, to figure out how
> the target VM is supposed to be started.
> 
> I think libvirt does a).

This seems like something that would be of general interest to libvirt
folks, adding the list on cc:.

For virtual devices, QEMU and any management software are in full
control, and can simply make sure that both source and target have the
device available.

For physical devices, we still can make sure that source and target
match when we do the setup, but device failures can happen at
inconvenient times. It may suddenly be no longer possible to access
state etc. Can we propagate changes like "device foobar, don't bother
migrating" even when we already started migration? Should the handling
be different if the target system uses a different (compatible) device?
Should we fail the migration?

> 
> I also suppose, such a device removal may not happen during device
> migration. That is, form the QEMU perspective I  believe taken care
> by the BQL.

Even if the device is not actually removed, it might still be
inaccessible.

> 
> But I'm in the dark regarding the management software/libivrt view. For
> example what happens if we get a req_notification on the target while
> pre-copy memory migration? At that point the destination is already
> receiving pages, thus it is already constructed.
> 
> My questions are not necessarily addressed to you Eric. Maybe the
> folks working on vfio migration can help us out. I'm also adding
> our libvirt guys.
> 
> Regards,
> Halil
> 



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Availability of physical devices and migration (was: Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier)
  2020-11-20 11:38     ` Availability of physical devices and migration (was: Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier) Cornelia Huck
@ 2020-11-20 12:29       ` Halil Pasic
  0 siblings, 0 replies; 7+ messages in thread
From: Halil Pasic @ 2020-11-20 12:29 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Eric Farman, Boris Fiuczynski, libvir-list, Alex Williamson,
	qemu-devel, qemu-s390x, Marc Hartmayer, Thomas Huth

On Fri, 20 Nov 2020 12:38:37 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Fri, 20 Nov 2020 03:51:07 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Tue, 17 Nov 2020 04:26:05 +0100
> > Eric Farman <farman@linux.ibm.com> wrote:
> > 
> > > Now that the vfio-ccw code has a notifier interface to request that
> > > a device be unplugged, let's wire that together.  
> > 
> > I'm aware of the fact that performing an unplug is what vfio-pci does,
> > but I was not aware of this before, and I became curious with regards
> > to how is this going to mesh with migration (in the future).
> > 
> > The sentence 'For this to work, QEMU has to be launched with the same
> > arguments the two times.' form docs/devel/migration.rst should not
> > be taken literally, I know, but the VM configuration changing not because
> > it was changed via a management interface, but because of events that
> > may not be under the control of whoever is managing the VM does
> > make thinks harder to reason about.
> > 
> > I suppose, we now basically mandate that whoever manages the VM, either
> > a) maintains his own model of the VM and listens to the events, to
> > update it if such a device removal happens, or
> > b) inspects the VM at some appropriate point in time, to figure out how
> > the target VM is supposed to be started.
> > 
> > I think libvirt does a).
> 
> This seems like something that would be of general interest to libvirt
> folks, adding the list on cc:.
> 

I agree. Just didn't now if this stuff was already discussed and
worked out.

> For virtual devices, QEMU and any management software are in full
> control, and can simply make sure that both source and target have the
> device available.
> 
> For physical devices, we still can make sure that source and target
> match when we do the setup, but device failures can happen at
> inconvenient times. It may suddenly be no longer possible to access
> state etc. 

Regarding virtual vs physical, I gave this some thought yesterday after
I've sent my previous email. Now I'm not sure virtual devicess and
pass-through devices are all that different. I mean if for example
you have a virtio-blk device that use a SCSI disk device special file
as backing and that SCSI disk (a physical device) fails, the virtio-blk
device is pretty much has going go dysfunctional. And it ain't different,
if it's backed by a file on a filesystem, and the filesystem fails.

I'm not sure I understand the reason why do we hot unplug the qemu
device, in case of pass-through.

I mean if I plug a physical device, into my physical machine, yes it may
fail at any time and even go completely silent (analogous to guest view),
but the physical machine is AFAIK not likely to catapult out the faulty
component (analogous to the QOM entity). At least for the machines I
owned, I always had to grab a screwdriver and unplug the device myself.
What I'm trying to say, this auto-unplug is a little non-intuitive
for me, but it ain't too bad.

> Can we propagate changes like "device foobar, don't bother
> migrating" even when we already started migration? Should the handling
> be different if the target system uses a different (compatible) device?
> Should we fail the migration?
> 

Thanks for asking these questions. I guess, I wanted to ask them myself,
but I'm not very eloquent. I have no answers.

> > 
> > I also suppose, such a device removal may not happen during device
> > migration. That is, form the QEMU perspective I  believe taken care
> > by the BQL.
> 
> Even if the device is not actually removed, it might still be
> inaccessible.
> 
> > 
> > But I'm in the dark regarding the management software/libivrt view. For
> > example what happens if we get a req_notification on the target while
> > pre-copy memory migration? At that point the destination is already
> > receiving pages, thus it is already constructed.
> > 
> > My questions are not necessarily addressed to you Eric. Maybe the
> > folks working on vfio migration can help us out. I'm also adding
> > our libvirt guys.
> > 
> > Regards,
> > Halil
> > 
> 



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-11-20 12:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-17  3:26 [RFC PATCH 0/2] vfio-ccw: Implement request notifier Eric Farman
2020-11-17  3:26 ` [RFC PATCH 1/2] Update linux headers Eric Farman
2020-11-17  3:26 ` [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier Eric Farman
2020-11-19 11:58   ` Cornelia Huck
2020-11-20  2:51   ` Halil Pasic
2020-11-20 11:38     ` Availability of physical devices and migration (was: Re: [RFC PATCH 2/2] vfio-ccw: Connect the device request notifier) Cornelia Huck
2020-11-20 12:29       ` Halil Pasic

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.