kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
@ 2018-11-22 16:54 Cornelia Huck
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
                   ` (5 more replies)
  0 siblings, 6 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-22 16:54 UTC (permalink / raw)
  To: Halil Pasic, Eric Farman, Farhan Ali, Pierre Morel
  Cc: linux-s390, kvm, Cornelia Huck, Alex Williamson, qemu-devel, qemu-s390x

[This is the Linux kernel part, git tree is available at
https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps

The companion QEMU patches are available at
https://github.com/cohuck/qemu vfio-ccw-caps]

Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
device. This tends to work well for the most common 'good path' scenarios;
however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
clearing pending requests at the device is currently not supported.
This may be a problem for e.g. error recovery.

This patch series introduces capabilities (similar to what vfio-pci uses)
and exposes a new async region for handling hsch/csch.

Very lightly tested (I can interact with a dasd as before; I have not
found a reliable way to trigger hsch/csch in the Linux dasd guest driver.)

Cornelia Huck (3):
  vfio-ccw: add capabilities chain
  s390/cio: export hsch to modules
  vfio-ccw: add handling for asnyc channel instructions

 drivers/s390/cio/Makefile           |   3 +-
 drivers/s390/cio/ioasm.c            |   1 +
 drivers/s390/cio/vfio_ccw_async.c   |  88 +++++++++++++
 drivers/s390/cio/vfio_ccw_drv.c     |  48 +++++--
 drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++-
 drivers/s390/cio/vfio_ccw_ops.c     | 195 ++++++++++++++++++++++++----
 drivers/s390/cio/vfio_ccw_private.h |  44 +++++++
 include/uapi/linux/vfio.h           |   5 +
 include/uapi/linux/vfio_ccw.h       |  12 ++
 9 files changed, 509 insertions(+), 45 deletions(-)
 create mode 100644 drivers/s390/cio/vfio_ccw_async.c

-- 
2.17.2

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
@ 2018-11-22 16:54 ` Cornelia Huck
  2018-11-23 12:28   ` Pierre Morel
                     ` (2 more replies)
  2018-11-22 16:54 ` [PATCH 2/3] s390/cio: export hsch to modules Cornelia Huck
                   ` (4 subsequent siblings)
  5 siblings, 3 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-22 16:54 UTC (permalink / raw)
  To: Halil Pasic, Eric Farman, Farhan Ali, Pierre Morel
  Cc: linux-s390, kvm, Cornelia Huck, Alex Williamson, qemu-devel, qemu-s390x

Allow to extend the regions used by vfio-ccw. The first user will be
handling of halt and clear subchannel.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_ops.c     | 182 ++++++++++++++++++++++++----
 drivers/s390/cio/vfio_ccw_private.h |  38 ++++++
 include/uapi/linux/vfio.h           |   1 +
 3 files changed, 195 insertions(+), 26 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index f673e106c041..a5d731ed2a39 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -3,9 +3,11 @@
  * Physical device callbacks for vfio_ccw
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/vfio.h>
@@ -157,47 +159,76 @@ static void vfio_ccw_mdev_release(struct mdev_device *mdev)
 {
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
+	int i;
 
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
 				 &private->nb);
+
+	for (i = 0; i < private->num_regions; i++)
+		private->region[i].ops->release(private, &private->region[i]);
+
+	private->num_regions = 0;
+	kfree(private->region);
+	private->region = NULL;
 }
 
-static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
-				  char __user *buf,
-				  size_t count,
-				  loff_t *ppos)
+static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
+					    char __user *buf, size_t count,
+					    loff_t *ppos)
 {
-	struct vfio_ccw_private *private;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
 	struct ccw_io_region *region;
 
-	if (*ppos + count > sizeof(*region))
+	if (pos + count > sizeof(*region))
 		return -EINVAL;
 
-	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	region = private->io_region;
-	if (copy_to_user(buf, (void *)region + *ppos, count))
+	if (copy_to_user(buf, (void *)region + pos, count))
 		return -EFAULT;
 
 	return count;
 }
 
-static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
-				   const char __user *buf,
-				   size_t count,
-				   loff_t *ppos)
+static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos)
 {
+	unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos);
 	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions)
+		return -EINVAL;
+
+	switch (index) {
+	case VFIO_CCW_CONFIG_REGION_INDEX:
+		return vfio_ccw_mdev_read_io_region(private, buf, count, ppos);
+	default:
+		index -= VFIO_CCW_NUM_REGIONS;
+		return private->region[index].ops->read(private, buf, count,
+							ppos);
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private,
+					     const char __user *buf,
+					     size_t count, loff_t *ppos)
+{
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
 	struct ccw_io_region *region;
 
-	if (*ppos + count > sizeof(*region))
+	if (pos + count > sizeof(*region))
 		return -EINVAL;
 
-	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	if (private->state != VFIO_CCW_STATE_IDLE)
 		return -EACCES;
 
 	region = private->io_region;
-	if (copy_from_user((void *)region + *ppos, buf, count))
+	if (copy_from_user((void *)region + pos, buf, count))
 		return -EFAULT;
 
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
@@ -207,21 +238,55 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
 	}
 
 	return count;
+
 }
 
-static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
+static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
+				   const char __user *buf,
+				   size_t count,
+				   loff_t *ppos)
 {
+	unsigned int index = VFIO_CCW_OFFSET_TO_INDEX(*ppos);
+	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
+
+	if (index >= VFIO_CCW_NUM_REGIONS + private->num_regions)
+		return -EINVAL;
+
+	switch (index) {
+	case VFIO_CCW_CONFIG_REGION_INDEX:
+		return vfio_ccw_mdev_write_io_region(private, buf, count, ppos);
+	default:
+		index -= VFIO_CCW_NUM_REGIONS;
+		return private->region[index].ops->write(private, buf, count,
+							 ppos);
+	}
+
+	return -EINVAL;
+}
+
+static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info,
+					 struct mdev_device *mdev)
+{
+	struct vfio_ccw_private *private;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
-	info->num_regions = VFIO_CCW_NUM_REGIONS;
+	info->num_regions = VFIO_CCW_NUM_REGIONS + private->num_regions;
 	info->num_irqs = VFIO_CCW_NUM_IRQS;
 
 	return 0;
 }
 
 static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
-					 u16 *cap_type_id,
-					 void **cap_type)
+					 struct mdev_device *mdev,
+					 unsigned long arg)
 {
+	struct vfio_ccw_private *private;
+	int i;
+
+	private = dev_get_drvdata(mdev_parent_dev(mdev));
 	switch (info->index) {
 	case VFIO_CCW_CONFIG_REGION_INDEX:
 		info->offset = 0;
@@ -229,9 +294,51 @@ static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
 		info->flags = VFIO_REGION_INFO_FLAG_READ
 			      | VFIO_REGION_INFO_FLAG_WRITE;
 		return 0;
-	default:
-		return -EINVAL;
+	default: /* all other regions are handled via capability chain */
+	{
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		struct vfio_region_info_cap_type cap_type = {
+			.header.id = VFIO_REGION_INFO_CAP_TYPE,
+			.header.version = 1 };
+		int ret;
+
+		if (info->index >=
+		    VFIO_CCW_NUM_REGIONS + private->num_regions)
+			return -EINVAL;
+
+		i = info->index - VFIO_CCW_NUM_REGIONS;
+
+		info->offset = VFIO_CCW_INDEX_TO_OFFSET(info->index);
+		info->size = private->region[i].size;
+		info->flags = private->region[i].flags;
+
+		cap_type.type = private->region[i].type;
+		cap_type.subtype = private->region[i].subtype;
+
+		ret = vfio_info_add_capability(&caps, &cap_type.header,
+					       sizeof(cap_type));
+		if (ret)
+			return ret;
+
+		info->flags |= VFIO_REGION_INFO_FLAG_CAPS;
+		if (info->argsz < sizeof(*info) + caps.size) {
+			info->argsz = sizeof(*info) + caps.size;
+			info->cap_offset = 0;
+		} else {
+			vfio_info_cap_shift(&caps, sizeof(*info));
+			if (copy_to_user((void __user *)arg + sizeof(*info),
+					 caps.buf, caps.size)) {
+				kfree(caps.buf);
+				return -EFAULT;
+			}
+			info->cap_offset = sizeof(*info);
+		}
+
+		kfree(caps.buf);
+
+	}
 	}
+	return 0;
 }
 
 static int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
@@ -308,6 +415,32 @@ static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
 	}
 }
 
+int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
+				 unsigned int subtype,
+				 const struct vfio_ccw_regops *ops,
+				 size_t size, u32 flags, void *data)
+{
+	struct vfio_ccw_region *region;
+
+	region = krealloc(private->region,
+			  (private->num_regions + 1) * sizeof(*region),
+			  GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	private->region = region;
+	private->region[private->num_regions].type = VFIO_REGION_TYPE_CCW;
+	private->region[private->num_regions].subtype = subtype;
+	private->region[private->num_regions].ops = ops;
+	private->region[private->num_regions].size = size;
+	private->region[private->num_regions].flags = flags;
+	private->region[private->num_regions].data = data;
+
+	private->num_regions++;
+
+	return 0;
+}
+
 static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 				   unsigned int cmd,
 				   unsigned long arg)
@@ -328,7 +461,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 		if (info.argsz < minsz)
 			return -EINVAL;
 
-		ret = vfio_ccw_mdev_get_device_info(&info);
+		ret = vfio_ccw_mdev_get_device_info(&info, mdev);
 		if (ret)
 			return ret;
 
@@ -337,8 +470,6 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 	case VFIO_DEVICE_GET_REGION_INFO:
 	{
 		struct vfio_region_info info;
-		u16 cap_type_id = 0;
-		void *cap_type = NULL;
 
 		minsz = offsetofend(struct vfio_region_info, offset);
 
@@ -348,8 +479,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
 		if (info.argsz < minsz)
 			return -EINVAL;
 
-		ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
-						    &cap_type);
+		ret = vfio_ccw_mdev_get_region_info(&info, mdev, arg);
 		if (ret)
 			return ret;
 
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 078e46f9623d..a6f9f84526e2 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -3,9 +3,11 @@
  * Private stuff for vfio_ccw driver
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #ifndef _VFIO_CCW_PRIVATE_H_
@@ -19,6 +21,38 @@
 #include "css.h"
 #include "vfio_ccw_cp.h"
 
+#define VFIO_CCW_OFFSET_SHIFT   40
+#define VFIO_CCW_OFFSET_TO_INDEX(off)	(off >> VFIO_CCW_OFFSET_SHIFT)
+#define VFIO_CCW_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_CCW_OFFSET_SHIFT)
+#define VFIO_CCW_OFFSET_MASK	(((u64)(1) << VFIO_CCW_OFFSET_SHIFT) - 1)
+
+/* capability chain handling similar to vfio-pci */
+struct vfio_ccw_private;
+struct vfio_ccw_region;
+
+struct vfio_ccw_regops {
+	size_t	(*read)(struct vfio_ccw_private *private, char __user *buf,
+			size_t count, loff_t *ppos);
+	size_t	(*write)(struct vfio_ccw_private *private,
+			 const char __user *buf, size_t count, loff_t *ppos);
+	void	(*release)(struct vfio_ccw_private *private,
+			   struct vfio_ccw_region *region);
+};
+
+struct vfio_ccw_region {
+	u32				type;
+	u32				subtype;
+	const struct vfio_ccw_regops	*ops;
+	void				*data;
+	size_t				size;
+	u32				flags;
+};
+
+int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
+				 unsigned int subtype,
+				 const struct vfio_ccw_regops *ops,
+				 size_t size, u32 flags, void *data);
+
 /**
  * struct vfio_ccw_private
  * @sch: pointer to the subchannel
@@ -28,6 +62,8 @@
  * @mdev: pointer to the mediated device
  * @nb: notifier for vfio events
  * @io_region: MMIO region to input/output I/O arguments/results
+ * @region: additional regions for other subchannel operations
+ * @num_regions: number of additional regions
  * @cp: channel program for the current I/O operation
  * @irb: irb info received from interrupt
  * @scsw: scsw info
@@ -42,6 +78,8 @@ struct vfio_ccw_private {
 	struct mdev_device	*mdev;
 	struct notifier_block	nb;
 	struct ccw_io_region	*io_region;
+	struct vfio_ccw_region *region;
+	int num_regions;
 
 	struct channel_program	cp;
 	struct irb		irb;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 813102810f53..565669f95534 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
 
 #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
+#define VFIO_REGION_TYPE_CCW			(1 << 30)
 
 /* 8086 Vendor sub-types */
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 2/3] s390/cio: export hsch to modules
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
@ 2018-11-22 16:54 ` Cornelia Huck
  2018-11-23 12:30   ` Pierre Morel
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-22 16:54 UTC (permalink / raw)
  To: Halil Pasic, Eric Farman, Farhan Ali, Pierre Morel
  Cc: linux-s390, kvm, Cornelia Huck, Alex Williamson, qemu-devel, qemu-s390x

The vfio-ccw code will need this, and it matches treatment of ssch
and csch.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/ioasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
index 14d328338ce2..08eb10283b18 100644
--- a/drivers/s390/cio/ioasm.c
+++ b/drivers/s390/cio/ioasm.c
@@ -233,6 +233,7 @@ int hsch(struct subchannel_id schid)
 
 	return ccode;
 }
+EXPORT_SYMBOL(hsch);
 
 static inline int __xsch(struct subchannel_id schid)
 {
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
  2018-11-22 16:54 ` [PATCH 2/3] s390/cio: export hsch to modules Cornelia Huck
@ 2018-11-22 16:54 ` Cornelia Huck
  2018-11-23 13:08   ` Pierre Morel
                     ` (4 more replies)
  2018-11-24 21:07 ` [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Halil Pasic
                   ` (2 subsequent siblings)
  5 siblings, 5 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-22 16:54 UTC (permalink / raw)
  To: Halil Pasic, Eric Farman, Farhan Ali, Pierre Morel
  Cc: linux-s390, kvm, Cornelia Huck, Alex Williamson, qemu-devel, qemu-s390x

Add a region to the vfio-ccw device that can be used to submit
asynchronous I/O instructions. ssch continues to be handled by the
existing I/O region; the new region handles hsch and csch.

Interrupt status continues to be reported through the same channels
as for ssch.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/Makefile           |   3 +-
 drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
 drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
 drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
 drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
 drivers/s390/cio/vfio_ccw_private.h |   6 ++
 include/uapi/linux/vfio.h           |   4 +
 include/uapi/linux/vfio_ccw.h       |  12 +++
 8 files changed, 313 insertions(+), 19 deletions(-)
 create mode 100644 drivers/s390/cio/vfio_ccw_async.c

diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index f230516abb96..f6a8db04177c 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
 qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
 obj-$(CONFIG_QDIO) += qdio.o
 
-vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
+vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
+	vfio_ccw_async.o
 obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
new file mode 100644
index 000000000000..8c7f51d17d70
--- /dev/null
+++ b/drivers/s390/cio/vfio_ccw_async.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Async I/O region for vfio_ccw
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Author(s): Cornelia Huck <cohuck@redhat.com>
+ */
+
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include "vfio_ccw_private.h"
+
+static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
+					 char __user *buf, size_t count,
+					 loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	region = private->region[i].data;
+	if (copy_to_user(buf, (void *)region + pos, count))
+		return -EFAULT;
+
+	return count;
+
+}
+
+static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
+					  const char __user *buf, size_t count,
+					  loff_t *ppos)
+{
+	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
+	struct ccw_cmd_region *region;
+
+	if (pos + count > sizeof(*region))
+		return -EINVAL;
+
+	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
+	    private->state == VFIO_CCW_STATE_STANDBY)
+		return -EACCES;
+
+	region = private->region[i].data;
+	if (copy_from_user((void *)region + pos, buf, count))
+		return -EFAULT;
+
+	switch (region->command) {
+	case VFIO_CCW_ASYNC_CMD_HSCH:
+		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
+		break;
+	case VFIO_CCW_ASYNC_CMD_CSCH:
+		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return region->ret_code ? region->ret_code : count;
+}
+
+static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
+					  struct vfio_ccw_region *region)
+{
+
+}
+
+const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
+	.read = vfio_ccw_async_region_read,
+	.write = vfio_ccw_async_region_write,
+	.release = vfio_ccw_async_region_release,
+};
+
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
+{
+	return vfio_ccw_register_dev_region(private,
+					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
+					    &vfio_ccw_async_region_ops,
+					    sizeof(struct ccw_cmd_region),
+					    VFIO_REGION_INFO_FLAG_READ |
+					    VFIO_REGION_INFO_FLAG_WRITE,
+					    private->cmd_region);
+}
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index a10cec0e86eb..890c588a3a61 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -3,9 +3,11 @@
  * VFIO based Physical Subchannel device driver
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
  *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/module.h>
@@ -23,6 +25,7 @@
 
 struct workqueue_struct *vfio_ccw_work_q;
 static struct kmem_cache *vfio_ccw_io_region;
+static struct kmem_cache *vfio_ccw_cmd_region;
 
 /*
  * Helpers
@@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
 
-	if (scsw_is_solicited(&irb->scsw)) {
+	if (scsw_is_solicited(&irb->scsw) &&
+	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
 		cp_free(&private->cp);
 	}
@@ -104,7 +108,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 {
 	struct pmcw *pmcw = &sch->schib.pmcw;
 	struct vfio_ccw_private *private;
-	int ret;
+	int ret = -ENOMEM;
 
 	if (pmcw->qf) {
 		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
@@ -118,10 +122,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 
 	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
 					       GFP_KERNEL | GFP_DMA);
-	if (!private->io_region) {
-		kfree(private);
-		return -ENOMEM;
-	}
+	if (!private->io_region)
+		goto out_free;
+
+	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
+						GFP_KERNEL | GFP_DMA);
+	if (!private->cmd_region)
+		goto out_free;
 
 	private->sch = sch;
 	dev_set_drvdata(&sch->dev, private);
@@ -148,7 +155,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	cio_disable_subchannel(sch);
 out_free:
 	dev_set_drvdata(&sch->dev, NULL);
-	kmem_cache_free(vfio_ccw_io_region, private->io_region);
+	if (private->cmd_region)
+		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+	if (private->io_region)
+		kmem_cache_free(vfio_ccw_io_region, private->io_region);
 	kfree(private);
 	return ret;
 }
@@ -237,7 +247,7 @@ static struct css_driver vfio_ccw_sch_driver = {
 
 static int __init vfio_ccw_sch_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
 
 	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
 	if (!vfio_ccw_work_q)
@@ -247,20 +257,30 @@ static int __init vfio_ccw_sch_init(void)
 					sizeof(struct ccw_io_region), 0,
 					SLAB_ACCOUNT, 0,
 					sizeof(struct ccw_io_region), NULL);
-	if (!vfio_ccw_io_region) {
-		destroy_workqueue(vfio_ccw_work_q);
-		return -ENOMEM;
-	}
+	if (!vfio_ccw_io_region)
+		goto out_err;
+
+	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
+					sizeof(struct ccw_cmd_region), 0,
+					SLAB_ACCOUNT, 0,
+					sizeof(struct ccw_cmd_region), NULL);
+	if (!vfio_ccw_cmd_region)
+		goto out_err;
 
 	isc_register(VFIO_CCW_ISC);
 	ret = css_driver_register(&vfio_ccw_sch_driver);
 	if (ret) {
 		isc_unregister(VFIO_CCW_ISC);
-		kmem_cache_destroy(vfio_ccw_io_region);
-		destroy_workqueue(vfio_ccw_work_q);
+		goto out_err;
 	}
 
 	return ret;
+
+out_err:
+	kmem_cache_destroy(vfio_ccw_cmd_region);
+	kmem_cache_destroy(vfio_ccw_io_region);
+	destroy_workqueue(vfio_ccw_work_q);
+	return ret;
 }
 
 static void __exit vfio_ccw_sch_exit(void)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index f94aa01f9c36..0caf77e8f377 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -3,8 +3,10 @@
  * Finite state machine for vfio-ccw device handling
  *
  * Copyright IBM Corp. 2017
+ * Copyright Red Hat, Inc. 2018
  *
  * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Cornelia Huck <cohuck@redhat.com>
  */
 
 #include <linux/vfio.h>
@@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
 	return ret;
 }
 
+static int fsm_do_halt(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+	private->state = VFIO_CCW_STATE_BUSY;
+
+	/* Issue "Halt Subchannel" */
+	ccode = hsch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
+		ret = 0;
+		break;
+	case 1:		/* Status pending */
+	case 2:		/* Busy */
+		ret = -EBUSY;
+		break;
+	case 3:		/* Device not operational */
+	{
+		ret = -ENODEV;
+		break;
+	}
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
+static int fsm_do_clear(struct vfio_ccw_private *private)
+{
+	struct subchannel *sch;
+	unsigned long flags;
+	int ccode;
+	int ret;
+
+	sch = private->sch;
+
+	spin_lock_irqsave(sch->lock, flags);
+	private->state = VFIO_CCW_STATE_BUSY;
+
+	/* Issue "Clear Subchannel" */
+	ccode = csch(sch->schid);
+
+	switch (ccode) {
+	case 0:
+		/*
+		 * Initialize device status information
+		 */
+		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
+		/* TODO: check what else we might need to clear */
+		ret = 0;
+		break;
+	case 3:		/* Device not operational */
+	{
+		ret = -ENODEV;
+		break;
+	}
+	default:
+		ret = ccode;
+	}
+	spin_unlock_irqrestore(sch->lock, flags);
+	return ret;
+}
+
 static void fsm_notoper(struct vfio_ccw_private *private,
 			enum vfio_ccw_event event)
 {
@@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
 	private->io_region->ret_code = -EBUSY;
 }
 
+static void fsm_async_error(struct vfio_ccw_private *private,
+			    enum vfio_ccw_event event)
+{
+	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
+	       private->state);
+	private->cmd_region->ret_code = -EIO;
+}
+
+static void fsm_async_busy(struct vfio_ccw_private *private,
+			   enum vfio_ccw_event event)
+{
+	private->cmd_region->ret_code = -EBUSY;
+}
+
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
@@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 		}
 		return;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
-		/* XXX: Handle halt. */
+		/* halt is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
-		/* XXX: Handle clear. */
+		/* clear is handled via the async cmd region */
 		io_region->ret_code = -EOPNOTSUPP;
 		goto err_out;
 	}
@@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 			       io_region->ret_code, errstr);
 }
 
+/*
+ * Deal with a halt request from userspace.
+ */
+static void fsm_halt_request(struct vfio_ccw_private *private,
+			     enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+	int state = private->state;
+
+	private->state = VFIO_CCW_STATE_BOXED;
+
+	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
+		/* should not happen? */
+		cmd_region->ret_code = -EINVAL;
+		goto err_out;
+	}
+
+	cmd_region->ret_code = fsm_do_halt(private);
+	if (cmd_region->ret_code)
+		goto err_out;
+
+	return;
+
+err_out:
+	private->state = state;
+}
+
+/*
+ * Deal with a clear request from userspace.
+ */
+static void fsm_clear_request(struct vfio_ccw_private *private,
+			      enum vfio_ccw_event event)
+{
+	struct ccw_cmd_region *cmd_region = private->cmd_region;
+	int state = private->state;
+
+	private->state = VFIO_CCW_STATE_BOXED;
+
+	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
+		/* should not happen? */
+		cmd_region->ret_code = -EINVAL;
+		goto err_out;
+	}
+
+	cmd_region->ret_code = fsm_do_clear(private);
+	if (cmd_region->ret_code)
+		goto err_out;
+
+	return;
+
+err_out:
+	private->state = state;
+}
 /*
  * Got an interrupt for a normal io (state busy).
  */
@@ -204,26 +348,36 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
 	[VFIO_CCW_STATE_NOT_OPER] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
 	},
 	[VFIO_CCW_STATE_STANDBY] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_IDLE] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_BOXED] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 	[VFIO_CCW_STATE_BUSY] = {
 		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
 		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
+		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
+		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
 		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
 	},
 };
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index a5d731ed2a39..0e1f7f7bf927 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -148,11 +148,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
 	struct vfio_ccw_private *private =
 		dev_get_drvdata(mdev_parent_dev(mdev));
 	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+	int ret;
 
 	private->nb.notifier_call = vfio_ccw_mdev_notifier;
 
-	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
-				      &events, &private->nb);
+	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				     &events, &private->nb);
+	if (ret)
+		return ret;
+
+	ret = vfio_ccw_register_async_dev_regions(private);
+	if (ret)
+		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+					 &private->nb);
+	return ret;
 }
 
 static void vfio_ccw_mdev_release(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index a6f9f84526e2..1a41a14831ae 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
 				 const struct vfio_ccw_regops *ops,
 				 size_t size, u32 flags, void *data);
 
+int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
+
 /**
  * struct vfio_ccw_private
  * @sch: pointer to the subchannel
@@ -62,6 +64,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
  * @mdev: pointer to the mediated device
  * @nb: notifier for vfio events
  * @io_region: MMIO region to input/output I/O arguments/results
+ * @cmd_region: MMIO region for asynchronous I/O commands other than START
  * @region: additional regions for other subchannel operations
  * @num_regions: number of additional regions
  * @cp: channel program for the current I/O operation
@@ -79,6 +82,7 @@ struct vfio_ccw_private {
 	struct notifier_block	nb;
 	struct ccw_io_region	*io_region;
 	struct vfio_ccw_region *region;
+	struct ccw_cmd_region	*cmd_region;
 	int num_regions;
 
 	struct channel_program	cp;
@@ -114,6 +118,8 @@ enum vfio_ccw_event {
 	VFIO_CCW_EVENT_NOT_OPER,
 	VFIO_CCW_EVENT_IO_REQ,
 	VFIO_CCW_EVENT_INTERRUPT,
+	VFIO_CCW_EVENT_HALT_REQ,
+	VFIO_CCW_EVENT_CLEAR_REQ,
 	/* last element! */
 	NR_VFIO_CCW_EVENTS
 };
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 565669f95534..c01472ec77ea 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
 
+
 #define VFIO_REGION_TYPE_GFX                    (1)
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
@@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
 };
 
+/* ccw sub-types */
+#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
+
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within
diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
index 2ec5f367ff78..cbecbf0cd54f 100644
--- a/include/uapi/linux/vfio_ccw.h
+++ b/include/uapi/linux/vfio_ccw.h
@@ -12,6 +12,7 @@
 
 #include <linux/types.h>
 
+/* used for START SUBCHANNEL, always present */
 struct ccw_io_region {
 #define ORB_AREA_SIZE 12
 	__u8	orb_area[ORB_AREA_SIZE];
@@ -22,4 +23,15 @@ struct ccw_io_region {
 	__u32	ret_code;
 } __packed;
 
+/*
+ * used for processing commands that trigger asynchronous actions
+ * Note: this is controlled by a capability
+ */
+#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
+#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
+struct ccw_cmd_region {
+	__u32 command;
+	__u32 ret_code;
+} __packed;
+
 #endif
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
@ 2018-11-23 12:28   ` Pierre Morel
  2018-11-23 12:45     ` Cornelia Huck
  2018-11-27 19:04   ` Farhan Ali
  2018-12-17 21:53   ` Eric Farman
  2 siblings, 1 reply; 54+ messages in thread
From: Pierre Morel @ 2018-11-23 12:28 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Farhan Ali
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm

On 22/11/2018 17:54, Cornelia Huck wrote:
> Allow to extend the regions used by vfio-ccw. The first user will be
> handling of halt and clear subchannel.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/vfio_ccw_ops.c     | 182 ++++++++++++++++++++++++----
>   drivers/s390/cio/vfio_ccw_private.h |  38 ++++++
>   include/uapi/linux/vfio.h           |   1 +
>   3 files changed, 195 insertions(+), 26 deletions(-)
> 

Halt and clear have no parameters (the sub-channel ID is obviously the 
one of the mediated device).

Isn't adding a new sub-region for the purpose of handling halt and clear 
superfluous?

What is the reason not to use simple ioctls ?

Regards,
Pierre

-- 
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/3] s390/cio: export hsch to modules
  2018-11-22 16:54 ` [PATCH 2/3] s390/cio: export hsch to modules Cornelia Huck
@ 2018-11-23 12:30   ` Pierre Morel
  0 siblings, 0 replies; 54+ messages in thread
From: Pierre Morel @ 2018-11-23 12:30 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Farhan Ali
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm

On 22/11/2018 17:54, Cornelia Huck wrote:
> The vfio-ccw code will need this, and it matches treatment of ssch
> and csch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/ioasm.c | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
> index 14d328338ce2..08eb10283b18 100644
> --- a/drivers/s390/cio/ioasm.c
> +++ b/drivers/s390/cio/ioasm.c
> @@ -233,6 +233,7 @@ int hsch(struct subchannel_id schid)
>   
>   	return ccode;
>   }
> +EXPORT_SYMBOL(hsch);
>   
>   static inline int __xsch(struct subchannel_id schid)
>   {
> 

LGTM

Reviewed-by: Pierre Morel<pmorel@linux.ibm.com>



-- 
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-23 12:28   ` Pierre Morel
@ 2018-11-23 12:45     ` Cornelia Huck
  2018-11-23 13:26       ` Pierre Morel
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-23 12:45 UTC (permalink / raw)
  To: Pierre Morel
  Cc: linux-s390, Eric Farman, Alex Williamson, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Fri, 23 Nov 2018 13:28:25 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 22/11/2018 17:54, Cornelia Huck wrote:
> > Allow to extend the regions used by vfio-ccw. The first user will be
> > handling of halt and clear subchannel.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/vfio_ccw_ops.c     | 182 ++++++++++++++++++++++++----
> >   drivers/s390/cio/vfio_ccw_private.h |  38 ++++++
> >   include/uapi/linux/vfio.h           |   1 +
> >   3 files changed, 195 insertions(+), 26 deletions(-)
> >   
> 
> Halt and clear have no parameters (the sub-channel ID is obviously the 
> one of the mediated device).
> 
> Isn't adding a new sub-region for the purpose of handling halt and clear 
> superfluous?
> 
> What is the reason not to use simple ioctls ?

Should it turn out that we missed something and need an enhanced
interface, we can simply stop providing this subregion and add a new
subregion, without breaking existing userspace. We can't do that with
ioctls.

And moreover, this is only the first user of this infrastructure.
There's also that path handling series that Dong Jia had posted early
this year -- that would be an obvious user as well.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
@ 2018-11-23 13:08   ` Pierre Morel
  2018-11-26  9:47     ` Cornelia Huck
  2018-11-27 19:09   ` Farhan Ali
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 54+ messages in thread
From: Pierre Morel @ 2018-11-23 13:08 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Farhan Ali
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm

On 22/11/2018 17:54, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0

...snip...


>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}

Shouldn't you set the state back here?

> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);
> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;

What about filtering inside the vfio_ccw_mdev_write_io_region() before 
the call to the FSM?


>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */

I think we should make sure it does not happen before we get here.
Like serializing HALT and CLEAR before the FSM.

> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);

fsm_do_halt() set the state to BUSY.
Do we need a state change here and in fsm_do_halt ?

Why not only the BUSY state?

> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +

...snip...

Regards,
Pierre

-- 
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-23 12:45     ` Cornelia Huck
@ 2018-11-23 13:26       ` Pierre Morel
  0 siblings, 0 replies; 54+ messages in thread
From: Pierre Morel @ 2018-11-23 13:26 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Alex Williamson, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On 23/11/2018 13:45, Cornelia Huck wrote:
> On Fri, 23 Nov 2018 13:28:25 +0100
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> On 22/11/2018 17:54, Cornelia Huck wrote:
>>> Allow to extend the regions used by vfio-ccw. The first user will be
>>> handling of halt and clear subchannel.
>>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> ---
>>>    drivers/s390/cio/vfio_ccw_ops.c     | 182 ++++++++++++++++++++++++----
>>>    drivers/s390/cio/vfio_ccw_private.h |  38 ++++++
>>>    include/uapi/linux/vfio.h           |   1 +
>>>    3 files changed, 195 insertions(+), 26 deletions(-)
>>>    
>>
>> Halt and clear have no parameters (the sub-channel ID is obviously the
>> one of the mediated device).
>>
>> Isn't adding a new sub-region for the purpose of handling halt and clear
>> superfluous?
>>
>> What is the reason not to use simple ioctls ?
> 
> Should it turn out that we missed something and need an enhanced
> interface, we can simply stop providing this subregion and add a new
> subregion, without breaking existing userspace. We can't do that with
> ioctls.

OK, it is a good reason, took me a while but I get the interest of 
capabilities for regions too.

> 
> And moreover, this is only the first user of this infrastructure.
> There's also that path handling series that Dong Jia had posted early
> this year -- that would be an obvious user as well.
> 

right.

Thanks.

Regards,
Pierre

-- 
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
                   ` (2 preceding siblings ...)
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
@ 2018-11-24 21:07 ` Halil Pasic
  2018-11-26  9:26   ` Cornelia Huck
  2018-11-26 18:57 ` Farhan Ali
  2018-12-04 12:38 ` Halil Pasic
  5 siblings, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-11-24 21:07 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Thu, 22 Nov 2018 17:54:29 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> [This is the Linux kernel part, git tree is available at
> https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git
> vfio-ccw-caps
> 
> The companion QEMU patches are available at
> https://github.com/cohuck/qemu vfio-ccw-caps]
> 
> Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> device. This tends to work well for the most common 'good path'
> scenarios; however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU,
> things like clearing pending requests at the device is currently not
> supported. This may be a problem for e.g. error recovery.
> 
> This patch series introduces capabilities (similar to what vfio-pci
> uses) and exposes a new async region for handling hsch/csch.

I'm on vacation and could not do more than skim over it real quick. FWIW
it looks very promising. I intend to give it an in depth review once I'm
back (i.e. second half of next week).

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-11-24 21:07 ` [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Halil Pasic
@ 2018-11-26  9:26   ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-26  9:26 UTC (permalink / raw)
  To: Halil Pasic
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Sat, 24 Nov 2018 22:07:57 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 22 Nov 2018 17:54:29 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > [This is the Linux kernel part, git tree is available at
> > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git
> > vfio-ccw-caps
> > 
> > The companion QEMU patches are available at
> > https://github.com/cohuck/qemu vfio-ccw-caps]
> > 
> > Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> > device. This tends to work well for the most common 'good path'
> > scenarios; however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU,
> > things like clearing pending requests at the device is currently not
> > supported. This may be a problem for e.g. error recovery.
> > 
> > This patch series introduces capabilities (similar to what vfio-pci
> > uses) and exposes a new async region for handling hsch/csch.  
> 
> I'm on vacation and could not do more than skim over it real quick. FWIW
> it looks very promising. I intend to give it an in depth review once I'm
> back (i.e. second half of next week).

Thanks!

Please do not feel pressured, it did take me long enough to get this
out :)

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-23 13:08   ` Pierre Morel
@ 2018-11-26  9:47     ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-26  9:47 UTC (permalink / raw)
  To: Pierre Morel
  Cc: linux-s390, Eric Farman, Alex Williamson, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Fri, 23 Nov 2018 14:08:03 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 22/11/2018 17:54, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c

(...)

> > +static int fsm_do_halt(struct vfio_ccw_private *private)
> > +{
> > +	struct subchannel *sch;
> > +	unsigned long flags;
> > +	int ccode;
> > +	int ret;
> > +
> > +	sch = private->sch;
> > +
> > +	spin_lock_irqsave(sch->lock, flags);
> > +	private->state = VFIO_CCW_STATE_BUSY;
> > +
> > +	/* Issue "Halt Subchannel" */
> > +	ccode = hsch(sch->schid);
> > +
> > +	switch (ccode) {
> > +	case 0:
> > +		/*
> > +		 * Initialize device status information
> > +		 */
> > +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> > +		ret = 0;
> > +		break;
> > +	case 1:		/* Status pending */
> > +	case 2:		/* Busy */
> > +		ret = -EBUSY;
> > +		break;
> > +	case 3:		/* Device not operational */
> > +	{
> > +		ret = -ENODEV;
> > +		break;
> > +	}
> > +	default:
> > +		ret = ccode;
> > +	}  
> 
> Shouldn't you set the state back here?

This is handled as for ssch, i.e. the state is restored by the caller.

> 
> > +	spin_unlock_irqrestore(sch->lock, flags);
> > +	return ret;
> > +}
> > +
> > +static int fsm_do_clear(struct vfio_ccw_private *private)
> > +{
> > +	struct subchannel *sch;
> > +	unsigned long flags;
> > +	int ccode;
> > +	int ret;
> > +
> > +	sch = private->sch;
> > +
> > +	spin_lock_irqsave(sch->lock, flags);
> > +	private->state = VFIO_CCW_STATE_BUSY;
> > +
> > +	/* Issue "Clear Subchannel" */
> > +	ccode = csch(sch->schid);
> > +
> > +	switch (ccode) {
> > +	case 0:
> > +		/*
> > +		 * Initialize device status information
> > +		 */
> > +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> > +		/* TODO: check what else we might need to clear */
> > +		ret = 0;
> > +		break;
> > +	case 3:		/* Device not operational */
> > +	{
> > +		ret = -ENODEV;
> > +		break;
> > +	}
> > +	default:
> > +		ret = ccode;
> > +	}
> > +	spin_unlock_irqrestore(sch->lock, flags);
> > +	return ret;

Same here, btw.

> > +}
> > +
> >   static void fsm_notoper(struct vfio_ccw_private *private,
> >   			enum vfio_ccw_event event)
> >   {
> > @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
> >   	private->io_region->ret_code = -EBUSY;
> >   }
> >   
> > +static void fsm_async_error(struct vfio_ccw_private *private,
> > +			    enum vfio_ccw_event event)
> > +{
> > +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> > +	       private->state);
> > +	private->cmd_region->ret_code = -EIO;
> > +}
> > +
> > +static void fsm_async_busy(struct vfio_ccw_private *private,
> > +			   enum vfio_ccw_event event)
> > +{
> > +	private->cmd_region->ret_code = -EBUSY;
> > +}
> > +
> >   static void fsm_disabled_irq(struct vfio_ccw_private *private,
> >   			     enum vfio_ccw_event event)
> >   {
> > @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
> >   		}
> >   		return;
> >   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> > -		/* XXX: Handle halt. */
> > +		/* halt is handled via the async cmd region */
> >   		io_region->ret_code = -EOPNOTSUPP;
> >   		goto err_out;
> >   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> > -		/* XXX: Handle clear. */
> > +		/* clear is handled via the async cmd region */
> >   		io_region->ret_code = -EOPNOTSUPP;
> >   		goto err_out;  
> 
> What about filtering inside the vfio_ccw_mdev_write_io_region() before 
> the call to the FSM?

We can do that as well, maybe as a patch on top. What I like about
doing it here is that all poking into the I/O region is done in one
place. On the other hand, doing it beforehand saves us some churn.

> 
> 
> >   	}
> > @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
> >   			       io_region->ret_code, errstr);
> >   }
> >   
> > +/*
> > + * Deal with a halt request from userspace.
> > + */
> > +static void fsm_halt_request(struct vfio_ccw_private *private,
> > +			     enum vfio_ccw_event event)
> > +{
> > +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> > +	int state = private->state;
> > +
> > +	private->state = VFIO_CCW_STATE_BOXED;
> > +
> > +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> > +		/* should not happen? */  
> 
> I think we should make sure it does not happen before we get here.
> Like serializing HALT and CLEAR before the FSM.

Given that there's only one generator of that event, that really should
not happen :) It would mean that we have messed up our code later on.
Maybe complain loudly here?

> 
> > +		cmd_region->ret_code = -EINVAL;
> > +		goto err_out;
> > +	}
> > +
> > +	cmd_region->ret_code = fsm_do_halt(private);  
> 
> fsm_do_halt() set the state to BUSY.
> Do we need a state change here and in fsm_do_halt ?
> 
> Why not only the BUSY state?

I basically took the ssch implementation and adapted it for halt/clear
handling. We can certainly think about doing state transitions in
different places, but I'd like to do that for all channel instructions
at the same time.

[Also note that this is still based on a version that still contains
the BOXED state.]

> 
> > +	if (cmd_region->ret_code)
> > +		goto err_out;
> > +
> > +	return;
> > +
> > +err_out:
> > +	private->state = state;
> > +}
> > +  
> 
> ...snip...
> 
> Regards,
> Pierre
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
                   ` (3 preceding siblings ...)
  2018-11-24 21:07 ` [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Halil Pasic
@ 2018-11-26 18:57 ` Farhan Ali
  2018-11-26 19:00   ` Cornelia Huck
  2018-12-04 12:38 ` Halil Pasic
  5 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-26 18:57 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> [This is the Linux kernel part, git tree is available at
> https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> 
> The companion QEMU patches are available at
> https://github.com/cohuck/qemu vfio-ccw-caps]
> 
> Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> device. This tends to work well for the most common 'good path' scenarios;
> however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> clearing pending requests at the device is currently not supported.
> This may be a problem for e.g. error recovery.
> 
> This patch series introduces capabilities (similar to what vfio-pci uses)
> and exposes a new async region for handling hsch/csch.
> 
> Very lightly tested (I can interact with a dasd as before; I have not
> found a reliable way to trigger hsch/csch in the Linux dasd guest driver.)
> 

I was able to trigger the guest DASD driver to issue a csch instruction, 
and from my brief testing it seems to be working just like it would on 
the LPAR. (I basically tested with 2 threads trying to issue DASD device 
reserve and release ioctls, on the same DASD device, in a busy loop).

I am going to spend some time doing a deeper review.

Thanks
Farhan


> Cornelia Huck (3):
>    vfio-ccw: add capabilities chain
>    s390/cio: export hsch to modules
>    vfio-ccw: add handling for asnyc channel instructions
> 
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/ioasm.c            |   1 +
>   drivers/s390/cio/vfio_ccw_async.c   |  88 +++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 +++++--
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     | 195 ++++++++++++++++++++++++----
>   drivers/s390/cio/vfio_ccw_private.h |  44 +++++++
>   include/uapi/linux/vfio.h           |   5 +
>   include/uapi/linux/vfio_ccw.h       |  12 ++
>   9 files changed, 509 insertions(+), 45 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-11-26 18:57 ` Farhan Ali
@ 2018-11-26 19:00   ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-26 19:00 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Mon, 26 Nov 2018 13:57:06 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > [This is the Linux kernel part, git tree is available at
> > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> > 
> > The companion QEMU patches are available at
> > https://github.com/cohuck/qemu vfio-ccw-caps]
> > 
> > Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> > device. This tends to work well for the most common 'good path' scenarios;
> > however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> > clearing pending requests at the device is currently not supported.
> > This may be a problem for e.g. error recovery.
> > 
> > This patch series introduces capabilities (similar to what vfio-pci uses)
> > and exposes a new async region for handling hsch/csch.
> > 
> > Very lightly tested (I can interact with a dasd as before; I have not
> > found a reliable way to trigger hsch/csch in the Linux dasd guest driver.)
> >   
> 
> I was able to trigger the guest DASD driver to issue a csch instruction, 
> and from my brief testing it seems to be working just like it would on 
> the LPAR. (I basically tested with 2 threads trying to issue DASD device 
> reserve and release ioctls, on the same DASD device, in a busy loop).
> 
> I am going to spend some time doing a deeper review.

Cool, thanks a lot!

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
  2018-11-23 12:28   ` Pierre Morel
@ 2018-11-27 19:04   ` Farhan Ali
  2018-11-28  9:05     ` Cornelia Huck
  2018-12-17 21:53   ` Eric Farman
  2 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-27 19:04 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index 078e46f9623d..a6f9f84526e2 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -3,9 +3,11 @@
>    * Private stuff for vfio_ccw driver
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi<bjsdjshi@linux.vnet.ibm.com>
>    *            Xiao Feng Ren<renxiaof@linux.vnet.ibm.com>
> + *            Cornelia Huck<cohuck@redhat.com>
>    */
>   
>   #ifndef_VFIO_CCW_PRIVATE_H_
> @@ -19,6 +21,38 @@
>   #include "css.h"
>   #include "vfio_ccw_cp.h"
>   
> +#define VFIO_CCW_OFFSET_SHIFT   40
> +#define VFIO_CCW_OFFSET_TO_INDEX(off)	(off >> VFIO_CCW_OFFSET_SHIFT)
> +#define VFIO_CCW_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_CCW_OFFSET_SHIFT)
> +#define VFIO_CCW_OFFSET_MASK	(((u64)(1) << VFIO_CCW_OFFSET_SHIFT) - 1)
> +

Why is the offset shift 40? I know vfio-pci is also using the same 
offset shift, but I am curious about the reasoning behind why we are 
using this? :)

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
  2018-11-23 13:08   ` Pierre Morel
@ 2018-11-27 19:09   ` Farhan Ali
  2018-11-28  9:02     ` Cornelia Huck
  2018-11-27 19:57   ` Farhan Ali
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-27 19:09 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;
> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +
> +static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
> +					  struct vfio_ccw_region *region)
> +{
> +
> +}
> +
> +const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
> +	.read = vfio_ccw_async_region_read,
> +	.write = vfio_ccw_async_region_write,
> +	.release = vfio_ccw_async_region_release,
> +};
> +
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
> +{
> +	return vfio_ccw_register_dev_region(private,
> +					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
> +					    &vfio_ccw_async_region_ops,
> +					    sizeof(struct ccw_cmd_region),
> +					    VFIO_REGION_INFO_FLAG_READ |
> +					    VFIO_REGION_INFO_FLAG_WRITE,
> +					    private->cmd_region);
> +}
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index a10cec0e86eb..890c588a3a61 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -3,9 +3,11 @@
>    * VFIO based Physical Subchannel device driver
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
>    *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/module.h>
> @@ -23,6 +25,7 @@
>   
>   struct workqueue_struct *vfio_ccw_work_q;
>   static struct kmem_cache *vfio_ccw_io_region;
> +static struct kmem_cache *vfio_ccw_cmd_region;
>   
>   /*
>    * Helpers
> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw)) {
> +	if (scsw_is_solicited(&irb->scsw) &&
> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}

I am a little confused about this. Why do we need to update the scsw.cpa 
if we have the start function function control bit set? Is it an 
optimization?

The Linux CIO code does accumulate the scsw.cpa and is not dependent on 
start function control bit, or did I miss something?

Maybe we could update condition in the if statement like Linux CIO layer 
does here 
https://elixir.bootlin.com/linux/latest/source/drivers/s390/cio/device_status.c#L265?

> @@ -104,7 +108,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   {
>   	struct pmcw *pmcw = &sch->schib.pmcw;
>   	struct vfio_ccw_private *private;
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	if (pmcw->qf) {
>   		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
> @@ -118,10 +122,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   
>   	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
>   					       GFP_KERNEL | GFP_DMA);
> -	if (!private->io_region) {
> -		kfree(private);
> -		return -ENOMEM;
> -	}
> +	if (!private->io_region)
> +		goto out_free;
> +
> +	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
> +						GFP_KERNEL | GFP_DMA);
> +	if (!private->cmd_region)
> +		goto out_free;
>   
>   	private->sch = sch;
>   	dev_set_drvdata(&sch->dev, private);
> @@ -148,7 +155,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   	cio_disable_subchannel(sch);
>   out_free:
>   	dev_set_drvdata(&sch->dev, NULL);
> -	kmem_cache_free(vfio_ccw_io_region, private->io_region);
> +	if (private->cmd_region)
> +		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
> +	if (private->io_region)
> +		kmem_cache_free(vfio_ccw_io_region, private->io_region);
>   	kfree(private);
>   	return ret;
>   }
> @@ -237,7 +247,7 @@ static struct css_driver vfio_ccw_sch_driver = {
>   
>   static int __init vfio_ccw_sch_init(void)
>   {
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
>   	if (!vfio_ccw_work_q)
> @@ -247,20 +257,30 @@ static int __init vfio_ccw_sch_init(void)
>   					sizeof(struct ccw_io_region), 0,
>   					SLAB_ACCOUNT, 0,
>   					sizeof(struct ccw_io_region), NULL);
> -	if (!vfio_ccw_io_region) {
> -		destroy_workqueue(vfio_ccw_work_q);
> -		return -ENOMEM;
> -	}
> +	if (!vfio_ccw_io_region)
> +		goto out_err;
> +
> +	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
> +					sizeof(struct ccw_cmd_region), 0,
> +					SLAB_ACCOUNT, 0,
> +					sizeof(struct ccw_cmd_region), NULL);
> +	if (!vfio_ccw_cmd_region)
> +		goto out_err;
>   
>   	isc_register(VFIO_CCW_ISC);
>   	ret = css_driver_register(&vfio_ccw_sch_driver);
>   	if (ret) {
>   		isc_unregister(VFIO_CCW_ISC);
> -		kmem_cache_destroy(vfio_ccw_io_region);
> -		destroy_workqueue(vfio_ccw_work_q);
> +		goto out_err;
>   	}
>   
>   	return ret;
> +
> +out_err:
> +	kmem_cache_destroy(vfio_ccw_cmd_region);
> +	kmem_cache_destroy(vfio_ccw_io_region);
> +	destroy_workqueue(vfio_ccw_work_q);
> +	return ret;
>   }
>   
>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);
> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +
> +/*
> + * Deal with a clear request from userspace.
> + */
> +static void fsm_clear_request(struct vfio_ccw_private *private,
> +			      enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_clear(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
>   /*
>    * Got an interrupt for a normal io (state busy).
>    */
> @@ -204,26 +348,36 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>   	[VFIO_CCW_STATE_NOT_OPER] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
>   	},
>   	[VFIO_CCW_STATE_STANDBY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_IDLE] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BOXED] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BUSY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   };
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index a5d731ed2a39..0e1f7f7bf927 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -148,11 +148,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
>   	struct vfio_ccw_private *private =
>   		dev_get_drvdata(mdev_parent_dev(mdev));
>   	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +	int ret;
>   
>   	private->nb.notifier_call = vfio_ccw_mdev_notifier;
>   
> -	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> -				      &events, &private->nb);
> +	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +				     &events, &private->nb);
> +	if (ret)
> +		return ret;
> +
> +	ret = vfio_ccw_register_async_dev_regions(private);
> +	if (ret)
> +		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +					 &private->nb);
> +	return ret;
>   }
>   
>   static void vfio_ccw_mdev_release(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index a6f9f84526e2..1a41a14831ae 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>   				 const struct vfio_ccw_regops *ops,
>   				 size_t size, u32 flags, void *data);
>   
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
> +
>   /**
>    * struct vfio_ccw_private
>    * @sch: pointer to the subchannel
> @@ -62,6 +64,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>    * @mdev: pointer to the mediated device
>    * @nb: notifier for vfio events
>    * @io_region: MMIO region to input/output I/O arguments/results
> + * @cmd_region: MMIO region for asynchronous I/O commands other than START
>    * @region: additional regions for other subchannel operations
>    * @num_regions: number of additional regions
>    * @cp: channel program for the current I/O operation
> @@ -79,6 +82,7 @@ struct vfio_ccw_private {
>   	struct notifier_block	nb;
>   	struct ccw_io_region	*io_region;
>   	struct vfio_ccw_region *region;
> +	struct ccw_cmd_region	*cmd_region;
>   	int num_regions;
>   
>   	struct channel_program	cp;
> @@ -114,6 +118,8 @@ enum vfio_ccw_event {
>   	VFIO_CCW_EVENT_NOT_OPER,
>   	VFIO_CCW_EVENT_IO_REQ,
>   	VFIO_CCW_EVENT_INTERRUPT,
> +	VFIO_CCW_EVENT_HALT_REQ,
> +	VFIO_CCW_EVENT_CLEAR_REQ,
>   	/* last element! */
>   	NR_VFIO_CCW_EVENTS
>   };
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +
>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +
>   /*
>    * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
>    * which allows direct access to non-MSIX registers which happened to be within
> diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
> index 2ec5f367ff78..cbecbf0cd54f 100644
> --- a/include/uapi/linux/vfio_ccw.h
> +++ b/include/uapi/linux/vfio_ccw.h
> @@ -12,6 +12,7 @@
>   
>   #include <linux/types.h>
>   
> +/* used for START SUBCHANNEL, always present */
>   struct ccw_io_region {
>   #define ORB_AREA_SIZE 12
>   	__u8	orb_area[ORB_AREA_SIZE];
> @@ -22,4 +23,15 @@ struct ccw_io_region {
>   	__u32	ret_code;
>   } __packed;
>   
> +/*
> + * used for processing commands that trigger asynchronous actions
> + * Note: this is controlled by a capability
> + */
> +#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
> +#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
> +struct ccw_cmd_region {
> +	__u32 command;
> +	__u32 ret_code;
> +} __packed;
> +
>   #endif
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
  2018-11-23 13:08   ` Pierre Morel
  2018-11-27 19:09   ` Farhan Ali
@ 2018-11-27 19:57   ` Farhan Ali
  2018-11-28  8:41     ` Cornelia Huck
  2018-11-28 16:36   ` [qemu-s390x] " Halil Pasic
  2018-12-17 21:54   ` Eric Farman
  4 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-27 19:57 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Eric Farman, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +

Whitespace error?

>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-27 19:57   ` Farhan Ali
@ 2018-11-28  8:41     ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-28  8:41 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Tue, 27 Nov 2018 14:57:31 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 565669f95534..c01472ec77ea 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
> >   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> >   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> >   
> > +  
> 
> Whitespace error?

Probably crept in while I was looking for a home for the #define below.
Removed.

> 
> >   #define VFIO_REGION_TYPE_GFX                    (1)
> >   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
> >   
> > @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
> >   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
> >   };
> >   
> > +/* ccw sub-types */
> > +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> > +  
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-27 19:09   ` Farhan Ali
@ 2018-11-28  9:02     ` Cornelia Huck
  2018-11-28 14:31       ` Farhan Ali
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-28  9:02 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Tue, 27 Nov 2018 14:09:49 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> > 

> > @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >   	private = container_of(work, struct vfio_ccw_private, io_work);
> >   	irb = &private->irb;
> >   
> > -	if (scsw_is_solicited(&irb->scsw)) {
> > +	if (scsw_is_solicited(&irb->scsw) &&
> > +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >   		cp_update_scsw(&private->cp, &irb->scsw);
> >   		cp_free(&private->cp);
> >   	}  
> 
> I am a little confused about this. Why do we need to update the scsw.cpa 
> if we have the start function function control bit set? Is it an 
> optimization?

No, it's not an optimization. This is the work function that is
scheduled if we get an interrupt for the device. Previously, we only
got an interrupt if either the device presented us an unsolicited
status or if we got an interrupt as a response to the channel program
we submitted. Now, we can get an interrupt for halt/clear subchannel as
well, and in that case, we don't necessarily have a cp.

[Thinking some more about it, we need to verify if the start function
actually remains set if we try to terminate a running channel program
with halt/clear. A clear might scrub too much. If that's the case, we
also need to free the cp if the start function is not set.]

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-27 19:04   ` Farhan Ali
@ 2018-11-28  9:05     ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-11-28  9:05 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Tue, 27 Nov 2018 14:04:49 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> > index 078e46f9623d..a6f9f84526e2 100644
> > --- a/drivers/s390/cio/vfio_ccw_private.h
> > +++ b/drivers/s390/cio/vfio_ccw_private.h
> > @@ -3,9 +3,11 @@
> >    * Private stuff for vfio_ccw driver
> >    *
> >    * Copyright IBM Corp. 2017
> > + * Copyright Red Hat, Inc. 2018
> >    *
> >    * Author(s): Dong Jia Shi<bjsdjshi@linux.vnet.ibm.com>
> >    *            Xiao Feng Ren<renxiaof@linux.vnet.ibm.com>
> > + *            Cornelia Huck<cohuck@redhat.com>
> >    */
> >   
> >   #ifndef_VFIO_CCW_PRIVATE_H_
> > @@ -19,6 +21,38 @@
> >   #include "css.h"
> >   #include "vfio_ccw_cp.h"
> >   
> > +#define VFIO_CCW_OFFSET_SHIFT   40
> > +#define VFIO_CCW_OFFSET_TO_INDEX(off)	(off >> VFIO_CCW_OFFSET_SHIFT)
> > +#define VFIO_CCW_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_CCW_OFFSET_SHIFT)
> > +#define VFIO_CCW_OFFSET_MASK	(((u64)(1) << VFIO_CCW_OFFSET_SHIFT) - 1)
> > +  
> 
> Why is the offset shift 40? I know vfio-pci is also using the same 
> offset shift, but I am curious about the reasoning behind why we are 
> using this? :)
> 

My entire reasoning was "hey, vfio-pci is using this, so it should not
be bad" 8)

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28  9:02     ` Cornelia Huck
@ 2018-11-28 14:31       ` Farhan Ali
  2018-11-28 14:52         ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-28 14:31 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x



On 11/28/2018 04:02 AM, Cornelia Huck wrote:
> On Tue, 27 Nov 2018 14:09:49 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>> Add a region to the vfio-ccw device that can be used to submit
>>> asynchronous I/O instructions. ssch continues to be handled by the
>>> existing I/O region; the new region handles hsch and csch.
>>>
>>> Interrupt status continues to be reported through the same channels
>>> as for ssch.
>>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> ---
>>>    drivers/s390/cio/Makefile           |   3 +-
>>>    drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>    drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>    drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>    drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>    drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>    include/uapi/linux/vfio.h           |   4 +
>>>    include/uapi/linux/vfio_ccw.h       |  12 +++
>>>    8 files changed, 313 insertions(+), 19 deletions(-)
>>>    create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>
> 
>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>    	private = container_of(work, struct vfio_ccw_private, io_work);
>>>    	irb = &private->irb;
>>>    
>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>    		cp_update_scsw(&private->cp, &irb->scsw);
>>>    		cp_free(&private->cp);
>>>    	}
>>
>> I am a little confused about this. Why do we need to update the scsw.cpa
>> if we have the start function function control bit set? Is it an
>> optimization?
> 
> No, it's not an optimization. This is the work function that is
> scheduled if we get an interrupt for the device. Previously, we only
> got an interrupt if either the device presented us an unsolicited
> status or if we got an interrupt as a response to the channel program
> we submitted. Now, we can get an interrupt for halt/clear subchannel as
> well, and in that case, we don't necessarily have a cp.
> 
> [Thinking some more about it, we need to verify if the start function
> actually remains set if we try to terminate a running channel program
> with halt/clear. A clear might scrub too much. If that's the case, we
> also need to free the cp if the start function is not set.]
> 
> 

According to PoPs (Chapter 16: I/O interruptions, under function control):

The start-function indication is also cleared at
the subchannel during the execution of CLEAR SUB-
CHANNEL.

So maybe we do need to free the cp.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 14:31       ` Farhan Ali
@ 2018-11-28 14:52         ` Cornelia Huck
  2018-11-28 15:00           ` Farhan Ali
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-28 14:52 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Wed, 28 Nov 2018 09:31:51 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
> > On Tue, 27 Nov 2018 14:09:49 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 11/22/2018 11:54 AM, Cornelia Huck wrote:  
> >>> Add a region to the vfio-ccw device that can be used to submit
> >>> asynchronous I/O instructions. ssch continues to be handled by the
> >>> existing I/O region; the new region handles hsch and csch.
> >>>
> >>> Interrupt status continues to be reported through the same channels
> >>> as for ssch.
> >>>
> >>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> >>> ---
> >>>    drivers/s390/cio/Makefile           |   3 +-
> >>>    drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >>>    drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >>>    drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >>>    drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >>>    drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >>>    include/uapi/linux/vfio.h           |   4 +
> >>>    include/uapi/linux/vfio_ccw.h       |  12 +++
> >>>    8 files changed, 313 insertions(+), 19 deletions(-)
> >>>    create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> >>>  
> >   
> >>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>    	private = container_of(work, struct vfio_ccw_private, io_work);
> >>>    	irb = &private->irb;
> >>>    
> >>> -	if (scsw_is_solicited(&irb->scsw)) {
> >>> +	if (scsw_is_solicited(&irb->scsw) &&
> >>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >>>    		cp_update_scsw(&private->cp, &irb->scsw);
> >>>    		cp_free(&private->cp);
> >>>    	}  
> >>
> >> I am a little confused about this. Why do we need to update the scsw.cpa
> >> if we have the start function function control bit set? Is it an
> >> optimization?  
> > 
> > No, it's not an optimization. This is the work function that is
> > scheduled if we get an interrupt for the device. Previously, we only
> > got an interrupt if either the device presented us an unsolicited
> > status or if we got an interrupt as a response to the channel program
> > we submitted. Now, we can get an interrupt for halt/clear subchannel as
> > well, and in that case, we don't necessarily have a cp.
> > 
> > [Thinking some more about it, we need to verify if the start function
> > actually remains set if we try to terminate a running channel program
> > with halt/clear. A clear might scrub too much. If that's the case, we
> > also need to free the cp if the start function is not set.]
> > 
> >   
> 
> According to PoPs (Chapter 16: I/O interruptions, under function control):
> 
> The start-function indication is also cleared at
> the subchannel during the execution of CLEAR SUB-
> CHANNEL.
> 
> So maybe we do need to free the cp.

Hm... so we need to make sure that cp_update_scsw() and cp_free() only
do something when there's actually a valid cp around and call them
unconditionally. Maybe add a ->valid flag to struct channel_program?

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 14:52         ` Cornelia Huck
@ 2018-11-28 15:00           ` Farhan Ali
  2018-11-28 15:35             ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-28 15:00 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x



On 11/28/2018 09:52 AM, Cornelia Huck wrote:
> On Wed, 28 Nov 2018 09:31:51 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
>>> On Tue, 27 Nov 2018 14:09:49 -0500
>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>    
>>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>>>> Add a region to the vfio-ccw device that can be used to submit
>>>>> asynchronous I/O instructions. ssch continues to be handled by the
>>>>> existing I/O region; the new region handles hsch and csch.
>>>>>
>>>>> Interrupt status continues to be reported through the same channels
>>>>> as for ssch.
>>>>>
>>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>>>> ---
>>>>>     drivers/s390/cio/Makefile           |   3 +-
>>>>>     drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>>>     drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>>>     drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>>>     drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>>>     drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>>>     include/uapi/linux/vfio.h           |   4 +
>>>>>     include/uapi/linux/vfio_ccw.h       |  12 +++
>>>>>     8 files changed, 313 insertions(+), 19 deletions(-)
>>>>>     create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>>>   
>>>    
>>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>>>     	private = container_of(work, struct vfio_ccw_private, io_work);
>>>>>     	irb = &private->irb;
>>>>>     
>>>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>>>     		cp_update_scsw(&private->cp, &irb->scsw);
>>>>>     		cp_free(&private->cp);
>>>>>     	}
>>>>
>>>> I am a little confused about this. Why do we need to update the scsw.cpa
>>>> if we have the start function function control bit set? Is it an
>>>> optimization?
>>>
>>> No, it's not an optimization. This is the work function that is
>>> scheduled if we get an interrupt for the device. Previously, we only
>>> got an interrupt if either the device presented us an unsolicited
>>> status or if we got an interrupt as a response to the channel program
>>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
>>> well, and in that case, we don't necessarily have a cp.
>>>
>>> [Thinking some more about it, we need to verify if the start function
>>> actually remains set if we try to terminate a running channel program
>>> with halt/clear. A clear might scrub too much. If that's the case, we
>>> also need to free the cp if the start function is not set.]
>>>
>>>    
>>
>> According to PoPs (Chapter 16: I/O interruptions, under function control):
>>
>> The start-function indication is also cleared at
>> the subchannel during the execution of CLEAR SUB-
>> CHANNEL.
>>
>> So maybe we do need to free the cp.
> 
> Hm... so we need to make sure that cp_update_scsw() and cp_free() only
> do something when there's actually a valid cp around and call them
> unconditionally. 

Yes, I agree.

Maybe add a ->valid flag to struct channel_program?

We could do that. So we would set the flag once we have copied the 
channel program to kernel memory? since that's when we should care about 
freeing it.

> 
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 15:00           ` Farhan Ali
@ 2018-11-28 15:35             ` Cornelia Huck
  2018-11-28 15:55               ` Farhan Ali
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-28 15:35 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Wed, 28 Nov 2018 10:00:59 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 09:52 AM, Cornelia Huck wrote:
> > On Wed, 28 Nov 2018 09:31:51 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 11/28/2018 04:02 AM, Cornelia Huck wrote:  
> >>> On Tue, 27 Nov 2018 14:09:49 -0500
> >>> Farhan Ali <alifm@linux.ibm.com> wrote:
> >>>      
> >>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:  
> >>>>> Add a region to the vfio-ccw device that can be used to submit
> >>>>> asynchronous I/O instructions. ssch continues to be handled by the
> >>>>> existing I/O region; the new region handles hsch and csch.
> >>>>>
> >>>>> Interrupt status continues to be reported through the same channels
> >>>>> as for ssch.
> >>>>>
> >>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> >>>>> ---
> >>>>>     drivers/s390/cio/Makefile           |   3 +-
> >>>>>     drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >>>>>     drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >>>>>     drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >>>>>     drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >>>>>     drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >>>>>     include/uapi/linux/vfio.h           |   4 +
> >>>>>     include/uapi/linux/vfio_ccw.h       |  12 +++
> >>>>>     8 files changed, 313 insertions(+), 19 deletions(-)
> >>>>>     create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> >>>>>     
> >>>      
> >>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>>>     	private = container_of(work, struct vfio_ccw_private, io_work);
> >>>>>     	irb = &private->irb;
> >>>>>     
> >>>>> -	if (scsw_is_solicited(&irb->scsw)) {
> >>>>> +	if (scsw_is_solicited(&irb->scsw) &&
> >>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> >>>>>     		cp_update_scsw(&private->cp, &irb->scsw);
> >>>>>     		cp_free(&private->cp);
> >>>>>     	}  
> >>>>
> >>>> I am a little confused about this. Why do we need to update the scsw.cpa
> >>>> if we have the start function function control bit set? Is it an
> >>>> optimization?  
> >>>
> >>> No, it's not an optimization. This is the work function that is
> >>> scheduled if we get an interrupt for the device. Previously, we only
> >>> got an interrupt if either the device presented us an unsolicited
> >>> status or if we got an interrupt as a response to the channel program
> >>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
> >>> well, and in that case, we don't necessarily have a cp.
> >>>
> >>> [Thinking some more about it, we need to verify if the start function
> >>> actually remains set if we try to terminate a running channel program
> >>> with halt/clear. A clear might scrub too much. If that's the case, we
> >>> also need to free the cp if the start function is not set.]
> >>>
> >>>      
> >>
> >> According to PoPs (Chapter 16: I/O interruptions, under function control):
> >>
> >> The start-function indication is also cleared at
> >> the subchannel during the execution of CLEAR SUB-
> >> CHANNEL.
> >>
> >> So maybe we do need to free the cp.  
> > 
> > Hm... so we need to make sure that cp_update_scsw() and cp_free() only
> > do something when there's actually a valid cp around and call them
> > unconditionally.   
> 
> Yes, I agree.
> 
> > Maybe add a ->valid flag to struct channel_program?
> 
> We could do that. So we would set the flag once we have copied the 
> channel program to kernel memory? since that's when we should care about 
> freeing it.

I hacked up the following (still untested):

From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Wed, 28 Nov 2018 16:30:51 +0100
Subject: [PATCH] vfio-ccw: make it safe to access channel programs

When we get a solicited interrupt, the start function may have
been cleared by a csch, but we still have a channel program
structure allocated. Make it safe to call the cp accessors in
any case, so we can call them unconditionally.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
 drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
 drivers/s390/cio/vfio_ccw_drv.c | 3 +--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index 70a006ba4d05..35f87514276b 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
 	struct ccwchain *chain, *temp;
 	int i;
 
+	cp->initialized = false;
 	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
 		for (i = 0; i < chain->ch_len; i++) {
 			pfn_array_table_unpin_free(chain->ch_pat + i,
@@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
 	 */
 	cp->orb.cmd.c64 = 1;
 
+	cp->initialized = true;
+
 	return ret;
 }
 
@@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
  */
 void cp_free(struct channel_program *cp)
 {
-	cp_unpin_free(cp);
+	if (cp->initialized)
+		cp_unpin_free(cp);
 }
 
 /**
@@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
 	u32 cpa = scsw->cmd.cpa;
 	u32 ccw_head, ccw_tail;
 
+	if (!cp->initialized)
+		return;
+
 	/*
 	 * LATER:
 	 * For now, only update the cmd.cpa part. We may need to deal with
diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
index a4b74fb1aa57..3c20cd208da5 100644
--- a/drivers/s390/cio/vfio_ccw_cp.h
+++ b/drivers/s390/cio/vfio_ccw_cp.h
@@ -21,6 +21,7 @@
  * @ccwchain_list: list head of ccwchains
  * @orb: orb for the currently processed ssch request
  * @mdev: the mediated device to perform page pinning/unpinning
+ * @initialized: whether this instance is actually initialized
  *
  * @ccwchain_list is the head of a ccwchain list, that contents the
  * translated result of the guest channel program that pointed out by
@@ -30,6 +31,7 @@ struct channel_program {
 	struct list_head ccwchain_list;
 	union orb orb;
 	struct device *mdev;
+	bool initialized;
 };
 
 extern int cp_init(struct channel_program *cp, struct device *mdev,
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 890c588a3a61..83d6f43792b6 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
 
-	if (scsw_is_solicited(&irb->scsw) &&
-	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
+	if (scsw_is_solicited(&irb->scsw)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
 		cp_free(&private->cp);
 	}
-- 
2.17.2

^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 15:35             ` Cornelia Huck
@ 2018-11-28 15:55               ` Farhan Ali
  2019-01-18 13:53                 ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-11-28 15:55 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x



On 11/28/2018 10:35 AM, Cornelia Huck wrote:
> On Wed, 28 Nov 2018 10:00:59 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 11/28/2018 09:52 AM, Cornelia Huck wrote:
>>> On Wed, 28 Nov 2018 09:31:51 -0500
>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>    
>>>> On 11/28/2018 04:02 AM, Cornelia Huck wrote:
>>>>> On Tue, 27 Nov 2018 14:09:49 -0500
>>>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>>>       
>>>>>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>>>>>> Add a region to the vfio-ccw device that can be used to submit
>>>>>>> asynchronous I/O instructions. ssch continues to be handled by the
>>>>>>> existing I/O region; the new region handles hsch and csch.
>>>>>>>
>>>>>>> Interrupt status continues to be reported through the same channels
>>>>>>> as for ssch.
>>>>>>>
>>>>>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>>>>>> ---
>>>>>>>      drivers/s390/cio/Makefile           |   3 +-
>>>>>>>      drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>>>>>>>      drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>>>>>>>      drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>>>>>>>      drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>>>>>>>      drivers/s390/cio/vfio_ccw_private.h |   6 ++
>>>>>>>      include/uapi/linux/vfio.h           |   4 +
>>>>>>>      include/uapi/linux/vfio_ccw.h       |  12 +++
>>>>>>>      8 files changed, 313 insertions(+), 19 deletions(-)
>>>>>>>      create mode 100644 drivers/s390/cio/vfio_ccw_async.c
>>>>>>>      
>>>>>       
>>>>>>> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>>>>>      	private = container_of(work, struct vfio_ccw_private, io_work);
>>>>>>>      	irb = &private->irb;
>>>>>>>      
>>>>>>> -	if (scsw_is_solicited(&irb->scsw)) {
>>>>>>> +	if (scsw_is_solicited(&irb->scsw) &&
>>>>>>> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>>>>>>>      		cp_update_scsw(&private->cp, &irb->scsw);
>>>>>>>      		cp_free(&private->cp);
>>>>>>>      	}
>>>>>>
>>>>>> I am a little confused about this. Why do we need to update the scsw.cpa
>>>>>> if we have the start function function control bit set? Is it an
>>>>>> optimization?
>>>>>
>>>>> No, it's not an optimization. This is the work function that is
>>>>> scheduled if we get an interrupt for the device. Previously, we only
>>>>> got an interrupt if either the device presented us an unsolicited
>>>>> status or if we got an interrupt as a response to the channel program
>>>>> we submitted. Now, we can get an interrupt for halt/clear subchannel as
>>>>> well, and in that case, we don't necessarily have a cp.
>>>>>
>>>>> [Thinking some more about it, we need to verify if the start function
>>>>> actually remains set if we try to terminate a running channel program
>>>>> with halt/clear. A clear might scrub too much. If that's the case, we
>>>>> also need to free the cp if the start function is not set.]
>>>>>
>>>>>       
>>>>
>>>> According to PoPs (Chapter 16: I/O interruptions, under function control):
>>>>
>>>> The start-function indication is also cleared at
>>>> the subchannel during the execution of CLEAR SUB-
>>>> CHANNEL.
>>>>
>>>> So maybe we do need to free the cp.
>>>
>>> Hm... so we need to make sure that cp_update_scsw() and cp_free() only
>>> do something when there's actually a valid cp around and call them
>>> unconditionally.
>>
>> Yes, I agree.
>>
>>> Maybe add a ->valid flag to struct channel_program?
>>
>> We could do that. So we would set the flag once we have copied the
>> channel program to kernel memory? since that's when we should care about
>> freeing it.
> 
> I hacked up the following (still untested):
> 
>  From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
> From: Cornelia Huck <cohuck@redhat.com>
> Date: Wed, 28 Nov 2018 16:30:51 +0100
> Subject: [PATCH] vfio-ccw: make it safe to access channel programs
> 
> When we get a solicited interrupt, the start function may have
> been cleared by a csch, but we still have a channel program
> structure allocated. Make it safe to call the cp accessors in
> any case, so we can call them unconditionally.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
>   drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
>   drivers/s390/cio/vfio_ccw_drv.c | 3 +--
>   3 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> index 70a006ba4d05..35f87514276b 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.c
> +++ b/drivers/s390/cio/vfio_ccw_cp.c
> @@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
>   	struct ccwchain *chain, *temp;
>   	int i;
>   
> +	cp->initialized = false;
>   	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
>   		for (i = 0; i < chain->ch_len; i++) {
>   			pfn_array_table_unpin_free(chain->ch_pat + i,
> @@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
>   	 */
>   	cp->orb.cmd.c64 = 1;
>   
> +	cp->initialized = true;
> +
>   	return ret;
>   }
>   
> @@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
>    */
>   void cp_free(struct channel_program *cp)
>   {
> -	cp_unpin_free(cp);
> +	if (cp->initialized)
> +		cp_unpin_free(cp);
>   }
>   
>   /**
> @@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
>   	u32 cpa = scsw->cmd.cpa;
>   	u32 ccw_head, ccw_tail;
>   
> +	if (!cp->initialized)
> +		return;
> +
>   	/*
>   	 * LATER:
>   	 * For now, only update the cmd.cpa part. We may need to deal with
> diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
> index a4b74fb1aa57..3c20cd208da5 100644
> --- a/drivers/s390/cio/vfio_ccw_cp.h
> +++ b/drivers/s390/cio/vfio_ccw_cp.h
> @@ -21,6 +21,7 @@
>    * @ccwchain_list: list head of ccwchains
>    * @orb: orb for the currently processed ssch request
>    * @mdev: the mediated device to perform page pinning/unpinning
> + * @initialized: whether this instance is actually initialized
>    *
>    * @ccwchain_list is the head of a ccwchain list, that contents the
>    * translated result of the guest channel program that pointed out by
> @@ -30,6 +31,7 @@ struct channel_program {
>   	struct list_head ccwchain_list;
>   	union orb orb;
>   	struct device *mdev;
> +	bool initialized;
>   };
>   
>   extern int cp_init(struct channel_program *cp, struct device *mdev,
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index 890c588a3a61..83d6f43792b6 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw) &&
> -	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> +	if (scsw_is_solicited(&irb->scsw)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}
> 

The changes look good to me.

Thanks
Farhan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [qemu-s390x] [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
                     ` (2 preceding siblings ...)
  2018-11-27 19:57   ` Farhan Ali
@ 2018-11-28 16:36   ` Halil Pasic
  2018-11-29 16:52     ` Cornelia Huck
  2018-12-17 21:54   ` Eric Farman
  4 siblings, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-11-28 16:36 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Thu, 22 Nov 2018 17:54:32 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>  drivers/s390/cio/Makefile           |   3 +-
>  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>  drivers/s390/cio/vfio_ccw_private.h |   6 ++
>  include/uapi/linux/vfio.h           |   4 +
>  include/uapi/linux/vfio_ccw.h       |  12 +++
>  8 files changed, 313 insertions(+), 19 deletions(-)
>  create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>  qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>  obj-$(CONFIG_QDIO) += qdio.o
>  
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>  obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;

I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
sense that there may be more that one 'instances' of the function
executing at the same time, or am I wrong?

If it is reenarant, I wonder what protects private->region[i].data from
corruption or simply being changed 'while at it'?

Regards,
Halil

> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [qemu-s390x] [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 16:36   ` [qemu-s390x] " Halil Pasic
@ 2018-11-29 16:52     ` Cornelia Huck
  2018-11-29 17:24       ` Halil Pasic
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-11-29 16:52 UTC (permalink / raw)
  To: Halil Pasic
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Wed, 28 Nov 2018 17:36:04 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 22 Nov 2018 17:54:32 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >  drivers/s390/cio/Makefile           |   3 +-
> >  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >  drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >  include/uapi/linux/vfio.h           |   4 +
> >  include/uapi/linux/vfio_ccw.h       |  12 +++
> >  8 files changed, 313 insertions(+), 19 deletions(-)
> >  create mode 100644 drivers/s390/cio/vfio_ccw_async.c

> > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> > +					 char __user *buf, size_t count,
> > +					 loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_to_user(buf, (void *)region + pos, count))
> > +		return -EFAULT;
> > +
> > +	return count;
> > +
> > +}
> > +
> > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > +					  const char __user *buf, size_t count,
> > +					  loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > +		return -EACCES;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_from_user((void *)region + pos, buf, count))
> > +		return -EFAULT;  
> 
> I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
> sense that there may be more that one 'instances' of the function
> executing at the same time, or am I wrong?
> 
> If it is reenarant, I wonder what protects private->region[i].data from
> corruption or simply being changed 'while at it'?

Interesting question. AFAICS this same issue applies to the existing
I/O region as well.

There's nothing in common code enforcing any exclusivity. If I
understand the code correctly, the common vfio-pci code reads/writes in
1/2/4 byte chunks for most accesses. There's igd code that does not do
that, though.

> 
> Regards,
> Halil
> 
> > +
> > +	switch (region->command) {
> > +	case VFIO_CCW_ASYNC_CMD_HSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> > +		break;
> > +	case VFIO_CCW_ASYNC_CMD_CSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return region->ret_code ? region->ret_code : count;
> > +}
> > +  
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [qemu-s390x] [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-29 16:52     ` Cornelia Huck
@ 2018-11-29 17:24       ` Halil Pasic
  0 siblings, 0 replies; 54+ messages in thread
From: Halil Pasic @ 2018-11-29 17:24 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Thu, 29 Nov 2018 17:52:34 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Wed, 28 Nov 2018 17:36:04 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Thu, 22 Nov 2018 17:54:32 +0100
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > Add a region to the vfio-ccw device that can be used to submit
> > > asynchronous I/O instructions. ssch continues to be handled by the
> > > existing I/O region; the new region handles hsch and csch.
> > > 
> > > Interrupt status continues to be reported through the same channels
> > > as for ssch.
> > > 
> > > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > > ---
> > >  drivers/s390/cio/Makefile           |   3 +-
> > >  drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> > >  drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> > >  drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> > >  drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> > >  drivers/s390/cio/vfio_ccw_private.h |   6 ++
> > >  include/uapi/linux/vfio.h           |   4 +
> > >  include/uapi/linux/vfio_ccw.h       |  12 +++
> > >  8 files changed, 313 insertions(+), 19 deletions(-)
> > >  create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> > > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,
> > > +					 char __user *buf, size_t count,
> > > +					 loff_t *ppos)
> > > +{
> > > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > > +	struct ccw_cmd_region *region;
> > > +
> > > +	if (pos + count > sizeof(*region))
> > > +		return -EINVAL;
> > > +
> > > +	region = private->region[i].data;
> > > +	if (copy_to_user(buf, (void *)region + pos, count))
> > > +		return -EFAULT;
> > > +
> > > +	return count;
> > > +
> > > +}
> > > +
> > > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > > +					  const char __user *buf, size_t count,
> > > +					  loff_t *ppos)
> > > +{
> > > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > > +	struct ccw_cmd_region *region;
> > > +
> > > +	if (pos + count > sizeof(*region))
> > > +		return -EINVAL;
> > > +
> > > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > > +		return -EACCES;
> > > +
> > > +	region = private->region[i].data;
> > > +	if (copy_from_user((void *)region + pos, buf, count))
> > > +		return -EFAULT;  
> > 
> > I guess vfio_ccw_async_region_write() is supposed to be reentrant in a
> > sense that there may be more that one 'instances' of the function
> > executing at the same time, or am I wrong?
> > 
> > If it is reenarant, I wonder what protects private->region[i].data from
> > corruption or simply being changed 'while at it'?
> 
> Interesting question. AFAICS this same issue applies to the existing
> I/O region as well.
>

I'm aware of this. IMHO the answer to this question as quite some
implications, but I wanted to start with something simple and tangible.

One difference between async and existing I/O region is, that we, kind
of, do implement mutex of io requests using private->state and the state
machine. It is racy, but AFAIU the idea of at most one io request is
processed at any time is recognizable in the the state machine.

Frankly I never understood how synchronization worked for vfio-ccw.

BTW considering current QEMU, I guess we kind of do have one event at
a time situation (not quite sure about stuff that is not triggered by
a channel instruction interpreted by QEMU). But the documentation does
not say anything, and I don't think relying on QEMU implementation
details is a good idea.

Pierre had a patch called '[PATCH v3 6/6] vfio: ccw: serialize the write
system calls' which  makes all the write calls mutually exclusive but
I'm not sure if that is what we want. In the end, it is a design
decision: making it one at the time simplifies implementation but makes
us different.

One way or the other, IMHO, it is a decision that needs to be made soon.


> There's nothing in common code enforcing any exclusivity. 

Nod.

> If I
> understand the code correctly, the common vfio-pci code reads/writes in
> 1/2/4 byte chunks for most accesses. There's igd code that does not do
> that, though.
> 

I didn't examine the vfio-pci stuff jet because my understanding of pci
is very limited.

Regards,
Halli

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
                   ` (4 preceding siblings ...)
  2018-11-26 18:57 ` Farhan Ali
@ 2018-12-04 12:38 ` Halil Pasic
  2018-12-04 13:11   ` Cornelia Huck
  5 siblings, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-12-04 12:38 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	Farhan Ali, qemu-devel, qemu-s390x

On Thu, 22 Nov 2018 17:54:29 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> [This is the Linux kernel part, git tree is available at
> https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> 
> The companion QEMU patches are available at
> https://github.com/cohuck/qemu vfio-ccw-caps]
> 
> Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> device. This tends to work well for the most common 'good path' scenarios;
> however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> clearing pending requests at the device is currently not supported.
> This may be a problem for e.g. error recovery.

I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add MSCH
as well or is it supposed to remain 'userspace emulated'? AFAIR MSCH
may have an effect on error recovery as well.

BTW I would like to have the concurrency discussion sorted out before
I proceed with my review, because reviewing the stuff without a fair idea
of what exactly are we trying to achieve would yield poor results.

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-04 12:38 ` Halil Pasic
@ 2018-12-04 13:11   ` Cornelia Huck
  2018-12-04 15:02     ` Halil Pasic
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-12-04 13:11 UTC (permalink / raw)
  To: Halil Pasic
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	Farhan Ali, qemu-devel, qemu-s390x

On Tue, 4 Dec 2018 13:38:10 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Thu, 22 Nov 2018 17:54:29 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > [This is the Linux kernel part, git tree is available at
> > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> > 
> > The companion QEMU patches are available at
> > https://github.com/cohuck/qemu vfio-ccw-caps]
> > 
> > Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> > device. This tends to work well for the most common 'good path' scenarios;
> > however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> > clearing pending requests at the device is currently not supported.
> > This may be a problem for e.g. error recovery.  
> 
> I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add MSCH
> as well or is it supposed to remain 'userspace emulated'? AFAIR MSCH
> may have an effect on error recovery as well.

I think that would require a deeper change, as we have the requirement
to enable the subchannel before handing it to userspace. IOW, the guest
does not cause the subchannel to be enabled/disabled, but the host does.

Parameters (like for channel measurements) are a different game. It is
something we should look into, but it will need a different region.

> BTW I would like to have the concurrency discussion sorted out before
> I proceed with my review, because reviewing the stuff without a fair idea
> of what exactly are we trying to achieve would yield poor results.

I'm not sure what is unclear about what we're trying to achieve (enable
the guest to issue halt/clear on real hardware)?

But yes, we need to sort out that concurrency thing; I'm currently
unsure if the core should do some things as well or if it's more of a
vendor-driver thing.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-04 13:11   ` Cornelia Huck
@ 2018-12-04 15:02     ` Halil Pasic
  2018-12-05 12:54       ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-12-04 15:02 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, kvm, Pierre Morel, qemu-s390x,
	Farhan Ali, qemu-devel, Alex Williamson

On Tue, 4 Dec 2018 14:11:30 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Tue, 4 Dec 2018 13:38:10 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Thu, 22 Nov 2018 17:54:29 +0100
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > [This is the Linux kernel part, git tree is available at
> > > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> > > 
> > > The companion QEMU patches are available at
> > > https://github.com/cohuck/qemu vfio-ccw-caps]
> > > 
> > > Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> > > device. This tends to work well for the most common 'good path' scenarios;
> > > however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> > > clearing pending requests at the device is currently not supported.
> > > This may be a problem for e.g. error recovery.  
> > 
> > I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add MSCH
> > as well or is it supposed to remain 'userspace emulated'? AFAIR MSCH
> > may have an effect on error recovery as well.
> 
> I think that would require a deeper change, as we have the requirement
> to enable the subchannel before handing it to userspace. IOW, the guest
> does not cause the subchannel to be enabled/disabled, but the host does.
> 

My point is, when the subchannel is disabled, 'firmware' is responsible
for suppressing interrupts and error conditions, and also for
doing the appropriate recovery procedure, so to say under the hood.

I think Jason has discovered some problems related to this while doing
his DASD IPL with vfio-ccw work, but I don't quite remember any more.

IMHO it may be possible to emulate enable/disable, but it seems way more
error prone and complicated, than letting the guest enable/disable the
host subchannel.

I have no idea what was the reason for going with the initial design.
I would appreciate any hints or explanations, but I'm well aware that it
was a long time ago.


> Parameters (like for channel measurements) are a different game. It is
> something we should look into, but it will need a different region.

Yes emulation only channel measurements seem even less likely than proper
enable/disable. And 'that would need a different' region helps me
understanding the scope of async_cmd_region. Maybe we should reconsider
the comment '+ * @cmd_region: MMIO region for asynchronous I/O commands
other than START'.

> 
> > BTW I would like to have the concurrency discussion sorted out before
> > I proceed with my review, because reviewing the stuff without a fair idea
> > of what exactly are we trying to achieve would yield poor results.
> 
> I'm not sure what is unclear about what we're trying to achieve (enable
> the guest to issue halt/clear on real hardware)?

Yeah, that is perfectly clear, but it ain't the complete story. E.g.
are subsequent commands blocking until the preceding command finishes
is part of the interface. And what is good implementation depends on the
answer. What I mean, I first need to understand how things are supposed
to work (together) so I can double check that against the
implementation. Otherwise all I can do is nitpicking.

To get more tangible: we are in the middle of processing an SSCH request
(e.g. doing the translation) when a HSCH comes in. What should happen?
Should we start processing HSCH after he instruction part of SSCH is
done -- which currently includes translation? Or should we -EBUSY? Or do
we abort START related activities and do the HALT stuff?


> 
> But yes, we need to sort out that concurrency thing; I'm currently
> unsure if the core should do some things as well or if it's more of a
> vendor-driver thing.
> 

By core you mean vfio-mdev core? If yes, I think it is a vendor-driver
thing: limiting concurrency for all vfio-mdev  does not make sense IMHO.

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-04 15:02     ` Halil Pasic
@ 2018-12-05 12:54       ` Cornelia Huck
  2018-12-05 18:34         ` Farhan Ali
  2018-12-06 18:47         ` Halil Pasic
  0 siblings, 2 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-05 12:54 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, Farhan Ali, qemu-devel, Alex Williamson

On Tue, 4 Dec 2018 16:02:36 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Tue, 4 Dec 2018 14:11:30 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Tue, 4 Dec 2018 13:38:10 +0100
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > On Thu, 22 Nov 2018 17:54:29 +0100
> > > Cornelia Huck <cohuck@redhat.com> wrote:
> > >   
> > > > [This is the Linux kernel part, git tree is available at
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git vfio-ccw-caps
> > > > 
> > > > The companion QEMU patches are available at
> > > > https://github.com/cohuck/qemu vfio-ccw-caps]
> > > > 
> > > > Currently, vfio-ccw only relays START SUBCHANNEL requests to the real
> > > > device. This tends to work well for the most common 'good path' scenarios;
> > > > however, as we emulate {HALT,CLEAR} SUBCHANNEL in QEMU, things like
> > > > clearing pending requests at the device is currently not supported.
> > > > This may be a problem for e.g. error recovery.    
> > > 
> > > I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add MSCH
> > > as well or is it supposed to remain 'userspace emulated'? AFAIR MSCH
> > > may have an effect on error recovery as well.  
> > 
> > I think that would require a deeper change, as we have the requirement
> > to enable the subchannel before handing it to userspace. IOW, the guest
> > does not cause the subchannel to be enabled/disabled, but the host does.
> >   
> 
> My point is, when the subchannel is disabled, 'firmware' is responsible
> for suppressing interrupts and error conditions, and also for
> doing the appropriate recovery procedure, so to say under the hood.

I don't think there's actually much of a 'recovery' possible at a
subchannel level (other than 'have you tried turning it off and on
again?'); the interesting stuff is all at the device-specific level.

> 
> I think Jason has discovered some problems related to this while doing
> his DASD IPL with vfio-ccw work, but I don't quite remember any more.

cc:ing Jason, in case he remembers :)

> IMHO it may be possible to emulate enable/disable, but it seems way more
> error prone and complicated, than letting the guest enable/disable the
> host subchannel.
> 
> I have no idea what was the reason for going with the initial design.
> I would appreciate any hints or explanations, but I'm well aware that it
> was a long time ago.

I don't really remember either, and any non-public mails from that time
are inaccessible to me :(

It *might* be an artifact of the original design (which operated at the
ccw_device rather than the subchannel level), though.

> > Parameters (like for channel measurements) are a different game. It is
> > something we should look into, but it will need a different region.  
> 
> Yes emulation only channel measurements seem even less likely than proper
> enable/disable. And 'that would need a different' region helps me
> understanding the scope of async_cmd_region. Maybe we should reconsider
> the comment '+ * @cmd_region: MMIO region for asynchronous I/O commands
> other than START'.

What do you think is wrong with that comment?

> > > BTW I would like to have the concurrency discussion sorted out before
> > > I proceed with my review, because reviewing the stuff without a fair idea
> > > of what exactly are we trying to achieve would yield poor results.  
> > 
> > I'm not sure what is unclear about what we're trying to achieve (enable
> > the guest to issue halt/clear on real hardware)?  
> 
> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> are subsequent commands blocking until the preceding command finishes
> is part of the interface. And what is good implementation depends on the
> answer. What I mean, I first need to understand how things are supposed
> to work (together) so I can double check that against the
> implementation. Otherwise all I can do is nitpicking.
> 
> To get more tangible: we are in the middle of processing an SSCH request
> (e.g. doing the translation) when a HSCH comes in. What should happen?
> Should we start processing HSCH after he instruction part of SSCH is
> done -- which currently includes translation? Or should we -EBUSY? Or do
> we abort START related activities and do the HALT stuff?

I think most of the sorting-out-the-operations stuff should be done by
the hardware itself, and we should not really try to enforce anything
special in our vfio code.

For your example, it might be best if a hsch is always accepted and
send on towards the hardware. Probably best to reflect back -EAGAIN if
we're currently processing another instruction from another vcpu, so
that the user space caller can retry. Same for ssch, if another ssch is
already being processed. We *could* reflect cc 2 if the fctl bit is
already set, but that won't do for csch, so it is probably best to have
the hardware figure that out in any case.

If I read the code correctly, we currently reflect -EBUSY and not
-EAGAIN if we get a ssch request while already processing another one.
QEMU hands that back to the guest as a cc 2, which is not 100% correct.
In practice, we don't see this with Linux guests due to locking.

> > But yes, we need to sort out that concurrency thing; I'm currently
> > unsure if the core should do some things as well or if it's more of
> > a vendor-driver thing.
> >   
> 
> By core you mean vfio-mdev core? If yes, I think it is a vendor-driver
> thing: limiting concurrency for all vfio-mdev  does not make sense
> IMHO.

Also generic vfio. But I'm still unclear which guarantees we have. I
suspect none; I'm wondering whether other vfio devices might have
issues as well.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-05 12:54       ` Cornelia Huck
@ 2018-12-05 18:34         ` Farhan Ali
  2018-12-06 14:39           ` Cornelia Huck
  2018-12-06 18:47         ` Halil Pasic
  1 sibling, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-12-05 18:34 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Alex Williamson



On 12/05/2018 07:54 AM, Cornelia Huck wrote:
>> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
>> are subsequent commands blocking until the preceding command finishes
>> is part of the interface. And what is good implementation depends on the
>> answer. What I mean, I first need to understand how things are supposed
>> to work (together) so I can double check that against the
>> implementation. Otherwise all I can do is nitpicking.
>>
>> To get more tangible: we are in the middle of processing an SSCH request
>> (e.g. doing the translation) when a HSCH comes in. What should happen?
>> Should we start processing HSCH after he instruction part of SSCH is
>> done -- which currently includes translation? Or should we -EBUSY? Or do
>> we abort START related activities and do the HALT stuff?
> I think most of the sorting-out-the-operations stuff should be done by
> the hardware itself, and we should not really try to enforce anything
> special in our vfio code.
> 
> For your example, it might be best if a hsch is always accepted and
> send on towards the hardware. Probably best to reflect back -EAGAIN if
> we're currently processing another instruction from another vcpu, so
> that the user space caller can retry. Same for ssch, if another ssch is
> already being processed. We*could*  reflect cc 2 if the fctl bit is
> already set, but that won't do for csch, so it is probably best to have
> the hardware figure that out in any case.
> 
> If I read the code correctly, we currently reflect -EBUSY and not
> -EAGAIN if we get a ssch request while already processing another one.
> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> In practice, we don't see this with Linux guests due to locking.
> 

If we have a ssch and a csch immediately afterwards from userspace, will 
we end up issuing csch first and then ssch to the hardware?

If I understand correctly, the ccw translation as part of the ssch can 
be a slow operation so it might be possible we issue the csch first?
In that case we won't actually clear the original start function as 
intended.


Thanks
Farhan

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-05 18:34         ` Farhan Ali
@ 2018-12-06 14:39           ` Cornelia Huck
  2018-12-06 15:26             ` Farhan Ali
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-12-06 14:39 UTC (permalink / raw)
  To: Farhan Ali
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Halil Pasic, Alex Williamson

On Wed, 5 Dec 2018 13:34:11 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 12/05/2018 07:54 AM, Cornelia Huck wrote:
> >> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> >> are subsequent commands blocking until the preceding command finishes
> >> is part of the interface. And what is good implementation depends on the
> >> answer. What I mean, I first need to understand how things are supposed
> >> to work (together) so I can double check that against the
> >> implementation. Otherwise all I can do is nitpicking.
> >>
> >> To get more tangible: we are in the middle of processing an SSCH request
> >> (e.g. doing the translation) when a HSCH comes in. What should happen?
> >> Should we start processing HSCH after he instruction part of SSCH is
> >> done -- which currently includes translation? Or should we -EBUSY? Or do
> >> we abort START related activities and do the HALT stuff?  
> > I think most of the sorting-out-the-operations stuff should be done by
> > the hardware itself, and we should not really try to enforce anything
> > special in our vfio code.
> > 
> > For your example, it might be best if a hsch is always accepted and
> > send on towards the hardware. Probably best to reflect back -EAGAIN if
> > we're currently processing another instruction from another vcpu, so
> > that the user space caller can retry. Same for ssch, if another ssch is
> > already being processed. We*could*  reflect cc 2 if the fctl bit is
> > already set, but that won't do for csch, so it is probably best to have
> > the hardware figure that out in any case.
> > 
> > If I read the code correctly, we currently reflect -EBUSY and not
> > -EAGAIN if we get a ssch request while already processing another one.
> > QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> > In practice, we don't see this with Linux guests due to locking.
> >   
> 
> If we have a ssch and a csch immediately afterwards from userspace, will 
> we end up issuing csch first and then ssch to the hardware?
> 
> If I understand correctly, the ccw translation as part of the ssch can 
> be a slow operation so it might be possible we issue the csch first?
> In that case we won't actually clear the original start function as 
> intended.

When we start processing the ssch request (translation and so on), we
set the state to BUSY. This means that any csch request will get a
-EBUSY, no overtaking possible. (I think maybe I'll need to check what
this series looks like if I rebase it on top of Pierre's rework, as he
did some changes in the state machine.)

My idea above was to return -EAGAIN instead of -EBUSY, so that user
space can retry the operation.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-06 14:39           ` Cornelia Huck
@ 2018-12-06 15:26             ` Farhan Ali
  2018-12-06 16:21               ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-12-06 15:26 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Halil Pasic, Alex Williamson



On 12/06/2018 09:39 AM, Cornelia Huck wrote:
> On Wed, 5 Dec 2018 13:34:11 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 12/05/2018 07:54 AM, Cornelia Huck wrote:
>>>> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
>>>> are subsequent commands blocking until the preceding command finishes
>>>> is part of the interface. And what is good implementation depends on the
>>>> answer. What I mean, I first need to understand how things are supposed
>>>> to work (together) so I can double check that against the
>>>> implementation. Otherwise all I can do is nitpicking.
>>>>
>>>> To get more tangible: we are in the middle of processing an SSCH request
>>>> (e.g. doing the translation) when a HSCH comes in. What should happen?
>>>> Should we start processing HSCH after he instruction part of SSCH is
>>>> done -- which currently includes translation? Or should we -EBUSY? Or do
>>>> we abort START related activities and do the HALT stuff?
>>> I think most of the sorting-out-the-operations stuff should be done by
>>> the hardware itself, and we should not really try to enforce anything
>>> special in our vfio code.
>>>
>>> For your example, it might be best if a hsch is always accepted and
>>> send on towards the hardware. Probably best to reflect back -EAGAIN if
>>> we're currently processing another instruction from another vcpu, so
>>> that the user space caller can retry. Same for ssch, if another ssch is
>>> already being processed. We*could*  reflect cc 2 if the fctl bit is
>>> already set, but that won't do for csch, so it is probably best to have
>>> the hardware figure that out in any case.
>>>
>>> If I read the code correctly, we currently reflect -EBUSY and not
>>> -EAGAIN if we get a ssch request while already processing another one.
>>> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
>>> In practice, we don't see this with Linux guests due to locking.
>>>    
>>
>> If we have a ssch and a csch immediately afterwards from userspace, will
>> we end up issuing csch first and then ssch to the hardware?
>>
>> If I understand correctly, the ccw translation as part of the ssch can
>> be a slow operation so it might be possible we issue the csch first?
>> In that case we won't actually clear the original start function as
>> intended.
> 
> When we start processing the ssch request (translation and so on), we
> set the state to BUSY. This means that any csch request will get a
> -EBUSY, no overtaking possible. (I think maybe I'll need to check what
> this series looks like if I rebase it on top of Pierre's rework, as he
> did some changes in the state machine.)

I think you meant the state is set to BOXED? otherwise the patch 3 says 
if state is BUSY and CLEAR event request comes in, we issue the clear 
instruction, no?


> 
> My idea above was to return -EAGAIN instead of -EBUSY, so that user
> space can retry the operation.
> 
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-06 15:26             ` Farhan Ali
@ 2018-12-06 16:21               ` Cornelia Huck
  2018-12-06 17:50                 ` Farhan Ali
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-12-06 16:21 UTC (permalink / raw)
  To: Farhan Ali
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Halil Pasic, Alex Williamson

On Thu, 6 Dec 2018 10:26:12 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 12/06/2018 09:39 AM, Cornelia Huck wrote:
> > On Wed, 5 Dec 2018 13:34:11 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 12/05/2018 07:54 AM, Cornelia Huck wrote:  
> >>>> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> >>>> are subsequent commands blocking until the preceding command finishes
> >>>> is part of the interface. And what is good implementation depends on the
> >>>> answer. What I mean, I first need to understand how things are supposed
> >>>> to work (together) so I can double check that against the
> >>>> implementation. Otherwise all I can do is nitpicking.
> >>>>
> >>>> To get more tangible: we are in the middle of processing an SSCH request
> >>>> (e.g. doing the translation) when a HSCH comes in. What should happen?
> >>>> Should we start processing HSCH after he instruction part of SSCH is
> >>>> done -- which currently includes translation? Or should we -EBUSY? Or do
> >>>> we abort START related activities and do the HALT stuff?  
> >>> I think most of the sorting-out-the-operations stuff should be done by
> >>> the hardware itself, and we should not really try to enforce anything
> >>> special in our vfio code.
> >>>
> >>> For your example, it might be best if a hsch is always accepted and
> >>> send on towards the hardware. Probably best to reflect back -EAGAIN if
> >>> we're currently processing another instruction from another vcpu, so
> >>> that the user space caller can retry. Same for ssch, if another ssch is
> >>> already being processed. We*could*  reflect cc 2 if the fctl bit is
> >>> already set, but that won't do for csch, so it is probably best to have
> >>> the hardware figure that out in any case.
> >>>
> >>> If I read the code correctly, we currently reflect -EBUSY and not
> >>> -EAGAIN if we get a ssch request while already processing another one.
> >>> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> >>> In practice, we don't see this with Linux guests due to locking.
> >>>      
> >>
> >> If we have a ssch and a csch immediately afterwards from userspace, will
> >> we end up issuing csch first and then ssch to the hardware?
> >>
> >> If I understand correctly, the ccw translation as part of the ssch can
> >> be a slow operation so it might be possible we issue the csch first?
> >> In that case we won't actually clear the original start function as
> >> intended.  
> > 
> > When we start processing the ssch request (translation and so on), we
> > set the state to BUSY. This means that any csch request will get a
> > -EBUSY, no overtaking possible. (I think maybe I'll need to check what
> > this series looks like if I rebase it on top of Pierre's rework, as he
> > did some changes in the state machine.)  
> 
> I think you meant the state is set to BOXED? otherwise the patch 3 says 
> if state is BUSY and CLEAR event request comes in, we issue the clear 
> instruction, no?

That's what I meant with "need to rebase" :) The BOXED state is gone; I
just had not rebased on top of it. There's more changes in the state
machine; if we are on the same page as to what should happen, I can
start massaging the patches.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-06 16:21               ` Cornelia Huck
@ 2018-12-06 17:50                 ` Farhan Ali
  2018-12-07  9:34                   ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Farhan Ali @ 2018-12-06 17:50 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Halil Pasic, Alex Williamson



On 12/06/2018 11:21 AM, Cornelia Huck wrote:
> On Thu, 6 Dec 2018 10:26:12 -0500
> Farhan Ali <alifm@linux.ibm.com> wrote:
> 
>> On 12/06/2018 09:39 AM, Cornelia Huck wrote:
>>> On Wed, 5 Dec 2018 13:34:11 -0500
>>> Farhan Ali <alifm@linux.ibm.com> wrote:
>>>    
>>>> On 12/05/2018 07:54 AM, Cornelia Huck wrote:
>>>>>> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
>>>>>> are subsequent commands blocking until the preceding command finishes
>>>>>> is part of the interface. And what is good implementation depends on the
>>>>>> answer. What I mean, I first need to understand how things are supposed
>>>>>> to work (together) so I can double check that against the
>>>>>> implementation. Otherwise all I can do is nitpicking.
>>>>>>
>>>>>> To get more tangible: we are in the middle of processing an SSCH request
>>>>>> (e.g. doing the translation) when a HSCH comes in. What should happen?
>>>>>> Should we start processing HSCH after he instruction part of SSCH is
>>>>>> done -- which currently includes translation? Or should we -EBUSY? Or do
>>>>>> we abort START related activities and do the HALT stuff?
>>>>> I think most of the sorting-out-the-operations stuff should be done by
>>>>> the hardware itself, and we should not really try to enforce anything
>>>>> special in our vfio code.
>>>>>
>>>>> For your example, it might be best if a hsch is always accepted and
>>>>> send on towards the hardware. Probably best to reflect back -EAGAIN if
>>>>> we're currently processing another instruction from another vcpu, so
>>>>> that the user space caller can retry. Same for ssch, if another ssch is
>>>>> already being processed. We*could*  reflect cc 2 if the fctl bit is
>>>>> already set, but that won't do for csch, so it is probably best to have
>>>>> the hardware figure that out in any case.
>>>>>
>>>>> If I read the code correctly, we currently reflect -EBUSY and not
>>>>> -EAGAIN if we get a ssch request while already processing another one.
>>>>> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
>>>>> In practice, we don't see this with Linux guests due to locking.
>>>>>       
>>>>
>>>> If we have a ssch and a csch immediately afterwards from userspace, will
>>>> we end up issuing csch first and then ssch to the hardware?
>>>>
>>>> If I understand correctly, the ccw translation as part of the ssch can
>>>> be a slow operation so it might be possible we issue the csch first?
>>>> In that case we won't actually clear the original start function as
>>>> intended.
>>>
>>> When we start processing the ssch request (translation and so on), we
>>> set the state to BUSY. This means that any csch request will get a
>>> -EBUSY, no overtaking possible. (I think maybe I'll need to check what
>>> this series looks like if I rebase it on top of Pierre's rework, as he
>>> did some changes in the state machine.)
>>
>> I think you meant the state is set to BOXED? otherwise the patch 3 says
>> if state is BUSY and CLEAR event request comes in, we issue the clear
>> instruction, no?
> 
> That's what I meant with "need to rebase" :) The BOXED state is gone; I
> just had not rebased on top of it. There's more changes in the state
> machine; if we are on the same page as to what should happen, I can
> start massaging the patches.
> 
> 

Sorry maybe I missed it, but are you referring to Pierre's latest 
cleanup patches? I don't see him removing the BOXED state.

I think returning -EAGAIN and asking the userspace to retry the 
operation sounds reasonable to me.

But how do we handle the issue of protecting the cmd_region from 
simultaneous hsch and csch calls? Do we agree on Pierre's method of 
making write calls mutually exclusive?

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-05 12:54       ` Cornelia Huck
  2018-12-05 18:34         ` Farhan Ali
@ 2018-12-06 18:47         ` Halil Pasic
  2018-12-07 10:05           ` Cornelia Huck
  1 sibling, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-12-06 18:47 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Wed, 5 Dec 2018 13:54:02 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Tue, 4 Dec 2018 16:02:36 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Tue, 4 Dec 2018 14:11:30 +0100
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > On Tue, 4 Dec 2018 13:38:10 +0100
> > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > >   
> > > > On Thu, 22 Nov 2018 17:54:29 +0100
> > > > Cornelia Huck <cohuck@redhat.com> wrote:
> > > >   
> > > > > [This is the Linux kernel part, git tree is available at
> > > > > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git
> > > > > vfio-ccw-caps
> > > > > 
> > > > > The companion QEMU patches are available at
> > > > > https://github.com/cohuck/qemu vfio-ccw-caps]
> > > > > 
> > > > > Currently, vfio-ccw only relays START SUBCHANNEL requests to
> > > > > the real device. This tends to work well for the most common
> > > > > 'good path' scenarios; however, as we emulate {HALT,CLEAR}
> > > > > SUBCHANNEL in QEMU, things like clearing pending requests at
> > > > > the device is currently not supported. This may be a problem
> > > > > for e.g. error recovery.    
> > > > 
> > > > I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add
> > > > MSCH as well or is it supposed to remain 'userspace emulated'?
> > > > AFAIR MSCH may have an effect on error recovery as well.  
> > > 
> > > I think that would require a deeper change, as we have the
> > > requirement to enable the subchannel before handing it to
> > > userspace. IOW, the guest does not cause the subchannel to be
> > > enabled/disabled, but the host does. 
> > 
> > My point is, when the subchannel is disabled, 'firmware' is
> > responsible for suppressing interrupts and error conditions, and
> > also for doing the appropriate recovery procedure, so to say under
> > the hood.
> 
> I don't think there's actually much of a 'recovery' possible at a
> subchannel level (other than 'have you tried turning it off and on
> again?'); the interesting stuff is all at the device-specific level.
> 

To clarify my concern let me quote from the PoP
(SA22-7832-10 page 14-9):

"""
If a device presents unsolicited status while the
associated subchannel is disabled, that status is
discarded by the channel subsystem without
generating an I/O-interruption condition. How-
ever, if the status presented contains unit check,
the channel subsystem issues the clear signal for
the associated subchannel and does not gener-
ate an I/O-interruption condition. This should be
taken into account when the program uses MOD-
IFY SUBCHANNEL to enable a subchannel. For
example, the medium on the associated device
that was present when the subchannel became
disabled may have been replaced, and, there-
fore, the program should verify the integrity of
that medium.
"""

> > 
> > I think Jason has discovered some problems related to this while
> > doing his DASD IPL with vfio-ccw work, but I don't quite remember
> > any more.
> 
> cc:ing Jason, in case he remembers :)
> 
> > IMHO it may be possible to emulate enable/disable, but it seems way
> > more error prone and complicated, than letting the guest
> > enable/disable the host subchannel.
> > 
> > I have no idea what was the reason for going with the initial design.
> > I would appreciate any hints or explanations, but I'm well aware
> > that it was a long time ago.
> 
> I don't really remember either, and any non-public mails from that time
> are inaccessible to me :(
> 
> It *might* be an artifact of the original design (which operated at the
> ccw_device rather than the subchannel level), though.
> 

Interesting.

> > > Parameters (like for channel measurements) are a different game.
> > > It is something we should look into, but it will need a different
> > > region.  
> > 
> > Yes emulation only channel measurements seem even less likely than
> > proper enable/disable. And 'that would need a different' region
> > helps me understanding the scope of async_cmd_region. Maybe we
> > should reconsider the comment '+ * @cmd_region: MMIO region for
> > asynchronous I/O commands other than START'.
> 
> What do you think is wrong with that comment?
> 

Well msch is also an async I/O command other than START. If msch does not
belong here but needs it's own region, then this description seems too
generic.

> > > > BTW I would like to have the concurrency discussion sorted out
> > > > before I proceed with my review, because reviewing the stuff
> > > > without a fair idea of what exactly are we trying to achieve
> > > > would yield poor results.  
> > > 
> > > I'm not sure what is unclear about what we're trying to achieve
> > > (enable the guest to issue halt/clear on real hardware)?  
> > 
> > Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> > are subsequent commands blocking until the preceding command finishes
> > is part of the interface. And what is good implementation depends on
> > the answer. What I mean, I first need to understand how things are
> > supposed to work (together) so I can double check that against the
> > implementation. Otherwise all I can do is nitpicking.
> > 
> > To get more tangible: we are in the middle of processing an SSCH
> > request (e.g. doing the translation) when a HSCH comes in. What
> > should happen? Should we start processing HSCH after he instruction
> > part of SSCH is done -- which currently includes translation? Or
> > should we -EBUSY? Or do we abort START related activities and do the
> > HALT stuff?
> 
> I think most of the sorting-out-the-operations stuff should be done by
> the hardware itself, and we should not really try to enforce anything
> special in our vfio code.
> 

Sounds very reasonable to me. Does this mean you are against Pierre's
'[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
not let HW sort out stuff, but enforces sequencing?


> For your example, it might be best if a hsch is always accepted and
> send on towards the hardware.

Nod.

> Probably best to reflect back -EAGAIN if
> we're currently processing another instruction from another vcpu, so
> that the user space caller can retry.

Hm, not sure how this works together with your previous sentence.

> Same for ssch, if another ssch is
> already being processed. We *could* reflect cc 2 if the fctl bit is
> already set, but that won't do for csch, so it is probably best to have
> the hardware figure that out in any case.
> 

We just need to be careful about avoiding races if we let hw sort out
things. If an ssch is issued with the start function pending the correct
response is cc 2. 

> If I read the code correctly, we currently reflect -EBUSY and not
> -EAGAIN if we get a ssch request while already processing another one.
> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> In practice, we don't see this with Linux guests due to locking.
> 

Nod, does not happen because of BQL. We currently do the user-space
counterpart of vfio_ccw_mdev_write() in BQL context or (i.e. we hold
BQL until translation is done and our host ssch() comes back)?

I think -EBUSY is the correct response for ssch while start pending set.
I think we set start pending in QEMU before we issue 'start command/io
request' to the kernel. I don't think -EAGAIN is a good idea. AFAIU we
would expect user-space to loop on -EAGAIN e.g. at least until the
processing of a 'start command' is done and the (fist) ssch by the host
is issued. And then what?  Translate the second channel program issue
the second ssch in the host and probably get a non-zero cc? Or return
-EBUSY? Or keep returning -EAGAIN?

> > > But yes, we need to sort out that concurrency thing; I'm currently
> > > unsure if the core should do some things as well or if it's more of
> > > a vendor-driver thing.
> > >   
> > 
> > By core you mean vfio-mdev core? If yes, I think it is a
> > vendor-driver thing: limiting concurrency for all vfio-mdev  does
> > not make sense IMHO.
> 
> Also generic vfio. But I'm still unclear which guarantees we have. I
> suspect none; I'm wondering whether other vfio devices might have
> issues as well.
> 

My intuition says this is something left to the devices.

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-06 17:50                 ` Farhan Ali
@ 2018-12-07  9:34                   ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-07  9:34 UTC (permalink / raw)
  To: Farhan Ali
  Cc: Jason J . Herne, linux-s390, Eric Farman, kvm, Pierre Morel,
	qemu-s390x, qemu-devel, Halil Pasic, Alex Williamson

On Thu, 6 Dec 2018 12:50:50 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 12/06/2018 11:21 AM, Cornelia Huck wrote:
> > On Thu, 6 Dec 2018 10:26:12 -0500
> > Farhan Ali <alifm@linux.ibm.com> wrote:
> >   
> >> On 12/06/2018 09:39 AM, Cornelia Huck wrote:  
> >>> On Wed, 5 Dec 2018 13:34:11 -0500
> >>> Farhan Ali <alifm@linux.ibm.com> wrote:
> >>>      
> >>>> On 12/05/2018 07:54 AM, Cornelia Huck wrote:  
> >>>>>> Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> >>>>>> are subsequent commands blocking until the preceding command finishes
> >>>>>> is part of the interface. And what is good implementation depends on the
> >>>>>> answer. What I mean, I first need to understand how things are supposed
> >>>>>> to work (together) so I can double check that against the
> >>>>>> implementation. Otherwise all I can do is nitpicking.
> >>>>>>
> >>>>>> To get more tangible: we are in the middle of processing an SSCH request
> >>>>>> (e.g. doing the translation) when a HSCH comes in. What should happen?
> >>>>>> Should we start processing HSCH after he instruction part of SSCH is
> >>>>>> done -- which currently includes translation? Or should we -EBUSY? Or do
> >>>>>> we abort START related activities and do the HALT stuff?  
> >>>>> I think most of the sorting-out-the-operations stuff should be done by
> >>>>> the hardware itself, and we should not really try to enforce anything
> >>>>> special in our vfio code.
> >>>>>
> >>>>> For your example, it might be best if a hsch is always accepted and
> >>>>> send on towards the hardware. Probably best to reflect back -EAGAIN if
> >>>>> we're currently processing another instruction from another vcpu, so
> >>>>> that the user space caller can retry. Same for ssch, if another ssch is
> >>>>> already being processed. We*could*  reflect cc 2 if the fctl bit is
> >>>>> already set, but that won't do for csch, so it is probably best to have
> >>>>> the hardware figure that out in any case.
> >>>>>
> >>>>> If I read the code correctly, we currently reflect -EBUSY and not
> >>>>> -EAGAIN if we get a ssch request while already processing another one.
> >>>>> QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> >>>>> In practice, we don't see this with Linux guests due to locking.
> >>>>>         
> >>>>
> >>>> If we have a ssch and a csch immediately afterwards from userspace, will
> >>>> we end up issuing csch first and then ssch to the hardware?
> >>>>
> >>>> If I understand correctly, the ccw translation as part of the ssch can
> >>>> be a slow operation so it might be possible we issue the csch first?
> >>>> In that case we won't actually clear the original start function as
> >>>> intended.  
> >>>
> >>> When we start processing the ssch request (translation and so on), we
> >>> set the state to BUSY. This means that any csch request will get a
> >>> -EBUSY, no overtaking possible. (I think maybe I'll need to check what
> >>> this series looks like if I rebase it on top of Pierre's rework, as he
> >>> did some changes in the state machine.)  
> >>
> >> I think you meant the state is set to BOXED? otherwise the patch 3 says
> >> if state is BUSY and CLEAR event request comes in, we issue the clear
> >> instruction, no?  
> > 
> > That's what I meant with "need to rebase" :) The BOXED state is gone; I
> > just had not rebased on top of it. There's more changes in the state
> > machine; if we are on the same page as to what should happen, I can
> > start massaging the patches.
> > 
> >   
> 
> Sorry maybe I missed it, but are you referring to Pierre's latest 
> cleanup patches? I don't see him removing the BOXED state.

The "remove BOXED state" patch is currently on my vfio-ccw-staging
branch. (That reminds me, will need to move it to my vfio-ccw branch
and possibly send a pull request. I had hoped to collect more patches
for the next release...)

> 
> I think returning -EAGAIN and asking the userspace to retry the 
> operation sounds reasonable to me.
> 
> But how do we handle the issue of protecting the cmd_region from 
> simultaneous hsch and csch calls? Do we agree on Pierre's method of 
> making write calls mutually exclusive?

That's in his patch series, right? I did not yet have time to look at
it...

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-06 18:47         ` Halil Pasic
@ 2018-12-07 10:05           ` Cornelia Huck
  2018-12-07 15:49             ` Halil Pasic
  2018-12-07 16:54             ` Halil Pasic
  0 siblings, 2 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-07 10:05 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Thu, 6 Dec 2018 19:47:14 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Wed, 5 Dec 2018 13:54:02 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Tue, 4 Dec 2018 16:02:36 +0100
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > On Tue, 4 Dec 2018 14:11:30 +0100
> > > Cornelia Huck <cohuck@redhat.com> wrote:
> > >   
> > > > On Tue, 4 Dec 2018 13:38:10 +0100
> > > > Halil Pasic <pasic@linux.ibm.com> wrote:
> > > >     
> > > > > On Thu, 22 Nov 2018 17:54:29 +0100
> > > > > Cornelia Huck <cohuck@redhat.com> wrote:
> > > > >     
> > > > > > [This is the Linux kernel part, git tree is available at
> > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw.git
> > > > > > vfio-ccw-caps
> > > > > > 
> > > > > > The companion QEMU patches are available at
> > > > > > https://github.com/cohuck/qemu vfio-ccw-caps]
> > > > > > 
> > > > > > Currently, vfio-ccw only relays START SUBCHANNEL requests to
> > > > > > the real device. This tends to work well for the most common
> > > > > > 'good path' scenarios; however, as we emulate {HALT,CLEAR}
> > > > > > SUBCHANNEL in QEMU, things like clearing pending requests at
> > > > > > the device is currently not supported. This may be a problem
> > > > > > for e.g. error recovery.      
> > > > > 
> > > > > I'm wondering: what about MODIFY SUBCHANNEL? Do we plan to add
> > > > > MSCH as well or is it supposed to remain 'userspace emulated'?
> > > > > AFAIR MSCH may have an effect on error recovery as well.    
> > > > 
> > > > I think that would require a deeper change, as we have the
> > > > requirement to enable the subchannel before handing it to
> > > > userspace. IOW, the guest does not cause the subchannel to be
> > > > enabled/disabled, but the host does.   
> > > 
> > > My point is, when the subchannel is disabled, 'firmware' is
> > > responsible for suppressing interrupts and error conditions, and
> > > also for doing the appropriate recovery procedure, so to say under
> > > the hood.  
> > 
> > I don't think there's actually much of a 'recovery' possible at a
> > subchannel level (other than 'have you tried turning it off and on
> > again?'); the interesting stuff is all at the device-specific level.
> >   
> 
> To clarify my concern let me quote from the PoP
> (SA22-7832-10 page 14-9):
> 
> """
> If a device presents unsolicited status while the
> associated subchannel is disabled, that status is
> discarded by the channel subsystem without
> generating an I/O-interruption condition. How-
> ever, if the status presented contains unit check,
> the channel subsystem issues the clear signal for
> the associated subchannel and does not gener-
> ate an I/O-interruption condition. This should be
> taken into account when the program uses MOD-
> IFY SUBCHANNEL to enable a subchannel. For
> example, the medium on the associated device
> that was present when the subchannel became
> disabled may have been replaced, and, there-
> fore, the program should verify the integrity of
> that medium.
> """

Hm, so is your concern that we might have a status (unit check) if we
have an enabled subchannel that might not be present if the subchannel
had been disabled all the time? Is that a problem in practice?

> > > 
> > > I think Jason has discovered some problems related to this while
> > > doing his DASD IPL with vfio-ccw work, but I don't quite remember
> > > any more.  
> > 
> > cc:ing Jason, in case he remembers :)

Like in that case. Couldn't a unit check status also arrive just when
the subchannel has been enabled, and the code therefore has to deal
with it anyway?

> >   
> > > IMHO it may be possible to emulate enable/disable, but it seems way
> > > more error prone and complicated, than letting the guest
> > > enable/disable the host subchannel.
> > > 
> > > I have no idea what was the reason for going with the initial design.
> > > I would appreciate any hints or explanations, but I'm well aware
> > > that it was a long time ago.  
> > 
> > I don't really remember either, and any non-public mails from that time
> > are inaccessible to me :(
> > 
> > It *might* be an artifact of the original design (which operated at the
> > ccw_device rather than the subchannel level), though.
> >   
> 
> Interesting.
> 
> > > > Parameters (like for channel measurements) are a different game.
> > > > It is something we should look into, but it will need a different
> > > > region.    
> > > 
> > > Yes emulation only channel measurements seem even less likely than
> > > proper enable/disable. And 'that would need a different' region
> > > helps me understanding the scope of async_cmd_region. Maybe we
> > > should reconsider the comment '+ * @cmd_region: MMIO region for
> > > asynchronous I/O commands other than START'.  
> > 
> > What do you think is wrong with that comment?
> >   
> 
> Well msch is also an async I/O command other than START. If msch does not
> belong here but needs it's own region, then this description seems too
> generic.

Why do you consider msch to be async? ssch, hsch, csch all have the
potential to cause the execution of an asynchronous (start/halt/clear)
function, while msch just (possibly) modifies the subchannel and is
done.

> 
> > > > > BTW I would like to have the concurrency discussion sorted out
> > > > > before I proceed with my review, because reviewing the stuff
> > > > > without a fair idea of what exactly are we trying to achieve
> > > > > would yield poor results.    
> > > > 
> > > > I'm not sure what is unclear about what we're trying to achieve
> > > > (enable the guest to issue halt/clear on real hardware)?    
> > > 
> > > Yeah, that is perfectly clear, but it ain't the complete story. E.g.
> > > are subsequent commands blocking until the preceding command finishes
> > > is part of the interface. And what is good implementation depends on
> > > the answer. What I mean, I first need to understand how things are
> > > supposed to work (together) so I can double check that against the
> > > implementation. Otherwise all I can do is nitpicking.
> > > 
> > > To get more tangible: we are in the middle of processing an SSCH
> > > request (e.g. doing the translation) when a HSCH comes in. What
> > > should happen? Should we start processing HSCH after he instruction
> > > part of SSCH is done -- which currently includes translation? Or
> > > should we -EBUSY? Or do we abort START related activities and do the
> > > HALT stuff?  
> > 
> > I think most of the sorting-out-the-operations stuff should be done by
> > the hardware itself, and we should not really try to enforce anything
> > special in our vfio code.
> >   
> 
> Sounds very reasonable to me. Does this mean you are against Pierre's
> '[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
> not let HW sort out stuff, but enforces sequencing?

I have not yet had time to look at that, sorry.

> 
> 
> > For your example, it might be best if a hsch is always accepted and
> > send on towards the hardware.  
> 
> Nod.
> 
> > Probably best to reflect back -EAGAIN if
> > we're currently processing another instruction from another vcpu, so
> > that the user space caller can retry.  
> 
> Hm, not sure how this works together with your previous sentence.

The software layering. We have the kernel layer
(drivers/s390/cio/vfio_ccw_*) that interacts with the hardware more or
less directly, and the QEMU layer, which does some writes on regions.
In the end, the goal is to act on behalf of the guest issuing a
ssch/hsch/csch, which is from the guest's view a single instruction. We
should not have the individual "instructions" compete with each other
so that they run essentially in parallel (kernel layer), but we should
also not try to impose an artificial ordering as to when instructions
executed by different vcpus are executed (QEMU layer). Therefore, don't
try to run an instruction in the kernel when another one is in progress
for the same subchannel (exclusivity in the kernel), but retry in QEMU
if needed (no ordering between vcpus imposed).

In short, don't create strange concurrency issues in the "instruction"
handling, but make it possible to execute instructions in a
non-predictable order if the guest does not care about enforcing
ordering on its side.

> 
> > Same for ssch, if another ssch is
> > already being processed. We *could* reflect cc 2 if the fctl bit is
> > already set, but that won't do for csch, so it is probably best to have
> > the hardware figure that out in any case.
> >   
> 
> We just need to be careful about avoiding races if we let hw sort out
> things. If an ssch is issued with the start function pending the correct
> response is cc 2. 

But sending it on to the hardware will give us that cc 2, no?

> 
> > If I read the code correctly, we currently reflect -EBUSY and not
> > -EAGAIN if we get a ssch request while already processing another one.
> > QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> > In practice, we don't see this with Linux guests due to locking.
> >   
> 
> Nod, does not happen because of BQL. We currently do the user-space
> counterpart of vfio_ccw_mdev_write() in BQL context or (i.e. we hold
> BQL until translation is done and our host ssch() comes back)?

The Linux kernel uses the subchannel lock to enforce exclusivity for
subchannel instructions, so we won't see Linux guests issue
instructions on different vcpus in parallel, that's what I meant.

> 
> I think -EBUSY is the correct response for ssch while start pending set.
> I think we set start pending in QEMU before we issue 'start command/io
> request' to the kernel. I don't think -EAGAIN is a good idea. AFAIU we
> would expect user-space to loop on -EAGAIN e.g. at least until the
> processing of a 'start command' is done and the (fist) ssch by the host
> is issued. And then what?  Translate the second channel program issue
> the second ssch in the host and probably get a non-zero cc? Or return
> -EBUSY? Or keep returning -EAGAIN?

My idea was:
- return -EAGAIN if we're already processing a channel instruction
- continue returning -EBUSY etc. if the instruction gets the respective
  return code from the hardware

So, the second ssch would first get a -EAGAIN and then a -EBUSY if the
first ssch is done, but the subchannel is still doing the start
function. Just as you would expect when you do a ssch while your last
request has not finished yet.

> 
> > > > But yes, we need to sort out that concurrency thing; I'm currently
> > > > unsure if the core should do some things as well or if it's more of
> > > > a vendor-driver thing.
> > > >     
> > > 
> > > By core you mean vfio-mdev core? If yes, I think it is a
> > > vendor-driver thing: limiting concurrency for all vfio-mdev  does
> > > not make sense IMHO.  
> > 
> > Also generic vfio. But I'm still unclear which guarantees we have. I
> > suspect none; I'm wondering whether other vfio devices might have
> > issues as well.
> >   
> 
> My intuition says this is something left to the devices.

Probably, yes.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-07 10:05           ` Cornelia Huck
@ 2018-12-07 15:49             ` Halil Pasic
  2018-12-07 16:54             ` Halil Pasic
  1 sibling, 0 replies; 54+ messages in thread
From: Halil Pasic @ 2018-12-07 15:49 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Fri, 7 Dec 2018 11:05:29 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

[..]
> > To clarify my concern let me quote from the PoP
> > (SA22-7832-10 page 14-9):
> > 
> > """
> > If a device presents unsolicited status while the
> > associated subchannel is disabled, that status is
> > discarded by the channel subsystem without
> > generating an I/O-interruption condition. How-
> > ever, if the status presented contains unit check,
> > the channel subsystem issues the clear signal for
> > the associated subchannel and does not gener-
> > ate an I/O-interruption condition. This should be
> > taken into account when the program uses MOD-
> > IFY SUBCHANNEL to enable a subchannel. For
> > example, the medium on the associated device
> > that was present when the subchannel became
> > disabled may have been replaced, and, there-
> > fore, the program should verify the integrity of
> > that medium.
> > """
> 
> Hm, so is your concern that we might have a status (unit check) if we
> have an enabled subchannel that might not be present if the subchannel
> had been disabled all the time? Is that a problem in practice?
> 

No idea if it is a problem in practice.

> > > > 
> > > > I think Jason has discovered some problems related to this while
> > > > doing his DASD IPL with vfio-ccw work, but I don't quite remember
> > > > any more.  
> > > 
> > > cc:ing Jason, in case he remembers :)
> 
> Like in that case. Couldn't a unit check status also arrive just when
> the subchannel has been enabled, and the code therefore has to deal
> with it anyway?
> 

I assumed that programming note is there for a reason. Of course if
it can not been proven it ain't cheating. I don't remember exactly this
interacts with the rest of the architecture. In fact I asked my question,
because my feeling was that tying the virtual an the backing subchannel
together is simpler, than proving that we are fine without doing it.


> > >   
> > > > IMHO it may be possible to emulate enable/disable, but it seems way
> > > > more error prone and complicated, than letting the guest
> > > > enable/disable the host subchannel.
> > > > 
> > > > I have no idea what was the reason for going with the initial design.
> > > > I would appreciate any hints or explanations, but I'm well aware
> > > > that it was a long time ago.  
> > > 
> > > I don't really remember either, and any non-public mails from that time
> > > are inaccessible to me :(
> > > 
> > > It *might* be an artifact of the original design (which operated at the
> > > ccw_device rather than the subchannel level), though.
> > >   
> > 
> > Interesting.
> > 
> > > > > Parameters (like for channel measurements) are a different game.
> > > > > It is something we should look into, but it will need a different
> > > > > region.    
> > > > 
> > > > Yes emulation only channel measurements seem even less likely than
> > > > proper enable/disable. And 'that would need a different' region
> > > > helps me understanding the scope of async_cmd_region. Maybe we
> > > > should reconsider the comment '+ * @cmd_region: MMIO region for
> > > > asynchronous I/O commands other than START'.  
> > > 
> > > What do you think is wrong with that comment?
> > >   
> > 
> > Well msch is also an async I/O command other than START. If msch does not
> > belong here but needs it's own region, then this description seems too
> > generic.
> 
> Why do you consider msch to be async? ssch, hsch, csch all have the
> potential to cause the execution of an asynchronous (start/halt/clear)
> function, while msch just (possibly) modifies the subchannel and is
> done.
>

Right, my bad. Got confused by my Z channel io is async superstition. I
did not quite understand what async means in this context.

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-07 10:05           ` Cornelia Huck
  2018-12-07 15:49             ` Halil Pasic
@ 2018-12-07 16:54             ` Halil Pasic
  2018-12-19 11:54               ` Cornelia Huck
  1 sibling, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-12-07 16:54 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Fri, 7 Dec 2018 11:05:29 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> > > I think most of the sorting-out-the-operations stuff should be done by
> > > the hardware itself, and we should not really try to enforce anything
> > > special in our vfio code.
> > >     
> > 
> > Sounds very reasonable to me. Does this mean you are against Pierre's
> > '[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
> > not let HW sort out stuff, but enforces sequencing?  
> 
> I have not yet had time to look at that, sorry.
> 
> > 
> >   
> > > For your example, it might be best if a hsch is always accepted and
> > > send on towards the hardware.    
> > 
> > Nod.
> >   
> > > Probably best to reflect back -EAGAIN if
> > > we're currently processing another instruction from another vcpu, so
> > > that the user space caller can retry.    
> > 
> > Hm, not sure how this works together with your previous sentence.  
> 
> The software layering. We have the kernel layer
> (drivers/s390/cio/vfio_ccw_*) that interacts with the hardware more or
> less directly, and the QEMU layer, which does some writes on regions.
> In the end, the goal is to act on behalf of the guest issuing a
> ssch/hsch/csch, which is from the guest's view a single instruction. We
> should not have the individual "instructions" compete with each other
> so that they run essentially in parallel (kernel layer), but we should
> also not try to impose an artificial ordering as to when instructions
> executed by different vcpus are executed (QEMU layer). Therefore, don't
> try to run an instruction in the kernel when another one is in progress
> for the same subchannel (exclusivity in the kernel), but retry in QEMU
> if needed (no ordering between vcpus imposed).
> 
> In short, don't create strange concurrency issues in the "instruction"
> handling, but make it possible to execute instructions in a
> non-predictable order if the guest does not care about enforcing
> ordering on its side.
> 

I'm neither sold on this, nor am I violently opposing it. Will try to
meditate on it some more if any spare cycles arise. Currently I don't
see the benefit of the non-predictable order over plain FCFS. For
example, let's assume we have a ssch "instruction" that 1 second to
complete. Since normally ssch instruction  does not have to process the
channel program, and is thus kind of a constant time operation (now we
do the translation and the pinning as a part of the "instruction), our
strange guest gets jumpy and does a csch after T+0.2s. And on T+0.8 in
desperation follows the whole up with a hsch. If I understand your
proposal correctly, both userspace handlers would spin on -EAGAIN until
T+1. When ssch is done the csch and the hsch would race for who can
be the next. I don't quite get the value of that.

> >   
> > > Same for ssch, if another ssch is
> > > already being processed. We *could* reflect cc 2 if the fctl bit is
> > > already set, but that won't do for csch, so it is probably best to have
> > > the hardware figure that out in any case.
> > >     
> > 
> > We just need to be careful about avoiding races if we let hw sort out
> > things. If an ssch is issued with the start function pending the correct
> > response is cc 2.   
> 
> But sending it on to the hardware will give us that cc 2, no?
> 
> >   
> > > If I read the code correctly, we currently reflect -EBUSY and not
> > > -EAGAIN if we get a ssch request while already processing another one.
> > > QEMU hands that back to the guest as a cc 2, which is not 100% correct.
> > > In practice, we don't see this with Linux guests due to locking.
> > >     
> > 
> > Nod, does not happen because of BQL. We currently do the user-space
> > counterpart of vfio_ccw_mdev_write() in BQL context or (i.e. we hold
> > BQL until translation is done and our host ssch() comes back)?  
> 
> The Linux kernel uses the subchannel lock to enforce exclusivity for
> subchannel instructions, so we won't see Linux guests issue
> instructions on different vcpus in parallel, that's what I meant.
>

That is cool. Yet I think the situation with the BQL is relevant. Because
while BQL is held, not only IO instructions on a single vfio-ccw device
are mutually exclusive. AFAIU no other instruction QEMU instruction
handler can engage. And a store subchannel for device A having to wait
until the translation for the start subchannel on device B is done is
not the most scary thing I can imagine.
 
> > 
> > I think -EBUSY is the correct response for ssch while start pending set.
> > I think we set start pending in QEMU before we issue 'start command/io
> > request' to the kernel. I don't think -EAGAIN is a good idea. AFAIU we
> > would expect user-space to loop on -EAGAIN e.g. at least until the
> > processing of a 'start command' is done and the (fist) ssch by the host
> > is issued. And then what?  Translate the second channel program issue
> > the second ssch in the host and probably get a non-zero cc? Or return
> > -EBUSY? Or keep returning -EAGAIN?  
> 
> My idea was:
> - return -EAGAIN if we're already processing a channel instruction
> - continue returning -EBUSY etc. if the instruction gets the respective
>   return code from the hardware
> 
> So, the second ssch would first get a -EAGAIN and then a -EBUSY if the
> first ssch is done, but the subchannel is still doing the start
> function. Just as you would expect when you do a ssch while your last
> request has not finished yet.
> 

But before you can issue the second ssch you have to do the translation
for it. And we must assume the IO corresponding to the first ssch is not
done yet -- so we still need the translated channel program of the first
ssch. That is if we insist on doing the -EBUSY based on a return code
from the hardware. I'm not sure we end up with a big simplification from
making the "instructions" mutex on vfio-ccw device level in kernel as
proposed above. But I'm not against it. If you have the time to write
the patches I will find time to review them.

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
  2018-11-23 12:28   ` Pierre Morel
  2018-11-27 19:04   ` Farhan Ali
@ 2018-12-17 21:53   ` Eric Farman
  2018-12-18 17:24     ` Cornelia Huck
  2 siblings, 1 reply; 54+ messages in thread
From: Eric Farman @ 2018-12-17 21:53 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Farhan Ali, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:

...snip...

> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 813102810f53..565669f95534 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
>   
>   #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
>   #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> +#define VFIO_REGION_TYPE_CCW			(1 << 30)

Oof.  So the existing VFIO_REGION_TYPE_PCI_VENDOR_TYPE gets OR'd with 
another value (e.g., 8086).  But in 4.20, there was a 
VFIO_REGION_TYPE_GFX is added as simply "1" ... Which direction are 
these definitions being added from?  I guess asked another way, is 
_TYPE_CCW going to be OR'd with anything else that necessitates its 
presence as an identifier with some Other Thing, or should this follow 
the TYPE_GFX enumeration?  Perhaps the type field needs to be tidied up 
to help this sit more cleanly now?  (Sorry!)

  - Eric

>   
>   /* 8086 Vendor sub-types */
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
                     ` (3 preceding siblings ...)
  2018-11-28 16:36   ` [qemu-s390x] " Halil Pasic
@ 2018-12-17 21:54   ` Eric Farman
  2018-12-18 16:45     ` Cornelia Huck
  4 siblings, 1 reply; 54+ messages in thread
From: Eric Farman @ 2018-12-17 21:54 UTC (permalink / raw)
  To: Cornelia Huck, Halil Pasic, Farhan Ali, Pierre Morel
  Cc: linux-s390, qemu-s390x, Alex Williamson, qemu-devel, kvm



On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> Add a region to the vfio-ccw device that can be used to submit
> asynchronous I/O instructions. ssch continues to be handled by the
> existing I/O region; the new region handles hsch and csch.
> 
> Interrupt status continues to be reported through the same channels
> as for ssch.
> 
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> ---
>   drivers/s390/cio/Makefile           |   3 +-
>   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
>   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
>   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
>   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
>   drivers/s390/cio/vfio_ccw_private.h |   6 ++
>   include/uapi/linux/vfio.h           |   4 +
>   include/uapi/linux/vfio_ccw.h       |  12 +++
>   8 files changed, 313 insertions(+), 19 deletions(-)
>   create mode 100644 drivers/s390/cio/vfio_ccw_async.c
> 
> diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
> index f230516abb96..f6a8db04177c 100644
> --- a/drivers/s390/cio/Makefile
> +++ b/drivers/s390/cio/Makefile
> @@ -20,5 +20,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
>   qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
>   obj-$(CONFIG_QDIO) += qdio.o
>   
> -vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
> +vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o \
> +	vfio_ccw_async.o
>   obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
> diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> new file mode 100644
> index 000000000000..8c7f51d17d70
> --- /dev/null
> +++ b/drivers/s390/cio/vfio_ccw_async.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Async I/O region for vfio_ccw
> + *
> + * Copyright Red Hat, Inc. 2018
> + *
> + * Author(s): Cornelia Huck <cohuck@redhat.com>
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/mdev.h>
> +
> +#include "vfio_ccw_private.h"
> +
> +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,

I think this should return ssize_t ?  (same for _write, below)

> +					 char __user *buf, size_t count,
> +					 loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	region = private->region[i].data;
> +	if (copy_to_user(buf, (void *)region + pos, count))
> +		return -EFAULT;
> +
> +	return count;
> +
> +}
> +
> +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> +					  const char __user *buf, size_t count,
> +					  loff_t *ppos)
> +{
> +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> +	struct ccw_cmd_region *region;
> +
> +	if (pos + count > sizeof(*region))
> +		return -EINVAL;
> +
> +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> +	    private->state == VFIO_CCW_STATE_STANDBY)
> +		return -EACCES;
> +
> +	region = private->region[i].data;
> +	if (copy_from_user((void *)region + pos, buf, count))
> +		return -EFAULT;
> +
> +	switch (region->command) {
> +	case VFIO_CCW_ASYNC_CMD_HSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> +		break;
> +	case VFIO_CCW_ASYNC_CMD_CSCH:
> +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> +		break;

I find myself wondering why we add separate VFIO_CCW_EVENT_x_REQ entries 
for HALT and CLEAR, rather than a single VFIO_CCW_EVENT_ASYNC_REQ and a 
switch on cmd_region->command within it to go to fsm_do_halt, 
fsm_do_clear, or whatever.

> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return region->ret_code ? region->ret_code : count;
> +}
> +
> +static void vfio_ccw_async_region_release(struct vfio_ccw_private *private,
> +					  struct vfio_ccw_region *region)
> +{
> +
> +}
> +
> +const struct vfio_ccw_regops vfio_ccw_async_region_ops = {
> +	.read = vfio_ccw_async_region_read,
> +	.write = vfio_ccw_async_region_write,
> +	.release = vfio_ccw_async_region_release,
> +};
> +
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private)
> +{
> +	return vfio_ccw_register_dev_region(private,
> +					    VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD,
> +					    &vfio_ccw_async_region_ops,
> +					    sizeof(struct ccw_cmd_region),
> +					    VFIO_REGION_INFO_FLAG_READ |
> +					    VFIO_REGION_INFO_FLAG_WRITE,
> +					    private->cmd_region);
> +}
> diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> index a10cec0e86eb..890c588a3a61 100644
> --- a/drivers/s390/cio/vfio_ccw_drv.c
> +++ b/drivers/s390/cio/vfio_ccw_drv.c
> @@ -3,9 +3,11 @@
>    * VFIO based Physical Subchannel device driver
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
>    *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/module.h>
> @@ -23,6 +25,7 @@
>   
>   struct workqueue_struct *vfio_ccw_work_q;
>   static struct kmem_cache *vfio_ccw_io_region;
> +static struct kmem_cache *vfio_ccw_cmd_region;
>   
>   /*
>    * Helpers
> @@ -76,7 +79,8 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>   	private = container_of(work, struct vfio_ccw_private, io_work);
>   	irb = &private->irb;
>   
> -	if (scsw_is_solicited(&irb->scsw)) {
> +	if (scsw_is_solicited(&irb->scsw) &&
> +	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
>   		cp_update_scsw(&private->cp, &irb->scsw);
>   		cp_free(&private->cp);
>   	}
> @@ -104,7 +108,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   {
>   	struct pmcw *pmcw = &sch->schib.pmcw;
>   	struct vfio_ccw_private *private;
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	if (pmcw->qf) {
>   		dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
> @@ -118,10 +122,13 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   
>   	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
>   					       GFP_KERNEL | GFP_DMA);
> -	if (!private->io_region) {
> -		kfree(private);
> -		return -ENOMEM;
> -	}
> +	if (!private->io_region)
> +		goto out_free;
> +
> +	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
> +						GFP_KERNEL | GFP_DMA);
> +	if (!private->cmd_region)
> +		goto out_free;
>   
>   	private->sch = sch;
>   	dev_set_drvdata(&sch->dev, private);
> @@ -148,7 +155,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
>   	cio_disable_subchannel(sch);
>   out_free:
>   	dev_set_drvdata(&sch->dev, NULL);
> -	kmem_cache_free(vfio_ccw_io_region, private->io_region);
> +	if (private->cmd_region)
> +		kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
> +	if (private->io_region)
> +		kmem_cache_free(vfio_ccw_io_region, private->io_region);
>   	kfree(private);
>   	return ret;
>   }
> @@ -237,7 +247,7 @@ static struct css_driver vfio_ccw_sch_driver = {
>   
>   static int __init vfio_ccw_sch_init(void)
>   {
> -	int ret;
> +	int ret = -ENOMEM;
>   
>   	vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
>   	if (!vfio_ccw_work_q)
> @@ -247,20 +257,30 @@ static int __init vfio_ccw_sch_init(void)
>   					sizeof(struct ccw_io_region), 0,
>   					SLAB_ACCOUNT, 0,
>   					sizeof(struct ccw_io_region), NULL);
> -	if (!vfio_ccw_io_region) {
> -		destroy_workqueue(vfio_ccw_work_q);
> -		return -ENOMEM;
> -	}
> +	if (!vfio_ccw_io_region)
> +		goto out_err;
> +
> +	vfio_ccw_cmd_region = kmem_cache_create_usercopy("vfio_ccw_cmd_region",
> +					sizeof(struct ccw_cmd_region), 0,
> +					SLAB_ACCOUNT, 0,
> +					sizeof(struct ccw_cmd_region), NULL);
> +	if (!vfio_ccw_cmd_region)
> +		goto out_err;
>   
>   	isc_register(VFIO_CCW_ISC);
>   	ret = css_driver_register(&vfio_ccw_sch_driver);
>   	if (ret) {
>   		isc_unregister(VFIO_CCW_ISC);
> -		kmem_cache_destroy(vfio_ccw_io_region);
> -		destroy_workqueue(vfio_ccw_work_q);
> +		goto out_err;
>   	}
>   
>   	return ret;
> +
> +out_err:
> +	kmem_cache_destroy(vfio_ccw_cmd_region);
> +	kmem_cache_destroy(vfio_ccw_io_region);
> +	destroy_workqueue(vfio_ccw_work_q);
> +	return ret;
>   }
>   
>   static void __exit vfio_ccw_sch_exit(void)
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index f94aa01f9c36..0caf77e8f377 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -3,8 +3,10 @@
>    * Finite state machine for vfio-ccw device handling
>    *
>    * Copyright IBM Corp. 2017
> + * Copyright Red Hat, Inc. 2018
>    *
>    * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
> + *            Cornelia Huck <cohuck@redhat.com>
>    */
>   
>   #include <linux/vfio.h>
> @@ -68,6 +70,81 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>   	return ret;
>   }
>   
> +static int fsm_do_halt(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Halt Subchannel" */
> +	ccode = hsch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl |= SCSW_ACTL_HALT_PEND;
> +		ret = 0;
> +		break;
> +	case 1:		/* Status pending */
> +	case 2:		/* Busy */
> +		ret = -EBUSY;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
> +static int fsm_do_clear(struct vfio_ccw_private *private)
> +{
> +	struct subchannel *sch;
> +	unsigned long flags;
> +	int ccode;
> +	int ret;
> +
> +	sch = private->sch;
> +
> +	spin_lock_irqsave(sch->lock, flags);
> +	private->state = VFIO_CCW_STATE_BUSY;
> +
> +	/* Issue "Clear Subchannel" */
> +	ccode = csch(sch->schid);
> +
> +	switch (ccode) {
> +	case 0:
> +		/*
> +		 * Initialize device status information
> +		 */
> +		sch->schib.scsw.cmd.actl = SCSW_ACTL_CLEAR_PEND;
> +		/* TODO: check what else we might need to clear */
> +		ret = 0;
> +		break;
> +	case 3:		/* Device not operational */
> +	{
> +		ret = -ENODEV;
> +		break;
> +	}
> +	default:
> +		ret = ccode;
> +	}
> +	spin_unlock_irqrestore(sch->lock, flags);
> +	return ret;
> +}
> +
>   static void fsm_notoper(struct vfio_ccw_private *private,
>   			enum vfio_ccw_event event)
>   {
> @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
>   	private->io_region->ret_code = -EBUSY;
>   }
>   
> +static void fsm_async_error(struct vfio_ccw_private *private,
> +			    enum vfio_ccw_event event)
> +{
> +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> +	       private->state);

Worth stating whether it's a Halt or Clear here, rather than leaving it 
ambiguous?

> +	private->cmd_region->ret_code = -EIO;
> +}
> +
> +static void fsm_async_busy(struct vfio_ccw_private *private,
> +			   enum vfio_ccw_event event)
> +{
> +	private->cmd_region->ret_code = -EBUSY;
> +}
> +
>   static void fsm_disabled_irq(struct vfio_ccw_private *private,
>   			     enum vfio_ccw_event event)
>   {
> @@ -166,11 +257,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   		}
>   		return;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
> -		/* XXX: Handle halt. */
> +		/* halt is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
> -		/* XXX: Handle clear. */
> +		/* clear is handled via the async cmd region */
>   		io_region->ret_code = -EOPNOTSUPP;
>   		goto err_out;
>   	}
> @@ -181,6 +272,59 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>   			       io_region->ret_code, errstr);
>   }
>   
> +/*
> + * Deal with a halt request from userspace.
> + */
> +static void fsm_halt_request(struct vfio_ccw_private *private,
> +			     enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_HSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_halt(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
> +
> +/*
> + * Deal with a clear request from userspace.
> + */
> +static void fsm_clear_request(struct vfio_ccw_private *private,
> +			      enum vfio_ccw_event event)
> +{
> +	struct ccw_cmd_region *cmd_region = private->cmd_region;
> +	int state = private->state;
> +
> +	private->state = VFIO_CCW_STATE_BOXED;
> +
> +	if (cmd_region->command != VFIO_CCW_ASYNC_CMD_CSCH) {
> +		/* should not happen? */
> +		cmd_region->ret_code = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	cmd_region->ret_code = fsm_do_clear(private);
> +	if (cmd_region->ret_code)
> +		goto err_out;
> +
> +	return;
> +
> +err_out:
> +	private->state = state;
> +}
>   /*
>    * Got an interrupt for a normal io (state busy).
>    */
> @@ -204,26 +348,36 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>   	[VFIO_CCW_STATE_NOT_OPER] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_nop,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_disabled_irq,
>   	},
>   	[VFIO_CCW_STATE_STANDBY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_error,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_error,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_error,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_IDLE] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_request,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BOXED] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_async_busy,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_async_busy,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   	[VFIO_CCW_STATE_BUSY] = {
>   		[VFIO_CCW_EVENT_NOT_OPER]	= fsm_notoper,
>   		[VFIO_CCW_EVENT_IO_REQ]		= fsm_io_busy,
> +		[VFIO_CCW_EVENT_HALT_REQ]	= fsm_halt_request,
> +		[VFIO_CCW_EVENT_CLEAR_REQ]	= fsm_clear_request,
>   		[VFIO_CCW_EVENT_INTERRUPT]	= fsm_irq,
>   	},
>   };
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index a5d731ed2a39..0e1f7f7bf927 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -148,11 +148,20 @@ static int vfio_ccw_mdev_open(struct mdev_device *mdev)
>   	struct vfio_ccw_private *private =
>   		dev_get_drvdata(mdev_parent_dev(mdev));
>   	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
> +	int ret;
>   
>   	private->nb.notifier_call = vfio_ccw_mdev_notifier;
>   
> -	return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> -				      &events, &private->nb);
> +	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +				     &events, &private->nb);
> +	if (ret)
> +		return ret;
> +
> +	ret = vfio_ccw_register_async_dev_regions(private);
> +	if (ret)
> +		vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> +					 &private->nb);
> +	return ret;
>   }
>   
>   static void vfio_ccw_mdev_release(struct mdev_device *mdev)
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index a6f9f84526e2..1a41a14831ae 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -53,6 +53,8 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>   				 const struct vfio_ccw_regops *ops,
>   				 size_t size, u32 flags, void *data);
>   
> +int vfio_ccw_register_async_dev_regions(struct vfio_ccw_private *private);
> +
>   /**
>    * struct vfio_ccw_private
>    * @sch: pointer to the subchannel
> @@ -62,6 +64,7 @@ int vfio_ccw_register_dev_region(struct vfio_ccw_private *private,
>    * @mdev: pointer to the mediated device
>    * @nb: notifier for vfio events
>    * @io_region: MMIO region to input/output I/O arguments/results
> + * @cmd_region: MMIO region for asynchronous I/O commands other than START
>    * @region: additional regions for other subchannel operations
>    * @num_regions: number of additional regions
>    * @cp: channel program for the current I/O operation
> @@ -79,6 +82,7 @@ struct vfio_ccw_private {
>   	struct notifier_block	nb;
>   	struct ccw_io_region	*io_region;
>   	struct vfio_ccw_region *region;
> +	struct ccw_cmd_region	*cmd_region;
>   	int num_regions;
>   
>   	struct channel_program	cp;
> @@ -114,6 +118,8 @@ enum vfio_ccw_event {
>   	VFIO_CCW_EVENT_NOT_OPER,
>   	VFIO_CCW_EVENT_IO_REQ,
>   	VFIO_CCW_EVENT_INTERRUPT,
> +	VFIO_CCW_EVENT_HALT_REQ,
> +	VFIO_CCW_EVENT_CLEAR_REQ,
>   	/* last element! */
>   	NR_VFIO_CCW_EVENTS
>   };
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 565669f95534..c01472ec77ea 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -304,6 +304,7 @@ struct vfio_region_info_cap_type {
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>   #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>   
> +
>   #define VFIO_REGION_TYPE_GFX                    (1)
>   #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
>   
> @@ -354,6 +355,9 @@ struct vfio_region_gfx_edid {
>   #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
>   };
>   
> +/* ccw sub-types */
> +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
> +
>   /*
>    * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
>    * which allows direct access to non-MSIX registers which happened to be within
> diff --git a/include/uapi/linux/vfio_ccw.h b/include/uapi/linux/vfio_ccw.h
> index 2ec5f367ff78..cbecbf0cd54f 100644
> --- a/include/uapi/linux/vfio_ccw.h
> +++ b/include/uapi/linux/vfio_ccw.h
> @@ -12,6 +12,7 @@
>   
>   #include <linux/types.h>
>   
> +/* used for START SUBCHANNEL, always present */
>   struct ccw_io_region {
>   #define ORB_AREA_SIZE 12
>   	__u8	orb_area[ORB_AREA_SIZE];
> @@ -22,4 +23,15 @@ struct ccw_io_region {
>   	__u32	ret_code;
>   } __packed;
>   
> +/*
> + * used for processing commands that trigger asynchronous actions
> + * Note: this is controlled by a capability
> + */
> +#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
> +#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
> +struct ccw_cmd_region {
> +	__u32 command;
> +	__u32 ret_code;
> +} __packed;
> +
>   #endif
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-12-17 21:54   ` Eric Farman
@ 2018-12-18 16:45     ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-18 16:45 UTC (permalink / raw)
  To: Eric Farman
  Cc: linux-s390, Alex Williamson, Pierre Morel, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Mon, 17 Dec 2018 16:54:31 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > Add a region to the vfio-ccw device that can be used to submit
> > asynchronous I/O instructions. ssch continues to be handled by the
> > existing I/O region; the new region handles hsch and csch.
> > 
> > Interrupt status continues to be reported through the same channels
> > as for ssch.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/Makefile           |   3 +-
> >   drivers/s390/cio/vfio_ccw_async.c   |  88 ++++++++++++++++
> >   drivers/s390/cio/vfio_ccw_drv.c     |  48 ++++++---
> >   drivers/s390/cio/vfio_ccw_fsm.c     | 158 +++++++++++++++++++++++++++-
> >   drivers/s390/cio/vfio_ccw_ops.c     |  13 ++-
> >   drivers/s390/cio/vfio_ccw_private.h |   6 ++
> >   include/uapi/linux/vfio.h           |   4 +
> >   include/uapi/linux/vfio_ccw.h       |  12 +++
> >   8 files changed, 313 insertions(+), 19 deletions(-)
> >   create mode 100644 drivers/s390/cio/vfio_ccw_async.c

> > diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c
> > new file mode 100644
> > index 000000000000..8c7f51d17d70
> > --- /dev/null
> > +++ b/drivers/s390/cio/vfio_ccw_async.c
> > @@ -0,0 +1,88 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Async I/O region for vfio_ccw
> > + *
> > + * Copyright Red Hat, Inc. 2018
> > + *
> > + * Author(s): Cornelia Huck <cohuck@redhat.com>
> > + */
> > +
> > +#include <linux/vfio.h>
> > +#include <linux/mdev.h>
> > +
> > +#include "vfio_ccw_private.h"
> > +
> > +static size_t vfio_ccw_async_region_read(struct vfio_ccw_private *private,  
> 
> I think this should return ssize_t ?  (same for _write, below)

Yes, ssize_t makes more sense. Changed.

(vfio_pci_regops also has size_t; should probably be changed as well.)

> 
> > +					 char __user *buf, size_t count,
> > +					 loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_to_user(buf, (void *)region + pos, count))
> > +		return -EFAULT;
> > +
> > +	return count;
> > +
> > +}
> > +
> > +static size_t vfio_ccw_async_region_write(struct vfio_ccw_private *private,
> > +					  const char __user *buf, size_t count,
> > +					  loff_t *ppos)
> > +{
> > +	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
> > +	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
> > +	struct ccw_cmd_region *region;
> > +
> > +	if (pos + count > sizeof(*region))
> > +		return -EINVAL;
> > +
> > +	if (private->state == VFIO_CCW_STATE_NOT_OPER ||
> > +	    private->state == VFIO_CCW_STATE_STANDBY)
> > +		return -EACCES;
> > +
> > +	region = private->region[i].data;
> > +	if (copy_from_user((void *)region + pos, buf, count))
> > +		return -EFAULT;
> > +
> > +	switch (region->command) {
> > +	case VFIO_CCW_ASYNC_CMD_HSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_HALT_REQ);
> > +		break;
> > +	case VFIO_CCW_ASYNC_CMD_CSCH:
> > +		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLEAR_REQ);
> > +		break;  
> 
> I find myself wondering why we add separate VFIO_CCW_EVENT_x_REQ entries 
> for HALT and CLEAR, rather than a single VFIO_CCW_EVENT_ASYNC_REQ and a 
> switch on cmd_region->command within it to go to fsm_do_halt, 
> fsm_do_clear, or whatever.

In the end, it probably does not matter much where we do the switch.
When I started writing this, I thought I would want to allow clear in
more states than halt; but that does not make much sense (best to let
the hardware sort it out; see also the other discussions around
concurrency.)

> 
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return region->ret_code ? region->ret_code : count;
> > +}

(...)

> > diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> > index f94aa01f9c36..0caf77e8f377 100644
> > --- a/drivers/s390/cio/vfio_ccw_fsm.c
> > +++ b/drivers/s390/cio/vfio_ccw_fsm.c

> > @@ -102,6 +179,20 @@ static void fsm_io_busy(struct vfio_ccw_private *private,
> >   	private->io_region->ret_code = -EBUSY;
> >   }
> >   
> > +static void fsm_async_error(struct vfio_ccw_private *private,
> > +			    enum vfio_ccw_event event)
> > +{
> > +	pr_err("vfio-ccw: FSM: halt/clear request from state:%d\n",
> > +	       private->state);  
> 
> Worth stating whether it's a Halt or Clear here, rather than leaving it 
> ambiguous?

Not sure. Also not sure if we want to fold the events, as you suggested
above :)

This also reminds me that I need to rebase this: some details in the
handling will need to be different without the BOXED state.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-12-17 21:53   ` Eric Farman
@ 2018-12-18 17:24     ` Cornelia Huck
  2018-12-18 17:56       ` Eric Farman
  2018-12-19 16:28       ` Alex Williamson
  0 siblings, 2 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-18 17:24 UTC (permalink / raw)
  To: Eric Farman
  Cc: linux-s390, Alex Williamson, Pierre Morel, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Mon, 17 Dec 2018 16:53:34 -0500
Eric Farman <farman@linux.ibm.com> wrote:

> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> 
> ...snip...
> 
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 813102810f53..565669f95534 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
> >   
> >   #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
> >   #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > +#define VFIO_REGION_TYPE_CCW			(1 << 30)  
> 
> Oof.  So the existing VFIO_REGION_TYPE_PCI_VENDOR_TYPE gets OR'd with 
> another value (e.g., 8086).  But in 4.20, there was a 
> VFIO_REGION_TYPE_GFX is added as simply "1" ... Which direction are 
> these definitions being added from?  I guess asked another way, is 
> _TYPE_CCW going to be OR'd with anything else that necessitates its 
> presence as an identifier with some Other Thing, or should this follow 
> the TYPE_GFX enumeration?  Perhaps the type field needs to be tidied up 
> to help this sit more cleanly now?  (Sorry!)

The semantics of that type stuff are really a bit unclear to me :(

I don't think we'll ever do any fancy mask handling for ccw. It is
probably enough to have any kind of uniqueness within the different
types, so maybe counting up would be indeed enough...

> 
>   - Eric
> 
> >   
> >   /* 8086 Vendor sub-types */
> >   #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> >   
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-12-18 17:24     ` Cornelia Huck
@ 2018-12-18 17:56       ` Eric Farman
  2018-12-19 16:28       ` Alex Williamson
  1 sibling, 0 replies; 54+ messages in thread
From: Eric Farman @ 2018-12-18 17:56 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Alex Williamson, Pierre Morel, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x



On 12/18/2018 12:24 PM, Cornelia Huck wrote:
> On Mon, 17 Dec 2018 16:53:34 -0500
> Eric Farman <farman@linux.ibm.com> wrote:
> 
>> On 11/22/2018 11:54 AM, Cornelia Huck wrote:
>>
>> ...snip...
>>
>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>> index 813102810f53..565669f95534 100644
>>> --- a/include/uapi/linux/vfio.h
>>> +++ b/include/uapi/linux/vfio.h
>>> @@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
>>>    
>>>    #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
>>>    #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
>>> +#define VFIO_REGION_TYPE_CCW			(1 << 30)
>>
>> Oof.  So the existing VFIO_REGION_TYPE_PCI_VENDOR_TYPE gets OR'd with
>> another value (e.g., 8086).  But in 4.20, there was a
>> VFIO_REGION_TYPE_GFX is added as simply "1" ... Which direction are
>> these definitions being added from?  I guess asked another way, is
>> _TYPE_CCW going to be OR'd with anything else that necessitates its
>> presence as an identifier with some Other Thing, or should this follow
>> the TYPE_GFX enumeration?  Perhaps the type field needs to be tidied up
>> to help this sit more cleanly now?  (Sorry!)
> 
> The semantics of that type stuff are really a bit unclear to me :(

+1

I was confused when I first looked at this.  When I applied it to 4.20, 
I got another level of confusion.  ;)

> 
> I don't think we'll ever do any fancy mask handling for ccw. It is
> probably enough to have any kind of uniqueness within the different
> types, so maybe counting up would be indeed enough...

Considering the subtype space, I think it would be fine too.  But wanted 
to ask in case I've been out of the loop on something.

> 
>>
>>    - Eric
>>
>>>    
>>>    /* 8086 Vendor sub-types */
>>>    #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
>>>    
>>
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-07 16:54             ` Halil Pasic
@ 2018-12-19 11:54               ` Cornelia Huck
  2018-12-19 14:17                 ` Halil Pasic
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-12-19 11:54 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Fri, 7 Dec 2018 17:54:23 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Fri, 7 Dec 2018 11:05:29 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > > > I think most of the sorting-out-the-operations stuff should be done by
> > > > the hardware itself, and we should not really try to enforce anything
> > > > special in our vfio code.
> > > >       
> > > 
> > > Sounds very reasonable to me. Does this mean you are against Pierre's
> > > '[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
> > > not let HW sort out stuff, but enforces sequencing?    
> > 
> > I have not yet had time to look at that, sorry.
> >   
> > > 
> > >     
> > > > For your example, it might be best if a hsch is always accepted and
> > > > send on towards the hardware.      
> > > 
> > > Nod.
> > >     
> > > > Probably best to reflect back -EAGAIN if
> > > > we're currently processing another instruction from another vcpu, so
> > > > that the user space caller can retry.      
> > > 
> > > Hm, not sure how this works together with your previous sentence.    
> > 
> > The software layering. We have the kernel layer
> > (drivers/s390/cio/vfio_ccw_*) that interacts with the hardware more or
> > less directly, and the QEMU layer, which does some writes on regions.
> > In the end, the goal is to act on behalf of the guest issuing a
> > ssch/hsch/csch, which is from the guest's view a single instruction. We
> > should not have the individual "instructions" compete with each other
> > so that they run essentially in parallel (kernel layer), but we should
> > also not try to impose an artificial ordering as to when instructions
> > executed by different vcpus are executed (QEMU layer). Therefore, don't
> > try to run an instruction in the kernel when another one is in progress
> > for the same subchannel (exclusivity in the kernel), but retry in QEMU
> > if needed (no ordering between vcpus imposed).
> > 
> > In short, don't create strange concurrency issues in the "instruction"
> > handling, but make it possible to execute instructions in a
> > non-predictable order if the guest does not care about enforcing
> > ordering on its side.
> >   
> 
> I'm neither sold on this, nor am I violently opposing it. Will try to
> meditate on it some more if any spare cycles arise. Currently I don't
> see the benefit of the non-predictable order over plain FCFS. For
> example, let's assume we have a ssch "instruction" that 1 second to
> complete. Since normally ssch instruction  does not have to process the
> channel program, and is thus kind of a constant time operation (now we
> do the translation and the pinning as a part of the "instruction), our
> strange guest gets jumpy and does a csch after T+0.2s. And on T+0.8 in
> desperation follows the whole up with a hsch. If I understand your
> proposal correctly, both userspace handlers would spin on -EAGAIN until
> T+1. When ssch is done the csch and the hsch would race for who can
> be the next. I don't quite get the value of that.

What would happen on real hardware for such a guest? I would expect
that the csch and the hsch would be executed in a random order as well.

My point is that it is up to the guest to impose an order on the
execution of instructions, if wanted. We should not try to guess
anything; I think that would make the implementation needlessly complex.

> 
> > >     
> > > > Same for ssch, if another ssch is
> > > > already being processed. We *could* reflect cc 2 if the fctl
> > > > bit is already set, but that won't do for csch, so it is
> > > > probably best to have the hardware figure that out in any case.
> > > >       
> > > 
> > > We just need to be careful about avoiding races if we let hw sort
> > > out things. If an ssch is issued with the start function pending
> > > the correct response is cc 2.     
> > 
> > But sending it on to the hardware will give us that cc 2, no?
> >   
> > >     
> > > > If I read the code correctly, we currently reflect -EBUSY and
> > > > not -EAGAIN if we get a ssch request while already processing
> > > > another one. QEMU hands that back to the guest as a cc 2, which
> > > > is not 100% correct. In practice, we don't see this with Linux
> > > > guests due to locking. 
> > > 
> > > Nod, does not happen because of BQL. We currently do the
> > > user-space counterpart of vfio_ccw_mdev_write() in BQL context or
> > > (i.e. we hold BQL until translation is done and our host ssch()
> > > comes back)?    
> > 
> > The Linux kernel uses the subchannel lock to enforce exclusivity for
> > subchannel instructions, so we won't see Linux guests issue
> > instructions on different vcpus in parallel, that's what I meant.
> >  
> 
> That is cool. Yet I think the situation with the BQL is relevant.
> Because while BQL is held, not only IO instructions on a single
> vfio-ccw device are mutually exclusive. AFAIU no other instruction
> QEMU instruction handler can engage. And a store subchannel for
> device A having to wait until the translation for the start
> subchannel on device B is done is not the most scary thing I can
> imagine. 

Yes. But we still need to be able to cope with a userspace that does
not give us those guarantees.

> > > 
> > > I think -EBUSY is the correct response for ssch while start
> > > pending set. I think we set start pending in QEMU before we issue
> > > 'start command/io request' to the kernel. I don't think -EAGAIN
> > > is a good idea. AFAIU we would expect user-space to loop on
> > > -EAGAIN e.g. at least until the processing of a 'start command'
> > > is done and the (fist) ssch by the host is issued. And then
> > > what?  Translate the second channel program issue the second ssch
> > > in the host and probably get a non-zero cc? Or return -EBUSY? Or
> > > keep returning -EAGAIN?    
> > 
> > My idea was:
> > - return -EAGAIN if we're already processing a channel instruction
> > - continue returning -EBUSY etc. if the instruction gets the
> > respective return code from the hardware
> > 
> > So, the second ssch would first get a -EAGAIN and then a -EBUSY if
> > the first ssch is done, but the subchannel is still doing the start
> > function. Just as you would expect when you do a ssch while your
> > last request has not finished yet.
> >   
> 
> But before you can issue the second ssch you have to do the
> translation for it. And we must assume the IO corresponding to the
> first ssch is not done yet -- so we still need the translated channel
> program of the first ssch. 

Yes, we need to be able to juggle different translated channel programs
if we don't consider this part of the "instruction execution". But if
we return -EAGAIN if the code is currently doing that translation, we
should be fine, no?

> That is if we insist on doing the -EBUSY
> based on a return code from the hardware. I'm not sure we end up with
> a big simplification from making the "instructions" mutex on vfio-ccw
> device level in kernel as proposed above. 

I'm not sure we're not talking past each other here... the "translate
and issue instruction" part should be mutually exclusive; I just don't
want to return -EBUSY, but -EAGAIN, so that userspace knows it should
try again.

> But I'm not against it. If
> you have the time to write the patches I will find time to review
> them.

Probably only on the new year...

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-19 11:54               ` Cornelia Huck
@ 2018-12-19 14:17                 ` Halil Pasic
  2018-12-21 11:23                   ` Cornelia Huck
  0 siblings, 1 reply; 54+ messages in thread
From: Halil Pasic @ 2018-12-19 14:17 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Wed, 19 Dec 2018 12:54:42 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Fri, 7 Dec 2018 17:54:23 +0100
> Halil Pasic <pasic@linux.ibm.com> wrote:
> 
> > On Fri, 7 Dec 2018 11:05:29 +0100
> > Cornelia Huck <cohuck@redhat.com> wrote:
> > 
> > > > > I think most of the sorting-out-the-operations stuff should be done by
> > > > > the hardware itself, and we should not really try to enforce anything
> > > > > special in our vfio code.
> > > > >       
> > > > 
> > > > Sounds very reasonable to me. Does this mean you are against Pierre's
> > > > '[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
> > > > not let HW sort out stuff, but enforces sequencing?    
> > > 
> > > I have not yet had time to look at that, sorry.
> > >   
> > > > 
> > > >     
> > > > > For your example, it might be best if a hsch is always accepted and
> > > > > send on towards the hardware.      
> > > > 
> > > > Nod.
> > > >     
> > > > > Probably best to reflect back -EAGAIN if
> > > > > we're currently processing another instruction from another vcpu, so
> > > > > that the user space caller can retry.      
> > > > 
> > > > Hm, not sure how this works together with your previous sentence.    
> > > 
> > > The software layering. We have the kernel layer
> > > (drivers/s390/cio/vfio_ccw_*) that interacts with the hardware more or
> > > less directly, and the QEMU layer, which does some writes on regions.
> > > In the end, the goal is to act on behalf of the guest issuing a
> > > ssch/hsch/csch, which is from the guest's view a single instruction. We
> > > should not have the individual "instructions" compete with each other
> > > so that they run essentially in parallel (kernel layer), but we should
> > > also not try to impose an artificial ordering as to when instructions
> > > executed by different vcpus are executed (QEMU layer). Therefore, don't
> > > try to run an instruction in the kernel when another one is in progress
> > > for the same subchannel (exclusivity in the kernel), but retry in QEMU
> > > if needed (no ordering between vcpus imposed).
> > > 
> > > In short, don't create strange concurrency issues in the "instruction"
> > > handling, but make it possible to execute instructions in a
> > > non-predictable order if the guest does not care about enforcing
> > > ordering on its side.
> > >   
> > 
> > I'm neither sold on this, nor am I violently opposing it. Will try to
> > meditate on it some more if any spare cycles arise. Currently I don't
> > see the benefit of the non-predictable order over plain FCFS. For
> > example, let's assume we have a ssch "instruction" that 1 second to
> > complete. Since normally ssch instruction  does not have to process the
> > channel program, and is thus kind of a constant time operation (now we
> > do the translation and the pinning as a part of the "instruction), our
> > strange guest gets jumpy and does a csch after T+0.2s. And on T+0.8 in
> > desperation follows the whole up with a hsch. If I understand your
> > proposal correctly, both userspace handlers would spin on -EAGAIN until
> > T+1. When ssch is done the csch and the hsch would race for who can
> > be the next. I don't quite get the value of that.
> 
> What would happen on real hardware for such a guest? I would expect
> that the csch and the hsch would be executed in a random order as well.
> 

Yes, they would be executed in random order, but would not wait until the
ssch is done (and especially not wait until the channel program gets
translated). AFAIR bot cancel the start function immediately -- if any
pending.

Furthermore the point where the race is decided is changing the function
control bits -- the update needs to be an interlocked one obviously.

What I want to say, there is no merit in waiting -- one second in the
example. At some point it needs to be decided who is considered first,
and artificially procrastinating this decision does not do us any good,
because we may end up with otherwise unlikely behavior.

> My point is that it is up to the guest to impose an order on the
> execution of instructions, if wanted. We should not try to guess
> anything; I think that would make the implementation needlessly complex.
> 

I'm not for guessing stuff, but rather for sticking to the architecture.

> > 
> > > >     
> > > > > Same for ssch, if another ssch is
> > > > > already being processed. We *could* reflect cc 2 if the fctl
> > > > > bit is already set, but that won't do for csch, so it is
> > > > > probably best to have the hardware figure that out in any case.
> > > > >       
> > > > 
> > > > We just need to be careful about avoiding races if we let hw sort
> > > > out things. If an ssch is issued with the start function pending
> > > > the correct response is cc 2.     
> > > 
> > > But sending it on to the hardware will give us that cc 2, no?
> > >   
> > > >     
> > > > > If I read the code correctly, we currently reflect -EBUSY and
> > > > > not -EAGAIN if we get a ssch request while already processing
> > > > > another one. QEMU hands that back to the guest as a cc 2, which
> > > > > is not 100% correct. In practice, we don't see this with Linux
> > > > > guests due to locking. 
> > > > 
> > > > Nod, does not happen because of BQL. We currently do the
> > > > user-space counterpart of vfio_ccw_mdev_write() in BQL context or
> > > > (i.e. we hold BQL until translation is done and our host ssch()
> > > > comes back)?    
> > > 
> > > The Linux kernel uses the subchannel lock to enforce exclusivity for
> > > subchannel instructions, so we won't see Linux guests issue
> > > instructions on different vcpus in parallel, that's what I meant.
> > >  
> > 
> > That is cool. Yet I think the situation with the BQL is relevant.
> > Because while BQL is held, not only IO instructions on a single
> > vfio-ccw device are mutually exclusive. AFAIU no other instruction
> > QEMU instruction handler can engage. And a store subchannel for
> > device A having to wait until the translation for the start
> > subchannel on device B is done is not the most scary thing I can
> > imagine. 
> 
> Yes. But we still need to be able to cope with a userspace that does
> not give us those guarantees.
> 

I agree. The point I was trying to make is not that 'We are good, because
qemu takes care of it!' on the contrary, I wanted to give voice to my
concern that a guest that has a couple of vfio-ccw devices in use could
experience performance problems because vfio-ccw holds BQL for long.

> > > > 
> > > > I think -EBUSY is the correct response for ssch while start
> > > > pending set. I think we set start pending in QEMU before we issue
> > > > 'start command/io request' to the kernel. I don't think -EAGAIN
> > > > is a good idea. AFAIU we would expect user-space to loop on
> > > > -EAGAIN e.g. at least until the processing of a 'start command'
> > > > is done and the (fist) ssch by the host is issued. And then
> > > > what?  Translate the second channel program issue the second ssch
> > > > in the host and probably get a non-zero cc? Or return -EBUSY? Or
> > > > keep returning -EAGAIN?    
> > > 
> > > My idea was:
> > > - return -EAGAIN if we're already processing a channel instruction
> > > - continue returning -EBUSY etc. if the instruction gets the
> > > respective return code from the hardware
> > > 
> > > So, the second ssch would first get a -EAGAIN and then a -EBUSY if
> > > the first ssch is done, but the subchannel is still doing the start
> > > function. Just as you would expect when you do a ssch while your
> > > last request has not finished yet.
> > >   
> > 
> > But before you can issue the second ssch you have to do the
> > translation for it. And we must assume the IO corresponding to the
> > first ssch is not done yet -- so we still need the translated channel
> > program of the first ssch. 
> 
> Yes, we need to be able to juggle different translated channel programs
> if we don't consider this part of the "instruction execution". But if
> we return -EAGAIN if the code is currently doing that translation, we
> should be fine, no?
> 

As long as you return -EAGAIN we are fine. But AFAIU you proposed to
do that until the I/O is submitted to the HW subchannel via ssch(). But
that is not the case I'm talking about here. We have already translated
the channel program for the first request, submitted it via ssch() and
are awaiting an interrupt that tells us the I/O is done. While waiting
for this interrupt we get a new ssch request. I understood, you don't
want to give -EAGAIN for this one, but make the ssch decide. The problem
is you still need the old translated channel program for the interrupt
handling, and at the same time you need the new channel program
translated as well, before doing the ssch for it in the host.

> > That is if we insist on doing the -EBUSY
> > based on a return code from the hardware. I'm not sure we end up with
> > a big simplification from making the "instructions" mutex on vfio-ccw
> > device level in kernel as proposed above. 
> 
> I'm not sure we're not talking past each other here... 

I'm afraid we do.

> the "translate
> and issue instruction" part should be mutually exclusive; I just don't
> want to return -EBUSY, but -EAGAIN, so that userspace knows it should
> try again.
> 

I got it. But I wanted to point out, that we need the old channel program
*beyond* the "translate and issue instruction".

> > But I'm not against it. If
> > you have the time to write the patches I will find time to review
> > them.
> 
> Probably only on the new year...

I think the stuff is better discussed with code at hand. I'm happy to
continue this discussion if you think it is useful to you. Otherwise I
suggest do it the way you think is the best, and I will try  to find and
to point out the problems, if any.


Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-12-18 17:24     ` Cornelia Huck
  2018-12-18 17:56       ` Eric Farman
@ 2018-12-19 16:28       ` Alex Williamson
  2018-12-21 11:12         ` Cornelia Huck
  1 sibling, 1 reply; 54+ messages in thread
From: Alex Williamson @ 2018-12-19 16:28 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Tue, 18 Dec 2018 18:24:00 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> On Mon, 17 Dec 2018 16:53:34 -0500
> Eric Farman <farman@linux.ibm.com> wrote:
> 
> > On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > 
> > ...snip...
> >   
> > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > index 813102810f53..565669f95534 100644
> > > --- a/include/uapi/linux/vfio.h
> > > +++ b/include/uapi/linux/vfio.h
> > > @@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
> > >   
> > >   #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
> > >   #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > > +#define VFIO_REGION_TYPE_CCW			(1 << 30)    
> > 
> > Oof.  So the existing VFIO_REGION_TYPE_PCI_VENDOR_TYPE gets OR'd with 
> > another value (e.g., 8086).  But in 4.20, there was a 
> > VFIO_REGION_TYPE_GFX is added as simply "1" ... Which direction are 
> > these definitions being added from?  I guess asked another way, is 
> > _TYPE_CCW going to be OR'd with anything else that necessitates its 
> > presence as an identifier with some Other Thing, or should this follow 
> > the TYPE_GFX enumeration?  Perhaps the type field needs to be tidied up 
> > to help this sit more cleanly now?  (Sorry!)  
> 
> The semantics of that type stuff are really a bit unclear to me :(
> 
> I don't think we'll ever do any fancy mask handling for ccw. It is
> probably enough to have any kind of uniqueness within the different
> types, so maybe counting up would be indeed enough...

Just to confirm, this is the intended usage, simply reserve a new type
following the GFX region example.  We can define VFIO_REGION_TYPE_CCW
as 2 and then there's a whole address space of sub-types to fill in
within that.  I might have over-engineered PCI a bit with the address
space split, but it seemed like a good idea at the time to pre-define a
type address space for each vendor, such that they only need to define
the sub-types and we can avoid namespace collisions.  Unfortunately
this implicit definition for each PCI vendor also contributes to the
confusion here.  Sorry.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/3] vfio-ccw: add capabilities chain
  2018-12-19 16:28       ` Alex Williamson
@ 2018-12-21 11:12         ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2018-12-21 11:12 UTC (permalink / raw)
  To: Alex Williamson
  Cc: linux-s390, Eric Farman, Pierre Morel, kvm, Farhan Ali,
	qemu-devel, Halil Pasic, qemu-s390x

On Wed, 19 Dec 2018 09:28:00 -0700
Alex Williamson <alex.williamson@redhat.com> wrote:

> On Tue, 18 Dec 2018 18:24:00 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Mon, 17 Dec 2018 16:53:34 -0500
> > Eric Farman <farman@linux.ibm.com> wrote:
> >   
> > > On 11/22/2018 11:54 AM, Cornelia Huck wrote:
> > > 
> > > ...snip...
> > >     
> > > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > > > index 813102810f53..565669f95534 100644
> > > > --- a/include/uapi/linux/vfio.h
> > > > +++ b/include/uapi/linux/vfio.h
> > > > @@ -297,6 +297,7 @@ struct vfio_region_info_cap_type {
> > > >   
> > > >   #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
> > > >   #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > > > +#define VFIO_REGION_TYPE_CCW			(1 << 30)      
> > > 
> > > Oof.  So the existing VFIO_REGION_TYPE_PCI_VENDOR_TYPE gets OR'd with 
> > > another value (e.g., 8086).  But in 4.20, there was a 
> > > VFIO_REGION_TYPE_GFX is added as simply "1" ... Which direction are 
> > > these definitions being added from?  I guess asked another way, is 
> > > _TYPE_CCW going to be OR'd with anything else that necessitates its 
> > > presence as an identifier with some Other Thing, or should this follow 
> > > the TYPE_GFX enumeration?  Perhaps the type field needs to be tidied up 
> > > to help this sit more cleanly now?  (Sorry!)    
> > 
> > The semantics of that type stuff are really a bit unclear to me :(
> > 
> > I don't think we'll ever do any fancy mask handling for ccw. It is
> > probably enough to have any kind of uniqueness within the different
> > types, so maybe counting up would be indeed enough...  
> 
> Just to confirm, this is the intended usage, simply reserve a new type
> following the GFX region example.  We can define VFIO_REGION_TYPE_CCW
> as 2 and then there's a whole address space of sub-types to fill in
> within that.  I might have over-engineered PCI a bit with the address
> space split, but it seemed like a good idea at the time to pre-define a
> type address space for each vendor, such that they only need to define
> the sub-types and we can avoid namespace collisions.  Unfortunately
> this implicit definition for each PCI vendor also contributes to the
> confusion here.  Sorry.  Thanks,
> 
> Alex

Thanks for the explanation. I'm simply switching VFIO_REGION_TYPE_CCW
to 2 in the next version.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-19 14:17                 ` Halil Pasic
@ 2018-12-21 11:23                   ` Cornelia Huck
  2018-12-21 12:42                     ` Halil Pasic
  0 siblings, 1 reply; 54+ messages in thread
From: Cornelia Huck @ 2018-12-21 11:23 UTC (permalink / raw)
  To: Halil Pasic
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Wed, 19 Dec 2018 15:17:19 +0100
Halil Pasic <pasic@linux.ibm.com> wrote:

> On Wed, 19 Dec 2018 12:54:42 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
> 
> > On Fri, 7 Dec 2018 17:54:23 +0100
> > Halil Pasic <pasic@linux.ibm.com> wrote:
> >   
> > > On Fri, 7 Dec 2018 11:05:29 +0100
> > > Cornelia Huck <cohuck@redhat.com> wrote:
> > >   
> > > > > > I think most of the sorting-out-the-operations stuff should be done by
> > > > > > the hardware itself, and we should not really try to enforce anything
> > > > > > special in our vfio code.
> > > > > >         
> > > > > 
> > > > > Sounds very reasonable to me. Does this mean you are against Pierre's
> > > > > '[PATCH v3 6/6] vfio: ccw: serialize the write system calls' as it does
> > > > > not let HW sort out stuff, but enforces sequencing?      
> > > > 
> > > > I have not yet had time to look at that, sorry.
> > > >     
> > > > > 
> > > > >       
> > > > > > For your example, it might be best if a hsch is always accepted and
> > > > > > send on towards the hardware.        
> > > > > 
> > > > > Nod.
> > > > >       
> > > > > > Probably best to reflect back -EAGAIN if
> > > > > > we're currently processing another instruction from another vcpu, so
> > > > > > that the user space caller can retry.        
> > > > > 
> > > > > Hm, not sure how this works together with your previous sentence.      
> > > > 
> > > > The software layering. We have the kernel layer
> > > > (drivers/s390/cio/vfio_ccw_*) that interacts with the hardware more or
> > > > less directly, and the QEMU layer, which does some writes on regions.
> > > > In the end, the goal is to act on behalf of the guest issuing a
> > > > ssch/hsch/csch, which is from the guest's view a single instruction. We
> > > > should not have the individual "instructions" compete with each other
> > > > so that they run essentially in parallel (kernel layer), but we should
> > > > also not try to impose an artificial ordering as to when instructions
> > > > executed by different vcpus are executed (QEMU layer). Therefore, don't
> > > > try to run an instruction in the kernel when another one is in progress
> > > > for the same subchannel (exclusivity in the kernel), but retry in QEMU
> > > > if needed (no ordering between vcpus imposed).
> > > > 
> > > > In short, don't create strange concurrency issues in the "instruction"
> > > > handling, but make it possible to execute instructions in a
> > > > non-predictable order if the guest does not care about enforcing
> > > > ordering on its side.
> > > >     
> > > 
> > > I'm neither sold on this, nor am I violently opposing it. Will try to
> > > meditate on it some more if any spare cycles arise. Currently I don't
> > > see the benefit of the non-predictable order over plain FCFS. For
> > > example, let's assume we have a ssch "instruction" that 1 second to
> > > complete. Since normally ssch instruction  does not have to process the
> > > channel program, and is thus kind of a constant time operation (now we
> > > do the translation and the pinning as a part of the "instruction), our
> > > strange guest gets jumpy and does a csch after T+0.2s. And on T+0.8 in
> > > desperation follows the whole up with a hsch. If I understand your
> > > proposal correctly, both userspace handlers would spin on -EAGAIN until
> > > T+1. When ssch is done the csch and the hsch would race for who can
> > > be the next. I don't quite get the value of that.  
> > 
> > What would happen on real hardware for such a guest? I would expect
> > that the csch and the hsch would be executed in a random order as well.
> >   
> 
> Yes, they would be executed in random order, but would not wait until the
> ssch is done (and especially not wait until the channel program gets
> translated). AFAIR bot cancel the start function immediately -- if any
> pending.
> 
> Furthermore the point where the race is decided is changing the function
> control bits -- the update needs to be an interlocked one obviously.
> 
> What I want to say, there is no merit in waiting -- one second in the
> example. At some point it needs to be decided who is considered first,
> and artificially procrastinating this decision does not do us any good,
> because we may end up with otherwise unlikely behavior.

You've really lost me here :( I fear you're criticizing something I
don't want to implement; I'll write some code, that should make things
much easier to discuss.

> 
> > My point is that it is up to the guest to impose an order on the
> > execution of instructions, if wanted. We should not try to guess
> > anything; I think that would make the implementation needlessly complex.
> >   
> 
> I'm not for guessing stuff, but rather for sticking to the architecture.
> 
> > >   
> > > > >       
> > > > > > Same for ssch, if another ssch is
> > > > > > already being processed. We *could* reflect cc 2 if the fctl
> > > > > > bit is already set, but that won't do for csch, so it is
> > > > > > probably best to have the hardware figure that out in any case.
> > > > > >         
> > > > > 
> > > > > We just need to be careful about avoiding races if we let hw sort
> > > > > out things. If an ssch is issued with the start function pending
> > > > > the correct response is cc 2.       
> > > > 
> > > > But sending it on to the hardware will give us that cc 2, no?
> > > >     
> > > > >       
> > > > > > If I read the code correctly, we currently reflect -EBUSY and
> > > > > > not -EAGAIN if we get a ssch request while already processing
> > > > > > another one. QEMU hands that back to the guest as a cc 2, which
> > > > > > is not 100% correct. In practice, we don't see this with Linux
> > > > > > guests due to locking.   
> > > > > 
> > > > > Nod, does not happen because of BQL. We currently do the
> > > > > user-space counterpart of vfio_ccw_mdev_write() in BQL context or
> > > > > (i.e. we hold BQL until translation is done and our host ssch()
> > > > > comes back)?      
> > > > 
> > > > The Linux kernel uses the subchannel lock to enforce exclusivity for
> > > > subchannel instructions, so we won't see Linux guests issue
> > > > instructions on different vcpus in parallel, that's what I meant.
> > > >    
> > > 
> > > That is cool. Yet I think the situation with the BQL is relevant.
> > > Because while BQL is held, not only IO instructions on a single
> > > vfio-ccw device are mutually exclusive. AFAIU no other instruction
> > > QEMU instruction handler can engage. And a store subchannel for
> > > device A having to wait until the translation for the start
> > > subchannel on device B is done is not the most scary thing I can
> > > imagine.   
> > 
> > Yes. But we still need to be able to cope with a userspace that does
> > not give us those guarantees.
> >   
> 
> I agree. The point I was trying to make is not that 'We are good, because
> qemu takes care of it!' on the contrary, I wanted to give voice to my
> concern that a guest that has a couple of vfio-ccw devices in use could
> experience performance problems because vfio-ccw holds BQL for long.

TBH, I have no idea how this will scale to many vfio-ccw devices.

> 
> > > > > 
> > > > > I think -EBUSY is the correct response for ssch while start
> > > > > pending set. I think we set start pending in QEMU before we issue
> > > > > 'start command/io request' to the kernel. I don't think -EAGAIN
> > > > > is a good idea. AFAIU we would expect user-space to loop on
> > > > > -EAGAIN e.g. at least until the processing of a 'start command'
> > > > > is done and the (fist) ssch by the host is issued. And then
> > > > > what?  Translate the second channel program issue the second ssch
> > > > > in the host and probably get a non-zero cc? Or return -EBUSY? Or
> > > > > keep returning -EAGAIN?      
> > > > 
> > > > My idea was:
> > > > - return -EAGAIN if we're already processing a channel instruction
> > > > - continue returning -EBUSY etc. if the instruction gets the
> > > > respective return code from the hardware
> > > > 
> > > > So, the second ssch would first get a -EAGAIN and then a -EBUSY if
> > > > the first ssch is done, but the subchannel is still doing the start
> > > > function. Just as you would expect when you do a ssch while your
> > > > last request has not finished yet.
> > > >     
> > > 
> > > But before you can issue the second ssch you have to do the
> > > translation for it. And we must assume the IO corresponding to the
> > > first ssch is not done yet -- so we still need the translated channel
> > > program of the first ssch.   
> > 
> > Yes, we need to be able to juggle different translated channel programs
> > if we don't consider this part of the "instruction execution". But if
> > we return -EAGAIN if the code is currently doing that translation, we
> > should be fine, no?
> >   
> 
> As long as you return -EAGAIN we are fine. But AFAIU you proposed to
> do that until the I/O is submitted to the HW subchannel via ssch(). But
> that is not the case I'm talking about here. We have already translated
> the channel program for the first request, submitted it via ssch() and
> are awaiting an interrupt that tells us the I/O is done. While waiting
> for this interrupt we get a new ssch request. I understood, you don't
> want to give -EAGAIN for this one, but make the ssch decide. The problem
> is you still need the old translated channel program for the interrupt
> handling, and at the same time you need the new channel program
> translated as well, before doing the ssch for it in the host.

Why? You're not doing anything with that second ssch at all, it returns
before translation is started.

> 
> > > That is if we insist on doing the -EBUSY
> > > based on a return code from the hardware. I'm not sure we end up with
> > > a big simplification from making the "instructions" mutex on vfio-ccw
> > > device level in kernel as proposed above.   
> > 
> > I'm not sure we're not talking past each other here...   
> 
> I'm afraid we do.
> 
> > the "translate
> > and issue instruction" part should be mutually exclusive; I just don't
> > want to return -EBUSY, but -EAGAIN, so that userspace knows it should
> > try again.
> >   
> 
> I got it. But I wanted to point out, that we need the old channel program
> *beyond* the "translate and issue instruction".

Of course. But I don't want to start a new channel program.

> 
> > > But I'm not against it. If
> > > you have the time to write the patches I will find time to review
> > > them.  
> > 
> > Probably only on the new year...  
> 
> I think the stuff is better discussed with code at hand. I'm happy to
> continue this discussion if you think it is useful to you. Otherwise I
> suggest do it the way you think is the best, and I will try  to find and
> to point out the problems, if any.

I'll try to post something in January. Have a nice holiday break :)

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part)
  2018-12-21 11:23                   ` Cornelia Huck
@ 2018-12-21 12:42                     ` Halil Pasic
  0 siblings, 0 replies; 54+ messages in thread
From: Halil Pasic @ 2018-12-21 12:42 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Jason J . Herne, linux-s390, Eric Farman, Alex Williamson, kvm,
	Pierre Morel, Farhan Ali, qemu-devel, qemu-s390x

On Fri, 21 Dec 2018 12:23:32 +0100
Cornelia Huck <cohuck@redhat.com> wrote:
[..]
> 
> You've really lost me here :( I fear you're criticizing something I
> don't want to implement; I'll write some code, that should make things
> much easier to discuss.
> 

Nod.


> TBH, I have no idea how this will scale to many vfio-ccw devices.
> 
> > 
[..]
> > As long as you return -EAGAIN we are fine. But AFAIU you proposed to
> > do that until the I/O is submitted to the HW subchannel via ssch(). But
> > that is not the case I'm talking about here. We have already translated
> > the channel program for the first request, submitted it via ssch() and
> > are awaiting an interrupt that tells us the I/O is done. While waiting
> > for this interrupt we get a new ssch request. I understood, you don't
> > want to give -EAGAIN for this one, but make the ssch decide. The problem
> > is you still need the old translated channel program for the interrupt
> > handling, and at the same time you need the new channel program
> > translated as well, before doing the ssch for it in the host.
> 
> Why? You're not doing anything with that second ssch at all, it returns
> before translation is started.
> 

OK apparently I misunderstood something -- it was a long and twisty
discussion. Looking forward to the code ;).

[..]

> 
> I'll try to post something in January. Have a nice holiday break :)
> 

Don't feel pressured. :) Have nice holidays as well!

Regards,
Halil

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions
  2018-11-28 15:55               ` Farhan Ali
@ 2019-01-18 13:53                 ` Cornelia Huck
  0 siblings, 0 replies; 54+ messages in thread
From: Cornelia Huck @ 2019-01-18 13:53 UTC (permalink / raw)
  To: Farhan Ali
  Cc: linux-s390, Eric Farman, Alex Williamson, Pierre Morel, kvm,
	qemu-devel, Halil Pasic, qemu-s390x

On Wed, 28 Nov 2018 10:55:11 -0500
Farhan Ali <alifm@linux.ibm.com> wrote:

> On 11/28/2018 10:35 AM, Cornelia Huck wrote:

> > I hacked up the following (still untested):
> > 
> >  From e771c8dc5abbfbd19688b452096bab9d032e0df5 Mon Sep 17 00:00:00 2001
> > From: Cornelia Huck <cohuck@redhat.com>
> > Date: Wed, 28 Nov 2018 16:30:51 +0100
> > Subject: [PATCH] vfio-ccw: make it safe to access channel programs
> > 
> > When we get a solicited interrupt, the start function may have
> > been cleared by a csch, but we still have a channel program
> > structure allocated. Make it safe to call the cp accessors in
> > any case, so we can call them unconditionally.
> > 
> > Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> > ---
> >   drivers/s390/cio/vfio_ccw_cp.c  | 9 ++++++++-
> >   drivers/s390/cio/vfio_ccw_cp.h  | 2 ++
> >   drivers/s390/cio/vfio_ccw_drv.c | 3 +--
> >   3 files changed, 11 insertions(+), 3 deletions(-)

Hm, this one seems to have fallen through the cracks; but it still does
look useful, especially if we want to allow concurrent handling of
channel operations.

> > 
> > diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
> > index 70a006ba4d05..35f87514276b 100644
> > --- a/drivers/s390/cio/vfio_ccw_cp.c
> > +++ b/drivers/s390/cio/vfio_ccw_cp.c
> > @@ -335,6 +335,7 @@ static void cp_unpin_free(struct channel_program *cp)
> >   	struct ccwchain *chain, *temp;
> >   	int i;
> >   
> > +	cp->initialized = false;
> >   	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
> >   		for (i = 0; i < chain->ch_len; i++) {
> >   			pfn_array_table_unpin_free(chain->ch_pat + i,
> > @@ -701,6 +702,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
> >   	 */
> >   	cp->orb.cmd.c64 = 1;
> >   
> > +	cp->initialized = true;
> > +
> >   	return ret;
> >   }
> >   
> > @@ -715,7 +718,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
> >    */
> >   void cp_free(struct channel_program *cp)
> >   {
> > -	cp_unpin_free(cp);
> > +	if (cp->initialized)
> > +		cp_unpin_free(cp);
> >   }
> >   
> >   /**
> > @@ -831,6 +835,9 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
> >   	u32 cpa = scsw->cmd.cpa;
> >   	u32 ccw_head, ccw_tail;
> >   
> > +	if (!cp->initialized)
> > +		return;
> > +
> >   	/*
> >   	 * LATER:
> >   	 * For now, only update the cmd.cpa part. We may need to deal with
> > diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h
> > index a4b74fb1aa57..3c20cd208da5 100644
> > --- a/drivers/s390/cio/vfio_ccw_cp.h
> > +++ b/drivers/s390/cio/vfio_ccw_cp.h
> > @@ -21,6 +21,7 @@
> >    * @ccwchain_list: list head of ccwchains
> >    * @orb: orb for the currently processed ssch request
> >    * @mdev: the mediated device to perform page pinning/unpinning
> > + * @initialized: whether this instance is actually initialized
> >    *
> >    * @ccwchain_list is the head of a ccwchain list, that contents the
> >    * translated result of the guest channel program that pointed out by
> > @@ -30,6 +31,7 @@ struct channel_program {
> >   	struct list_head ccwchain_list;
> >   	union orb orb;
> >   	struct device *mdev;
> > +	bool initialized;
> >   };
> >   
> >   extern int cp_init(struct channel_program *cp, struct device *mdev,
> > diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
> > index 890c588a3a61..83d6f43792b6 100644
> > --- a/drivers/s390/cio/vfio_ccw_drv.c
> > +++ b/drivers/s390/cio/vfio_ccw_drv.c
> > @@ -79,8 +79,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >   	private = container_of(work, struct vfio_ccw_private, io_work);
> >   	irb = &private->irb;
> >   
> > -	if (scsw_is_solicited(&irb->scsw) &&
> > -	    (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC)) {
> > +	if (scsw_is_solicited(&irb->scsw)) {
> >   		cp_update_scsw(&private->cp, &irb->scsw);
> >   		cp_free(&private->cp);
> >   	}
> >   
> 
> The changes look good to me.

Thanks!

> 
> Thanks
> Farhan
> 

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2019-01-18 13:53 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-22 16:54 [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Cornelia Huck
2018-11-22 16:54 ` [PATCH 1/3] vfio-ccw: add capabilities chain Cornelia Huck
2018-11-23 12:28   ` Pierre Morel
2018-11-23 12:45     ` Cornelia Huck
2018-11-23 13:26       ` Pierre Morel
2018-11-27 19:04   ` Farhan Ali
2018-11-28  9:05     ` Cornelia Huck
2018-12-17 21:53   ` Eric Farman
2018-12-18 17:24     ` Cornelia Huck
2018-12-18 17:56       ` Eric Farman
2018-12-19 16:28       ` Alex Williamson
2018-12-21 11:12         ` Cornelia Huck
2018-11-22 16:54 ` [PATCH 2/3] s390/cio: export hsch to modules Cornelia Huck
2018-11-23 12:30   ` Pierre Morel
2018-11-22 16:54 ` [PATCH 3/3] vfio-ccw: add handling for asnyc channel instructions Cornelia Huck
2018-11-23 13:08   ` Pierre Morel
2018-11-26  9:47     ` Cornelia Huck
2018-11-27 19:09   ` Farhan Ali
2018-11-28  9:02     ` Cornelia Huck
2018-11-28 14:31       ` Farhan Ali
2018-11-28 14:52         ` Cornelia Huck
2018-11-28 15:00           ` Farhan Ali
2018-11-28 15:35             ` Cornelia Huck
2018-11-28 15:55               ` Farhan Ali
2019-01-18 13:53                 ` Cornelia Huck
2018-11-27 19:57   ` Farhan Ali
2018-11-28  8:41     ` Cornelia Huck
2018-11-28 16:36   ` [qemu-s390x] " Halil Pasic
2018-11-29 16:52     ` Cornelia Huck
2018-11-29 17:24       ` Halil Pasic
2018-12-17 21:54   ` Eric Farman
2018-12-18 16:45     ` Cornelia Huck
2018-11-24 21:07 ` [qemu-s390x] [PATCH 0/3] vfio-ccw: support hsch/csch (kernel part) Halil Pasic
2018-11-26  9:26   ` Cornelia Huck
2018-11-26 18:57 ` Farhan Ali
2018-11-26 19:00   ` Cornelia Huck
2018-12-04 12:38 ` Halil Pasic
2018-12-04 13:11   ` Cornelia Huck
2018-12-04 15:02     ` Halil Pasic
2018-12-05 12:54       ` Cornelia Huck
2018-12-05 18:34         ` Farhan Ali
2018-12-06 14:39           ` Cornelia Huck
2018-12-06 15:26             ` Farhan Ali
2018-12-06 16:21               ` Cornelia Huck
2018-12-06 17:50                 ` Farhan Ali
2018-12-07  9:34                   ` Cornelia Huck
2018-12-06 18:47         ` Halil Pasic
2018-12-07 10:05           ` Cornelia Huck
2018-12-07 15:49             ` Halil Pasic
2018-12-07 16:54             ` Halil Pasic
2018-12-19 11:54               ` Cornelia Huck
2018-12-19 14:17                 ` Halil Pasic
2018-12-21 11:23                   ` Cornelia Huck
2018-12-21 12:42                     ` Halil Pasic

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).