All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Rosato <mjrosato@linux.ibm.com>
To: alex.williamson@redhat.com, cohuck@redhat.com, schnelle@linux.ibm.com
Cc: pmorel@linux.ibm.com, borntraeger@de.ibm.com, hca@linux.ibm.com,
	gor@linux.ibm.com, gerald.schaefer@linux.ibm.com,
	linux-s390@vger.kernel.org, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 4/4] vfio-pci/zdev: Introduce the zPCI I/O vfio region
Date: Tue, 19 Jan 2021 15:02:30 -0500	[thread overview]
Message-ID: <1611086550-32765-5-git-send-email-mjrosato@linux.ibm.com> (raw)
In-Reply-To: <1611086550-32765-1-git-send-email-mjrosato@linux.ibm.com>

Some s390 PCI devices (e.g. ISM) perform I/O operations that have very
specific requirements in terms of alignment as well as the patterns in
which the data is read/written. Allowing these to proceed through the
typical vfio_pci_bar_rw path will cause them to be broken in up in such a
way that these requirements can't be guaranteed. In addition, ISM devices
do not support the MIO codepaths that might be triggered on vfio I/O coming
from userspace; we must be able to ensure that these devices use the
non-MIO instructions.  To facilitate this, provide a new vfio region by
which non-MIO instructions can be passed directly to the host kernel s390
PCI layer, to be reliably issued as non-MIO instructions.

This patch introduces the new vfio VFIO_REGION_SUBTYPE_IBM_ZPCI_IO region
and implements the ability to pass PCISTB and PCILG instructions over it,
as these are what is required for ISM devices.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/vfio/pci/vfio_pci.c         |   8 ++
 drivers/vfio/pci/vfio_pci_private.h |   6 ++
 drivers/vfio/pci/vfio_pci_zdev.c    | 158 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h           |   4 +
 include/uapi/linux/vfio_zdev.h      |  33 ++++++++
 5 files changed, 209 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 706de3e..e1c156e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -407,6 +407,14 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
+		ret = vfio_pci_zdev_io_init(vdev);
+		if (ret && ret != -ENODEV) {
+			pci_warn(pdev, "Failed to setup zPCI I/O region\n");
+			return ret;
+		}
+	}
+
 	vfio_pci_probe_mmaps(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 5c90e56..bc49980 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -217,12 +217,18 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
 #ifdef CONFIG_VFIO_PCI_ZDEV
 extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
 				       struct vfio_info_cap *caps);
+extern int vfio_pci_zdev_io_init(struct vfio_pci_device *vdev);
 #else
 static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
 					      struct vfio_info_cap *caps)
 {
 	return -ENODEV;
 }
+
+static inline int vfio_pci_zdev_io_init(struct vfio_pci_device *vdev)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 57e19ff..a962043 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -18,6 +18,7 @@
 #include <linux/vfio_zdev.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_io.h>
+#include <asm/pci_insn.h>
 
 #include "vfio_pci_private.h"
 
@@ -143,3 +144,160 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
 
 	return ret;
 }
+
+static size_t vfio_pci_zdev_io_rw(struct vfio_pci_device *vdev,
+				  char __user *buf, size_t count,
+				  loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	struct vfio_region_zpci_io *region = vdev->region[i].data;
+	struct zpci_dev *zdev = to_zpci(vdev->pdev);
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *base = region;
+	struct page *gpage;
+	void *gaddr;
+	u64 *data;
+	int ret;
+	u64 req;
+
+	if ((!vdev->pdev->bus) || (!zdev))
+		return -ENODEV;
+
+	if (pos >= vdev->region[i].size)
+		return -EINVAL;
+
+	count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	if (!iswrite) {
+		/* Only allow reads to the _hdr area */
+		if (pos + count > offsetof(struct vfio_region_zpci_io, req))
+			return -EFAULT;
+		if (copy_to_user(buf, base + pos, count))
+			return -EFAULT;
+		return count;
+	}
+
+	/* Only allow writes to the _req area */
+	if (pos < offsetof(struct vfio_region_zpci_io, req))
+		return -EFAULT;
+	if (copy_from_user(base + pos, buf, count))
+		return -EFAULT;
+
+	/*
+	 * Read operations are limited to 8B
+	 */
+	if ((region->req.flags & VFIO_ZPCI_IO_FLAG_READ) &&
+		(region->req.len > 8)) {
+		return -EIO;
+	}
+
+	/*
+	 * Block write operations are limited to hardware-reported max
+	 */
+	if ((region->req.flags & VFIO_ZPCI_IO_FLAG_BLOCKW) &&
+		(region->req.len > zdev->maxstbl)) {
+		return -EIO;
+	}
+
+	/*
+	 * While some devices may allow relaxed alignment for the PCISTB
+	 * instruction, the VFIO region requires the input buffer to be on a
+	 * DWORD boundary for all operations for simplicity.
+	 */
+	if (!IS_ALIGNED(region->req.gaddr, sizeof(uint64_t)))
+		return -EIO;
+
+	/*
+	 * For now, the largest allowed block I/O is advertised as PAGE_SIZE,
+	 * and cannot exceed a page boundary - so a single page is enough.  The
+	 * guest should have validated this but let's double-check that the
+	 * request will not cross a page boundary.
+	 */
+	if (((region->req.gaddr & ~PAGE_MASK)
+			+ region->req.len - 1) & PAGE_MASK) {
+		return -EIO;
+	}
+
+	mutex_lock(&zdev->lock);
+
+	ret = get_user_pages_fast(region->req.gaddr & PAGE_MASK, 1, 0, &gpage);
+	if (ret <= 0) {
+		count = -EIO;
+		goto out;
+	}
+	gaddr = page_address(gpage);
+	gaddr += (region->req.gaddr & ~PAGE_MASK);
+	data = (u64 *)gaddr;
+
+	req = ZPCI_CREATE_REQ(zdev->fh, region->req.pcias, region->req.len);
+
+	/* Perform the requested I/O operation */
+	if (region->req.flags & VFIO_ZPCI_IO_FLAG_READ) {
+		/* PCILG */
+		ret = __zpci_load(data, req,
+				region->req.offset);
+	} else if (region->req.flags & VFIO_ZPCI_IO_FLAG_BLOCKW) {
+		/* PCISTB */
+		ret = __zpci_store_block(data, req,
+					region->req.offset);
+	} else {
+		/* Undefined Operation or none provided */
+		count = -EIO;
+	}
+	if (ret < 0)
+		count = -EIO;
+
+	put_page(gpage);
+
+out:
+	mutex_unlock(&zdev->lock);
+	return count;
+}
+
+static void vfio_pci_zdev_io_release(struct vfio_pci_device *vdev,
+				     struct vfio_pci_region *region)
+{
+	kfree(region->data);
+}
+
+static const struct vfio_pci_regops vfio_pci_zdev_io_regops = {
+	.rw		= vfio_pci_zdev_io_rw,
+	.release	= vfio_pci_zdev_io_release,
+};
+
+int vfio_pci_zdev_io_init(struct vfio_pci_device *vdev)
+{
+	struct vfio_region_zpci_io *region;
+	struct zpci_dev *zdev;
+	int ret;
+
+	if (!vdev->pdev->bus)
+		return -ENODEV;
+
+	zdev = to_zpci(vdev->pdev);
+	if (!zdev)
+		return -ENODEV;
+
+	region = kmalloc(sizeof(struct vfio_region_zpci_io), GFP_KERNEL);
+
+	ret = vfio_pci_register_dev_region(vdev,
+		PCI_VENDOR_ID_IBM | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+		VFIO_REGION_SUBTYPE_IBM_ZPCI_IO,
+		&vfio_pci_zdev_io_regops,
+		sizeof(struct vfio_region_zpci_io),
+		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+		region);
+
+	if (ret) {
+		kfree(region);
+		return ret;
+	}
+
+	/* Setup the initial header information */
+	region->hdr.flags = 0;
+	region->hdr.max = zdev->maxstbl;
+	region->hdr.reserved = 0;
+	region->hdr.reserved2 = 0;
+
+	return ret;
+}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index d181277..5547f9b 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -338,6 +338,10 @@ struct vfio_region_info_cap_type {
  * to do TLB invalidation on a GPU.
  */
 #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
+/*
+ * IBM zPCI I/O region
+ */
+#define VFIO_REGION_SUBTYPE_IBM_ZPCI_IO		(2)
 
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
index b0b6596..830acca4 100644
--- a/include/uapi/linux/vfio_zdev.h
+++ b/include/uapi/linux/vfio_zdev.h
@@ -76,4 +76,37 @@ struct vfio_device_info_cap_zpci_pfip {
 	__u8 pfip[];
 };
 
+/**
+ * VFIO_REGION_SUBTYPE_IBM_ZPCI_IO - VFIO zPCI PCI Direct I/O Region
+ *
+ * This region is used to transfer I/O operations from the guest directly
+ * to the host zPCI I/O layer.  The same instruction requested by the guest
+ * (e.g. PCISTB) will always be used, even if the MIO variant is available.
+ *
+ * The _hdr area is user-readable and is used to provide setup information.
+ * The _req area is user-writable and is used to provide the I/O operation.
+ */
+struct vfio_zpci_io_hdr {
+	__u64 flags;
+	__u16 max;		/* Max block operation size allowed */
+	__u16 reserved;
+	__u32 reserved2;
+};
+
+struct vfio_zpci_io_req {
+	__u64 flags;
+#define VFIO_ZPCI_IO_FLAG_READ (1 << 0) /* Read Operation Specified */
+#define VFIO_ZPCI_IO_FLAG_BLOCKW (1 << 1) /* Block Write Operation Specified */
+	__u64 gaddr;		/* Address of guest data */
+	__u64 offset;		/* Offset into target PCI Address Space */
+	__u32 reserved;
+	__u16 len;		/* Length of guest operation */
+	__u8 pcias;		/* Target PCI Address Space */
+	__u8 reserved2;
+};
+
+struct vfio_region_zpci_io {
+	struct vfio_zpci_io_hdr hdr;
+	struct vfio_zpci_io_req req;
+};
 #endif
-- 
1.8.3.1


  parent reply	other threads:[~2021-01-19 20:29 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-19 20:02 [PATCH 0/4] vfio-pci/zdev: Fixing s390 vfio-pci ISM support Matthew Rosato
2021-01-19 20:02 ` [PATCH 1/4] s390/pci: track alignment/length strictness for zpci_dev Matthew Rosato
2021-01-19 20:02 ` [PATCH 2/4] vfio-pci/zdev: Pass the relaxed alignment flag Matthew Rosato
2021-01-19 20:02 ` [PATCH 3/4] s390/pci: Get hardware-reported max store block length Matthew Rosato
2021-01-19 20:02 ` Matthew Rosato [this message]
2021-01-20 13:21   ` [PATCH 4/4] vfio-pci/zdev: Introduce the zPCI I/O vfio region Niklas Schnelle
2021-01-20 17:10     ` Matthew Rosato
2021-01-20 17:28       ` Niklas Schnelle
2021-01-20 17:40         ` Matthew Rosato
2021-01-21 20:50     ` Alex Williamson
2021-01-21 10:01   ` Niklas Schnelle
2021-01-21 15:57     ` Matthew Rosato
2021-01-22 23:48   ` Alex Williamson
2021-01-25 14:40     ` Matthew Rosato
2021-01-25 15:42       ` Cornelia Huck
2021-01-25 15:52         ` Matthew Rosato
2021-01-26 23:18       ` Alex Williamson
2021-01-27 14:23         ` Matthew Rosato
2021-01-27 15:53           ` Alex Williamson
2021-01-27 17:45             ` Cornelia Huck
2021-01-27 18:27               ` Matthew Rosato
2021-01-19 20:50 ` [PATCH 0/4] vfio-pci/zdev: Fixing s390 vfio-pci ISM support Matthew Rosato
2021-01-20  9:02 ` Pierre Morel
2021-01-20 14:02   ` Matthew Rosato

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1611086550-32765-5-git-send-email-mjrosato@linux.ibm.com \
    --to=mjrosato@linux.ibm.com \
    --cc=alex.williamson@redhat.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=pmorel@linux.ibm.com \
    --cc=schnelle@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.