All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	iommu@lists.linux-foundation.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, kvmarm@lists.cs.columbia.edu,
	will@kernel.org, joro@8bytes.org, maz@kernel.org,
	robin.murphy@arm.com, alex.williamson@redhat.com
Cc: jean-philippe@linaro.org, zhangfei.gao@linaro.org,
	zhangfei.gao@gmail.com, vivek.gautam@arm.com,
	shameerali.kolothum.thodi@huawei.com,
	jacob.jun.pan@linux.intel.com, yi.l.liu@intel.com,
	tn@semihalf.com, nicoleotsuka@gmail.com, yuzenghui@huawei.com
Subject: [PATCH v11 04/13] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type
Date: Mon, 16 Nov 2020 12:00:21 +0100	[thread overview]
Message-ID: <20201116110030.32335-5-eric.auger@redhat.com> (raw)
In-Reply-To: <20201116110030.32335-1-eric.auger@redhat.com>

Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults. This region only is exposed if the device
is attached to a nested domain.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---
v11 -> v12:
- set fault_pages to NULL after free
- check new_tail >= header->nb_entries (Zenghui)

v10 -> v11:
- rename vfio_pci_init_dma_fault_region into
  vfio_pci_dma_fault_init
- free fault_pages in vfio_pci_dma_fault_release
- only register the region if the device is attached
  to a nested domain

v8 -> v9:
- Use a single region instead of a prod/cons region

v4 -> v5
- check cons is not null in vfio_pci_check_cons_fault

v3 -> v4:
- use 2 separate regions, respectively in read and write modes
- add the version capability
---
 drivers/vfio/pci/vfio_pci.c         | 76 +++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |  6 +++
 drivers/vfio/pci/vfio_pci_rdwr.c    | 44 +++++++++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++
 4 files changed, 160 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e6190173482c..7546a81e7fb6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -311,6 +311,78 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
 	return ret;
 }
 
+static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
+				       struct vfio_pci_region *region)
+{
+	kfree(vdev->fault_pages);
+}
+
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
+					     struct vfio_pci_region *region,
+					     struct vfio_info_cap *caps)
+{
+	struct vfio_region_info_cap_fault cap = {
+		.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
+		.header.version = 1,
+		.version = 1,
+	};
+	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
+	.rw		= vfio_pci_dma_fault_rw,
+	.release	= vfio_pci_dma_fault_release,
+	.add_capability = vfio_pci_dma_fault_add_capability,
+};
+
+#define DMA_FAULT_RING_LENGTH 512
+
+static int vfio_pci_dma_fault_init(struct vfio_pci_device *vdev)
+{
+	struct vfio_region_dma_fault *header;
+	struct iommu_domain *domain;
+	size_t size;
+	bool nested;
+	int ret;
+
+	domain = iommu_get_domain_for_dev(&vdev->pdev->dev);
+	ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nested);
+	if (ret || !nested)
+		return ret;
+
+	mutex_init(&vdev->fault_queue_lock);
+
+	/*
+	 * We provision 1 page for the header and space for
+	 * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
+	 */
+	size = ALIGN(sizeof(struct iommu_fault) *
+		     DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
+
+	vdev->fault_pages = kzalloc(size, GFP_KERNEL);
+	if (!vdev->fault_pages)
+		return -ENOMEM;
+
+	ret = vfio_pci_register_dev_region(vdev,
+		VFIO_REGION_TYPE_NESTED,
+		VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+		&vfio_pci_dma_fault_regops, size,
+		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+		vdev->fault_pages);
+	if (ret)
+		goto out;
+
+	header = (struct vfio_region_dma_fault *)vdev->fault_pages;
+	header->entry_size = sizeof(struct iommu_fault);
+	header->nb_entries = DMA_FAULT_RING_LENGTH;
+	header->offset = sizeof(struct vfio_region_dma_fault);
+	return 0;
+out:
+	kfree(vdev->fault_pages);
+	vdev->fault_pages = NULL;
+	return ret;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -409,6 +481,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
+	ret = vfio_pci_dma_fault_init(vdev);
+	if (ret)
+		goto disable_exit;
+
 	vfio_pci_probe_mmaps(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 5c90e560c5c7..1d9b0f648133 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -134,6 +134,8 @@ struct vfio_pci_device {
 	int			ioeventfds_nr;
 	struct eventfd_ctx	*err_trigger;
 	struct eventfd_ctx	*req_trigger;
+	u8			*fault_pages;
+	struct mutex		fault_queue_lock;
 	struct list_head	dummy_resources_list;
 	struct mutex		ioeventfds_lock;
 	struct list_head	ioeventfds_list;
@@ -170,6 +172,10 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 			       uint64_t data, int count, int fd);
 
+extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev,
+				    char __user *buf, size_t count,
+				    loff_t *ppos, bool iswrite);
+
 extern int vfio_pci_init_perm_bits(void);
 extern void vfio_pci_uninit_perm_bits(void);
 
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index a0b5fc8e46f4..164120607469 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -356,6 +356,50 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return done;
 }
 
+size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev, char __user *buf,
+			     size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *base = vdev->region[i].data;
+	int ret = -EFAULT;
+
+	if (pos >= vdev->region[i].size)
+		return -EINVAL;
+
+	count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	mutex_lock(&vdev->fault_queue_lock);
+
+	if (iswrite) {
+		struct vfio_region_dma_fault *header =
+			(struct vfio_region_dma_fault *)base;
+		u32 new_tail;
+
+		if (pos != 0 || count != 4) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		if (copy_from_user((void *)&new_tail, buf, count))
+			goto unlock;
+
+		if (new_tail >= header->nb_entries) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+		header->tail = new_tail;
+	} else {
+		if (copy_to_user(buf, base + pos, count))
+			goto unlock;
+	}
+	*ppos += count;
+	ret = count;
+unlock:
+	mutex_unlock(&vdev->fault_queue_lock);
+	return ret;
+}
+
 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
 					bool test_mem)
 {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b352e76cfb71..629dfb38d9e7 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -343,6 +343,9 @@ struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+#define VFIO_REGION_TYPE_NESTED			(2)
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT	(1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
@@ -988,6 +991,37 @@ struct vfio_device_feature {
  */
 #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
 
+/*
+ * Capability exposed by the DMA fault region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT	6
+
+struct vfio_region_info_cap_fault {
+	struct vfio_info_cap_header header;
+	__u32 version;
+};
+
+/*
+ * DMA Fault Region Layout
+ * @tail: index relative to the start of the ring buffer at which the
+ *        consumer finds the next item in the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @head: index relative to the start of the ring buffer at which the
+ *        producer (kernel) inserts items into the buffers
+ */
+struct vfio_region_dma_fault {
+	/* Write-Only */
+	__u32   tail;
+	/* Read-Only */
+	__u32   entry_size;
+	__u32	nb_entries;
+	__u32	offset;
+	__u32   head;
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.21.3


WARNING: multiple messages have this Message-ID (diff)
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	iommu@lists.linux-foundation.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, kvmarm@lists.cs.columbia.edu,
	will@kernel.org, joro@8bytes.org, maz@kernel.org,
	robin.murphy@arm.com, alex.williamson@redhat.com
Cc: jean-philippe@linaro.org, vivek.gautam@arm.com, zhangfei.gao@linaro.org
Subject: [PATCH v11 04/13] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type
Date: Mon, 16 Nov 2020 12:00:21 +0100	[thread overview]
Message-ID: <20201116110030.32335-5-eric.auger@redhat.com> (raw)
In-Reply-To: <20201116110030.32335-1-eric.auger@redhat.com>

Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults. This region only is exposed if the device
is attached to a nested domain.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---
v11 -> v12:
- set fault_pages to NULL after free
- check new_tail >= header->nb_entries (Zenghui)

v10 -> v11:
- rename vfio_pci_init_dma_fault_region into
  vfio_pci_dma_fault_init
- free fault_pages in vfio_pci_dma_fault_release
- only register the region if the device is attached
  to a nested domain

v8 -> v9:
- Use a single region instead of a prod/cons region

v4 -> v5
- check cons is not null in vfio_pci_check_cons_fault

v3 -> v4:
- use 2 separate regions, respectively in read and write modes
- add the version capability
---
 drivers/vfio/pci/vfio_pci.c         | 76 +++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |  6 +++
 drivers/vfio/pci/vfio_pci_rdwr.c    | 44 +++++++++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++
 4 files changed, 160 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e6190173482c..7546a81e7fb6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -311,6 +311,78 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
 	return ret;
 }
 
+static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
+				       struct vfio_pci_region *region)
+{
+	kfree(vdev->fault_pages);
+}
+
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
+					     struct vfio_pci_region *region,
+					     struct vfio_info_cap *caps)
+{
+	struct vfio_region_info_cap_fault cap = {
+		.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
+		.header.version = 1,
+		.version = 1,
+	};
+	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
+	.rw		= vfio_pci_dma_fault_rw,
+	.release	= vfio_pci_dma_fault_release,
+	.add_capability = vfio_pci_dma_fault_add_capability,
+};
+
+#define DMA_FAULT_RING_LENGTH 512
+
+static int vfio_pci_dma_fault_init(struct vfio_pci_device *vdev)
+{
+	struct vfio_region_dma_fault *header;
+	struct iommu_domain *domain;
+	size_t size;
+	bool nested;
+	int ret;
+
+	domain = iommu_get_domain_for_dev(&vdev->pdev->dev);
+	ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nested);
+	if (ret || !nested)
+		return ret;
+
+	mutex_init(&vdev->fault_queue_lock);
+
+	/*
+	 * We provision 1 page for the header and space for
+	 * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
+	 */
+	size = ALIGN(sizeof(struct iommu_fault) *
+		     DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
+
+	vdev->fault_pages = kzalloc(size, GFP_KERNEL);
+	if (!vdev->fault_pages)
+		return -ENOMEM;
+
+	ret = vfio_pci_register_dev_region(vdev,
+		VFIO_REGION_TYPE_NESTED,
+		VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+		&vfio_pci_dma_fault_regops, size,
+		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+		vdev->fault_pages);
+	if (ret)
+		goto out;
+
+	header = (struct vfio_region_dma_fault *)vdev->fault_pages;
+	header->entry_size = sizeof(struct iommu_fault);
+	header->nb_entries = DMA_FAULT_RING_LENGTH;
+	header->offset = sizeof(struct vfio_region_dma_fault);
+	return 0;
+out:
+	kfree(vdev->fault_pages);
+	vdev->fault_pages = NULL;
+	return ret;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -409,6 +481,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
+	ret = vfio_pci_dma_fault_init(vdev);
+	if (ret)
+		goto disable_exit;
+
 	vfio_pci_probe_mmaps(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 5c90e560c5c7..1d9b0f648133 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -134,6 +134,8 @@ struct vfio_pci_device {
 	int			ioeventfds_nr;
 	struct eventfd_ctx	*err_trigger;
 	struct eventfd_ctx	*req_trigger;
+	u8			*fault_pages;
+	struct mutex		fault_queue_lock;
 	struct list_head	dummy_resources_list;
 	struct mutex		ioeventfds_lock;
 	struct list_head	ioeventfds_list;
@@ -170,6 +172,10 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 			       uint64_t data, int count, int fd);
 
+extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev,
+				    char __user *buf, size_t count,
+				    loff_t *ppos, bool iswrite);
+
 extern int vfio_pci_init_perm_bits(void);
 extern void vfio_pci_uninit_perm_bits(void);
 
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index a0b5fc8e46f4..164120607469 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -356,6 +356,50 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return done;
 }
 
+size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev, char __user *buf,
+			     size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *base = vdev->region[i].data;
+	int ret = -EFAULT;
+
+	if (pos >= vdev->region[i].size)
+		return -EINVAL;
+
+	count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	mutex_lock(&vdev->fault_queue_lock);
+
+	if (iswrite) {
+		struct vfio_region_dma_fault *header =
+			(struct vfio_region_dma_fault *)base;
+		u32 new_tail;
+
+		if (pos != 0 || count != 4) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		if (copy_from_user((void *)&new_tail, buf, count))
+			goto unlock;
+
+		if (new_tail >= header->nb_entries) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+		header->tail = new_tail;
+	} else {
+		if (copy_to_user(buf, base + pos, count))
+			goto unlock;
+	}
+	*ppos += count;
+	ret = count;
+unlock:
+	mutex_unlock(&vdev->fault_queue_lock);
+	return ret;
+}
+
 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
 					bool test_mem)
 {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b352e76cfb71..629dfb38d9e7 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -343,6 +343,9 @@ struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+#define VFIO_REGION_TYPE_NESTED			(2)
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT	(1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
@@ -988,6 +991,37 @@ struct vfio_device_feature {
  */
 #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
 
+/*
+ * Capability exposed by the DMA fault region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT	6
+
+struct vfio_region_info_cap_fault {
+	struct vfio_info_cap_header header;
+	__u32 version;
+};
+
+/*
+ * DMA Fault Region Layout
+ * @tail: index relative to the start of the ring buffer at which the
+ *        consumer finds the next item in the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @head: index relative to the start of the ring buffer at which the
+ *        producer (kernel) inserts items into the buffers
+ */
+struct vfio_region_dma_fault {
+	/* Write-Only */
+	__u32   tail;
+	/* Read-Only */
+	__u32   entry_size;
+	__u32	nb_entries;
+	__u32	offset;
+	__u32   head;
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.21.3

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

WARNING: multiple messages have this Message-ID (diff)
From: Eric Auger <eric.auger@redhat.com>
To: eric.auger.pro@gmail.com, eric.auger@redhat.com,
	iommu@lists.linux-foundation.org, linux-kernel@vger.kernel.org,
	kvm@vger.kernel.org, kvmarm@lists.cs.columbia.edu,
	will@kernel.org, joro@8bytes.org, maz@kernel.org,
	robin.murphy@arm.com, alex.williamson@redhat.com
Cc: jean-philippe@linaro.org, jacob.jun.pan@linux.intel.com,
	nicoleotsuka@gmail.com, vivek.gautam@arm.com, yi.l.liu@intel.com,
	zhangfei.gao@linaro.org
Subject: [PATCH v11 04/13] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type
Date: Mon, 16 Nov 2020 12:00:21 +0100	[thread overview]
Message-ID: <20201116110030.32335-5-eric.auger@redhat.com> (raw)
In-Reply-To: <20201116110030.32335-1-eric.auger@redhat.com>

Add a new specific DMA_FAULT region aiming to exposed nested mode
translation faults. This region only is exposed if the device
is attached to a nested domain.

The region has a ring buffer that contains the actual fault
records plus a header allowing to handle it (tail/head indices,
max capacity, entry size). At the moment the region is dimensionned
for 512 fault records.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---
v11 -> v12:
- set fault_pages to NULL after free
- check new_tail >= header->nb_entries (Zenghui)

v10 -> v11:
- rename vfio_pci_init_dma_fault_region into
  vfio_pci_dma_fault_init
- free fault_pages in vfio_pci_dma_fault_release
- only register the region if the device is attached
  to a nested domain

v8 -> v9:
- Use a single region instead of a prod/cons region

v4 -> v5
- check cons is not null in vfio_pci_check_cons_fault

v3 -> v4:
- use 2 separate regions, respectively in read and write modes
- add the version capability
---
 drivers/vfio/pci/vfio_pci.c         | 76 +++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |  6 +++
 drivers/vfio/pci/vfio_pci_rdwr.c    | 44 +++++++++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++
 4 files changed, 160 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e6190173482c..7546a81e7fb6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -311,6 +311,78 @@ int vfio_pci_set_power_state(struct vfio_pci_device *vdev, pci_power_t state)
 	return ret;
 }
 
+static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
+				       struct vfio_pci_region *region)
+{
+	kfree(vdev->fault_pages);
+}
+
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
+					     struct vfio_pci_region *region,
+					     struct vfio_info_cap *caps)
+{
+	struct vfio_region_info_cap_fault cap = {
+		.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
+		.header.version = 1,
+		.version = 1,
+	};
+	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
+	.rw		= vfio_pci_dma_fault_rw,
+	.release	= vfio_pci_dma_fault_release,
+	.add_capability = vfio_pci_dma_fault_add_capability,
+};
+
+#define DMA_FAULT_RING_LENGTH 512
+
+static int vfio_pci_dma_fault_init(struct vfio_pci_device *vdev)
+{
+	struct vfio_region_dma_fault *header;
+	struct iommu_domain *domain;
+	size_t size;
+	bool nested;
+	int ret;
+
+	domain = iommu_get_domain_for_dev(&vdev->pdev->dev);
+	ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nested);
+	if (ret || !nested)
+		return ret;
+
+	mutex_init(&vdev->fault_queue_lock);
+
+	/*
+	 * We provision 1 page for the header and space for
+	 * DMA_FAULT_RING_LENGTH fault records in the ring buffer.
+	 */
+	size = ALIGN(sizeof(struct iommu_fault) *
+		     DMA_FAULT_RING_LENGTH, PAGE_SIZE) + PAGE_SIZE;
+
+	vdev->fault_pages = kzalloc(size, GFP_KERNEL);
+	if (!vdev->fault_pages)
+		return -ENOMEM;
+
+	ret = vfio_pci_register_dev_region(vdev,
+		VFIO_REGION_TYPE_NESTED,
+		VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT,
+		&vfio_pci_dma_fault_regops, size,
+		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+		vdev->fault_pages);
+	if (ret)
+		goto out;
+
+	header = (struct vfio_region_dma_fault *)vdev->fault_pages;
+	header->entry_size = sizeof(struct iommu_fault);
+	header->nb_entries = DMA_FAULT_RING_LENGTH;
+	header->offset = sizeof(struct vfio_region_dma_fault);
+	return 0;
+out:
+	kfree(vdev->fault_pages);
+	vdev->fault_pages = NULL;
+	return ret;
+}
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -409,6 +481,10 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 		}
 	}
 
+	ret = vfio_pci_dma_fault_init(vdev);
+	if (ret)
+		goto disable_exit;
+
 	vfio_pci_probe_mmaps(vdev);
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 5c90e560c5c7..1d9b0f648133 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -134,6 +134,8 @@ struct vfio_pci_device {
 	int			ioeventfds_nr;
 	struct eventfd_ctx	*err_trigger;
 	struct eventfd_ctx	*req_trigger;
+	u8			*fault_pages;
+	struct mutex		fault_queue_lock;
 	struct list_head	dummy_resources_list;
 	struct mutex		ioeventfds_lock;
 	struct list_head	ioeventfds_list;
@@ -170,6 +172,10 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 extern long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
 			       uint64_t data, int count, int fd);
 
+extern size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev,
+				    char __user *buf, size_t count,
+				    loff_t *ppos, bool iswrite);
+
 extern int vfio_pci_init_perm_bits(void);
 extern void vfio_pci_uninit_perm_bits(void);
 
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index a0b5fc8e46f4..164120607469 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -356,6 +356,50 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
 	return done;
 }
 
+size_t vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev, char __user *buf,
+			     size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	void *base = vdev->region[i].data;
+	int ret = -EFAULT;
+
+	if (pos >= vdev->region[i].size)
+		return -EINVAL;
+
+	count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	mutex_lock(&vdev->fault_queue_lock);
+
+	if (iswrite) {
+		struct vfio_region_dma_fault *header =
+			(struct vfio_region_dma_fault *)base;
+		u32 new_tail;
+
+		if (pos != 0 || count != 4) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		if (copy_from_user((void *)&new_tail, buf, count))
+			goto unlock;
+
+		if (new_tail >= header->nb_entries) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+		header->tail = new_tail;
+	} else {
+		if (copy_to_user(buf, base + pos, count))
+			goto unlock;
+	}
+	*ppos += count;
+	ret = count;
+unlock:
+	mutex_unlock(&vdev->fault_queue_lock);
+	return ret;
+}
+
 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
 					bool test_mem)
 {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b352e76cfb71..629dfb38d9e7 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -343,6 +343,9 @@ struct vfio_region_info_cap_type {
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
+#define VFIO_REGION_TYPE_NESTED			(2)
+#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT	(1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
@@ -988,6 +991,37 @@ struct vfio_device_feature {
  */
 #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
 
+/*
+ * Capability exposed by the DMA fault region
+ * @version: ABI version
+ */
+#define VFIO_REGION_INFO_CAP_DMA_FAULT	6
+
+struct vfio_region_info_cap_fault {
+	struct vfio_info_cap_header header;
+	__u32 version;
+};
+
+/*
+ * DMA Fault Region Layout
+ * @tail: index relative to the start of the ring buffer at which the
+ *        consumer finds the next item in the buffer
+ * @entry_size: fault ring buffer entry size in bytes
+ * @nb_entries: max capacity of the fault ring buffer
+ * @offset: ring buffer offset relative to the start of the region
+ * @head: index relative to the start of the ring buffer at which the
+ *        producer (kernel) inserts items into the buffers
+ */
+struct vfio_region_dma_fault {
+	/* Write-Only */
+	__u32   tail;
+	/* Read-Only */
+	__u32   entry_size;
+	__u32	nb_entries;
+	__u32	offset;
+	__u32   head;
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.21.3

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

  parent reply	other threads:[~2020-11-16 12:34 UTC|newest]

Thread overview: 99+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-16 11:00 [PATCH v11 00/13] SMMUv3 Nested Stage Setup (VFIO part) Eric Auger
2020-11-16 11:00 ` Eric Auger
2020-11-16 11:00 ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-17  2:11   ` kernel test robot
2020-11-17  2:11     ` kernel test robot
2020-11-17  2:11     ` kernel test robot
2020-11-17  2:11     ` kernel test robot
2020-11-24 21:31   ` Alex Williamson
2020-11-24 21:31     ` Alex Williamson
2020-11-24 21:31     ` Alex Williamson
2021-02-02 12:34   ` Keqian Zhu
2021-02-02 12:34     ` Keqian Zhu
2021-02-02 12:34     ` Keqian Zhu
2021-02-22 10:53     ` Auger Eric
2021-02-22 10:53       ` Auger Eric
2021-02-22 10:53       ` Auger Eric
2021-02-22 12:20       ` Keqian Zhu
2021-02-22 12:20         ` Keqian Zhu
2021-02-22 12:20         ` Keqian Zhu
2021-02-22 16:12         ` Auger Eric
2021-02-22 16:12           ` Auger Eric
2021-02-22 16:12           ` Auger Eric
2020-11-16 11:00 ` [PATCH v11 02/13] vfio: VFIO_IOMMU_CACHE_INVALIDATE Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 03/13] vfio: VFIO_IOMMU_SET_MSI_BINDING Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2021-02-02 12:08   ` Keqian Zhu
2021-02-02 12:08     ` Keqian Zhu
2021-02-02 12:08     ` Keqian Zhu
2020-11-16 11:00 ` Eric Auger [this message]
2020-11-16 11:00   ` [PATCH v11 04/13] vfio/pci: Add VFIO_REGION_TYPE_NESTED region type Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-12-17 11:49   ` Kunkun Jiang
2020-12-17 11:49     ` Kunkun Jiang
2020-12-17 11:49     ` Kunkun Jiang
2021-02-23 12:45     ` Shenming Lu
2021-02-23 12:45       ` Shenming Lu
2021-02-23 12:45       ` Shenming Lu
2021-02-23 12:57       ` Auger Eric
2021-02-23 12:57         ` Auger Eric
2021-02-23 12:57         ` Auger Eric
2020-11-16 11:00 ` [PATCH v11 05/13] vfio/pci: Register an iommu fault handler Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 13:51   ` kernel test robot
2020-11-16 13:51     ` kernel test robot
2020-11-16 13:51     ` kernel test robot
2020-11-16 13:51     ` kernel test robot
2020-11-16 19:30   ` kernel test robot
2020-11-16 19:30     ` kernel test robot
2020-11-16 19:30     ` kernel test robot
2020-11-16 19:30     ` kernel test robot
2020-11-16 11:00 ` [PATCH v11 06/13] vfio/pci: Allow to mmap the fault queue Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 07/13] vfio: Use capability chains to handle device specific irq Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 08/13] vfio/pci: Add framework for custom interrupt indices Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-23 12:51   ` Shameerali Kolothum Thodi
2020-11-23 12:51     ` Shameerali Kolothum Thodi
2020-11-23 12:51     ` Shameerali Kolothum Thodi
2020-11-24  8:35     ` Auger Eric
2020-11-24  8:35       ` Auger Eric
2020-11-24  8:35       ` Auger Eric
2020-11-16 11:00 ` [PATCH v11 09/13] vfio: Add new IRQ for DMA fault reporting Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 10/13] vfio/pci: Register and allow DMA FAULT IRQ signaling Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 11/13] vfio: Document nested stage control Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00 ` [PATCH v11 12/13] vfio/pci: Register a DMA fault response region Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2021-01-08 15:19   ` Shameerali Kolothum Thodi
2021-01-08 15:19     ` Shameerali Kolothum Thodi
2021-01-08 15:19     ` Shameerali Kolothum Thodi
2021-02-18 10:36   ` Shameerali Kolothum Thodi
2021-02-18 10:36     ` Shameerali Kolothum Thodi
2021-02-18 10:36     ` Shameerali Kolothum Thodi
2021-02-18 10:48     ` Auger Eric
2021-02-18 10:48       ` Auger Eric
2021-02-18 10:48       ` Auger Eric
2020-11-16 11:00 ` [PATCH v11 13/13] vfio/pci: Inject page response upon response region fill Eric Auger
2020-11-16 11:00   ` Eric Auger
2020-11-16 11:00   ` Eric Auger
2021-03-15 18:04 ` [PATCH v11 00/13] SMMUv3 Nested Stage Setup (VFIO part) Krishna Reddy
2021-03-15 18:04   ` Krishna Reddy
2021-03-15 18:04   ` Krishna Reddy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201116110030.32335-5-eric.auger@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=eric.auger.pro@gmail.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jean-philippe@linaro.org \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=nicoleotsuka@gmail.com \
    --cc=robin.murphy@arm.com \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=tn@semihalf.com \
    --cc=vivek.gautam@arm.com \
    --cc=will@kernel.org \
    --cc=yi.l.liu@intel.com \
    --cc=yuzenghui@huawei.com \
    --cc=zhangfei.gao@gmail.com \
    --cc=zhangfei.gao@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.