Linux-PCI Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory
@ 2020-09-29  8:44 Sherry Sun
  2020-09-29  8:44 ` [PATCH V2 1/4] misc: vop: change the way of allocating vring Sherry Sun
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Sherry Sun @ 2020-09-29  8:44 UTC (permalink / raw)
  To: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi
  Cc: linux-kernel, linux-pci, linux-imx

Changes in V2:
1. Remove dev_is_dma_coherent check as Christoph suggested in the patchset. 
2. Remove used_address_updated flags in Patch2.
3. Remove WARN_ON in Patch3 and return error code instead directly return -1.
4. Remove Patch4 as it is a separate patch now.
5. Add more detailed explanations in the commit of patches.

The original vop driver only supports dma coherent device, as it allocates and
maps vring by _get_free_pages and dma_map_single, but not use 
dma_sync_single_for_cpu/device to sync the updates of device_page/vring between
EP and RC, which will cause memory synchronization problem for device don't
support hardware dma coherent.

And allocate vrings use dma_alloc_coherent is a common way in kernel, as the
memory interacted between two systems should use consistent memory to avoid
caching effects. So here add noncoherent platform support for vop driver.
Also add some related dma changes to make sure noncoherent platform works
well.

Sherry Sun (4):
  misc: vop: change the way of allocating vring
  misc: vop: do not allocate and reassign the used ring
  misc: vop: simply return the saved dma address instead of virt_to_phys
  misc: vop: mapping kernel memory to user space as noncached

 drivers/misc/mic/bus/vop_bus.h     |  2 +
 drivers/misc/mic/host/mic_boot.c   |  8 +++
 drivers/misc/mic/vop/vop_debugfs.c |  2 -
 drivers/misc/mic/vop/vop_main.c    | 48 +++--------------
 drivers/misc/mic/vop/vop_vringh.c  | 83 +++++++-----------------------
 include/uapi/linux/mic_common.h    |  3 --
 6 files changed, 35 insertions(+), 111 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH V2 1/4] misc: vop: change the way of allocating vring
  2020-09-29  8:44 [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory Sherry Sun
@ 2020-09-29  8:44 ` Sherry Sun
  2020-09-29 10:23   ` Christoph Hellwig
  2020-09-29  8:44 ` [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring Sherry Sun
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29  8:44 UTC (permalink / raw)
  To: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi
  Cc: linux-kernel, linux-pci, linux-imx

Allocate vrings use dma_alloc_coherent is a common way in kernel. As the
memory interacted between two systems should use consistent memory to
avoid caching effects.

The orginal way use __get_free_pages and dma_map_single to allocate and
map vring, but not use dma_sync_single_for_cpu/device api to sync the
changes of vring between EP and RC, which will cause memory
synchronization problem for  those devices which don't support hardware
dma coherent.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
---
 drivers/misc/mic/vop/vop_vringh.c | 39 +++++++------------------------
 1 file changed, 9 insertions(+), 30 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 7014ffe88632..08e63c81c5b2 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -298,9 +298,7 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 		mutex_init(&vvr->vr_mutex);
 		vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
 			sizeof(struct _mic_vring_info));
-		vr->va = (void *)
-			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 get_order(vr_size));
+		vr->va = dma_alloc_coherent(vop_dev(vdev), vr_size, &vr_addr, GFP_KERNEL);
 		if (!vr->va) {
 			ret = -ENOMEM;
 			dev_err(vop_dev(vdev), "%s %d err %d\n",
@@ -310,15 +308,6 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 		vr->len = vr_size;
 		vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
 		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
-		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
-					 DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
-			free_pages((unsigned long)vr->va, get_order(vr_size));
-			ret = -ENOMEM;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto err;
-		}
 		vqconfig[i].address = cpu_to_le64(vr_addr);
 
 		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
@@ -339,11 +328,8 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 		dev_dbg(&vpdev->dev,
 			"%s %d index %d va %p info %p vr_size 0x%x\n",
 			__func__, __LINE__, i, vr->va, vr->info, vr_size);
-		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
-					get_order(VOP_INT_DMA_BUF_SIZE));
-		vvr->buf_da = dma_map_single(&vpdev->dev,
-					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
-					  DMA_BIDIRECTIONAL);
+		vvr->buf = dma_alloc_coherent(vop_dev(vdev), VOP_INT_DMA_BUF_SIZE,
+					      &vvr->buf_da, GFP_KERNEL);
 	}
 
 	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
@@ -382,10 +368,8 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 	for (j = 0; j < i; j++) {
 		struct vop_vringh *vvr = &vdev->vvr[j];
 
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
+		dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr->vring.va,
+				  le64_to_cpu(vqconfig[j].address));
 	}
 	return ret;
 }
@@ -433,17 +417,12 @@ static void vop_virtio_del_device(struct vop_vdev *vdev)
 	for (i = 0; i < vdev->dd->num_vq; i++) {
 		struct vop_vringh *vvr = &vdev->vvr[i];
 
-		dma_unmap_single(&vpdev->dev,
-				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
-				 DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->buf,
-			   get_order(VOP_INT_DMA_BUF_SIZE));
+		dma_free_coherent(vop_dev(vdev), VOP_INT_DMA_BUF_SIZE,
+				  vvr->buf, vvr->buf_da);
 		vringh_kiov_cleanup(&vvr->riov);
 		vringh_kiov_cleanup(&vvr->wiov);
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
+		dma_free_coherent(vop_dev(vdev), vvr->vring.len, vvr->vring.va,
+				  le64_to_cpu(vqconfig[i].address));
 	}
 	/*
 	 * Order the type update with previous stores. This write barrier
-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring
  2020-09-29  8:44 [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory Sherry Sun
  2020-09-29  8:44 ` [PATCH V2 1/4] misc: vop: change the way of allocating vring Sherry Sun
@ 2020-09-29  8:44 ` Sherry Sun
  2020-09-29 10:24   ` Christoph Hellwig
  2020-09-29  8:44 ` [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys Sherry Sun
  2020-09-29  8:44 ` [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached Sherry Sun
  3 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29  8:44 UTC (permalink / raw)
  To: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi
  Cc: linux-kernel, linux-pci, linux-imx

We don't need to allocate and reassign the used ring here and remove the
used_address_updated flag.Since RC has allocated the entire vring,
including the used ring. Simply add the corresponding offset can get the
used ring address.

If following the orginal way to reassign the used ring, will encounter a
problem. When host finished with descriptor, it will update the used
ring with putused_kern api, if reassign used ring at EP side, used
ring will be io device memory for RC, use memcpy in putused_kern will
cause kernel panic.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/misc/mic/vop/vop_debugfs.c |  2 --
 drivers/misc/mic/vop/vop_main.c    | 48 ++++--------------------------
 drivers/misc/mic/vop/vop_vringh.c  | 31 -------------------
 include/uapi/linux/mic_common.h    |  3 --
 4 files changed, 6 insertions(+), 78 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c
index 9d4f175f4dd1..05eca4a98585 100644
--- a/drivers/misc/mic/vop/vop_debugfs.c
+++ b/drivers/misc/mic/vop/vop_debugfs.c
@@ -79,8 +79,6 @@ static int vop_dp_show(struct seq_file *s, void *pos)
 		seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
 		seq_printf(s, "Guest Ack %d ", dc->guest_ack);
 		seq_printf(s, "Host ack %d\n", dc->host_ack);
-		seq_printf(s, "Used address updated %d ",
-			   dc->used_address_updated);
 		seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
 		seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
 		seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 714b94f42d38..1ccc94dfd6ac 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -250,10 +250,6 @@ static void vop_del_vq(struct virtqueue *vq, int n)
 	struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
 	struct vop_device *vpdev = vdev->vpdev;
 
-	dma_unmap_single(&vpdev->dev, vdev->used[n],
-			 vdev->used_size[n], DMA_BIDIRECTIONAL);
-	free_pages((unsigned long)vdev->used_virt[n],
-		   get_order(vdev->used_size[n]));
 	vring_del_virtqueue(vq);
 	vpdev->hw_ops->unmap(vpdev, vdev->vr[n]);
 	vdev->vr[n] = NULL;
@@ -340,8 +336,8 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 	vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
 					     sizeof(struct vring_used_elem) *
 					     le16_to_cpu(config.num));
-	used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					get_order(vdev->used_size[index]));
+	used = va + PAGE_ALIGN(sizeof(struct vring_desc) * le16_to_cpu(config.num) +
+			       sizeof(__u16) * (3 + le16_to_cpu(config.num)));
 	vdev->used_virt[index] = used;
 	if (!used) {
 		err = -ENOMEM;
@@ -355,27 +351,15 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 			       name, used);
 	if (!vq) {
 		err = -ENOMEM;
-		goto free_used;
+		goto unmap;
 	}
 
-	vdev->used[index] = dma_map_single(&vpdev->dev, used,
-					    vdev->used_size[index],
-					    DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
-		err = -ENOMEM;
-		dev_err(_vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto del_vq;
-	}
+	vdev->used[index] = config.address + PAGE_ALIGN(sizeof(struct vring_desc) * le16_to_cpu(config.num) +
+			    sizeof(__u16) * (3 + le16_to_cpu(config.num)));
 	writeq(vdev->used[index], &vqconfig->used_address);
 
 	vq->priv = vdev;
 	return vq;
-del_vq:
-	vring_del_virtqueue(vq);
-free_used:
-	free_pages((unsigned long)used,
-		   get_order(vdev->used_size[index]));
 unmap:
 	vpdev->hw_ops->unmap(vpdev, vdev->vr[index]);
 	return ERR_PTR(err);
@@ -388,9 +372,7 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 			struct irq_affinity *desc)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
-	struct vop_device *vpdev = vdev->vpdev;
-	struct mic_device_ctrl __iomem *dc = vdev->dc;
-	int i, err, retry, queue_idx = 0;
+	int i, err, queue_idx = 0;
 
 	/* We must have this many virtqueues. */
 	if (nvqs > ioread8(&vdev->desc->num_vq))
@@ -412,24 +394,6 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 		}
 	}
 
-	iowrite8(1, &dc->used_address_updated);
-	/*
-	 * Send an interrupt to the host to inform it that used
-	 * rings have been re-assigned.
-	 */
-	vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
-	for (retry = 100; --retry;) {
-		if (!ioread8(&dc->used_address_updated))
-			break;
-		msleep(100);
-	}
-
-	dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
-	if (!retry) {
-		err = -ENODEV;
-		goto error;
-	}
-
 	return 0;
 error:
 	vop_del_vqs(dev);
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 08e63c81c5b2..422a28c1bb7a 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -53,33 +53,6 @@ static void _vop_notify(struct vringh *vrh)
 		vpdev->hw_ops->send_intr(vpdev, db);
 }
 
-static void vop_virtio_init_post(struct vop_vdev *vdev)
-{
-	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
-	struct vop_device *vpdev = vdev->vpdev;
-	int i, used_size;
-
-	for (i = 0; i < vdev->dd->num_vq; i++) {
-		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
-				sizeof(struct vring_used_elem) *
-				le16_to_cpu(vqconfig->num));
-		if (!le64_to_cpu(vqconfig[i].used_address)) {
-			dev_warn(vop_dev(vdev), "used_address zero??\n");
-			continue;
-		}
-		vdev->vvr[i].vrh.vring.used =
-			(void __force *)vpdev->hw_ops->remap(
-			vpdev,
-			le64_to_cpu(vqconfig[i].used_address),
-			used_size);
-	}
-
-	vdev->dc->used_address_updated = 0;
-
-	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
-		 __func__, vdev->virtio_id);
-}
-
 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
 {
 	int i;
@@ -130,9 +103,6 @@ static void vop_bh_handler(struct work_struct *work)
 	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
 			virtio_bh_work);
 
-	if (vdev->dc->used_address_updated)
-		vop_virtio_init_post(vdev);
-
 	if (vdev->dc->vdev_reset)
 		vop_virtio_device_reset(vdev);
 
@@ -250,7 +220,6 @@ static void vop_init_device_ctrl(struct vop_vdev *vdev,
 	dc->guest_ack = 0;
 	dc->vdev_reset = 0;
 	dc->host_ack = 0;
-	dc->used_address_updated = 0;
 	dc->c2h_vdev_db = -1;
 	dc->h2c_vdev_db = -1;
 	vdev->dc = dc;
diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
index 504e523f702c..e680fe27af69 100644
--- a/include/uapi/linux/mic_common.h
+++ b/include/uapi/linux/mic_common.h
@@ -56,8 +56,6 @@ struct mic_device_desc {
  * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
  * @guest_ack: Set to 1 by guest to ack a command.
  * @host_ack: Set to 1 by host to ack a command.
- * @used_address_updated: Set to 1 by guest when the used address should be
- * updated.
  * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
  * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
  */
@@ -67,7 +65,6 @@ struct mic_device_ctrl {
 	__u8 vdev_reset;
 	__u8 guest_ack;
 	__u8 host_ack;
-	__u8 used_address_updated;
 	__s8 c2h_vdev_db;
 	__s8 h2c_vdev_db;
 } __attribute__ ((aligned(8)));
-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-29  8:44 [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory Sherry Sun
  2020-09-29  8:44 ` [PATCH V2 1/4] misc: vop: change the way of allocating vring Sherry Sun
  2020-09-29  8:44 ` [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring Sherry Sun
@ 2020-09-29  8:44 ` Sherry Sun
  2020-09-29 10:26   ` Christoph Hellwig
  2020-09-29  8:44 ` [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached Sherry Sun
  3 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29  8:44 UTC (permalink / raw)
  To: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi
  Cc: linux-kernel, linux-pci, linux-imx

The device page and vring should use consistent memory which are
allocated by dma_alloc_coherent api, when user space wants to get its
physical address, virt_to_phys cannot be used, should simply return the
saved device page dma address by get_dp_dma callback and the vring dma
address saved in mic_vqconfig.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/misc/mic/bus/vop_bus.h    |  2 ++
 drivers/misc/mic/host/mic_boot.c  |  8 ++++++++
 drivers/misc/mic/vop/vop_vringh.c | 11 +++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h
index 4fa02808c1e2..d891eacae358 100644
--- a/drivers/misc/mic/bus/vop_bus.h
+++ b/drivers/misc/mic/bus/vop_bus.h
@@ -75,6 +75,7 @@ struct vop_driver {
  *                 node to add/remove/configure virtio devices.
  * @get_dp: Get access to the virtio device page used by the self
  *          node to add/remove/configure virtio devices.
+ * @get_dp_dma: Get dma address of the virtio device page.
  * @send_intr: Send an interrupt to the peer node on a specified doorbell.
  * @remap: Map a buffer with the specified DMA address and length.
  * @unmap: Unmap a buffer previously mapped.
@@ -92,6 +93,7 @@ struct vop_hw_ops {
 	void (*ack_interrupt)(struct vop_device *vpdev, int num);
 	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
 	void * (*get_dp)(struct vop_device *vpdev);
+	dma_addr_t (*get_dp_dma)(struct vop_device *vpdev);
 	void (*send_intr)(struct vop_device *vpdev, int db);
 	void __iomem * (*remap)(struct vop_device *vpdev,
 				  dma_addr_t pa, size_t len);
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index fb5b3989753d..ced03662cd8f 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -88,6 +88,13 @@ static void *__mic_get_dp(struct vop_device *vpdev)
 	return mdev->dp;
 }
 
+static dma_addr_t __mic_get_dp_dma(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->dp_dma_addr;
+}
+
 static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
 {
 	return NULL;
@@ -119,6 +126,7 @@ static struct vop_hw_ops vop_hw_ops = {
 	.ack_interrupt = __mic_ack_interrupt,
 	.next_db = __mic_next_db,
 	.get_dp = __mic_get_dp,
+	.get_dp_dma = __mic_get_dp_dma,
 	.get_remote_dp = __mic_get_remote_dp,
 	.send_intr = __mic_send_intr,
 	.remap = __mic_ioremap,
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 422a28c1bb7a..4d5feb39aeb7 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -995,6 +995,7 @@ vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
 		 unsigned long *size, unsigned long *pa)
 {
 	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
 	unsigned long start = MIC_DP_SIZE;
 	int i;
 
@@ -1007,7 +1008,13 @@ vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
 	 * ....
 	 */
 	if (!offset) {
-		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
+		if (vpdev->hw_ops->get_dp_dma)
+			*pa = vpdev->hw_ops->get_dp_dma(vpdev);
+		else {
+			dev_err(vop_dev(vdev), "can't get device page physical address\n");
+			return -EINVAL;
+		}
+
 		*size = MIC_DP_SIZE;
 		return 0;
 	}
@@ -1016,7 +1023,7 @@ vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
 		struct vop_vringh *vvr = &vdev->vvr[i];
 
 		if (offset == start) {
-			*pa = virt_to_phys(vvr->vring.va);
+			*pa = vqconfig[i].address;
 			*size = vvr->vring.len;
 			return 0;
 		}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-09-29  8:44 [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory Sherry Sun
                   ` (2 preceding siblings ...)
  2020-09-29  8:44 ` [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys Sherry Sun
@ 2020-09-29  8:44 ` Sherry Sun
  2020-09-29 10:28   ` Christoph Hellwig
  3 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29  8:44 UTC (permalink / raw)
  To: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi
  Cc: linux-kernel, linux-pci, linux-imx

Mapping kernel space memory to user space as noncached, since user space
need check the updates of avail_idx and device page flags timely.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
---
 drivers/misc/mic/vop/vop_vringh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 4d5feb39aeb7..6e193bd64ef1 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -1057,7 +1057,7 @@ static int vop_mmap(struct file *f, struct vm_area_struct *vma)
 		}
 		err = remap_pfn_range(vma, vma->vm_start + offset,
 				      pa >> PAGE_SHIFT, size,
-				      vma->vm_page_prot);
+				      pgprot_noncached(vma->vm_page_prot));
 		if (err)
 			goto ret;
 		size_rem -= size;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 1/4] misc: vop: change the way of allocating vring
  2020-09-29  8:44 ` [PATCH V2 1/4] misc: vop: change the way of allocating vring Sherry Sun
@ 2020-09-29 10:23   ` Christoph Hellwig
  2020-09-29 13:02     ` Sherry Sun
  0 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-09-29 10:23 UTC (permalink / raw)
  To: Sherry Sun
  Cc: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, linux-imx

> +		vr->va = dma_alloc_coherent(vop_dev(vdev), vr_size, &vr_addr, GFP_KERNEL);

Please stick to 80 character lines unless you have a really good
reason not to. 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring
  2020-09-29  8:44 ` [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring Sherry Sun
@ 2020-09-29 10:24   ` Christoph Hellwig
  2020-09-29 13:02     ` Sherry Sun
  0 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-09-29 10:24 UTC (permalink / raw)
  To: Sherry Sun
  Cc: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, linux-imx

> diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
> index 504e523f702c..e680fe27af69 100644
> --- a/include/uapi/linux/mic_common.h
> +++ b/include/uapi/linux/mic_common.h
> @@ -56,8 +56,6 @@ struct mic_device_desc {
>   * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
>   * @guest_ack: Set to 1 by guest to ack a command.
>   * @host_ack: Set to 1 by host to ack a command.
> - * @used_address_updated: Set to 1 by guest when the used address should be
> - * updated.
>   * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
>   * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
>   */
> @@ -67,7 +65,6 @@ struct mic_device_ctrl {
>  	__u8 vdev_reset;
>  	__u8 guest_ack;
>  	__u8 host_ack;
> -	__u8 used_address_updated;
>  	__s8 c2h_vdev_db;
>  	__s8 h2c_vdev_db;
>  } __attribute__ ((aligned(8)));

This is an ABI change.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-29  8:44 ` [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys Sherry Sun
@ 2020-09-29 10:26   ` Christoph Hellwig
  2020-09-29 13:10     ` Sherry Sun
  0 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-09-29 10:26 UTC (permalink / raw)
  To: Sherry Sun
  Cc: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, linux-imx

On Tue, Sep 29, 2020 at 04:44:24PM +0800, Sherry Sun wrote:
> The device page and vring should use consistent memory which are
> allocated by dma_alloc_coherent api, when user space wants to get its
> physical address, virt_to_phys cannot be used, should simply return the
> saved device page dma address by get_dp_dma callback and the vring dma
> address saved in mic_vqconfig.

More importantly you can't just all virt_to_phys on a return value
from dma_alloc_coherent, so this needs to be folded into patch 1.

>  	if (!offset) {
> -		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> +		if (vpdev->hw_ops->get_dp_dma)
> +			*pa = vpdev->hw_ops->get_dp_dma(vpdev);
> +		else {
> +			dev_err(vop_dev(vdev), "can't get device page physical address\n");
> +			return -EINVAL;
> +		}

I don't think we need the NULL check here.  Wouldn't it also make sense
to return the virtual and DMA address from ->get_dp instead of adding
another method?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-09-29  8:44 ` [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached Sherry Sun
@ 2020-09-29 10:28   ` Christoph Hellwig
  2020-09-29 13:12     ` Sherry Sun
  2020-09-29 15:39     ` David Laight
  0 siblings, 2 replies; 20+ messages in thread
From: Christoph Hellwig @ 2020-09-29 10:28 UTC (permalink / raw)
  To: Sherry Sun
  Cc: hch, sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, linux-imx

On Tue, Sep 29, 2020 at 04:44:25PM +0800, Sherry Sun wrote:
> Mapping kernel space memory to user space as noncached, since user space
> need check the updates of avail_idx and device page flags timely.
> 
> Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
> ---
>  drivers/misc/mic/vop/vop_vringh.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
> index 4d5feb39aeb7..6e193bd64ef1 100644
> --- a/drivers/misc/mic/vop/vop_vringh.c
> +++ b/drivers/misc/mic/vop/vop_vringh.c
> @@ -1057,7 +1057,7 @@ static int vop_mmap(struct file *f, struct vm_area_struct *vma)
>  		}
>  		err = remap_pfn_range(vma, vma->vm_start + offset,
>  				      pa >> PAGE_SHIFT, size,
> -				      vma->vm_page_prot);
> +				      pgprot_noncached(vma->vm_page_prot));

You can't call remap_pfn_range on memory returned from
dma_alloc_coherent (which btw is not marked uncached on many platforms).

You need to use the dma_mmap_coherent helper instead.  And this also
needs to go into the first patch.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 1/4] misc: vop: change the way of allocating vring
  2020-09-29 10:23   ` Christoph Hellwig
@ 2020-09-29 13:02     ` Sherry Sun
  0 siblings, 0 replies; 20+ messages in thread
From: Sherry Sun @ 2020-09-29 13:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

Hi Christoph,

> 
> > +		vr->va = dma_alloc_coherent(vop_dev(vdev), vr_size,
> &vr_addr,
> > +GFP_KERNEL);
> 
> Please stick to 80 character lines unless you have a really good reason not to.

Okay, will change it in V3.

Regards
Sherry

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring
  2020-09-29 10:24   ` Christoph Hellwig
@ 2020-09-29 13:02     ` Sherry Sun
  2020-09-29 14:28       ` David Laight
  0 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29 13:02 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

Hi Christoph,

> > diff --git a/include/uapi/linux/mic_common.h
> > b/include/uapi/linux/mic_common.h index 504e523f702c..e680fe27af69
> > 100644
> > --- a/include/uapi/linux/mic_common.h
> > +++ b/include/uapi/linux/mic_common.h
> > @@ -56,8 +56,6 @@ struct mic_device_desc {
> >   * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
> >   * @guest_ack: Set to 1 by guest to ack a command.
> >   * @host_ack: Set to 1 by host to ack a command.
> > - * @used_address_updated: Set to 1 by guest when the used address
> > should be
> > - * updated.
> >   * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
> >   * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
> >   */
> > @@ -67,7 +65,6 @@ struct mic_device_ctrl {
> >  	__u8 vdev_reset;
> >  	__u8 guest_ack;
> >  	__u8 host_ack;
> > -	__u8 used_address_updated;
> >  	__s8 c2h_vdev_db;
> >  	__s8 h2c_vdev_db;
> >  } __attribute__ ((aligned(8)));
> 
> This is an ABI change.

Yes, it is. But I noticed that this structure is only used by the vop driver.
And until now I haven't seen any user tools use it, including the user tool
mpssd which is corresponding to the vop driver.

Regards
Sherry

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-29 10:26   ` Christoph Hellwig
@ 2020-09-29 13:10     ` Sherry Sun
  2020-09-29 18:11       ` Christoph Hellwig
  0 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-29 13:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

Hi Christoph,

> On Tue, Sep 29, 2020 at 04:44:24PM +0800, Sherry Sun wrote:
> > The device page and vring should use consistent memory which are
> > allocated by dma_alloc_coherent api, when user space wants to get its
> > physical address, virt_to_phys cannot be used, should simply return
> > the saved device page dma address by get_dp_dma callback and the vring
> > dma address saved in mic_vqconfig.
> 
> More importantly you can't just all virt_to_phys on a return value from
> dma_alloc_coherent, so this needs to be folded into patch 1.

Okay, will move this change into patch 1.
> 
> >  	if (!offset) {
> > -		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> > +		if (vpdev->hw_ops->get_dp_dma)
> > +			*pa = vpdev->hw_ops->get_dp_dma(vpdev);
> > +		else {
> > +			dev_err(vop_dev(vdev), "can't get device page
> physical address\n");
> > +			return -EINVAL;
> > +		}
> 
> I don't think we need the NULL check here.  Wouldn't it also make sense to
> return the virtual and DMA address from ->get_dp instead of adding another
> method?

Do you mean that we should only change the original ->get_dp callback to return virtual
and DMA address at the same time, instead of adding the ->get_dp_dma callback?

Regards
Sherry


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-09-29 10:28   ` Christoph Hellwig
@ 2020-09-29 13:12     ` Sherry Sun
  2020-09-29 15:39     ` David Laight
  1 sibling, 0 replies; 20+ messages in thread
From: Sherry Sun @ 2020-09-29 13:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

Hi Christoph,

> On Tue, Sep 29, 2020 at 04:44:25PM +0800, Sherry Sun wrote:
> > Mapping kernel space memory to user space as noncached, since user
> > space need check the updates of avail_idx and device page flags timely.
> >
> > Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
> > Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
> > ---
> >  drivers/misc/mic/vop/vop_vringh.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/misc/mic/vop/vop_vringh.c
> > b/drivers/misc/mic/vop/vop_vringh.c
> > index 4d5feb39aeb7..6e193bd64ef1 100644
> > --- a/drivers/misc/mic/vop/vop_vringh.c
> > +++ b/drivers/misc/mic/vop/vop_vringh.c
> > @@ -1057,7 +1057,7 @@ static int vop_mmap(struct file *f, struct
> vm_area_struct *vma)
> >  		}
> >  		err = remap_pfn_range(vma, vma->vm_start + offset,
> >  				      pa >> PAGE_SHIFT, size,
> > -				      vma->vm_page_prot);
> > +				      pgprot_noncached(vma->vm_page_prot));
> 
> You can't call remap_pfn_range on memory returned from
> dma_alloc_coherent (which btw is not marked uncached on many platforms).
> 
> You need to use the dma_mmap_coherent helper instead.  And this also
> needs to go into the first patch.

Okay, will try to use dma_mmap_coherent  api in V3. Thanks.

Regards
Sherry


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring
  2020-09-29 13:02     ` Sherry Sun
@ 2020-09-29 14:28       ` David Laight
  0 siblings, 0 replies; 20+ messages in thread
From: David Laight @ 2020-09-29 14:28 UTC (permalink / raw)
  To: 'Sherry Sun', Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

From: Sherry Sun
> Sent: 29 September 2020 14:02
> 
> Hi Christoph,
> 
> > > diff --git a/include/uapi/linux/mic_common.h
> > > b/include/uapi/linux/mic_common.h index 504e523f702c..e680fe27af69
> > > 100644
> > > --- a/include/uapi/linux/mic_common.h
> > > +++ b/include/uapi/linux/mic_common.h
> > > @@ -56,8 +56,6 @@ struct mic_device_desc {
> > >   * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset.
> > >   * @guest_ack: Set to 1 by guest to ack a command.
> > >   * @host_ack: Set to 1 by host to ack a command.
> > > - * @used_address_updated: Set to 1 by guest when the used address
> > > should be
> > > - * updated.
> > >   * @c2h_vdev_db: The doorbell number to be used by guest. Set by host.
> > >   * @h2c_vdev_db: The doorbell number to be used by host. Set by guest.
> > >   */
> > > @@ -67,7 +65,6 @@ struct mic_device_ctrl {
> > >  	__u8 vdev_reset;
> > >  	__u8 guest_ack;
> > >  	__u8 host_ack;
> > > -	__u8 used_address_updated;
> > >  	__s8 c2h_vdev_db;
> > >  	__s8 h2c_vdev_db;
> > >  } __attribute__ ((aligned(8)));
> >
> > This is an ABI change.
> 
> Yes, it is. But I noticed that this structure is only used by the vop driver.
> And until now I haven't seen any user tools use it, including the user tool
> mpssd which is corresponding to the vop driver.

Just rename it as 'must_be_zero'.

I've just looked at the header.
WTF is all the __attribute__ ((aligned(8))); about?
Someone is really trying to make it slow on anything other than x86.
It would have been much better to ensure the structure members
are all 'naturally aligned'.

I'm not sure of the usage of the mic_device_ctrl structure.
But you really don't want to share a structure that has
adjacent bytes written by two different entities.
Even with coherent memory you probably should have
separated the data by cache line.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-09-29 10:28   ` Christoph Hellwig
  2020-09-29 13:12     ` Sherry Sun
@ 2020-09-29 15:39     ` David Laight
  2020-10-13  2:02       ` Sherry Sun
  1 sibling, 1 reply; 20+ messages in thread
From: David Laight @ 2020-09-29 15:39 UTC (permalink / raw)
  To: 'Christoph Hellwig', Sherry Sun
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, linux-imx

From: Christoph Hellwig
> Sent: 29 September 2020 11:29
...
> You can't call remap_pfn_range on memory returned from
> dma_alloc_coherent (which btw is not marked uncached on many platforms).
> 
> You need to use the dma_mmap_coherent helper instead.

Hmmmm. I've a driver that does that.
Fortunately it only has to work on x86 where it doesn't matter.
However I can't easily convert it.
The 'problem' is that the mmap() request can cover multiple
dma buffers and need not start at the beginning of one.

Basically we have a PCIe card that has an inbuilt iommu
to convert internal 32bit addresses to 64bit PCIe ones.
This has 512 16kB pages.
So we do a number of dma_alloc_coherent() for 16k blocks.
The user process then does an mmap() for part of the buffer.
This request is 4k aligned so we do multiple remap_pfn_range()
calls to map the disjoint physical (and kernel virtual)
buffers into contiguous user memory.

So both ends see contiguous addresses even though the physical
addresses are non-contiguous.

I guess I could modify the vm_start address and then restore
it at the end.

I found this big discussion:
https://lore.kernel.org/patchwork/patch/351245/
about these functions.

The bit about VIPT caches is problematic.
I don't think you can change the kernel address during mmap.
What you need to do is defer allocating the user address until
you know the kernel address.
Otherwise you get into problems is you try to mmap the
same memory into two processes.
This is a general problem even for mmap() of files.
ISTR SYSV on some sparc systems having to use uncached maps.

If you might want to mmap two kernel buffers (dma or not)
into adjacent user addresses then you need some way of
allocating the second buffer to follow the first one
in the VIVT cache.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-29 13:10     ` Sherry Sun
@ 2020-09-29 18:11       ` Christoph Hellwig
  2020-09-30  7:30         ` Sherry Sun
  0 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-09-29 18:11 UTC (permalink / raw)
  To: Sherry Sun
  Cc: Christoph Hellwig, sudeep.dutt, ashutosh.dixit, arnd, gregkh,
	kishon, lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

On Tue, Sep 29, 2020 at 01:10:12PM +0000, Sherry Sun wrote:
> > >  	if (!offset) {
> > > -		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> > > +		if (vpdev->hw_ops->get_dp_dma)
> > > +			*pa = vpdev->hw_ops->get_dp_dma(vpdev);
> > > +		else {
> > > +			dev_err(vop_dev(vdev), "can't get device page
> > physical address\n");
> > > +			return -EINVAL;
> > > +		}
> > 
> > I don't think we need the NULL check here.  Wouldn't it also make sense to
> > return the virtual and DMA address from ->get_dp instead of adding another
> > method?
> 
> Do you mean that we should only change the original ->get_dp callback to return virtual
> and DMA address at the same time, instead of adding the ->get_dp_dma callback?

That was my intention when writing it, yes.  But it seems like most
other callers don't care.  Maybe move the invocation of
dma_mmap_coherent into a new ->mmap helper, that way it seems like
the calling code doesn't need to know about the dma_addr_t at all.

That being said the layering in the code keeps puzzling me.  As far as
I can tell only a single instance of struct vop_driver even exists, so
we might be able to kill all the indirections entirely.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-29 18:11       ` Christoph Hellwig
@ 2020-09-30  7:30         ` Sherry Sun
  2020-10-05 13:42           ` Christoph Hellwig
  0 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-09-30  7:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

Hi Christoph,

> On Tue, Sep 29, 2020 at 01:10:12PM +0000, Sherry Sun wrote:
> > > >  	if (!offset) {
> > > > -		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
> > > > +		if (vpdev->hw_ops->get_dp_dma)
> > > > +			*pa = vpdev->hw_ops->get_dp_dma(vpdev);
> > > > +		else {
> > > > +			dev_err(vop_dev(vdev), "can't get device page
> > > physical address\n");
> > > > +			return -EINVAL;
> > > > +		}
> > >
> > > I don't think we need the NULL check here.  Wouldn't it also make
> > > sense to return the virtual and DMA address from ->get_dp instead of
> > > adding another method?
> >
> > Do you mean that we should only change the original ->get_dp callback
> > to return virtual and DMA address at the same time, instead of adding the -
> >get_dp_dma callback?
> 
> That was my intention when writing it, yes.  But it seems like most other
> callers don't care.  Maybe move the invocation of dma_mmap_coherent into
> a new ->mmap helper, that way it seems like the calling code doesn't need to
> know about the dma_addr_t at all.
> 
> That being said the layering in the code keeps puzzling me.  As far as I can tell
> only a single instance of struct vop_driver even exists, so we might be able to
> kill all the indirections entirely.

There may be some misunderstandings here.
For ->get_dp_dma callback, it is used to get the device page dma address,
which is allocated by MIC layer instead of vop layer. 
For Intel mic, it still use kzalloc and dma_map_single apis, although we
recommended and we did use dma_alloc_coherent to get consistent device
page memory on our i.MX platform, but we didn't change the original implementation
method of Intel mic till now, as our main goal is to change the vop code to make it
more generic.

Which is means that the device page may use different allocate methods for
different platform, and now it is transparent to the vop layer.
So I think here use ->get_dp_dma callback to get device page dma address
is the most simple and convenient way.

We change to use dma_alloc_coherent in patch 1 to allocate vrings memory, as it is
the main job that the vop layer is responsible for.
So I still suggest to use ->get_dp or ->get_dp_dma callback for device page here, what do you think?

Regards
Sherry

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys
  2020-09-30  7:30         ` Sherry Sun
@ 2020-10-05 13:42           ` Christoph Hellwig
  0 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2020-10-05 13:42 UTC (permalink / raw)
  To: Sherry Sun
  Cc: Christoph Hellwig, sudeep.dutt, ashutosh.dixit, arnd, gregkh,
	kishon, lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx

On Wed, Sep 30, 2020 at 07:30:21AM +0000, Sherry Sun wrote:
> There may be some misunderstandings here.
> For ->get_dp_dma callback, it is used to get the device page dma address,
> which is allocated by MIC layer instead of vop layer. 
> For Intel mic, it still use kzalloc and dma_map_single apis, although we
> recommended and we did use dma_alloc_coherent to get consistent device
> page memory on our i.MX platform, but we didn't change the original implementation
> method of Intel mic till now, as our main goal is to change the vop code to make it
> more generic.

Given how the memory is used everyone should use dma_alloc_coherent.
Note that for x86 the ony difference is that dma_alloc_coherent also
dips into the CMA pools where available, which eases allocator pressure,
and that it properly deals with the AMD SEV memory encryption, which
does't matter for Intel platforms.

> Which is means that the device page may use different allocate methods for
> different platform, and now it is transparent to the vop layer.
> So I think here use ->get_dp_dma callback to get device page dma address
> is the most simple and convenient way.
> 
> We change to use dma_alloc_coherent in patch 1 to allocate vrings memory, as it is
> the main job that the vop layer is responsible for.
> So I still suggest to use ->get_dp or ->get_dp_dma callback for device page here, what do you think?

As mentioned you need to move the code to mmap the buffers to the
same layer as the one doing the allocation.  If that is taken care of
we're fine, and I think a ->mmap callback might be the best way to
archive that, but this is not code I know intimately.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-09-29 15:39     ` David Laight
@ 2020-10-13  2:02       ` Sherry Sun
  2020-10-20  1:33         ` Sherry Sun
  0 siblings, 1 reply; 20+ messages in thread
From: Sherry Sun @ 2020-10-13  2:02 UTC (permalink / raw)
  To: David Laight, 'Christoph Hellwig'
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx,
	Andy Duan

Hi David, thanks for your information.
Hi Christoph, please see my comments below.

> Subject: RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space
> as noncached
> 
> From: Christoph Hellwig
> > Sent: 29 September 2020 11:29
> ...
> > You can't call remap_pfn_range on memory returned from
> > dma_alloc_coherent (which btw is not marked uncached on many
> platforms).
> >
> > You need to use the dma_mmap_coherent helper instead.
> 

I tried to use dma_mmap_coherent helper here, but I met the same problem as David said.
Since the user space calls mmap() to map all the device page and vring size at one time.

     va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq, PROT_READ, MAP_SHARED, fd, 0);

But the physical addresses of device page and multiple vrings are not consecutive, so we called
multiple remap_pfn_range before. When changing to use dma_mmap_coherent, it will return error
because vma_pages(the size user space want to map) are bigger than the actual size we do multiple
map(one non-continuous memory size at a time).

David believes that we could modify the vm_start address before call the multiple dma_mmap_coherent to
avoid the vma_pages check error and map multiple discontinuous memory.
Do you have any suggestions?

Best regards
Sherry

> Hmmmm. I've a driver that does that.
> Fortunately it only has to work on x86 where it doesn't matter.
> However I can't easily convert it.
> The 'problem' is that the mmap() request can cover multiple dma buffers and
> need not start at the beginning of one.
> 
> Basically we have a PCIe card that has an inbuilt iommu to convert internal
> 32bit addresses to 64bit PCIe ones.
> This has 512 16kB pages.
> So we do a number of dma_alloc_coherent() for 16k blocks.
> The user process then does an mmap() for part of the buffer.
> This request is 4k aligned so we do multiple remap_pfn_range() calls to map
> the disjoint physical (and kernel virtual) buffers into contiguous user memory.
> 
> So both ends see contiguous addresses even though the physical addresses
> are non-contiguous.
> 
> I guess I could modify the vm_start address and then restore it at the end.
> 
> I found this big discussion:
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.k
> ernel.org%2Fpatchwork%2Fpatch%2F351245%2F&amp;data=02%7C01%7Csh
> erry.sun%40nxp.com%7C876724689688440581a708d8648dceb3%7C686ea1d
> 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637369907516376294&amp;sdat
> a=amSClQfVGhI0%2F3bZfo8HF7UmCktkPluArWW22YlQzMQ%3D&amp;reser
> ved=0
> about these functions.
> 
> The bit about VIPT caches is problematic.
> I don't think you can change the kernel address during mmap.
> What you need to do is defer allocating the user address until you know the
> kernel address.
> Otherwise you get into problems is you try to mmap the same memory into
> two processes.
> This is a general problem even for mmap() of files.
> ISTR SYSV on some sparc systems having to use uncached maps.
> 
> If you might want to mmap two kernel buffers (dma or not) into adjacent
> user addresses then you need some way of allocating the second buffer to
> follow the first one in the VIVT cache.
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes,
> MK1 1PT, UK Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached
  2020-10-13  2:02       ` Sherry Sun
@ 2020-10-20  1:33         ` Sherry Sun
  0 siblings, 0 replies; 20+ messages in thread
From: Sherry Sun @ 2020-10-20  1:33 UTC (permalink / raw)
  To: 'Christoph Hellwig'
  Cc: sudeep.dutt, ashutosh.dixit, arnd, gregkh, kishon,
	lorenzo.pieralisi, linux-kernel, linux-pci, dl-linux-imx,
	Andy Duan, David Laight

Hi Christoph,

Gentle ping....

Can you give some suggestions on the limitations of dma_mmap_coherent api?

Best regards
Sherry

> Subject: RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user space
> as noncached
> 
> Hi David, thanks for your information.
> Hi Christoph, please see my comments below.
> 
> > Subject: RE: [PATCH V2 4/4] misc: vop: mapping kernel memory to user
> > space as noncached
> >
> > From: Christoph Hellwig
> > > Sent: 29 September 2020 11:29
> > ...
> > > You can't call remap_pfn_range on memory returned from
> > > dma_alloc_coherent (which btw is not marked uncached on many
> > platforms).
> > >
> > > You need to use the dma_mmap_coherent helper instead.
> >
> 
> I tried to use dma_mmap_coherent helper here, but I met the same problem
> as David said.
> Since the user space calls mmap() to map all the device page and vring size at
> one time.
> 
>      va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
> PROT_READ, MAP_SHARED, fd, 0);
> 
> But the physical addresses of device page and multiple vrings are not
> consecutive, so we called multiple remap_pfn_range before. When changing
> to use dma_mmap_coherent, it will return error because vma_pages(the size
> user space want to map) are bigger than the actual size we do multiple
> map(one non-continuous memory size at a time).
> 
> David believes that we could modify the vm_start address before call the
> multiple dma_mmap_coherent to avoid the vma_pages check error and map
> multiple discontinuous memory.
> Do you have any suggestions?
> 
> Best regards
> Sherry
> 
> > Hmmmm. I've a driver that does that.
> > Fortunately it only has to work on x86 where it doesn't matter.
> > However I can't easily convert it.
> > The 'problem' is that the mmap() request can cover multiple dma
> > buffers and need not start at the beginning of one.
> >
> > Basically we have a PCIe card that has an inbuilt iommu to convert
> > internal 32bit addresses to 64bit PCIe ones.
> > This has 512 16kB pages.
> > So we do a number of dma_alloc_coherent() for 16k blocks.
> > The user process then does an mmap() for part of the buffer.
> > This request is 4k aligned so we do multiple remap_pfn_range() calls
> > to map the disjoint physical (and kernel virtual) buffers into contiguous
> user memory.
> >
> > So both ends see contiguous addresses even though the physical
> > addresses are non-contiguous.
> >
> > I guess I could modify the vm_start address and then restore it at the end.
> >
> > I found this big discussion:
> > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore
> > .k
> ernel.org%2Fpatchwork%2Fpatch%2F351245%2F&amp;data=02%7C01%7Csh
> >
> erry.sun%40nxp.com%7C876724689688440581a708d8648dceb3%7C686ea1d
> >
> 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637369907516376294&amp;sdat
> >
> a=amSClQfVGhI0%2F3bZfo8HF7UmCktkPluArWW22YlQzMQ%3D&amp;reser
> > ved=0
> > about these functions.
> >
> > The bit about VIPT caches is problematic.
> > I don't think you can change the kernel address during mmap.
> > What you need to do is defer allocating the user address until you
> > know the kernel address.
> > Otherwise you get into problems is you try to mmap the same memory
> > into two processes.
> > This is a general problem even for mmap() of files.
> > ISTR SYSV on some sparc systems having to use uncached maps.
> >
> > If you might want to mmap two kernel buffers (dma or not) into
> > adjacent user addresses then you need some way of allocating the
> > second buffer to follow the first one in the VIVT cache.
> >
> > 	David
> >
> > -
> > Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes,
> > MK1 1PT, UK Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, back to index

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-29  8:44 [PATCH V2 0/4] Change vring space from nomal memory to dma coherent memory Sherry Sun
2020-09-29  8:44 ` [PATCH V2 1/4] misc: vop: change the way of allocating vring Sherry Sun
2020-09-29 10:23   ` Christoph Hellwig
2020-09-29 13:02     ` Sherry Sun
2020-09-29  8:44 ` [PATCH V2 2/4] misc: vop: do not allocate and reassign the used ring Sherry Sun
2020-09-29 10:24   ` Christoph Hellwig
2020-09-29 13:02     ` Sherry Sun
2020-09-29 14:28       ` David Laight
2020-09-29  8:44 ` [PATCH V2 3/4] misc: vop: simply return the saved dma address instead of virt_to_phys Sherry Sun
2020-09-29 10:26   ` Christoph Hellwig
2020-09-29 13:10     ` Sherry Sun
2020-09-29 18:11       ` Christoph Hellwig
2020-09-30  7:30         ` Sherry Sun
2020-10-05 13:42           ` Christoph Hellwig
2020-09-29  8:44 ` [PATCH V2 4/4] misc: vop: mapping kernel memory to user space as noncached Sherry Sun
2020-09-29 10:28   ` Christoph Hellwig
2020-09-29 13:12     ` Sherry Sun
2020-09-29 15:39     ` David Laight
2020-10-13  2:02       ` Sherry Sun
2020-10-20  1:33         ` Sherry Sun

Linux-PCI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-pci/0 linux-pci/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-pci linux-pci/ https://lore.kernel.org/linux-pci \
		linux-pci@vger.kernel.org
	public-inbox-index linux-pci

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-pci


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git