linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 0/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
@ 2018-08-21 17:06 Matwey V. Kornilov
  2018-08-21 17:06 ` [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler() Matwey V. Kornilov
  2018-08-21 17:06 ` [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
  0 siblings, 2 replies; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-08-21 17:06 UTC (permalink / raw)
  To: linux-media, linux-kernel
  Cc: matwey.kornilov, Matwey V. Kornilov, tfiga, laurent.pinchart,
	stern, ezequiel, hdegoede, hverkuil, mchehab, rostedt, mingo,
	isely, bhumirks, colin.king, kieran.bingham, keiichiw

DMA cocherency slows the transfer down on systems without hardware coherent
DMA. In order to demontrate this we introduce performance measurement
facilities in patch 1 and fix the performance issue in patch 2 in order to
obtain 4 times speedup.

Changes since v4:
 * fix fields order in trace events 
 * minor style fixes

Changes since v3:
 * fix scripts/checkpatch.pl errors
 * use __string to store name in trace events

Changes since v2:
 * use dma_sync_single_for_cpu() to achive better performance
 * remeasured performance

Changes since v1:
 * trace_pwc_handler_exit() call moved to proper place
 * detailed description added for commit 1
 * additional output added to trace to track separate frames

Matwey V. Kornilov (2):
  media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler()
  media: usb: pwc: Don't use coherent DMA buffers for ISO transfer

 drivers/media/usb/pwc/pwc-if.c | 64 ++++++++++++++++++++++++++++++++---------
 include/trace/events/pwc.h     | 65 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 13 deletions(-)
 create mode 100644 include/trace/events/pwc.h

-- 
2.16.4


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler()
  2018-08-21 17:06 [PATCH v5 0/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
@ 2018-08-21 17:06 ` Matwey V. Kornilov
  2018-08-21 19:49   ` Steven Rostedt
  2018-08-21 17:06 ` [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
  1 sibling, 1 reply; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-08-21 17:06 UTC (permalink / raw)
  To: linux-media, linux-kernel
  Cc: matwey.kornilov, Matwey V. Kornilov, tfiga, laurent.pinchart,
	stern, ezequiel, hdegoede, hverkuil, mchehab, rostedt, mingo,
	isely, bhumirks, colin.king, kieran.bingham, keiichiw

There were reports that PWC-based webcams don't work at some
embedded ARM platforms. [1] Isochronous transfer handler seems to
work too long leading to the issues in MUSB USB host subsystem.
Also note, that urb->giveback() handlers are still called with
disabled interrupts. In order to be able to measure performance of
PWC driver, traces are introduced in URB handler section.

[1] https://www.spinics.net/lists/linux-usb/msg165735.html

Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
---
 drivers/media/usb/pwc/pwc-if.c |  7 +++++
 include/trace/events/pwc.h     | 65 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 include/trace/events/pwc.h

diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index 54b036d39c5b..72d2897a4b9f 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -76,6 +76,9 @@
 #include "pwc-dec23.h"
 #include "pwc-dec1.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/pwc.h>
+
 /* Function prototypes and driver templates */
 
 /* hotplug device table support */
@@ -260,6 +263,8 @@ static void pwc_isoc_handler(struct urb *urb)
 	int i, fst, flen;
 	unsigned char *iso_buf = NULL;
 
+	trace_pwc_handler_enter(urb, pdev);
+
 	if (urb->status == -ENOENT || urb->status == -ECONNRESET ||
 	    urb->status == -ESHUTDOWN) {
 		PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n",
@@ -348,6 +353,8 @@ static void pwc_isoc_handler(struct urb *urb)
 	}
 
 handler_end:
+	trace_pwc_handler_exit(urb, pdev);
+
 	i = usb_submit_urb(urb, GFP_ATOMIC);
 	if (i != 0)
 		PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i);
diff --git a/include/trace/events/pwc.h b/include/trace/events/pwc.h
new file mode 100644
index 000000000000..a2da764a3b41
--- /dev/null
+++ b/include/trace/events/pwc.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_PWC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PWC_H
+
+#include <linux/usb.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pwc
+
+TRACE_EVENT(pwc_handler_enter,
+	TP_PROTO(struct urb *urb, struct pwc_device *pdev),
+	TP_ARGS(urb, pdev),
+	TP_STRUCT__entry(
+		__field(struct urb*, urb)
+		__field(struct pwc_frame_buf*, fbuf)
+		__field(int, urb__status)
+		__field(u32, urb__actual_length)
+		__field(int, fbuf__filled)
+		__string(name, pdev->v4l2_dev.name)
+	),
+	TP_fast_assign(
+		__entry->urb = urb;
+		__entry->fbuf = pdev->fill_buf;
+		__entry->urb__status = urb->status;
+		__entry->urb__actual_length = urb->actual_length;
+		__entry->fbuf__filled = (pdev->fill_buf
+					 ? pdev->fill_buf->filled : 0);
+		__assign_str(name, pdev->v4l2_dev.name);
+	),
+	TP_printk("dev=%s (fbuf=%p filled=%d) urb=%p (status=%d actual_length=%u)",
+		__get_str(name),
+		__entry->fbuf,
+		__entry->fbuf__filled,
+		__entry->urb,
+		__entry->urb__status,
+		__entry->urb__actual_length)
+);
+
+TRACE_EVENT(pwc_handler_exit,
+	TP_PROTO(struct urb *urb, struct pwc_device *pdev),
+	TP_ARGS(urb, pdev),
+	TP_STRUCT__entry(
+		__field(struct urb*, urb)
+		__field(struct pwc_frame_buf*, fbuf)
+		__field(int, fbuf__filled)
+		__string(name, pdev->v4l2_dev.name)
+	),
+	TP_fast_assign(
+		__entry->urb = urb;
+		__entry->fbuf = pdev->fill_buf;
+		__entry->fbuf__filled = pdev->fill_buf->filled;
+		__assign_str(name, pdev->v4l2_dev.name);
+	),
+	TP_printk(" dev=%s (fbuf=%p filled=%d) urb=%p",
+		__get_str(name),
+		__entry->fbuf,
+		__entry->fbuf__filled,
+		__entry->urb)
+);
+
+#endif /* _TRACE_PWC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-08-21 17:06 [PATCH v5 0/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
  2018-08-21 17:06 ` [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler() Matwey V. Kornilov
@ 2018-08-21 17:06 ` Matwey V. Kornilov
  2018-08-28  7:17   ` Matwey V. Kornilov
  2018-10-30 22:00   ` Laurent Pinchart
  1 sibling, 2 replies; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-08-21 17:06 UTC (permalink / raw)
  To: linux-media, linux-kernel
  Cc: matwey.kornilov, Matwey V. Kornilov, tfiga, laurent.pinchart,
	stern, ezequiel, hdegoede, hverkuil, mchehab, rostedt, mingo,
	isely, bhumirks, colin.king, kieran.bingham, keiichiw

DMA cocherency slows the transfer down on systems without hardware
coherent DMA.
Instead we use noncocherent DMA memory and explicit sync at data receive
handler.

Based on previous commit the following performance benchmarks have been
carried out. Average memcpy() data transfer rate (rate) and handler
completion time (time) have been measured when running video stream at
640x480 resolution at 10fps.

x86_64 based system (Intel Core i5-3470). This platform has hardware
coherent DMA support and proposed change doesn't make big difference here.

 * kmalloc:            rate = (2.0 +- 0.4) GBps
                       time = (5.0 +- 3.0) usec
 * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
                       time = (3.5 +- 3.0) usec

We see that the measurements agree within error ranges in this case.
So theoretically predicted performance downgrade cannot be reliably
measured here.

armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
has no hardware coherent DMA support. DMA coherence is implemented via
disabled page caching that slows down memcpy() due to memory controller
behaviour.

 * kmalloc:            rate =  (114 +- 5) MBps
                       time =   (84 +- 4) usec
 * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
                       time =  (341 +- 2) usec

Note, that quantative difference leads (this commit leads to 4 times
acceleration) to qualitative behavior change in this case. As it was
stated before, the video stream cannot be successfully received at AM335x
platforms with MUSB based USB host controller due to performance issues
[1].

[1] https://www.spinics.net/lists/linux-usb/msg165735.html

Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
---
 drivers/media/usb/pwc/pwc-if.c | 57 ++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 13 deletions(-)

diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index 72d2897a4b9f..1360722ab423 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -159,6 +159,32 @@ static const struct video_device pwc_template = {
 /***************************************************************************/
 /* Private functions */
 
+static void *pwc_alloc_urb_buffer(struct device *dev,
+				  size_t size, dma_addr_t *dma_handle)
+{
+	void *buffer = kmalloc(size, GFP_KERNEL);
+
+	if (!buffer)
+		return NULL;
+
+	*dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, *dma_handle)) {
+		kfree(buffer);
+		return NULL;
+	}
+
+	return buffer;
+}
+
+static void pwc_free_urb_buffer(struct device *dev,
+				size_t size,
+				void *buffer,
+				dma_addr_t dma_handle)
+{
+	dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE);
+	kfree(buffer);
+}
+
 static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
 {
 	unsigned long flags = 0;
@@ -306,6 +332,11 @@ static void pwc_isoc_handler(struct urb *urb)
 	/* Reset ISOC error counter. We did get here, after all. */
 	pdev->visoc_errors = 0;
 
+	dma_sync_single_for_cpu(&urb->dev->dev,
+				urb->transfer_dma,
+				urb->transfer_buffer_length,
+				DMA_FROM_DEVICE);
+
 	/* vsync: 0 = don't copy data
 		  1 = sync-hunt
 		  2 = synched
@@ -428,16 +459,15 @@ static int pwc_isoc_init(struct pwc_device *pdev)
 		urb->dev = udev;
 		urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
 		urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
-		urb->transfer_buffer = usb_alloc_coherent(udev,
-							  ISO_BUFFER_SIZE,
-							  GFP_KERNEL,
-							  &urb->transfer_dma);
+		urb->transfer_buffer_length = ISO_BUFFER_SIZE;
+		urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev,
+							    urb->transfer_buffer_length,
+							    &urb->transfer_dma);
 		if (urb->transfer_buffer == NULL) {
 			PWC_ERROR("Failed to allocate urb buffer %d\n", i);
 			pwc_isoc_cleanup(pdev);
 			return -ENOMEM;
 		}
-		urb->transfer_buffer_length = ISO_BUFFER_SIZE;
 		urb->complete = pwc_isoc_handler;
 		urb->context = pdev;
 		urb->start_frame = 0;
@@ -488,15 +518,16 @@ static void pwc_iso_free(struct pwc_device *pdev)
 
 	/* Freeing ISOC buffers one by one */
 	for (i = 0; i < MAX_ISO_BUFS; i++) {
-		if (pdev->urbs[i]) {
+		struct urb *urb = pdev->urbs[i];
+
+		if (urb) {
 			PWC_DEBUG_MEMORY("Freeing URB\n");
-			if (pdev->urbs[i]->transfer_buffer) {
-				usb_free_coherent(pdev->udev,
-					pdev->urbs[i]->transfer_buffer_length,
-					pdev->urbs[i]->transfer_buffer,
-					pdev->urbs[i]->transfer_dma);
-			}
-			usb_free_urb(pdev->urbs[i]);
+			if (urb->transfer_buffer)
+				pwc_free_urb_buffer(&urb->dev->dev,
+						    urb->transfer_buffer_length,
+						    urb->transfer_buffer,
+						    urb->transfer_dma);
+			usb_free_urb(urb);
 			pdev->urbs[i] = NULL;
 		}
 	}
-- 
2.16.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler()
  2018-08-21 17:06 ` [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler() Matwey V. Kornilov
@ 2018-08-21 19:49   ` Steven Rostedt
  0 siblings, 0 replies; 27+ messages in thread
From: Steven Rostedt @ 2018-08-21 19:49 UTC (permalink / raw)
  To: Matwey V. Kornilov
  Cc: linux-media, linux-kernel, matwey.kornilov, tfiga,
	laurent.pinchart, stern, ezequiel, hdegoede, hverkuil, mchehab,
	mingo, isely, bhumirks, colin.king, kieran.bingham, keiichiw

On Tue, 21 Aug 2018 20:06:28 +0300
"Matwey V. Kornilov" <matwey@sai.msu.ru> wrote:

> There were reports that PWC-based webcams don't work at some
> embedded ARM platforms. [1] Isochronous transfer handler seems to
> work too long leading to the issues in MUSB USB host subsystem.
> Also note, that urb->giveback() handlers are still called with
> disabled interrupts. In order to be able to measure performance of
> PWC driver, traces are introduced in URB handler section.
> 
> [1] https://www.spinics.net/lists/linux-usb/msg165735.html
> 
> Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
> ---
>

From a tracing perspective,

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-08-21 17:06 ` [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
@ 2018-08-28  7:17   ` Matwey V. Kornilov
  2018-09-11 18:58     ` Matwey V. Kornilov
  2018-10-30 22:00   ` Laurent Pinchart
  1 sibling, 1 reply; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-08-28  7:17 UTC (permalink / raw)
  To: Linux Media Mailing List, open list
  Cc: Tomasz Figa, Laurent Pinchart, Alan Stern, Ezequiel Garcia,
	Hans de Goede, Hans Verkuil, Mauro Carvalho Chehab,
	Steven Rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

вт, 21 авг. 2018 г. в 20:06, Matwey V. Kornilov <matwey@sai.msu.ru>:
>
> DMA cocherency slows the transfer down on systems without hardware
> coherent DMA.
> Instead we use noncocherent DMA memory and explicit sync at data receive
> handler.
>
> Based on previous commit the following performance benchmarks have been
> carried out. Average memcpy() data transfer rate (rate) and handler
> completion time (time) have been measured when running video stream at
> 640x480 resolution at 10fps.
>
> x86_64 based system (Intel Core i5-3470). This platform has hardware
> coherent DMA support and proposed change doesn't make big difference here.
>
>  * kmalloc:            rate = (2.0 +- 0.4) GBps
>                        time = (5.0 +- 3.0) usec
>  * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
>                        time = (3.5 +- 3.0) usec
>
> We see that the measurements agree within error ranges in this case.
> So theoretically predicted performance downgrade cannot be reliably
> measured here.
>
> armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
> has no hardware coherent DMA support. DMA coherence is implemented via
> disabled page caching that slows down memcpy() due to memory controller
> behaviour.
>
>  * kmalloc:            rate =  (114 +- 5) MBps
>                        time =   (84 +- 4) usec
>  * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
>                        time =  (341 +- 2) usec
>
> Note, that quantative difference leads (this commit leads to 4 times
> acceleration) to qualitative behavior change in this case. As it was
> stated before, the video stream cannot be successfully received at AM335x
> platforms with MUSB based USB host controller due to performance issues
> [1].
>
> [1] https://www.spinics.net/lists/linux-usb/msg165735.html
>
> Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>

Ping

> ---
>  drivers/media/usb/pwc/pwc-if.c | 57 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 44 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
> index 72d2897a4b9f..1360722ab423 100644
> --- a/drivers/media/usb/pwc/pwc-if.c
> +++ b/drivers/media/usb/pwc/pwc-if.c
> @@ -159,6 +159,32 @@ static const struct video_device pwc_template = {
>  /***************************************************************************/
>  /* Private functions */
>
> +static void *pwc_alloc_urb_buffer(struct device *dev,
> +                                 size_t size, dma_addr_t *dma_handle)
> +{
> +       void *buffer = kmalloc(size, GFP_KERNEL);
> +
> +       if (!buffer)
> +               return NULL;
> +
> +       *dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE);
> +       if (dma_mapping_error(dev, *dma_handle)) {
> +               kfree(buffer);
> +               return NULL;
> +       }
> +
> +       return buffer;
> +}
> +
> +static void pwc_free_urb_buffer(struct device *dev,
> +                               size_t size,
> +                               void *buffer,
> +                               dma_addr_t dma_handle)
> +{
> +       dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE);
> +       kfree(buffer);
> +}
> +
>  static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
>  {
>         unsigned long flags = 0;
> @@ -306,6 +332,11 @@ static void pwc_isoc_handler(struct urb *urb)
>         /* Reset ISOC error counter. We did get here, after all. */
>         pdev->visoc_errors = 0;
>
> +       dma_sync_single_for_cpu(&urb->dev->dev,
> +                               urb->transfer_dma,
> +                               urb->transfer_buffer_length,
> +                               DMA_FROM_DEVICE);
> +
>         /* vsync: 0 = don't copy data
>                   1 = sync-hunt
>                   2 = synched
> @@ -428,16 +459,15 @@ static int pwc_isoc_init(struct pwc_device *pdev)
>                 urb->dev = udev;
>                 urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
>                 urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
> -               urb->transfer_buffer = usb_alloc_coherent(udev,
> -                                                         ISO_BUFFER_SIZE,
> -                                                         GFP_KERNEL,
> -                                                         &urb->transfer_dma);
> +               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> +               urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev,
> +                                                           urb->transfer_buffer_length,
> +                                                           &urb->transfer_dma);
>                 if (urb->transfer_buffer == NULL) {
>                         PWC_ERROR("Failed to allocate urb buffer %d\n", i);
>                         pwc_isoc_cleanup(pdev);
>                         return -ENOMEM;
>                 }
> -               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
>                 urb->complete = pwc_isoc_handler;
>                 urb->context = pdev;
>                 urb->start_frame = 0;
> @@ -488,15 +518,16 @@ static void pwc_iso_free(struct pwc_device *pdev)
>
>         /* Freeing ISOC buffers one by one */
>         for (i = 0; i < MAX_ISO_BUFS; i++) {
> -               if (pdev->urbs[i]) {
> +               struct urb *urb = pdev->urbs[i];
> +
> +               if (urb) {
>                         PWC_DEBUG_MEMORY("Freeing URB\n");
> -                       if (pdev->urbs[i]->transfer_buffer) {
> -                               usb_free_coherent(pdev->udev,
> -                                       pdev->urbs[i]->transfer_buffer_length,
> -                                       pdev->urbs[i]->transfer_buffer,
> -                                       pdev->urbs[i]->transfer_dma);
> -                       }
> -                       usb_free_urb(pdev->urbs[i]);
> +                       if (urb->transfer_buffer)
> +                               pwc_free_urb_buffer(&urb->dev->dev,
> +                                                   urb->transfer_buffer_length,
> +                                                   urb->transfer_buffer,
> +                                                   urb->transfer_dma);
> +                       usb_free_urb(urb);
>                         pdev->urbs[i] = NULL;
>                 }
>         }
> --
> 2.16.4
>


--
With best regards,
Matwey V. Kornilov

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-08-28  7:17   ` Matwey V. Kornilov
@ 2018-09-11 18:58     ` Matwey V. Kornilov
  2018-09-19 16:12       ` Ezequiel Garcia
  2018-10-10 21:13       ` Matwey V. Kornilov
  0 siblings, 2 replies; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-09-11 18:58 UTC (permalink / raw)
  To: Linux Media Mailing List, open list
  Cc: Tomasz Figa, Laurent Pinchart, Alan Stern, Ezequiel Garcia,
	Hans de Goede, Hans Verkuil, Mauro Carvalho Chehab,
	Steven Rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

вт, 28 авг. 2018 г. в 10:17, Matwey V. Kornilov <matwey.kornilov@gmail.com>:
>
> вт, 21 авг. 2018 г. в 20:06, Matwey V. Kornilov <matwey@sai.msu.ru>:
> >
> > DMA cocherency slows the transfer down on systems without hardware
> > coherent DMA.
> > Instead we use noncocherent DMA memory and explicit sync at data receive
> > handler.
> >
> > Based on previous commit the following performance benchmarks have been
> > carried out. Average memcpy() data transfer rate (rate) and handler
> > completion time (time) have been measured when running video stream at
> > 640x480 resolution at 10fps.
> >
> > x86_64 based system (Intel Core i5-3470). This platform has hardware
> > coherent DMA support and proposed change doesn't make big difference here.
> >
> >  * kmalloc:            rate = (2.0 +- 0.4) GBps
> >                        time = (5.0 +- 3.0) usec
> >  * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
> >                        time = (3.5 +- 3.0) usec
> >
> > We see that the measurements agree within error ranges in this case.
> > So theoretically predicted performance downgrade cannot be reliably
> > measured here.
> >
> > armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
> > has no hardware coherent DMA support. DMA coherence is implemented via
> > disabled page caching that slows down memcpy() due to memory controller
> > behaviour.
> >
> >  * kmalloc:            rate =  (114 +- 5) MBps
> >                        time =   (84 +- 4) usec
> >  * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
> >                        time =  (341 +- 2) usec
> >
> > Note, that quantative difference leads (this commit leads to 4 times
> > acceleration) to qualitative behavior change in this case. As it was
> > stated before, the video stream cannot be successfully received at AM335x
> > platforms with MUSB based USB host controller due to performance issues
> > [1].
> >
> > [1] https://www.spinics.net/lists/linux-usb/msg165735.html
> >
> > Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
>
> Ping
>

Ping

> > ---
> >  drivers/media/usb/pwc/pwc-if.c | 57 ++++++++++++++++++++++++++++++++----------
> >  1 file changed, 44 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
> > index 72d2897a4b9f..1360722ab423 100644
> > --- a/drivers/media/usb/pwc/pwc-if.c
> > +++ b/drivers/media/usb/pwc/pwc-if.c
> > @@ -159,6 +159,32 @@ static const struct video_device pwc_template = {
> >  /***************************************************************************/
> >  /* Private functions */
> >
> > +static void *pwc_alloc_urb_buffer(struct device *dev,
> > +                                 size_t size, dma_addr_t *dma_handle)
> > +{
> > +       void *buffer = kmalloc(size, GFP_KERNEL);
> > +
> > +       if (!buffer)
> > +               return NULL;
> > +
> > +       *dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE);
> > +       if (dma_mapping_error(dev, *dma_handle)) {
> > +               kfree(buffer);
> > +               return NULL;
> > +       }
> > +
> > +       return buffer;
> > +}
> > +
> > +static void pwc_free_urb_buffer(struct device *dev,
> > +                               size_t size,
> > +                               void *buffer,
> > +                               dma_addr_t dma_handle)
> > +{
> > +       dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE);
> > +       kfree(buffer);
> > +}
> > +
> >  static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
> >  {
> >         unsigned long flags = 0;
> > @@ -306,6 +332,11 @@ static void pwc_isoc_handler(struct urb *urb)
> >         /* Reset ISOC error counter. We did get here, after all. */
> >         pdev->visoc_errors = 0;
> >
> > +       dma_sync_single_for_cpu(&urb->dev->dev,
> > +                               urb->transfer_dma,
> > +                               urb->transfer_buffer_length,
> > +                               DMA_FROM_DEVICE);
> > +
> >         /* vsync: 0 = don't copy data
> >                   1 = sync-hunt
> >                   2 = synched
> > @@ -428,16 +459,15 @@ static int pwc_isoc_init(struct pwc_device *pdev)
> >                 urb->dev = udev;
> >                 urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
> >                 urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
> > -               urb->transfer_buffer = usb_alloc_coherent(udev,
> > -                                                         ISO_BUFFER_SIZE,
> > -                                                         GFP_KERNEL,
> > -                                                         &urb->transfer_dma);
> > +               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> > +               urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev,
> > +                                                           urb->transfer_buffer_length,
> > +                                                           &urb->transfer_dma);
> >                 if (urb->transfer_buffer == NULL) {
> >                         PWC_ERROR("Failed to allocate urb buffer %d\n", i);
> >                         pwc_isoc_cleanup(pdev);
> >                         return -ENOMEM;
> >                 }
> > -               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> >                 urb->complete = pwc_isoc_handler;
> >                 urb->context = pdev;
> >                 urb->start_frame = 0;
> > @@ -488,15 +518,16 @@ static void pwc_iso_free(struct pwc_device *pdev)
> >
> >         /* Freeing ISOC buffers one by one */
> >         for (i = 0; i < MAX_ISO_BUFS; i++) {
> > -               if (pdev->urbs[i]) {
> > +               struct urb *urb = pdev->urbs[i];
> > +
> > +               if (urb) {
> >                         PWC_DEBUG_MEMORY("Freeing URB\n");
> > -                       if (pdev->urbs[i]->transfer_buffer) {
> > -                               usb_free_coherent(pdev->udev,
> > -                                       pdev->urbs[i]->transfer_buffer_length,
> > -                                       pdev->urbs[i]->transfer_buffer,
> > -                                       pdev->urbs[i]->transfer_dma);
> > -                       }
> > -                       usb_free_urb(pdev->urbs[i]);
> > +                       if (urb->transfer_buffer)
> > +                               pwc_free_urb_buffer(&urb->dev->dev,
> > +                                                   urb->transfer_buffer_length,
> > +                                                   urb->transfer_buffer,
> > +                                                   urb->transfer_dma);
> > +                       usb_free_urb(urb);
> >                         pdev->urbs[i] = NULL;
> >                 }
> >         }
> > --
> > 2.16.4
> >
>
>
> --
> With best regards,
> Matwey V. Kornilov



-- 
With best regards,
Matwey V. Kornilov

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-09-11 18:58     ` Matwey V. Kornilov
@ 2018-09-19 16:12       ` Ezequiel Garcia
  2018-10-10 21:13       ` Matwey V. Kornilov
  1 sibling, 0 replies; 27+ messages in thread
From: Ezequiel Garcia @ 2018-09-19 16:12 UTC (permalink / raw)
  To: Matwey V. Kornilov, Linux Media Mailing List, open list
  Cc: Tomasz Figa, Laurent Pinchart, Alan Stern, Hans de Goede,
	Hans Verkuil, Mauro Carvalho Chehab, Steven Rostedt, mingo,
	Mike Isely, Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Tue, 2018-09-11 at 21:58 +0300, Matwey V. Kornilov wrote:
> вт, 28 авг. 2018 г. в 10:17, Matwey V. Kornilov <matwey.kornilov@gmail.com>:
> > 
> > вт, 21 авг. 2018 г. в 20:06, Matwey V. Kornilov <matwey@sai.msu.ru>:
> > > 
> > > DMA cocherency slows the transfer down on systems without hardware
> > > coherent DMA.
> > > Instead we use noncocherent DMA memory and explicit sync at data receive
> > > handler.
> > > 
> > > Based on previous commit the following performance benchmarks have been
> > > carried out. Average memcpy() data transfer rate (rate) and handler
> > > completion time (time) have been measured when running video stream at
> > > 640x480 resolution at 10fps.
> > > 
> > > x86_64 based system (Intel Core i5-3470). This platform has hardware
> > > coherent DMA support and proposed change doesn't make big difference here.
> > > 
> > >  * kmalloc:            rate = (2.0 +- 0.4) GBps
> > >                        time = (5.0 +- 3.0) usec
> > >  * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
> > >                        time = (3.5 +- 3.0) usec
> > > 
> > > We see that the measurements agree within error ranges in this case.
> > > So theoretically predicted performance downgrade cannot be reliably
> > > measured here.
> > > 
> > > armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
> > > has no hardware coherent DMA support. DMA coherence is implemented via
> > > disabled page caching that slows down memcpy() due to memory controller
> > > behaviour.
> > > 
> > >  * kmalloc:            rate =  (114 +- 5) MBps
> > >                        time =   (84 +- 4) usec
> > >  * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
> > >                        time =  (341 +- 2) usec
> > > 
> > > Note, that quantative difference leads (this commit leads to 4 times
> > > acceleration) to qualitative behavior change in this case. As it was
> > > stated before, the video stream cannot be successfully received at AM335x
> > > platforms with MUSB based USB host controller due to performance issues
> > > [1].
> > > 
> > > [1] https://www.spinics.net/lists/linux-usb/msg165735.html
> > > 
> > > Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
> > 
> > Ping
> > 

We are already using slab buffers in em28xx, so I think this change
makes sense, until we have proper a non-coherent allocation API.

Acked-by: Ezequiel Garcia <ezequiel@collabora.com>

Thanks a lot Matwey for your work.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-09-11 18:58     ` Matwey V. Kornilov
  2018-09-19 16:12       ` Ezequiel Garcia
@ 2018-10-10 21:13       ` Matwey V. Kornilov
  1 sibling, 0 replies; 27+ messages in thread
From: Matwey V. Kornilov @ 2018-10-10 21:13 UTC (permalink / raw)
  To: Linux Media Mailing List, open list
  Cc: Tomasz Figa, Laurent Pinchart, Alan Stern, Ezequiel Garcia,
	Hans de Goede, Hans Verkuil, Mauro Carvalho Chehab,
	Steven Rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

вт, 11 сент. 2018 г. в 21:58, Matwey V. Kornilov <matwey.kornilov@gmail.com>:
>
> вт, 28 авг. 2018 г. в 10:17, Matwey V. Kornilov <matwey.kornilov@gmail.com>:
> >
> > вт, 21 авг. 2018 г. в 20:06, Matwey V. Kornilov <matwey@sai.msu.ru>:
> > >
> > > DMA cocherency slows the transfer down on systems without hardware
> > > coherent DMA.
> > > Instead we use noncocherent DMA memory and explicit sync at data receive
> > > handler.
> > >
> > > Based on previous commit the following performance benchmarks have been
> > > carried out. Average memcpy() data transfer rate (rate) and handler
> > > completion time (time) have been measured when running video stream at
> > > 640x480 resolution at 10fps.
> > >
> > > x86_64 based system (Intel Core i5-3470). This platform has hardware
> > > coherent DMA support and proposed change doesn't make big difference here.
> > >
> > >  * kmalloc:            rate = (2.0 +- 0.4) GBps
> > >                        time = (5.0 +- 3.0) usec
> > >  * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
> > >                        time = (3.5 +- 3.0) usec
> > >
> > > We see that the measurements agree within error ranges in this case.
> > > So theoretically predicted performance downgrade cannot be reliably
> > > measured here.
> > >
> > > armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
> > > has no hardware coherent DMA support. DMA coherence is implemented via
> > > disabled page caching that slows down memcpy() due to memory controller
> > > behaviour.
> > >
> > >  * kmalloc:            rate =  (114 +- 5) MBps
> > >                        time =   (84 +- 4) usec
> > >  * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
> > >                        time =  (341 +- 2) usec
> > >
> > > Note, that quantative difference leads (this commit leads to 4 times
> > > acceleration) to qualitative behavior change in this case. As it was
> > > stated before, the video stream cannot be successfully received at AM335x
> > > platforms with MUSB based USB host controller due to performance issues
> > > [1].
> > >
> > > [1] https://www.spinics.net/lists/linux-usb/msg165735.html
> > >
> > > Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
> >
> > Ping
> >
>
> Ping
>

Ping

> > > ---
> > >  drivers/media/usb/pwc/pwc-if.c | 57 ++++++++++++++++++++++++++++++++----------
> > >  1 file changed, 44 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
> > > index 72d2897a4b9f..1360722ab423 100644
> > > --- a/drivers/media/usb/pwc/pwc-if.c
> > > +++ b/drivers/media/usb/pwc/pwc-if.c
> > > @@ -159,6 +159,32 @@ static const struct video_device pwc_template = {
> > >  /***************************************************************************/
> > >  /* Private functions */
> > >
> > > +static void *pwc_alloc_urb_buffer(struct device *dev,
> > > +                                 size_t size, dma_addr_t *dma_handle)
> > > +{
> > > +       void *buffer = kmalloc(size, GFP_KERNEL);
> > > +
> > > +       if (!buffer)
> > > +               return NULL;
> > > +
> > > +       *dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE);
> > > +       if (dma_mapping_error(dev, *dma_handle)) {
> > > +               kfree(buffer);
> > > +               return NULL;
> > > +       }
> > > +
> > > +       return buffer;
> > > +}
> > > +
> > > +static void pwc_free_urb_buffer(struct device *dev,
> > > +                               size_t size,
> > > +                               void *buffer,
> > > +                               dma_addr_t dma_handle)
> > > +{
> > > +       dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE);
> > > +       kfree(buffer);
> > > +}
> > > +
> > >  static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
> > >  {
> > >         unsigned long flags = 0;
> > > @@ -306,6 +332,11 @@ static void pwc_isoc_handler(struct urb *urb)
> > >         /* Reset ISOC error counter. We did get here, after all. */
> > >         pdev->visoc_errors = 0;
> > >
> > > +       dma_sync_single_for_cpu(&urb->dev->dev,
> > > +                               urb->transfer_dma,
> > > +                               urb->transfer_buffer_length,
> > > +                               DMA_FROM_DEVICE);
> > > +
> > >         /* vsync: 0 = don't copy data
> > >                   1 = sync-hunt
> > >                   2 = synched
> > > @@ -428,16 +459,15 @@ static int pwc_isoc_init(struct pwc_device *pdev)
> > >                 urb->dev = udev;
> > >                 urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
> > >                 urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
> > > -               urb->transfer_buffer = usb_alloc_coherent(udev,
> > > -                                                         ISO_BUFFER_SIZE,
> > > -                                                         GFP_KERNEL,
> > > -                                                         &urb->transfer_dma);
> > > +               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> > > +               urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev,
> > > +                                                           urb->transfer_buffer_length,
> > > +                                                           &urb->transfer_dma);
> > >                 if (urb->transfer_buffer == NULL) {
> > >                         PWC_ERROR("Failed to allocate urb buffer %d\n", i);
> > >                         pwc_isoc_cleanup(pdev);
> > >                         return -ENOMEM;
> > >                 }
> > > -               urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> > >                 urb->complete = pwc_isoc_handler;
> > >                 urb->context = pdev;
> > >                 urb->start_frame = 0;
> > > @@ -488,15 +518,16 @@ static void pwc_iso_free(struct pwc_device *pdev)
> > >
> > >         /* Freeing ISOC buffers one by one */
> > >         for (i = 0; i < MAX_ISO_BUFS; i++) {
> > > -               if (pdev->urbs[i]) {
> > > +               struct urb *urb = pdev->urbs[i];
> > > +
> > > +               if (urb) {
> > >                         PWC_DEBUG_MEMORY("Freeing URB\n");
> > > -                       if (pdev->urbs[i]->transfer_buffer) {
> > > -                               usb_free_coherent(pdev->udev,
> > > -                                       pdev->urbs[i]->transfer_buffer_length,
> > > -                                       pdev->urbs[i]->transfer_buffer,
> > > -                                       pdev->urbs[i]->transfer_dma);
> > > -                       }
> > > -                       usb_free_urb(pdev->urbs[i]);
> > > +                       if (urb->transfer_buffer)
> > > +                               pwc_free_urb_buffer(&urb->dev->dev,
> > > +                                                   urb->transfer_buffer_length,
> > > +                                                   urb->transfer_buffer,
> > > +                                                   urb->transfer_dma);
> > > +                       usb_free_urb(urb);
> > >                         pdev->urbs[i] = NULL;
> > >                 }
> > >         }
> > > --
> > > 2.16.4
> > >
> >
> >
> > --
> > With best regards,
> > Matwey V. Kornilov
>
>
>
> --
> With best regards,
> Matwey V. Kornilov



-- 
With best regards,
Matwey V. Kornilov

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-08-21 17:06 ` [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
  2018-08-28  7:17   ` Matwey V. Kornilov
@ 2018-10-30 22:00   ` Laurent Pinchart
  2018-10-31  5:38     ` Christoph Hellwig
  2018-12-07 15:25     ` Christoph Hellwig
  1 sibling, 2 replies; 27+ messages in thread
From: Laurent Pinchart @ 2018-10-30 22:00 UTC (permalink / raw)
  To: Matwey V. Kornilov
  Cc: linux-media, linux-kernel, matwey.kornilov, tfiga, stern,
	ezequiel, hdegoede, hverkuil, mchehab, rostedt, mingo, isely,
	bhumirks, colin.king, kieran.bingham, keiichiw,
	Christoph Hellwig

Hi Matwey,

(CC'ing Christoph)

Thank you for the patch, and my apologies for the late answer. For the record, 
I don't think this should have been blocked by lack of time on my side: even 
if I'm one of the core developers in the subsystem, and possibly have the most 
experience with USB webcams recently due to uvcvideo, patches shouldn't be 
blocked indefinitely by the absence of reply from anyone, me included.

(On a side note, thanks to Ezequiel for pointing my attention to this patch in 
our face to face meeting during ELCE, side channels are very useful in this 
kind of situation, including IRC.)

On Tuesday, 21 August 2018 20:06:29 EET Matwey V. Kornilov wrote:
> DMA cocherency slows the transfer down on systems without hardware
> coherent DMA.
> Instead we use noncocherent DMA memory and explicit sync at data receive
> handler.
> 
> Based on previous commit the following performance benchmarks have been
> carried out. Average memcpy() data transfer rate (rate) and handler
> completion time (time) have been measured when running video stream at
> 640x480 resolution at 10fps.
> 
> x86_64 based system (Intel Core i5-3470). This platform has hardware
> coherent DMA support and proposed change doesn't make big difference here.
> 
>  * kmalloc:            rate = (2.0 +- 0.4) GBps
>                        time = (5.0 +- 3.0) usec
>  * usb_alloc_coherent: rate = (3.4 +- 1.2) GBps
>                        time = (3.5 +- 3.0) usec
> 
> We see that the measurements agree within error ranges in this case.
> So theoretically predicted performance downgrade cannot be reliably
> measured here.
> 
> armv7l based system (TI AM335x BeagleBone Black @ 300MHz). This platform
> has no hardware coherent DMA support. DMA coherence is implemented via
> disabled page caching that slows down memcpy() due to memory controller
> behaviour.
> 
>  * kmalloc:            rate =  (114 +- 5) MBps
>                        time =   (84 +- 4) usec
>  * usb_alloc_coherent: rate = (28.1 +- 0.1) MBps
>                        time =  (341 +- 2) usec
> 
> Note, that quantative difference leads (this commit leads to 4 times
> acceleration) to qualitative behavior change in this case. As it was
> stated before, the video stream cannot be successfully received at AM335x
> platforms with MUSB based USB host controller due to performance issues
> [1].
> 
> [1] https://www.spinics.net/lists/linux-usb/msg165735.html
> 
> Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
> ---
>  drivers/media/usb/pwc/pwc-if.c | 57 ++++++++++++++++++++++++++++++--------
>  1 file changed, 44 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
> index 72d2897a4b9f..1360722ab423 100644
> --- a/drivers/media/usb/pwc/pwc-if.c
> +++ b/drivers/media/usb/pwc/pwc-if.c
> @@ -159,6 +159,32 @@ static const struct video_device pwc_template = {
>  /**************************************************************************
> */ /* Private functions */
> 
> +static void *pwc_alloc_urb_buffer(struct device *dev,
> +				  size_t size, dma_addr_t *dma_handle)
> +{
> +	void *buffer = kmalloc(size, GFP_KERNEL);
> +
> +	if (!buffer)
> +		return NULL;
> +
> +	*dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE);
> +	if (dma_mapping_error(dev, *dma_handle)) {
> +		kfree(buffer);
> +		return NULL;
> +	}
> +
> +	return buffer;

As discussed before, we're clearly missing a proper non-coherent memory 
allocation API. As much as I would like to see a volunteer for this, I don't 
think it's a reason to block the performance improvement we get from this 
patch.

This being said, I'm a bit concerned about the allocation of 16kB blocks from 
kmalloc(), and believe that the priority of the non-coherent memory allocation 
API implementation should be increased. Christoph, you mentioned in a recent 
discussion on this topic that you are working on removing the existing non-
coherent DMA allocation API, what is your opinion on how we should gllobally 
solve the problem that this patch addresses ?

> +}
> +
> +static void pwc_free_urb_buffer(struct device *dev,
> +				size_t size,
> +				void *buffer,
> +				dma_addr_t dma_handle)
> +{
> +	dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE);
> +	kfree(buffer);
> +}
> +
>  static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev)
> {
>  	unsigned long flags = 0;
> @@ -306,6 +332,11 @@ static void pwc_isoc_handler(struct urb *urb)
>  	/* Reset ISOC error counter. We did get here, after all. */
>  	pdev->visoc_errors = 0;
> 
> +	dma_sync_single_for_cpu(&urb->dev->dev,
> +				urb->transfer_dma,
> +				urb->transfer_buffer_length,
> +				DMA_FROM_DEVICE);
> +

As explained before as well, I think we need dma_sync_single_for_device() 
calls, and I know they would degrade performances until we fix the problem on 
the DMA mapping API side. This is not a reason to block the patch either. I 
would appreciate, however, if a comment could be added to the place where 
dma_sync_single_for_device() should be called, to explain the problem.

Is there anyone who would like to volunteer to drive the effort on the DMA 
mapping API side ? I'm unfortunately too short on time to address this myself 
:-(

>  	/* vsync: 0 = don't copy data
>  		  1 = sync-hunt
>  		  2 = synched
> @@ -428,16 +459,15 @@ static int pwc_isoc_init(struct pwc_device *pdev)
>  		urb->dev = udev;
>  		urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint);
>  		urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
> -		urb->transfer_buffer = usb_alloc_coherent(udev,
> -							  ISO_BUFFER_SIZE,
> -							  GFP_KERNEL,
> -							  &urb->transfer_dma);
> +		urb->transfer_buffer_length = ISO_BUFFER_SIZE;
> +		urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev,
> +							    urb->transfer_buffer_length,
> +							    &urb->transfer_dma);
>  		if (urb->transfer_buffer == NULL) {
>  			PWC_ERROR("Failed to allocate urb buffer %d\n", i);
>  			pwc_isoc_cleanup(pdev);
>  			return -ENOMEM;
>  		}
> -		urb->transfer_buffer_length = ISO_BUFFER_SIZE;
>  		urb->complete = pwc_isoc_handler;
>  		urb->context = pdev;
>  		urb->start_frame = 0;
> @@ -488,15 +518,16 @@ static void pwc_iso_free(struct pwc_device *pdev)
> 
>  	/* Freeing ISOC buffers one by one */
>  	for (i = 0; i < MAX_ISO_BUFS; i++) {
> -		if (pdev->urbs[i]) {
> +		struct urb *urb = pdev->urbs[i];
> +
> +		if (urb) {
>  			PWC_DEBUG_MEMORY("Freeing URB\n");
> -			if (pdev->urbs[i]->transfer_buffer) {
> -				usb_free_coherent(pdev->udev,
> -					pdev->urbs[i]->transfer_buffer_length,
> -					pdev->urbs[i]->transfer_buffer,
> -					pdev->urbs[i]->transfer_dma);
> -			}
> -			usb_free_urb(pdev->urbs[i]);
> +			if (urb->transfer_buffer)
> +				pwc_free_urb_buffer(&urb->dev->dev,
> +						    urb->transfer_buffer_length,
> +						    urb->transfer_buffer,
> +						    urb->transfer_dma);
> +			usb_free_urb(urb);
>  			pdev->urbs[i] = NULL;
>  		}
>  	}

The rest of the patch looks fine, so

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

(if possible with a comment about dma_sync_single_for_device() added)

-- 
Regards,

Laurent Pinchart




^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-10-30 22:00   ` Laurent Pinchart
@ 2018-10-31  5:38     ` Christoph Hellwig
  2018-12-07 15:25     ` Christoph Hellwig
  1 sibling, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-10-31  5:38 UTC (permalink / raw)
  To: Laurent Pinchart
  Cc: Matwey V. Kornilov, linux-media, linux-kernel, matwey.kornilov,
	tfiga, stern, ezequiel, hdegoede, hverkuil, mchehab, rostedt,
	mingo, isely, bhumirks, colin.king, kieran.bingham, keiichiw,
	Christoph Hellwig

On Wed, Oct 31, 2018 at 12:00:12AM +0200, Laurent Pinchart wrote:
> As discussed before, we're clearly missing a proper non-coherent memory 
> allocation API. As much as I would like to see a volunteer for this, I don't 
> think it's a reason to block the performance improvement we get from this 
> patch.
> 
> This being said, I'm a bit concerned about the allocation of 16kB blocks from 
> kmalloc(), and believe that the priority of the non-coherent memory allocation 
> API implementation should be increased. Christoph, you mentioned in a recent 
> discussion on this topic that you are working on removing the existing non-
> coherent DMA allocation API, what is your opinion on how we should gllobally 
> solve the problem that this patch addresses ?

I hope to address this on the dma-mapping side for this merge window.
My current idea is to add (back) add dma_alloc_noncoherent-like API
(name to be determindes).  This would be very similar to to the
DMA_ATTR_NON_CONSISTENT to dma_alloc_attrs with the following
differences:

 - it must actually be implemented by every dma_map_ops instance, no
   falling back to dma_alloc_coherent like semantics.  For all actually
   coherent ops this is trivial as there is no difference in semantics
   and we can fall back to the 'coherent' semantics, for non-coherent
   direct mappings it also is mostly trivial as we generally can use
   dma_direct_alloc.  The only instances that will need real work are
   IOMMUs that support non-coherent access, but there is only about
   a handfull of those.
 - instead of using the only vaguely defined dma_cache_sync for
   ownership transfers we'll use dma_sync_single_* which are well
   defined and available everywhere

I'll try to prioritise this to get done early in the merge window,
but I'll need someone else do the USB side.

> > +	dma_sync_single_for_cpu(&urb->dev->dev,
> > +				urb->transfer_dma,
> > +				urb->transfer_buffer_length,
> > +				DMA_FROM_DEVICE);
> > +
> 
> As explained before as well, I think we need dma_sync_single_for_device() 
> calls, and I know they would degrade performances until we fix the problem on 
> the DMA mapping API side. This is not a reason to block the patch either. I 
> would appreciate, however, if a comment could be added to the place where 
> dma_sync_single_for_device() should be called, to explain the problem.

Yes, as a rule of thumb every dma_sync_single_for_cpu call needs to pair
with a previous dma_sync_single_for_device call.  (Exceptions like
selective use of DMA_ATTR_SKIP_CPU_SYNC proove the rule)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-10-30 22:00   ` Laurent Pinchart
  2018-10-31  5:38     ` Christoph Hellwig
@ 2018-12-07 15:25     ` Christoph Hellwig
  2018-12-12  8:57       ` Tomasz Figa
  1 sibling, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-07 15:25 UTC (permalink / raw)
  To: Laurent Pinchart
  Cc: Matwey V. Kornilov, linux-media, linux-kernel, matwey.kornilov,
	tfiga, stern, ezequiel, hdegoede, hverkuil, mchehab, rostedt,
	mingo, isely, bhumirks, colin.king, kieran.bingham, keiichiw,
	Christoph Hellwig

Folks, can you take a look at this tree and see if this is useful
for USB:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-noncoherent-allocator

The idea is that you use dma_alloc_attrs with the DMA_ATTR_NON_CONSISTENT
now that I've made sure it is avaiable everywhere [1], and we can use
dma_sync_single_* on it.

The only special case USB will need are the HCD_LOCAL_MEM devices, for
which we must use dma_alloc_coherent (or dma_alloc_attrs without
DMA_ATTR_NON_CONSISTENT) and must skip the dma_sync_single_* calls,
so we'll probably need USB subsystem wrappers for those calls.

[1] except powerpc in this tree - I have another series to make powerpc
use the generic dma noncoherent code, which would cover it.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-07 15:25     ` Christoph Hellwig
@ 2018-12-12  8:57       ` Tomasz Figa
  2018-12-12  9:09         ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-12  8:57 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

Hi Christoph,

On Sat, Dec 8, 2018 at 12:25 AM Christoph Hellwig <hch@infradead.org> wrote:
>
> Folks, can you take a look at this tree and see if this is useful
> for USB:
>
> http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/dma-noncoherent-allocator
>
> The idea is that you use dma_alloc_attrs with the DMA_ATTR_NON_CONSISTENT
> now that I've made sure it is avaiable everywhere [1], and we can use
> dma_sync_single_* on it.

How about dma_sync_sg_*()? I'd expect some drivers to export/import
such memory via sg, since that's the typical way of describing memory
in DMA-buf.

>
> The only special case USB will need are the HCD_LOCAL_MEM devices, for
> which we must use dma_alloc_coherent (or dma_alloc_attrs without
> DMA_ATTR_NON_CONSISTENT) and must skip the dma_sync_single_* calls,
> so we'll probably need USB subsystem wrappers for those calls.
>
> [1] except powerpc in this tree - I have another series to make powerpc
> use the generic dma noncoherent code, which would cover it.

Sounds good to me. Thanks for working on this. I'd be happy to be on
CC and help with review when you post the patches later.

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-12  8:57       ` Tomasz Figa
@ 2018-12-12  9:09         ` Christoph Hellwig
  2018-12-12  9:34           ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-12  9:09 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Wed, Dec 12, 2018 at 05:57:02PM +0900, Tomasz Figa wrote:
> How about dma_sync_sg_*()? I'd expect some drivers to export/import
> such memory via sg, since that's the typical way of describing memory
> in DMA-buf.

The way it is implemented dma_sync_sg_* would just work, however there
really should be no need to have sglists for buffers created by this
API.

> Sounds good to me. Thanks for working on this. I'd be happy to be on
> CC and help with review when you post the patches later.

The patches were already posted here:

https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031982.html

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-12  9:09         ` Christoph Hellwig
@ 2018-12-12  9:34           ` Tomasz Figa
  2018-12-12 13:54             ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-12  9:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Wed, Dec 12, 2018 at 6:09 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Dec 12, 2018 at 05:57:02PM +0900, Tomasz Figa wrote:
> > How about dma_sync_sg_*()? I'd expect some drivers to export/import
> > such memory via sg, since that's the typical way of describing memory
> > in DMA-buf.
>
> The way it is implemented dma_sync_sg_* would just work, however there
> really should be no need to have sglists for buffers created by this
> API.

The typical DMA-buf import/export flow is as follows:
1) Driver X allocates buffer A using this API for device x and gets a
DMA address inside x's DMA (or IOVA) address space.
2) Driver X creates a dma_buf D(A), backed by buffer A and gives the
user space process a file descriptor FD(A) referring to it.
3) Driver Y gets FD(A) from the user space and needs to map it into
the DMA/IOVA address space of device y. It doe it by calling
dma_buf_map_attachment() which returns an sg_table describing the
mapping.

And then I realized that actually there is no need for the importer
(driver Y) to call dma_sync_*() on its own, since the exporter (driver
X) is expected to map and sync in its .map_dma_buf() dma_buf_ops
callback. But there is still a need to have an sg_table created for
the buffer, because it's what dma_buf_map_attachment() returns.

>
> > Sounds good to me. Thanks for working on this. I'd be happy to be on
> > CC and help with review when you post the patches later.
>
> The patches were already posted here:
>
> https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031982.html

Okay, thanks. I can see it in my inbox.

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-12  9:34           ` Tomasz Figa
@ 2018-12-12 13:54             ` Christoph Hellwig
  2018-12-13  3:13               ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-12 13:54 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Wed, Dec 12, 2018 at 06:34:25PM +0900, Tomasz Figa wrote:
> The typical DMA-buf import/export flow is as follows:
> 1) Driver X allocates buffer A using this API for device x and gets a
> DMA address inside x's DMA (or IOVA) address space.
> 2) Driver X creates a dma_buf D(A), backed by buffer A and gives the
> user space process a file descriptor FD(A) referring to it.
> 3) Driver Y gets FD(A) from the user space and needs to map it into
> the DMA/IOVA address space of device y. It doe it by calling
> dma_buf_map_attachment() which returns an sg_table describing the
> mapping.

And just as I said last time I think we need to fix the dmabuf code
to not rely on struct scatterlist.  struct scatterlist is an interface
that is fundamentally page based, while the dma coherent allocator
only gives your a kernel virtual and dma address (and the option to
map the buffer into userspace).  So we need to fix to get the interface
right as we already have DMAable memory withour a struct page and we
are bound to get more of those.  Nevermind all the caching implications
even if we have a struct page.

It would also be great to use that opportunity to get rid of all the
code duplication of almost the same dmabug provides backed by the
DMA API.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-12 13:54             ` Christoph Hellwig
@ 2018-12-13  3:13               ` Tomasz Figa
  2018-12-13 14:03                 ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-13  3:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Wed, Dec 12, 2018 at 10:54 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Dec 12, 2018 at 06:34:25PM +0900, Tomasz Figa wrote:
> > The typical DMA-buf import/export flow is as follows:
> > 1) Driver X allocates buffer A using this API for device x and gets a
> > DMA address inside x's DMA (or IOVA) address space.
> > 2) Driver X creates a dma_buf D(A), backed by buffer A and gives the
> > user space process a file descriptor FD(A) referring to it.
> > 3) Driver Y gets FD(A) from the user space and needs to map it into
> > the DMA/IOVA address space of device y. It doe it by calling
> > dma_buf_map_attachment() which returns an sg_table describing the
> > mapping.
>
> And just as I said last time I think we need to fix the dmabuf code
> to not rely on struct scatterlist.  struct scatterlist is an interface
> that is fundamentally page based, while the dma coherent allocator
> only gives your a kernel virtual and dma address (and the option to
> map the buffer into userspace).  So we need to fix to get the interface
> right as we already have DMAable memory withour a struct page and we
> are bound to get more of those.  Nevermind all the caching implications
> even if we have a struct page.

Putting aside the problem of memory without struct page, one thing to
note here that what is a contiguous DMA range for device X, may not be
mappable contiguously for device Y and it would still need something
like a scatter list to fully describe the buffer.

Do we already have a structure that would work for this purposes? I'd
assume that we need something like the existing scatterlist but with
page links replaced with something that doesn't require the memory to
have struct page, possibly just PFN?

>
> It would also be great to use that opportunity to get rid of all the
> code duplication of almost the same dmabug provides backed by the
> DMA API.

Could you sched some more light on this? I'm curious what is the code
duplication you're referring to.

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-13  3:13               ` Tomasz Figa
@ 2018-12-13 14:03                 ` Christoph Hellwig
  2018-12-14  3:12                   ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-13 14:03 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Thu, Dec 13, 2018 at 12:13:29PM +0900, Tomasz Figa wrote:
> Putting aside the problem of memory without struct page, one thing to
> note here that what is a contiguous DMA range for device X, may not be
> mappable contiguously for device Y and it would still need something
> like a scatter list to fully describe the buffer.

I think we need to define contiguous here.

If the buffer always is physically contiguous, as it is in the currently
posted series, we can always map it with a single dma_map_single call
(if the hardware can handle that in a single segment is a different
question, but out of scope here).

If on the other hand we have multiple discontiguous physical address
range that are mapped using the iommu and vmap this interface doesn't
work anyway.

But in that case you should just do multiple allocations and then use
dma_map_sg coalescing on the hardware side, and vmap [1] if really
needed.  I guess for this we want to gurantee that dma_alloc_attrs
with the DMA_ATTR_NON_CONSISTENT allows virt_to_page to be used on
the return value, which the currently posted implementation does,
although I'm a it reluctant about the API guarantee.


> Do we already have a structure that would work for this purposes? I'd
> assume that we need something like the existing scatterlist but with
> page links replaced with something that doesn't require the memory to
> have struct page, possibly just PFN?

The problem is that just the PFN / physical address isn't enough for
most use cases as you also need a kernel virtual address.  But moving
struct scatterlist to store a pfn instead of a struct page would be
pretty nice for various reasons anyway.

> 
> >
> > It would also be great to use that opportunity to get rid of all the
> > code duplication of almost the same dmabug provides backed by the
> > DMA API.
> 
> Could you sched some more light on this? I'm curious what is the code
> duplication you're referring to.

It seems like a lot of the dmabuf ops are just slight various of
dma_alloc + dma_get_sttable + dma_map_sg / dma_unmap_sg.  There must be
a void to not duplicate that over and over.

[1] and use invalidate_kernel_vmap_range and flush_kernel_vmap_range
    to manually take care of cache flushing.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-13 14:03                 ` Christoph Hellwig
@ 2018-12-14  3:12                   ` Tomasz Figa
  2018-12-14 12:36                     ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-14  3:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Thu, Dec 13, 2018 at 11:03 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Thu, Dec 13, 2018 at 12:13:29PM +0900, Tomasz Figa wrote:
> > Putting aside the problem of memory without struct page, one thing to
> > note here that what is a contiguous DMA range for device X, may not be
> > mappable contiguously for device Y and it would still need something
> > like a scatter list to fully describe the buffer.
>
> I think we need to define contiguous here.
>
> If the buffer always is physically contiguous, as it is in the currently
> posted series, we can always map it with a single dma_map_single call
> (if the hardware can handle that in a single segment is a different
> question, but out of scope here).

Are you sure the buffer is always physically contiguous? At least the
ARM IOMMU dma_ops [1] and the DMA-IOMMU dma_ops [2] will simply
allocate pages without any continuity guarantees and remap the pages
into a contiguous kernel VA (unless DMA_ATTR_NO_KERNEL_MAPPING is
given, which makes them return an opaque cookie instead of the kernel
VA).

[1] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/arch/arm/mm/dma-mapping.c#l1291
[2] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/drivers/iommu/dma-iommu.c#l450

>
> If on the other hand we have multiple discontiguous physical address
> range that are mapped using the iommu and vmap this interface doesn't
> work anyway.
>
> But in that case you should just do multiple allocations and then use
> dma_map_sg coalescing on the hardware side, and vmap [1] if really
> needed.  I guess for this we want to gurantee that dma_alloc_attrs
> with the DMA_ATTR_NON_CONSISTENT allows virt_to_page to be used on
> the return value, which the currently posted implementation does,
> although I'm a it reluctant about the API guarantee.
>
>
> > Do we already have a structure that would work for this purposes? I'd
> > assume that we need something like the existing scatterlist but with
> > page links replaced with something that doesn't require the memory to
> > have struct page, possibly just PFN?
>
> The problem is that just the PFN / physical address isn't enough for
> most use cases as you also need a kernel virtual address.  But moving
> struct scatterlist to store a pfn instead of a struct page would be
> pretty nice for various reasons anyway.
>
> >
> > >
> > > It would also be great to use that opportunity to get rid of all the
> > > code duplication of almost the same dmabug provides backed by the
> > > DMA API.
> >
> > Could you sched some more light on this? I'm curious what is the code
> > duplication you're referring to.
>
> It seems like a lot of the dmabuf ops are just slight various of
> dma_alloc + dma_get_sttable + dma_map_sg / dma_unmap_sg.  There must be
> a void to not duplicate that over and over.

Device/kernel/userspace maps/unmaps shouldn't really be
exporter-specific indeed, as long as one provides a uniform way of
describing a buffer and then have dma_map_*() work on that. Possibly a
part that manages the CPU cache maintenance either. There is still
some space for some special device caches (or other synchronization),
though.

>
> [1] and use invalidate_kernel_vmap_range and flush_kernel_vmap_range
>     to manually take care of cache flushing.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-14  3:12                   ` Tomasz Figa
@ 2018-12-14 12:36                     ` Christoph Hellwig
  2018-12-18  7:22                       ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-14 12:36 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Fri, Dec 14, 2018 at 12:12:38PM +0900, Tomasz Figa wrote:
> > If the buffer always is physically contiguous, as it is in the currently
> > posted series, we can always map it with a single dma_map_single call
> > (if the hardware can handle that in a single segment is a different
> > question, but out of scope here).
> 
> Are you sure the buffer is always physically contiguous? At least the
> ARM IOMMU dma_ops [1] and the DMA-IOMMU dma_ops [2] will simply
> allocate pages without any continuity guarantees and remap the pages
> into a contiguous kernel VA (unless DMA_ATTR_NO_KERNEL_MAPPING is
> given, which makes them return an opaque cookie instead of the kernel
> VA).
> 
> [1] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/arch/arm/mm/dma-mapping.c#l1291
> [2] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/drivers/iommu/dma-iommu.c#l450

We never end up in this allocator for the new DMA_ATTR_NON_CONSISTENT
case, and that is intentional.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-14 12:36                     ` Christoph Hellwig
@ 2018-12-18  7:22                       ` Tomasz Figa
  2018-12-18  7:38                         ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-18  7:22 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Fri, Dec 14, 2018 at 9:36 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Fri, Dec 14, 2018 at 12:12:38PM +0900, Tomasz Figa wrote:
> > > If the buffer always is physically contiguous, as it is in the currently
> > > posted series, we can always map it with a single dma_map_single call
> > > (if the hardware can handle that in a single segment is a different
> > > question, but out of scope here).
> >
> > Are you sure the buffer is always physically contiguous? At least the
> > ARM IOMMU dma_ops [1] and the DMA-IOMMU dma_ops [2] will simply
> > allocate pages without any continuity guarantees and remap the pages
> > into a contiguous kernel VA (unless DMA_ATTR_NO_KERNEL_MAPPING is
> > given, which makes them return an opaque cookie instead of the kernel
> > VA).
> >
> > [1] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/arch/arm/mm/dma-mapping.c#l1291
> > [2] http://git.infradead.org/users/hch/misc.git/blob/2dbb028e4a3017e1b71a6ae3828a3548545eba24:/drivers/iommu/dma-iommu.c#l450
>
> We never end up in this allocator for the new DMA_ATTR_NON_CONSISTENT
> case, and that is intentional.

It kind of limits the usability of this API, since it enforces
contiguous allocations even for big sizes even for devices behind
IOMMU (contrary to the case when DMA_ATTR_NON_CONSISTENT is not set),
but given that it's just a temporary solution for devices like these
USB cameras, I guess that's fine.

Note that in V4L2 we use the DMA API extensively, so that we don't
need to embed any device-specific or integration-specific knowledge in
the framework. Right now we're using dma_alloc_attrs() with
driver-provided attrs [1], but current driver never request
non-consistent memory. We're however thinking about making it possible
to allocate non-consistent memory. What would you suggest for this?

[1] https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/media/common/videobuf2/videobuf2-dma-contig.c#L139

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-18  7:22                       ` Tomasz Figa
@ 2018-12-18  7:38                         ` Christoph Hellwig
  2018-12-18  9:48                           ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-18  7:38 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Tue, Dec 18, 2018 at 04:22:43PM +0900, Tomasz Figa wrote:
> It kind of limits the usability of this API, since it enforces
> contiguous allocations even for big sizes even for devices behind
> IOMMU (contrary to the case when DMA_ATTR_NON_CONSISTENT is not set),
> but given that it's just a temporary solution for devices like these
> USB cameras, I guess that's fine.

The problem is that you can't have flexibility and simplicity at the
same time.  Once you use kernel virtual address remapping you need to
be prepared to have multiple segments.

So as I said you can call dma_alloc_attrs with DMA_ATTR_NON_CONSISTENT
in a loop with a suitably small chunk size, then stuff the results into
a scatterlist and map that again for the device share with if you don't
want a single contigous region.  You just have to either deal with
non-contigous access from the kernel or use vmap and the right vmap
cache flushing helpers.

> Note that in V4L2 we use the DMA API extensively, so that we don't
> need to embed any device-specific or integration-specific knowledge in
> the framework. Right now we're using dma_alloc_attrs() with
> driver-provided attrs [1], but current driver never request
> non-consistent memory. We're however thinking about making it possible
> to allocate non-consistent memory. What would you suggest for this?
> 
> [1] https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/media/common/videobuf2/videobuf2-dma-contig.c#L139

I would advice against new non-consistent users until this series
goes through, mostly because dma_cache_sync is such an amazing bad
API.  Otherwise things will just work at the allocation side, you'll
just need to be careful to transfer ownership between the cpu and
the device(s) carefully using the dma_sync_* APIs.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-18  7:38                         ` Christoph Hellwig
@ 2018-12-18  9:48                           ` Tomasz Figa
  2018-12-19  7:51                             ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-18  9:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Tue, Dec 18, 2018 at 4:38 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Tue, Dec 18, 2018 at 04:22:43PM +0900, Tomasz Figa wrote:
> > It kind of limits the usability of this API, since it enforces
> > contiguous allocations even for big sizes even for devices behind
> > IOMMU (contrary to the case when DMA_ATTR_NON_CONSISTENT is not set),
> > but given that it's just a temporary solution for devices like these
> > USB cameras, I guess that's fine.
>
> The problem is that you can't have flexibility and simplicity at the
> same time.  Once you use kernel virtual address remapping you need to
> be prepared to have multiple segments.
>
> So as I said you can call dma_alloc_attrs with DMA_ATTR_NON_CONSISTENT
> in a loop with a suitably small chunk size, then stuff the results into
> a scatterlist and map that again for the device share with if you don't
> want a single contigous region.  You just have to either deal with
> non-contigous access from the kernel or use vmap and the right vmap
> cache flushing helpers.

The point is that you didn't have to do this small chunk loop without
DMA_ATTR_NON_CONSISTENT, so it's at least inconsistent now and not
sure why it could be better than just a loop of alloc_page().

>
> > Note that in V4L2 we use the DMA API extensively, so that we don't
> > need to embed any device-specific or integration-specific knowledge in
> > the framework. Right now we're using dma_alloc_attrs() with
> > driver-provided attrs [1], but current driver never request
> > non-consistent memory. We're however thinking about making it possible
> > to allocate non-consistent memory. What would you suggest for this?
> >
> > [1] https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/media/common/videobuf2/videobuf2-dma-contig.c#L139
>
> I would advice against new non-consistent users until this series
> goes through, mostly because dma_cache_sync is such an amazing bad
> API.  Otherwise things will just work at the allocation side, you'll
> just need to be careful to transfer ownership between the cpu and
> the device(s) carefully using the dma_sync_* APIs.

Just to clarify, the actual code isn't very likely to surface any time
soon. so I assume it would be after this series lands.

We will however need an API that can transparently handle both cases
of contiguous (without IOMMU) and page-by-page allocations (with
IOMMU) behind the scenes, like the current dma_alloc_attrs() without
DMA_ATTR_NON_CONSISTENT.

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-18  9:48                           ` Tomasz Figa
@ 2018-12-19  7:51                             ` Christoph Hellwig
  2018-12-19  8:18                               ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-19  7:51 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Tue, Dec 18, 2018 at 06:48:03PM +0900, Tomasz Figa wrote:
> > So as I said you can call dma_alloc_attrs with DMA_ATTR_NON_CONSISTENT
> > in a loop with a suitably small chunk size, then stuff the results into
> > a scatterlist and map that again for the device share with if you don't
> > want a single contigous region.  You just have to either deal with
> > non-contigous access from the kernel or use vmap and the right vmap
> > cache flushing helpers.
> 
> The point is that you didn't have to do this small chunk loop without
> DMA_ATTR_NON_CONSISTENT, so it's at least inconsistent now and not
> sure why it could be better than just a loop of alloc_page().

You have to do it if you want to map the addresses for a second device.

> > I would advice against new non-consistent users until this series
> > goes through, mostly because dma_cache_sync is such an amazing bad
> > API.  Otherwise things will just work at the allocation side, you'll
> > just need to be careful to transfer ownership between the cpu and
> > the device(s) carefully using the dma_sync_* APIs.
> 
> Just to clarify, the actual code isn't very likely to surface any time
> soon. so I assume it would be after this series lands.
> 
> We will however need an API that can transparently handle both cases
> of contiguous (without IOMMU) and page-by-page allocations (with
> IOMMU) behind the scenes, like the current dma_alloc_attrs() without
> DMA_ATTR_NON_CONSISTENT.

Is the use case to then share the memory between multiples devices
or just for a single device?  The latter case is generally easy, the
former is rather more painful.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-19  7:51                             ` Christoph Hellwig
@ 2018-12-19  8:18                               ` Tomasz Figa
  2018-12-19 14:51                                 ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-19  8:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Wed, Dec 19, 2018 at 4:51 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Tue, Dec 18, 2018 at 06:48:03PM +0900, Tomasz Figa wrote:
> > > So as I said you can call dma_alloc_attrs with DMA_ATTR_NON_CONSISTENT
> > > in a loop with a suitably small chunk size, then stuff the results into
> > > a scatterlist and map that again for the device share with if you don't
> > > want a single contigous region.  You just have to either deal with
> > > non-contigous access from the kernel or use vmap and the right vmap
> > > cache flushing helpers.
> >
> > The point is that you didn't have to do this small chunk loop without
> > DMA_ATTR_NON_CONSISTENT, so it's at least inconsistent now and not
> > sure why it could be better than just a loop of alloc_page().
>
> You have to do it if you want to map the addresses for a second device.
>

The existing code that deals with dma_alloc_attrs() without
DMA_ATTR_NON_CONSISTENT would just call dma_get_sgtable_attrs() like
here:

https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/media/common/videobuf2/videobuf2-dma-contig.c#L366

and then dma_map_sg() for the other device like here;

https://elixir.bootlin.com/linux/v4.20-rc7/source/drivers/media/common/videobuf2/videobuf2-dma-contig.c#L283

> > > I would advice against new non-consistent users until this series
> > > goes through, mostly because dma_cache_sync is such an amazing bad
> > > API.  Otherwise things will just work at the allocation side, you'll
> > > just need to be careful to transfer ownership between the cpu and
> > > the device(s) carefully using the dma_sync_* APIs.
> >
> > Just to clarify, the actual code isn't very likely to surface any time
> > soon. so I assume it would be after this series lands.
> >
> > We will however need an API that can transparently handle both cases
> > of contiguous (without IOMMU) and page-by-page allocations (with
> > IOMMU) behind the scenes, like the current dma_alloc_attrs() without
> > DMA_ATTR_NON_CONSISTENT.
>
> Is the use case to then share the memory between multiples devices
> or just for a single device?  The latter case is generally easy, the
> former is rather more painful.

The former, but the convention has been to assume that the userspace
will choose the right (the most constrained typically) device to
allocate from or otherwise handle the import failure (e.g. by falling
back to copying into a buffer allocated from the importer).

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-19  8:18                               ` Tomasz Figa
@ 2018-12-19 14:51                                 ` Christoph Hellwig
  2018-12-20  3:23                                   ` Tomasz Figa
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-19 14:51 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Wed, Dec 19, 2018 at 05:18:35PM +0900, Tomasz Figa wrote:
> The existing code that deals with dma_alloc_attrs() without
> DMA_ATTR_NON_CONSISTENT would just call dma_get_sgtable_attrs() like
> here:

I know.  And dma_get_sgtable_attrs is fundamentally flawed and we
need to kill this interface as it just can't worked with virtually
tagged cases.  It is a prime example for an interface that looks
nice and simple but is plain wrong.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-19 14:51                                 ` Christoph Hellwig
@ 2018-12-20  3:23                                   ` Tomasz Figa
  2018-12-21  8:13                                     ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Tomasz Figa @ 2018-12-20  3:23 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Laurent Pinchart, Matwey V. Kornilov, Linux Media Mailing List,
	Linux Kernel Mailing List, Matwey V. Kornilov, Alan Stern,
	Ezequiel Garcia, hdegoede, Hans Verkuil, Mauro Carvalho Chehab,
	rostedt, mingo, Mike Isely, Bhumika Goyal, Colin King,
	Kieran Bingham, keiichiw

On Wed, Dec 19, 2018 at 11:51 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Dec 19, 2018 at 05:18:35PM +0900, Tomasz Figa wrote:
> > The existing code that deals with dma_alloc_attrs() without
> > DMA_ATTR_NON_CONSISTENT would just call dma_get_sgtable_attrs() like
> > here:
>
> I know.  And dma_get_sgtable_attrs is fundamentally flawed and we
> need to kill this interface as it just can't worked with virtually
> tagged cases.  It is a prime example for an interface that looks
> nice and simple but is plain wrong.

Got it, thanks.

I haven't been following the problems with virtually tagged cases,
would you mind sharing some background, so that we can consider it
when adding non-consistent allocations to VB2?

Best regards,
Tomasz

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer
  2018-12-20  3:23                                   ` Tomasz Figa
@ 2018-12-21  8:13                                     ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2018-12-21  8:13 UTC (permalink / raw)
  To: Tomasz Figa
  Cc: Christoph Hellwig, Laurent Pinchart, Matwey V. Kornilov,
	Linux Media Mailing List, Linux Kernel Mailing List,
	Matwey V. Kornilov, Alan Stern, Ezequiel Garcia, hdegoede,
	Hans Verkuil, Mauro Carvalho Chehab, rostedt, mingo, Mike Isely,
	Bhumika Goyal, Colin King, Kieran Bingham, keiichiw

On Thu, Dec 20, 2018 at 12:23:46PM +0900, Tomasz Figa wrote:
> I haven't been following the problems with virtually tagged cases,
> would you mind sharing some background, so that we can consider it
> when adding non-consistent allocations to VB2?

The problem exists at least partially with the current consistent
allocator, and we need to fix it.  My non-coherent series does not have
it, but we would add it if we allowed virtual remapping.

The problem with get_sgtable is that it creates aliasing of kernel
virtual addresses used to access memory and thus the cache.  We have
the mapping return from dma_alloc_*, which in case of a remap contains
a vmap/ioremap style address that is different from the kernel direct
mapping address you get from using page_address/kmap on the pages
backing that mapping.  (assuming you even have pages - in a few corner
cases we don't and the whole interface concept breaks down).

This creates various problems as the the scatterlist returned from
get_stable now gives a second way to access this memory through direct
mapping addresses in the pages contained in it, but as soon as we do
that we:

 a) don't use the nocache mapping used by the coherent allocator if that
    is on a per-mapping basis (which it is for many architectures), so
    you do get data in the cache even when that might not be assumed
 b) if the data returned from dma_alloc_coherent was not actually a
    remap but a special pool of non-cached address the cache flushing
    instructions might be invalid and caused problems
 c) any cache flushing now operates on just those direct mappings, which
    in case of the non-coherent allocator and access through the
    remapped address does the wrong thing for virtually tagged caches

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2018-12-21  8:14 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-21 17:06 [PATCH v5 0/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
2018-08-21 17:06 ` [PATCH v5 1/2] media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler() Matwey V. Kornilov
2018-08-21 19:49   ` Steven Rostedt
2018-08-21 17:06 ` [PATCH v5 2/2] media: usb: pwc: Don't use coherent DMA buffers for ISO transfer Matwey V. Kornilov
2018-08-28  7:17   ` Matwey V. Kornilov
2018-09-11 18:58     ` Matwey V. Kornilov
2018-09-19 16:12       ` Ezequiel Garcia
2018-10-10 21:13       ` Matwey V. Kornilov
2018-10-30 22:00   ` Laurent Pinchart
2018-10-31  5:38     ` Christoph Hellwig
2018-12-07 15:25     ` Christoph Hellwig
2018-12-12  8:57       ` Tomasz Figa
2018-12-12  9:09         ` Christoph Hellwig
2018-12-12  9:34           ` Tomasz Figa
2018-12-12 13:54             ` Christoph Hellwig
2018-12-13  3:13               ` Tomasz Figa
2018-12-13 14:03                 ` Christoph Hellwig
2018-12-14  3:12                   ` Tomasz Figa
2018-12-14 12:36                     ` Christoph Hellwig
2018-12-18  7:22                       ` Tomasz Figa
2018-12-18  7:38                         ` Christoph Hellwig
2018-12-18  9:48                           ` Tomasz Figa
2018-12-19  7:51                             ` Christoph Hellwig
2018-12-19  8:18                               ` Tomasz Figa
2018-12-19 14:51                                 ` Christoph Hellwig
2018-12-20  3:23                                   ` Tomasz Figa
2018-12-21  8:13                                     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).