All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05  2:00 ` Barry Song
  0 siblings, 0 replies; 12+ messages in thread
From: Barry Song @ 2021-02-05  2:00 UTC (permalink / raw)
  To: m.szyprowski, hch, robin.murphy, iommu; +Cc: linux-kernel, linuxarm, Barry Song

In a real dma mapping user case, after dma_map is done, data will be
transmit. Thus, in multi-threaded user scenario, IOMMU contention
should not be that severe. For example, if users enable multiple
threads to send network packets through 1G/10G/100Gbps NIC, usually
the steps will be: map -> transmission -> unmap.  Transmission delay
reduces the contention of IOMMU.

Here a delay is added to simulate the transmission between map and unmap
so that the tested result could be more accurate for TX and simple RX.
A typical TX transmission for NIC would be like: map -> TX -> unmap
since the socket buffers come from OS. Simple RX model eg. disk driver,
is also map -> RX -> unmap, but real RX model in a NIC could be more
complicated considering packets can come spontaneously and many drivers
are using pre-mapped buffers pool. This is in the TBD list.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
---
 -v2: cleanup according to Robin's feedback. thanks, Robin.

 kernel/dma/map_benchmark.c                    | 10 ++++++++++
 .../testing/selftests/dma/dma_map_benchmark.c | 19 +++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 1b1b8ff875cb..06636406a245 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -21,6 +21,7 @@
 #define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS	1024
 #define DMA_MAP_MAX_SECONDS	300
+#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC) /* 10ms */
 
 #define DMA_MAP_BIDIRECTIONAL	0
 #define DMA_MAP_TO_DEVICE	1
@@ -36,6 +37,7 @@ struct map_benchmark {
 	__s32 node; /* which numa node this benchmark will run on */
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
+	__u32 dma_trans_ns; /* time for DMA transmission in ns */
 	__u64 expansion[10];	/* For future use */
 };
 
@@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data)
 		map_etime = ktime_get();
 		map_delta = ktime_sub(map_etime, map_stime);
 
+		/* Pretend DMA is transmitting */
+		ndelay(map->bparam.dma_trans_ns);
+
 		unmap_stime = ktime_get();
 		dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
 		unmap_etime = ktime_get();
@@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 			return -EINVAL;
 		}
 
+		if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
+			pr_err("invalid transmission delay\n");
+			return -EINVAL;
+		}
+
 		if (map->bparam.node != NUMA_NO_NODE &&
 		    !node_possible(map->bparam.node)) {
 			pr_err("invalid numa node\n");
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 7065163a8388..a370290d9503 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -11,9 +11,12 @@
 #include <sys/mman.h>
 #include <linux/types.h>
 
+#define NSEC_PER_MSEC	1000000L
+
 #define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS	1024
 #define DMA_MAP_MAX_SECONDS     300
+#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC) /* 10ms */
 
 #define DMA_MAP_BIDIRECTIONAL	0
 #define DMA_MAP_TO_DEVICE	1
@@ -35,6 +38,7 @@ struct map_benchmark {
 	__s32 node; /* which numa node this benchmark will run on */
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
+	__u32 dma_trans_ns; /* delay for DMA transmission in ns */
 	__u64 expansion[10];	/* For future use */
 };
 
@@ -45,12 +49,12 @@ int main(int argc, char **argv)
 	/* default single thread, run 20 seconds on NUMA_NO_NODE */
 	int threads = 1, seconds = 20, node = -1;
 	/* default dma mask 32bit, bidirectional DMA */
-	int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+	int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
 
 	int cmd = DMA_MAP_BENCHMARK;
 	char *p;
 
-	while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
 		switch (opt) {
 		case 't':
 			threads = atoi(optarg);
@@ -67,6 +71,9 @@ int main(int argc, char **argv)
 		case 'd':
 			dir = atoi(optarg);
 			break;
+		case 'x':
+			xdelay = atoi(optarg);
+			break;
 		default:
 			return -1;
 		}
@@ -84,6 +91,12 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
+	if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+		fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+			DMA_MAP_MAX_TRANS_DELAY);
+		exit(1);
+	}
+
 	/* suppose the mininum DMA zone is 1MB in the world */
 	if (bits < 20 || bits > 64) {
 		fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -107,6 +120,8 @@ int main(int argc, char **argv)
 	map.node = node;
 	map.dma_bits = bits;
 	map.dma_dir = dir;
+	map.dma_trans_ns = xdelay;
+
 	if (ioctl(fd, cmd, &map)) {
 		perror("ioctl");
 		exit(1);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05  2:00 ` Barry Song
  0 siblings, 0 replies; 12+ messages in thread
From: Barry Song @ 2021-02-05  2:00 UTC (permalink / raw)
  To: m.szyprowski, hch, robin.murphy, iommu; +Cc: linux-kernel, linuxarm

In a real dma mapping user case, after dma_map is done, data will be
transmit. Thus, in multi-threaded user scenario, IOMMU contention
should not be that severe. For example, if users enable multiple
threads to send network packets through 1G/10G/100Gbps NIC, usually
the steps will be: map -> transmission -> unmap.  Transmission delay
reduces the contention of IOMMU.

Here a delay is added to simulate the transmission between map and unmap
so that the tested result could be more accurate for TX and simple RX.
A typical TX transmission for NIC would be like: map -> TX -> unmap
since the socket buffers come from OS. Simple RX model eg. disk driver,
is also map -> RX -> unmap, but real RX model in a NIC could be more
complicated considering packets can come spontaneously and many drivers
are using pre-mapped buffers pool. This is in the TBD list.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
---
 -v2: cleanup according to Robin's feedback. thanks, Robin.

 kernel/dma/map_benchmark.c                    | 10 ++++++++++
 .../testing/selftests/dma/dma_map_benchmark.c | 19 +++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 1b1b8ff875cb..06636406a245 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -21,6 +21,7 @@
 #define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS	1024
 #define DMA_MAP_MAX_SECONDS	300
+#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC) /* 10ms */
 
 #define DMA_MAP_BIDIRECTIONAL	0
 #define DMA_MAP_TO_DEVICE	1
@@ -36,6 +37,7 @@ struct map_benchmark {
 	__s32 node; /* which numa node this benchmark will run on */
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
+	__u32 dma_trans_ns; /* time for DMA transmission in ns */
 	__u64 expansion[10];	/* For future use */
 };
 
@@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data)
 		map_etime = ktime_get();
 		map_delta = ktime_sub(map_etime, map_stime);
 
+		/* Pretend DMA is transmitting */
+		ndelay(map->bparam.dma_trans_ns);
+
 		unmap_stime = ktime_get();
 		dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
 		unmap_etime = ktime_get();
@@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 			return -EINVAL;
 		}
 
+		if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
+			pr_err("invalid transmission delay\n");
+			return -EINVAL;
+		}
+
 		if (map->bparam.node != NUMA_NO_NODE &&
 		    !node_possible(map->bparam.node)) {
 			pr_err("invalid numa node\n");
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 7065163a8388..a370290d9503 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -11,9 +11,12 @@
 #include <sys/mman.h>
 #include <linux/types.h>
 
+#define NSEC_PER_MSEC	1000000L
+
 #define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS	1024
 #define DMA_MAP_MAX_SECONDS     300
+#define DMA_MAP_MAX_TRANS_DELAY	(10 * NSEC_PER_MSEC) /* 10ms */
 
 #define DMA_MAP_BIDIRECTIONAL	0
 #define DMA_MAP_TO_DEVICE	1
@@ -35,6 +38,7 @@ struct map_benchmark {
 	__s32 node; /* which numa node this benchmark will run on */
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
+	__u32 dma_trans_ns; /* delay for DMA transmission in ns */
 	__u64 expansion[10];	/* For future use */
 };
 
@@ -45,12 +49,12 @@ int main(int argc, char **argv)
 	/* default single thread, run 20 seconds on NUMA_NO_NODE */
 	int threads = 1, seconds = 20, node = -1;
 	/* default dma mask 32bit, bidirectional DMA */
-	int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+	int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
 
 	int cmd = DMA_MAP_BENCHMARK;
 	char *p;
 
-	while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
 		switch (opt) {
 		case 't':
 			threads = atoi(optarg);
@@ -67,6 +71,9 @@ int main(int argc, char **argv)
 		case 'd':
 			dir = atoi(optarg);
 			break;
+		case 'x':
+			xdelay = atoi(optarg);
+			break;
 		default:
 			return -1;
 		}
@@ -84,6 +91,12 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
+	if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+		fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+			DMA_MAP_MAX_TRANS_DELAY);
+		exit(1);
+	}
+
 	/* suppose the mininum DMA zone is 1MB in the world */
 	if (bits < 20 || bits > 64) {
 		fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -107,6 +120,8 @@ int main(int argc, char **argv)
 	map.node = node;
 	map.dma_bits = bits;
 	map.dma_dir = dir;
+	map.dma_trans_ns = xdelay;
+
 	if (ioctl(fd, cmd, &map)) {
 		perror("ioctl");
 		exit(1);
-- 
2.25.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
  2021-02-05  2:00 ` Barry Song
@ 2021-02-05  9:21   ` Christoph Hellwig
  -1 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05  9:21 UTC (permalink / raw)
  To: Barry Song; +Cc: m.szyprowski, hch, robin.murphy, iommu, linux-kernel, linuxarm

On Fri, Feb 05, 2021 at 03:00:35PM +1300, Barry Song wrote:
> +	__u32 dma_trans_ns; /* time for DMA transmission in ns */
>  	__u64 expansion[10];	/* For future use */

We need to keep the struct size, so the expansion field needs to
shrink by the equivalent amount of data that is added in dma_trans_ns.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05  9:21   ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05  9:21 UTC (permalink / raw)
  To: Barry Song; +Cc: linux-kernel, linuxarm, iommu, robin.murphy, hch

On Fri, Feb 05, 2021 at 03:00:35PM +1300, Barry Song wrote:
> +	__u32 dma_trans_ns; /* time for DMA transmission in ns */
>  	__u64 expansion[10];	/* For future use */

We need to keep the struct size, so the expansion field needs to
shrink by the equivalent amount of data that is added in dma_trans_ns.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
  2021-02-05  9:21   ` Christoph Hellwig
@ 2021-02-05 10:32     ` Song Bao Hua (Barry Song)
  -1 siblings, 0 replies; 12+ messages in thread
From: Song Bao Hua (Barry Song) @ 2021-02-05 10:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: m.szyprowski, robin.murphy, iommu, linux-kernel, linuxarm



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@lst.de]
> Sent: Friday, February 5, 2021 10:21 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: m.szyprowski@samsung.com; hch@lst.de; robin.murphy@arm.com;
> iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org;
> linuxarm@openeuler.org
> Subject: Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
> 
> On Fri, Feb 05, 2021 at 03:00:35PM +1300, Barry Song wrote:
> > +	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> >  	__u64 expansion[10];	/* For future use */
> 
> We need to keep the struct size, so the expansion field needs to
> shrink by the equivalent amount of data that is added in dma_trans_ns.

Unfortunately I didn't put a rsv u32 field after dma_dir
in the original patch.
There were five 32bits data before expansion[]:

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u64 expansion[10];	/* For future use */
};

My bad. That was really silly. I should have done the below from
the first beginning:
struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u32 rsv;
	__u64 expansion[10];	/* For future use */
};

So on 64bit system, this patch doesn't change the length of struct
as the new added u32 just fill the gap between dma_dir and expansion.

For 32bit system, this patch increases 4 bytes in the length.

I can keep the struct size unchanged by changing the struct to

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u32 dma_trans_ns; /* time for DMA transmission in ns */

	__u32 exp; /* For future use */
	__u64 expansion[9];	/* For future use */
};

But the code is really ugly now.

Thanks
Barry

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05 10:32     ` Song Bao Hua (Barry Song)
  0 siblings, 0 replies; 12+ messages in thread
From: Song Bao Hua (Barry Song) @ 2021-02-05 10:32 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: iommu, linuxarm, robin.murphy, linux-kernel



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@lst.de]
> Sent: Friday, February 5, 2021 10:21 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: m.szyprowski@samsung.com; hch@lst.de; robin.murphy@arm.com;
> iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org;
> linuxarm@openeuler.org
> Subject: Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
> 
> On Fri, Feb 05, 2021 at 03:00:35PM +1300, Barry Song wrote:
> > +	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> >  	__u64 expansion[10];	/* For future use */
> 
> We need to keep the struct size, so the expansion field needs to
> shrink by the equivalent amount of data that is added in dma_trans_ns.

Unfortunately I didn't put a rsv u32 field after dma_dir
in the original patch.
There were five 32bits data before expansion[]:

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u64 expansion[10];	/* For future use */
};

My bad. That was really silly. I should have done the below from
the first beginning:
struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u32 rsv;
	__u64 expansion[10];	/* For future use */
};

So on 64bit system, this patch doesn't change the length of struct
as the new added u32 just fill the gap between dma_dir and expansion.

For 32bit system, this patch increases 4 bytes in the length.

I can keep the struct size unchanged by changing the struct to

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u32 dma_trans_ns; /* time for DMA transmission in ns */

	__u32 exp; /* For future use */
	__u64 expansion[9];	/* For future use */
};

But the code is really ugly now.

Thanks
Barry
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
  2021-02-05 10:32     ` Song Bao Hua (Barry Song)
@ 2021-02-05 10:36       ` Christoph Hellwig
  -1 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05 10:36 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song)
  Cc: Christoph Hellwig, m.szyprowski, robin.murphy, iommu,
	linux-kernel, linuxarm

On Fri, Feb 05, 2021 at 10:32:26AM +0000, Song Bao Hua (Barry Song) wrote:
> I can keep the struct size unchanged by changing the struct to
> 
> struct map_benchmark {
> 	__u64 avg_map_100ns; /* average map latency in 100ns */
> 	__u64 map_stddev; /* standard deviation of map latency */
> 	__u64 avg_unmap_100ns; /* as above */
> 	__u64 unmap_stddev;
> 	__u32 threads; /* how many threads will do map/unmap in parallel */
> 	__u32 seconds; /* how long the test will last */
> 	__s32 node; /* which numa node this benchmark will run on */
> 	__u32 dma_bits; /* DMA addressing capability */
> 	__u32 dma_dir; /* DMA data direction */
> 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> 
> 	__u32 exp; /* For future use */
> 	__u64 expansion[9];	/* For future use */
> };
> 
> But the code is really ugly now.

Thats why we usually use __u8 fields for reserved field.  You might
consider just switching to that instead while you're at it. I guess
we'll just have to get the addition into 5.11 then to make sure we
don't release a kernel with the alignment fix.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05 10:36       ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05 10:36 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song)
  Cc: linux-kernel, linuxarm, iommu, robin.murphy, Christoph Hellwig

On Fri, Feb 05, 2021 at 10:32:26AM +0000, Song Bao Hua (Barry Song) wrote:
> I can keep the struct size unchanged by changing the struct to
> 
> struct map_benchmark {
> 	__u64 avg_map_100ns; /* average map latency in 100ns */
> 	__u64 map_stddev; /* standard deviation of map latency */
> 	__u64 avg_unmap_100ns; /* as above */
> 	__u64 unmap_stddev;
> 	__u32 threads; /* how many threads will do map/unmap in parallel */
> 	__u32 seconds; /* how long the test will last */
> 	__s32 node; /* which numa node this benchmark will run on */
> 	__u32 dma_bits; /* DMA addressing capability */
> 	__u32 dma_dir; /* DMA data direction */
> 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> 
> 	__u32 exp; /* For future use */
> 	__u64 expansion[9];	/* For future use */
> };
> 
> But the code is really ugly now.

Thats why we usually use __u8 fields for reserved field.  You might
consider just switching to that instead while you're at it. I guess
we'll just have to get the addition into 5.11 then to make sure we
don't release a kernel with the alignment fix.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
  2021-02-05 10:36       ` Christoph Hellwig
@ 2021-02-05 10:52         ` Song Bao Hua (Barry Song)
  -1 siblings, 0 replies; 12+ messages in thread
From: Song Bao Hua (Barry Song) @ 2021-02-05 10:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: m.szyprowski, robin.murphy, iommu, linux-kernel, linuxarm



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@lst.de]
> Sent: Friday, February 5, 2021 11:36 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: Christoph Hellwig <hch@lst.de>; m.szyprowski@samsung.com;
> robin.murphy@arm.com; iommu@lists.linux-foundation.org;
> linux-kernel@vger.kernel.org; linuxarm@openeuler.org
> Subject: Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
> 
> On Fri, Feb 05, 2021 at 10:32:26AM +0000, Song Bao Hua (Barry Song) wrote:
> > I can keep the struct size unchanged by changing the struct to
> >
> > struct map_benchmark {
> > 	__u64 avg_map_100ns; /* average map latency in 100ns */
> > 	__u64 map_stddev; /* standard deviation of map latency */
> > 	__u64 avg_unmap_100ns; /* as above */
> > 	__u64 unmap_stddev;
> > 	__u32 threads; /* how many threads will do map/unmap in parallel */
> > 	__u32 seconds; /* how long the test will last */
> > 	__s32 node; /* which numa node this benchmark will run on */
> > 	__u32 dma_bits; /* DMA addressing capability */
> > 	__u32 dma_dir; /* DMA data direction */
> > 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> >
> > 	__u32 exp; /* For future use */
> > 	__u64 expansion[9];	/* For future use */
> > };
> >
> > But the code is really ugly now.
> 
> Thats why we usually use __u8 fields for reserved field.  You might
> consider just switching to that instead while you're at it. I guess
> we'll just have to get the addition into 5.11 then to make sure we
> don't release a kernel with the alignment fix.

I assume there is no need to keep the same size with 5.11-rc, so
could change the struct to:

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u8 expansion[84]; /* For future use */
};

This won't increase size on 64bit system, but it increases 4bytes
on 32bits system comparing to 5.11-rc. How do you think about it?

Thanks
Barry


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05 10:52         ` Song Bao Hua (Barry Song)
  0 siblings, 0 replies; 12+ messages in thread
From: Song Bao Hua (Barry Song) @ 2021-02-05 10:52 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: iommu, linuxarm, robin.murphy, linux-kernel



> -----Original Message-----
> From: Christoph Hellwig [mailto:hch@lst.de]
> Sent: Friday, February 5, 2021 11:36 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: Christoph Hellwig <hch@lst.de>; m.szyprowski@samsung.com;
> robin.murphy@arm.com; iommu@lists.linux-foundation.org;
> linux-kernel@vger.kernel.org; linuxarm@openeuler.org
> Subject: Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
> 
> On Fri, Feb 05, 2021 at 10:32:26AM +0000, Song Bao Hua (Barry Song) wrote:
> > I can keep the struct size unchanged by changing the struct to
> >
> > struct map_benchmark {
> > 	__u64 avg_map_100ns; /* average map latency in 100ns */
> > 	__u64 map_stddev; /* standard deviation of map latency */
> > 	__u64 avg_unmap_100ns; /* as above */
> > 	__u64 unmap_stddev;
> > 	__u32 threads; /* how many threads will do map/unmap in parallel */
> > 	__u32 seconds; /* how long the test will last */
> > 	__s32 node; /* which numa node this benchmark will run on */
> > 	__u32 dma_bits; /* DMA addressing capability */
> > 	__u32 dma_dir; /* DMA data direction */
> > 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> >
> > 	__u32 exp; /* For future use */
> > 	__u64 expansion[9];	/* For future use */
> > };
> >
> > But the code is really ugly now.
> 
> Thats why we usually use __u8 fields for reserved field.  You might
> consider just switching to that instead while you're at it. I guess
> we'll just have to get the addition into 5.11 then to make sure we
> don't release a kernel with the alignment fix.

I assume there is no need to keep the same size with 5.11-rc, so
could change the struct to:

struct map_benchmark {
	__u64 avg_map_100ns; /* average map latency in 100ns */
	__u64 map_stddev; /* standard deviation of map latency */
	__u64 avg_unmap_100ns; /* as above */
	__u64 unmap_stddev;
	__u32 threads; /* how many threads will do map/unmap in parallel */
	__u32 seconds; /* how long the test will last */
	__s32 node; /* which numa node this benchmark will run on */
	__u32 dma_bits; /* DMA addressing capability */
	__u32 dma_dir; /* DMA data direction */
	__u8 expansion[84]; /* For future use */
};

This won't increase size on 64bit system, but it increases 4bytes
on 32bits system comparing to 5.11-rc. How do you think about it?

Thanks
Barry

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
  2021-02-05 10:52         ` Song Bao Hua (Barry Song)
@ 2021-02-05 10:56           ` Christoph Hellwig
  -1 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05 10:56 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song)
  Cc: Christoph Hellwig, m.szyprowski, robin.murphy, iommu,
	linux-kernel, linuxarm

On Fri, Feb 05, 2021 at 10:52:37AM +0000, Song Bao Hua (Barry Song) wrote:
> I assume there is no need to keep the same size with 5.11-rc, so
> could change the struct to:
> 
> struct map_benchmark {
> 	__u64 avg_map_100ns; /* average map latency in 100ns */
> 	__u64 map_stddev; /* standard deviation of map latency */
> 	__u64 avg_unmap_100ns; /* as above */
> 	__u64 unmap_stddev;
> 	__u32 threads; /* how many threads will do map/unmap in parallel */
> 	__u32 seconds; /* how long the test will last */
> 	__s32 node; /* which numa node this benchmark will run on */
> 	__u32 dma_bits; /* DMA addressing capability */
> 	__u32 dma_dir; /* DMA data direction */
> 	__u8 expansion[84]; /* For future use */
> };
> 
> This won't increase size on 64bit system, but it increases 4bytes
> on 32bits system comparing to 5.11-rc. How do you think about it?

Yes, that sounds good.  Please send me a two patch series with the
first one changing the alignment, and the second adding the delay.
I'll send the first one off to Linus ASAP then.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting
@ 2021-02-05 10:56           ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-02-05 10:56 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song)
  Cc: linux-kernel, linuxarm, iommu, robin.murphy, Christoph Hellwig

On Fri, Feb 05, 2021 at 10:52:37AM +0000, Song Bao Hua (Barry Song) wrote:
> I assume there is no need to keep the same size with 5.11-rc, so
> could change the struct to:
> 
> struct map_benchmark {
> 	__u64 avg_map_100ns; /* average map latency in 100ns */
> 	__u64 map_stddev; /* standard deviation of map latency */
> 	__u64 avg_unmap_100ns; /* as above */
> 	__u64 unmap_stddev;
> 	__u32 threads; /* how many threads will do map/unmap in parallel */
> 	__u32 seconds; /* how long the test will last */
> 	__s32 node; /* which numa node this benchmark will run on */
> 	__u32 dma_bits; /* DMA addressing capability */
> 	__u32 dma_dir; /* DMA data direction */
> 	__u8 expansion[84]; /* For future use */
> };
> 
> This won't increase size on 64bit system, but it increases 4bytes
> on 32bits system comparing to 5.11-rc. How do you think about it?

Yes, that sounds good.  Please send me a two patch series with the
first one changing the alignment, and the second adding the delay.
I'll send the first one off to Linus ASAP then.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-02-06  0:38 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-05  2:00 [PATCH v2] dma-mapping: benchmark: pretend DMA is transmitting Barry Song
2021-02-05  2:00 ` Barry Song
2021-02-05  9:21 ` Christoph Hellwig
2021-02-05  9:21   ` Christoph Hellwig
2021-02-05 10:32   ` Song Bao Hua (Barry Song)
2021-02-05 10:32     ` Song Bao Hua (Barry Song)
2021-02-05 10:36     ` Christoph Hellwig
2021-02-05 10:36       ` Christoph Hellwig
2021-02-05 10:52       ` Song Bao Hua (Barry Song)
2021-02-05 10:52         ` Song Bao Hua (Barry Song)
2021-02-05 10:56         ` Christoph Hellwig
2021-02-05 10:56           ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.