All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API
@ 2015-10-25 16:07 Shamir Rabinovitch
  2015-10-25 16:07 ` [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS Shamir Rabinovitch
  0 siblings, 1 reply; 42+ messages in thread
From: Shamir Rabinovitch @ 2015-10-25 16:07 UTC (permalink / raw)
  To: arnd, corbet, linux-doc, linux-arch

SPARC64 arch need the ability to pass DMA attributes when mapping
pages. DMA performance will be low if specific attributes such as
DMA_ATTR_WEAK_ORDERING are not used.

Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
---
 include/asm-generic/dma-mapping-common.h |   32 +++++++++++++++++++++++------
 1 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index b1bc954..b816ed7 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -74,32 +74,50 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
 		ops->unmap_sg(dev, sg, nents, dir, attrs);
 }
 
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      size_t offset, size_t size,
-				      enum dma_data_direction dir)
+static inline dma_addr_t dma_map_page_attrs(struct device *dev,
+					    struct page *page,
+					    size_t offset, size_t size,
+					    enum dma_data_direction dir,
+					    struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	kmemcheck_mark_initialized(page_address(page) + offset, size);
 	BUG_ON(!valid_dma_direction(dir));
-	addr = ops->map_page(dev, page, offset, size, dir, NULL);
+	addr = ops->map_page(dev, page, offset, size, dir, attrs);
 	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
 
 	return addr;
 }
 
-static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
-				  size_t size, enum dma_data_direction dir)
+static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
+					size_t size,
+					enum dma_data_direction dir,
+					struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_page)
-		ops->unmap_page(dev, addr, size, dir, NULL);
+		ops->unmap_page(dev, addr, size, dir, attrs);
 	debug_dma_unmap_page(dev, addr, size, dir, false);
 }
 
+static inline dma_addr_t dma_map_page(struct device *dev,
+				      struct page *page,
+				      size_t offset, size_t size,
+				      enum dma_data_direction dir)
+{
+	return dma_map_page_attrs(dev, page, offset, size, dir, NULL);
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+				  size_t size, enum dma_data_direction dir)
+{
+	return dma_unmap_page_attrs(dev, addr, size, dir, NULL);
+}
+
 static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 					   size_t size,
 					   enum dma_data_direction dir)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 42+ messages in thread
* [PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API
@ 2015-10-25 16:37 Shamir Rabinovitch
  2015-10-25 16:37 ` [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS Shamir Rabinovitch
  0 siblings, 1 reply; 42+ messages in thread
From: Shamir Rabinovitch @ 2015-10-25 16:37 UTC (permalink / raw)
  To: linux-arch; +Cc: arnd, corbet, linux-doc

SPARC64 arch need the ability to pass DMA attributes when mapping
pages. DMA performance will be low if specific attributes such as
DMA_ATTR_WEAK_ORDERING are not used.

Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
---
 include/asm-generic/dma-mapping-common.h |   32 +++++++++++++++++++++++------
 1 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index b1bc954..b816ed7 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -74,32 +74,50 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg
 		ops->unmap_sg(dev, sg, nents, dir, attrs);
 }
 
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      size_t offset, size_t size,
-				      enum dma_data_direction dir)
+static inline dma_addr_t dma_map_page_attrs(struct device *dev,
+					    struct page *page,
+					    size_t offset, size_t size,
+					    enum dma_data_direction dir,
+					    struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 	dma_addr_t addr;
 
 	kmemcheck_mark_initialized(page_address(page) + offset, size);
 	BUG_ON(!valid_dma_direction(dir));
-	addr = ops->map_page(dev, page, offset, size, dir, NULL);
+	addr = ops->map_page(dev, page, offset, size, dir, attrs);
 	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
 
 	return addr;
 }
 
-static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
-				  size_t size, enum dma_data_direction dir)
+static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
+					size_t size,
+					enum dma_data_direction dir,
+					struct dma_attrs *attrs)
 {
 	struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
 	if (ops->unmap_page)
-		ops->unmap_page(dev, addr, size, dir, NULL);
+		ops->unmap_page(dev, addr, size, dir, attrs);
 	debug_dma_unmap_page(dev, addr, size, dir, false);
 }
 
+static inline dma_addr_t dma_map_page(struct device *dev,
+				      struct page *page,
+				      size_t offset, size_t size,
+				      enum dma_data_direction dir)
+{
+	return dma_map_page_attrs(dev, page, offset, size, dir, NULL);
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+				  size_t size, enum dma_data_direction dir)
+{
+	return dma_unmap_page_attrs(dev, addr, size, dir, NULL);
+}
+
 static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 					   size_t size,
 					   enum dma_data_direction dir)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 42+ messages in thread
* Re: [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS
@ 2015-11-16  6:56 Benjamin Serebrin
  0 siblings, 0 replies; 42+ messages in thread
From: Benjamin Serebrin @ 2015-11-16  6:56 UTC (permalink / raw)
  To: Shamir Rabinovitch
  Cc: David Miller, Joerg Roedel, dwmw2, benh, arnd, corbet, linux-doc,
	linux-arch, luto, borntraeger, cornelia.huck, sebott,
	Paolo Bonzini, hch, kvm, schwidefsky, linux-s390

We looked into Intel IOMMU performance a while ago and learned a few
useful things.  We generally did a parallel 200 thread TCP_RR test, as
this churns through mappings quickly and uses all available cores.

First, the main bottleneck was software performance[1].  This study
preceded the recent patch to break the locks into pools ("Break up
monolithic iommu table/lock into finer graularity pools and lock").
There were several points of lock contention:
- the RB tree is per device (and in the network test, there's one
device).  Every dma_map and unmap holds the lock.
- the RB tree lock is held during invalidations as well.  There's a
250-entry queue for invalidations that doesn't do any batching
intelligence (for example, promote to larger-range invalidations,
flush entire device, etc).  RB tree locks may be held while waiting
for invalidation drains.  Invalidations have even worse behavior with
ATS enabled for a given device.
- the RB tree has one entry per dma_map call (that entry is deleted by
the corresponding dma_unmap).  If we had merged all adjacent entries
when we could, we would have not lost any information that's actually
used by the code today.  (There could be a check that a dma_unmap
actually covers the entire region that was mapped, but there isn't.)
At boot (without network traffic), two common NICs' drivers show tens
of thousands of static dma_maps that never go away; this means the RB
tree is ~14-16 levels deep.  A rbtree walk (holding that big lock) has
a 14-16 level pointer chase through mostly cache-cold entries.  I
wrote a modification to the RB tree handling that merges nodes that
represent abutting IOVA ranges (and unmerges them on dma_unmap), and
the same drivers created around 7 unique entries.  Steady state grew
to a few hundreds and maybe a thousand, but the fragmentation didn't
get worse than that.  This optimization got about a third of the
performance back.

Omer's paper (https://www.usenix.org/system/files/conference/atc15/atc15-paper-peleg.pdf)
has some promising approaches.  The magazine avoids the RB tree issue.

I'm interested in seeing if the dynamic 1:1 with a mostly-lock-free
page table cleanup algorithm could do well.

There are correctness fixes and optimizations left in the invalidation
path: I want strict-ish semantics (a page doesn't go back into the
freelist until the last IOTLB/IOMMU TLB entry is invalidated) with
good performance, and that seems to imply that an additional page
reference should be gotten at dma_map time and put back at the
completion of the IOMMU flush routine.  (This is worthy of much
discussion.)

Additionally, we can find ways to optimize the flush routine by
realizing that if we have frequent maps and unmaps, it may be because
the device creates and destroys buffers a lot; these kind of workloads
use an IOVA for one event and then never come back.  Maybe TLBs don't
do much good and we could just flush the entire IOMMU TLB [and ATS
cache] for that BDF.

We'll try to get free time to do some of these things soon.

Ben


1: We verified that the IOMMU costs are almost entirely software
overheads by forcing software 1:1 mode, where we create page tables
for all physical addresses.  We tested using leaf nodes of size 4KB,
of 2MB, and of 1GB.  In call cases, there is zero runtime maintenance
of the page tables, and no IOMMU invalidations.  We did piles of DMA
maximizing x16 PCIe bandwidth on multiple lanes, to random DRAM
addresses.  At 4KB page size, we could see some bandwidth slowdown,
but at 2MB and 1GB, there was < 1% performance loss as compared with
IOMMU off.

^ permalink raw reply	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2015-11-16 18:42 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-25 16:07 [PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API Shamir Rabinovitch
2015-10-25 16:07 ` [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS Shamir Rabinovitch
2015-10-28  6:30   ` David Woodhouse
2015-10-28 11:10     ` Shamir Rabinovitch
2015-10-28 13:31       ` David Woodhouse
2015-10-28 14:07         ` David Miller
2015-10-28 13:57           ` David Woodhouse
2015-10-29  0:23             ` David Miller
2015-10-29  0:32         ` Benjamin Herrenschmidt
2015-10-29  0:42           ` David Woodhouse
2015-10-29  1:10             ` Benjamin Herrenschmidt
2015-10-29 18:31               ` Andy Lutomirski
2015-10-29 22:35                 ` David Woodhouse
2015-11-01  7:45                   ` Shamir Rabinovitch
2015-11-01 21:10                     ` Benjamin Herrenschmidt
2015-11-02  7:23                       ` Shamir Rabinovitch
2015-11-02 10:00                         ` Benjamin Herrenschmidt
2015-11-02 12:07                           ` Shamir Rabinovitch
2015-11-02 20:13                             ` Benjamin Herrenschmidt
2015-11-02 21:45                               ` Arnd Bergmann
2015-11-02 23:08                                 ` Benjamin Herrenschmidt
2015-11-03 13:11                                   ` Christoph Hellwig
2015-11-03 19:35                                     ` Benjamin Herrenschmidt
2015-11-02 21:49                               ` Shamir Rabinovitch
2015-11-02 22:48                       ` David Woodhouse
2015-11-02 23:10                         ` Benjamin Herrenschmidt
2015-11-05 21:08                   ` David Miller
2015-10-30  1:51                 ` Benjamin Herrenschmidt
2015-10-30 10:32               ` Arnd Bergmann
2015-10-30 23:17                 ` Benjamin Herrenschmidt
2015-10-30 23:24                   ` Arnd Bergmann
2015-11-02 14:51                 ` Joerg Roedel
2015-10-29  7:32             ` Shamir Rabinovitch
2015-11-02 14:44               ` Joerg Roedel
2015-11-02 17:32                 ` Shamir Rabinovitch
2015-11-05 13:42                   ` Joerg Roedel
2015-11-05 21:11                     ` David Miller
2015-11-07 15:06                       ` Shamir Rabinovitch
     [not found]                         ` <CAN+hb0UvztgwNuAh93XdJEe7vgiZgNMc9mHNziHpEopg8Oi4Mg@mail.gmail.com>
2015-11-16  8:42                           ` David Woodhouse
     [not found]                             ` <CAN+hb0UWpfcS5DvgMxNjY-5JOztw2mO1r2FJAW17fn974mhxPA@mail.gmail.com>
2015-11-16 18:42                               ` Benjamin Serebrin
2015-10-25 16:37 [PATCH v1 1/2] dma-mapping-common: add dma_map_page_attrs API Shamir Rabinovitch
2015-10-25 16:37 ` [PATCH v1 2/2] dma-mapping-common: add DMA attribute - DMA_ATTR_IOMMU_BYPASS Shamir Rabinovitch
2015-11-16  6:56 Benjamin Serebrin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.