All of lore.kernel.org
 help / color / mirror / Atom feed
From: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
To: joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org,
	will.deacon-5wv7dgnIgG8@public.gmane.org
Cc: laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org,
	dianders-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org,
	treding-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org,
	brian.starkey-5wv7dgnIgG8@public.gmane.org
Subject: [PATCH 4/5] iommu/dma: Finish optimising higher-order allocations
Date: Thu,  7 Apr 2016 18:42:07 +0100	[thread overview]
Message-ID: <89763f6b1ac684c3d8712e38760bec55b7885e3b.1460048991.git.robin.murphy@arm.com> (raw)
In-Reply-To: <cover.1460048991.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>

Now that we know exactly which page sizes our caller wants to use in the
given domain, we can restrict higher-order allocation attempts to just
those sizes, if any, and avoid wasting any time or effort on other sizes
which offer no benefit. In the same vein, this also lets us accommodate
a minimum order greater than 0 for special cases.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 arch/arm64/mm/dma-mapping.c |  4 ++--
 drivers/iommu/dma-iommu.c   | 37 ++++++++++++++++++++++++++++---------
 include/linux/dma-iommu.h   |  4 ++--
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 5d36907..41d19a0 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -562,8 +562,8 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 		struct page **pages;
 		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
 
-		pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
-					flush_page);
+		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
+					handle, flush_page);
 		if (!pages)
 			return NULL;
 
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6edc852..6dc8dfc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -190,11 +190,16 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
 	kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count,
+		unsigned long pgsize_orders, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
-	unsigned int order = MAX_ORDER;
+	unsigned int min_order = __ffs(pgsize_orders);
+
+	pgsize_orders &= (2U << MAX_ORDER) - 1;
+	if (!pgsize_orders)
+		return NULL;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -208,6 +213,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 
 	while (count) {
 		struct page *page = NULL;
+		unsigned int order;
 		int j;
 
 		/*
@@ -215,8 +221,9 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 		 * than a necessity, hence using __GFP_NORETRY until
 		 * falling back to single-page allocations.
 		 */
-		for (order = min_t(unsigned int, order, __fls(count));
-		     order > 0; order--) {
+		for (pgsize_orders &= (2U << __fls(count)) - 1;
+		     (order = __fls(pgsize_orders)) > min_order;
+		     pgsize_orders &= (1U << order) - 1) {
 			page = alloc_pages(gfp | __GFP_NORETRY, order);
 			if (!page)
 				continue;
@@ -230,7 +237,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 			}
 		}
 		if (!page)
-			page = alloc_page(gfp);
+			page = alloc_pages(gfp, order);
 		if (!page) {
 			__iommu_dma_free_pages(pages, i);
 			return NULL;
@@ -267,6 +274,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  *	 attached to an iommu_dma_domain
  * @size: Size of buffer in bytes
  * @gfp: Allocation flags
+ * @attrs: DMA attributes for this allocation
  * @prot: IOMMU mapping flags
  * @handle: Out argument for allocated DMA handle
  * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
@@ -278,8 +286,8 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  * Return: Array of struct page pointers describing the buffer,
  *	   or NULL on failure.
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -288,11 +296,22 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size,
 	struct page **pages;
 	struct sg_table sgt;
 	dma_addr_t dma_addr;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int count, min_pgsize, pgsizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
 
-	pages = __iommu_dma_alloc_pages(count, gfp);
+	if (pgsizes & (PAGE_SIZE - 1)) {
+		pgsizes &= PAGE_MASK;
+		pgsizes |= PAGE_SIZE;
+	}
+
+	min_pgsize = pgsizes ^ (pgsizes & (pgsizes - 1));
+	if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+		pgsizes = min_pgsize;
+
+	size = ALIGN(size, min_pgsize);
+	count = size >> PAGE_SHIFT;
+	pages = __iommu_dma_alloc_pages(count, pgsizes >> PAGE_SHIFT, gfp);
 	if (!pages)
 		return NULL;
 
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index fc48103..8443bbb 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -38,8 +38,8 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
  * These implement the bulk of the relevant DMA mapping callbacks, but require
  * the arch code to take care of attributes and cache maintenance
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t));
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle);
-- 
2.7.3.dirty

WARNING: multiple messages have this Message-ID (diff)
From: robin.murphy@arm.com (Robin Murphy)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] iommu/dma: Finish optimising higher-order allocations
Date: Thu,  7 Apr 2016 18:42:07 +0100	[thread overview]
Message-ID: <89763f6b1ac684c3d8712e38760bec55b7885e3b.1460048991.git.robin.murphy@arm.com> (raw)
In-Reply-To: <cover.1460048991.git.robin.murphy@arm.com>

Now that we know exactly which page sizes our caller wants to use in the
given domain, we can restrict higher-order allocation attempts to just
those sizes, if any, and avoid wasting any time or effort on other sizes
which offer no benefit. In the same vein, this also lets us accommodate
a minimum order greater than 0 for special cases.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 arch/arm64/mm/dma-mapping.c |  4 ++--
 drivers/iommu/dma-iommu.c   | 37 ++++++++++++++++++++++++++++---------
 include/linux/dma-iommu.h   |  4 ++--
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 5d36907..41d19a0 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -562,8 +562,8 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 		struct page **pages;
 		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
 
-		pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
-					flush_page);
+		pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
+					handle, flush_page);
 		if (!pages)
 			return NULL;
 
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6edc852..6dc8dfc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -190,11 +190,16 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
 	kvfree(pages);
 }
 
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count,
+		unsigned long pgsize_orders, gfp_t gfp)
 {
 	struct page **pages;
 	unsigned int i = 0, array_size = count * sizeof(*pages);
-	unsigned int order = MAX_ORDER;
+	unsigned int min_order = __ffs(pgsize_orders);
+
+	pgsize_orders &= (2U << MAX_ORDER) - 1;
+	if (!pgsize_orders)
+		return NULL;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -208,6 +213,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 
 	while (count) {
 		struct page *page = NULL;
+		unsigned int order;
 		int j;
 
 		/*
@@ -215,8 +221,9 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 		 * than a necessity, hence using __GFP_NORETRY until
 		 * falling back to single-page allocations.
 		 */
-		for (order = min_t(unsigned int, order, __fls(count));
-		     order > 0; order--) {
+		for (pgsize_orders &= (2U << __fls(count)) - 1;
+		     (order = __fls(pgsize_orders)) > min_order;
+		     pgsize_orders &= (1U << order) - 1) {
 			page = alloc_pages(gfp | __GFP_NORETRY, order);
 			if (!page)
 				continue;
@@ -230,7 +237,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
 			}
 		}
 		if (!page)
-			page = alloc_page(gfp);
+			page = alloc_pages(gfp, order);
 		if (!page) {
 			__iommu_dma_free_pages(pages, i);
 			return NULL;
@@ -267,6 +274,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  *	 attached to an iommu_dma_domain
  * @size: Size of buffer in bytes
  * @gfp: Allocation flags
+ * @attrs: DMA attributes for this allocation
  * @prot: IOMMU mapping flags
  * @handle: Out argument for allocated DMA handle
  * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
@@ -278,8 +286,8 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
  * Return: Array of struct page pointers describing the buffer,
  *	   or NULL on failure.
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -288,11 +296,22 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size,
 	struct page **pages;
 	struct sg_table sgt;
 	dma_addr_t dma_addr;
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int count, min_pgsize, pgsizes = domain->pgsize_bitmap;
 
 	*handle = DMA_ERROR_CODE;
 
-	pages = __iommu_dma_alloc_pages(count, gfp);
+	if (pgsizes & (PAGE_SIZE - 1)) {
+		pgsizes &= PAGE_MASK;
+		pgsizes |= PAGE_SIZE;
+	}
+
+	min_pgsize = pgsizes ^ (pgsizes & (pgsizes - 1));
+	if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+		pgsizes = min_pgsize;
+
+	size = ALIGN(size, min_pgsize);
+	count = size >> PAGE_SHIFT;
+	pages = __iommu_dma_alloc_pages(count, pgsizes >> PAGE_SHIFT, gfp);
 	if (!pages)
 		return NULL;
 
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index fc48103..8443bbb 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -38,8 +38,8 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
  * These implement the bulk of the relevant DMA mapping callbacks, but require
  * the arch code to take care of attributes and cache maintenance
  */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-		gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+		struct dma_attrs *attrs, int prot, dma_addr_t *handle,
 		void (*flush_page)(struct device *, const void *, phys_addr_t));
 void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
 		dma_addr_t *handle);
-- 
2.7.3.dirty

  parent reply	other threads:[~2016-04-07 17:42 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-07 17:42 [PATCH 0/5] Introduce per-domain page sizes Robin Murphy
2016-04-07 17:42 ` Robin Murphy
     [not found] ` <cover.1460048991.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2016-04-07 17:42   ` [PATCH 1/5] iommu: remove unused priv field from struct iommu_ops Robin Murphy
2016-04-07 17:42     ` Robin Murphy
2016-04-07 17:42   ` [PATCH 2/5] iommu: of: enforce const-ness of " Robin Murphy
2016-04-07 17:42     ` Robin Murphy
2016-04-07 17:42   ` [PATCH 3/5] iommu: Allow selecting page sizes per domain Robin Murphy
2016-04-07 17:42     ` Robin Murphy
2016-04-07 17:42   ` Robin Murphy [this message]
2016-04-07 17:42     ` [PATCH 4/5] iommu/dma: Finish optimising higher-order allocations Robin Murphy
     [not found]     ` <89763f6b1ac684c3d8712e38760bec55b7885e3b.1460048991.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2016-04-08  5:32       ` Yong Wu
2016-04-08  5:32         ` Yong Wu
2016-04-08 16:33         ` Robin Murphy
2016-04-08 16:33           ` Robin Murphy
2016-04-13 16:29       ` [PATCH v2] " Robin Murphy
2016-04-13 16:29         ` Robin Murphy
     [not found]         ` <3e4572cb0a175061c1c4b436e3806ba9d7b9f199.1460563676.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2016-04-21  5:47           ` Yong Wu
2016-04-21  5:47             ` Yong Wu
2016-04-07 17:42   ` [PATCH 5/5] iommu/arm-smmu: Use per-domain page sizes Robin Murphy
2016-04-07 17:42     ` Robin Murphy
2016-04-21 16:38   ` [PATCH 0/5] Introduce " Will Deacon
2016-04-21 16:38     ` Will Deacon
2016-05-09 11:21   ` Joerg Roedel
2016-05-09 11:21     ` Joerg Roedel
     [not found]     ` <20160509112138.GB13275-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2016-05-09 11:45       ` Robin Murphy
2016-05-09 11:45         ` Robin Murphy
     [not found]         ` <57307863.1070706-5wv7dgnIgG8@public.gmane.org>
2016-05-09 14:51           ` Joerg Roedel
2016-05-09 14:51             ` Joerg Roedel
     [not found]             ` <20160509145157.GD13971-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2016-05-09 15:18               ` Robin Murphy
2016-05-09 15:18                 ` Robin Murphy
2016-05-09 15:50                 ` Joerg Roedel
2016-05-09 15:50                   ` Joerg Roedel
     [not found] ` <ea520b8c72b5a72a1731bd35f6e3e50872fe6764.1460048991.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2016-05-09 16:20   ` [PATCH v2] iommu/arm-smmu: Use " Robin Murphy
2016-05-09 16:20     ` Robin Murphy
     [not found]     ` <112fc0e5f9bbe08007778b8438b35025d8e876a4.1462810410.git.robin.murphy-5wv7dgnIgG8@public.gmane.org>
2016-05-10  9:45       ` Joerg Roedel
2016-05-10  9:45         ` Joerg Roedel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=89763f6b1ac684c3d8712e38760bec55b7885e3b.1460048991.git.robin.murphy@arm.com \
    --to=robin.murphy-5wv7dgnigg8@public.gmane.org \
    --cc=brian.starkey-5wv7dgnIgG8@public.gmane.org \
    --cc=dianders-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org \
    --cc=iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org \
    --cc=laurent.pinchart+renesas-ryLnwIuWjnjg/C1BVhZhaw@public.gmane.org \
    --cc=linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org \
    --cc=treding-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org \
    --cc=will.deacon-5wv7dgnIgG8@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.