All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/ttm: add support for different pool sizes
@ 2017-07-13 13:56 Christian König
  2017-07-13 13:56 ` [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Christian König @ 2017-07-13 13:56 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

Correctly handle different page sizes in the memory accounting.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_memory.c         | 12 +++++-------
 drivers/gpu/drm/ttm/ttm_page_alloc.c     |  4 ++--
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  6 +++---
 include/drm/ttm/ttm_memory.h             |  5 ++---
 4 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 29855be..e963749 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -546,8 +546,7 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
 EXPORT_SYMBOL(ttm_mem_global_alloc);
 
 int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
-			      struct page *page,
-			      bool no_wait, bool interruptible)
+			      struct page *page, uint64_t size)
 {
 
 	struct ttm_mem_zone *zone = NULL;
@@ -564,11 +563,11 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
 	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
 		zone = glob->zone_kernel;
 #endif
-	return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
-					 interruptible);
+	return ttm_mem_global_alloc_zone(glob, zone, size, false, false);
 }
 
-void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page,
+			      uint64_t size)
 {
 	struct ttm_mem_zone *zone = NULL;
 
@@ -579,10 +578,9 @@ void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
 	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
 		zone = glob->zone_kernel;
 #endif
-	ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+	ttm_mem_global_free_zone(glob, zone, size);
 }
 
-
 size_t ttm_round_pot(size_t size)
 {
 	if ((size & (size - 1)) == 0)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index a37de5d..6add044 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -880,7 +880,7 @@ int ttm_pool_populate(struct ttm_tt *ttm)
 		}
 
 		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
-						false, false);
+						PAGE_SIZE);
 		if (unlikely(ret != 0)) {
 			ttm_pool_unpopulate(ttm);
 			return -ENOMEM;
@@ -907,7 +907,7 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm)
 	for (i = 0; i < ttm->num_pages; ++i) {
 		if (ttm->pages[i]) {
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 ttm->pages[i]);
+						 ttm->pages[i], PAGE_SIZE);
 			ttm_put_pages(&ttm->pages[i], 1,
 				      ttm->page_flags,
 				      ttm->caching_state);
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index cec4b4b..6c38046 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -910,7 +910,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 		}
 
 		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
-						false, false);
+						pool->size);
 		if (unlikely(ret != 0)) {
 			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
@@ -975,13 +975,13 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	if (is_cached) {
 		list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) {
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 d_page->p);
+						 d_page->p, pool->size);
 			ttm_dma_page_put(pool, d_page);
 		}
 	} else {
 		for (i = 0; i < count; i++) {
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
-						 ttm->pages[i]);
+						 ttm->pages[i], pool->size);
 		}
 	}
 
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index c452089..2c1e359 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h
@@ -150,10 +150,9 @@ extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
 extern void ttm_mem_global_free(struct ttm_mem_global *glob,
 				uint64_t amount);
 extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
-				     struct page *page,
-				     bool no_wait, bool interruptible);
+				     struct page *page, uint64_t size);
 extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
-				     struct page *page);
+				     struct page *page, uint64_t size);
 extern size_t ttm_round_pot(size_t size);
 extern uint64_t ttm_get_kernel_zone_memory_size(struct ttm_mem_global *glob);
 #endif
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c
  2017-07-13 13:56 [PATCH 1/3] drm/ttm: add support for different pool sizes Christian König
@ 2017-07-13 13:56 ` Christian König
  2017-07-14 15:42   ` Alex Deucher
  2017-07-13 13:56 ` [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations Christian König
  2017-07-14 15:41 ` [PATCH 1/3] drm/ttm: add support for different pool sizes Alex Deucher
  2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2017-07-13 13:56 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

Remove unused defines and variables. Also stop computing the
gfp_flags when they aren't used.

No intended functional change.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 42 ++++++++++++--------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 6c38046..2081e20 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -57,22 +57,15 @@
 #define NUM_PAGES_TO_ALLOC		(PAGE_SIZE/sizeof(struct page *))
 #define SMALL_ALLOCATION		4
 #define FREE_ALL_PAGES			(~0U)
-/* times are in msecs */
-#define IS_UNDEFINED			(0)
-#define IS_WC				(1<<1)
-#define IS_UC				(1<<2)
-#define IS_CACHED			(1<<3)
-#define IS_DMA32			(1<<4)
 
 enum pool_type {
-	POOL_IS_UNDEFINED,
-	POOL_IS_WC = IS_WC,
-	POOL_IS_UC = IS_UC,
-	POOL_IS_CACHED = IS_CACHED,
-	POOL_IS_WC_DMA32 = IS_WC | IS_DMA32,
-	POOL_IS_UC_DMA32 = IS_UC | IS_DMA32,
-	POOL_IS_CACHED_DMA32 = IS_CACHED | IS_DMA32,
+	IS_UNDEFINED	= 0,
+	IS_WC		= 1 << 1,
+	IS_UC		= 1 << 2,
+	IS_CACHED	= 1 << 3,
+	IS_DMA32	= 1 << 4
 };
+
 /*
  * The pool structure. There are usually six pools:
  *  - generic (not restricted to DMA32):
@@ -83,11 +76,9 @@ enum pool_type {
  * The other ones can be shrunk by the shrinker API if neccessary.
  * @pools: The 'struct device->dma_pools' link.
  * @type: Type of the pool
- * @lock: Protects the inuse_list and free_list from concurrnet access. Must be
+ * @lock: Protects the free_list from concurrnet access. Must be
  * used with irqsave/irqrestore variants because pool allocator maybe called
  * from delayed work.
- * @inuse_list: Pool of pages that are in use. The order is very important and
- *   it is in the order that the TTM pages that are put back are in.
  * @free_list: Pool of pages that are free to be used. No order requirements.
  * @dev: The device that is associated with these pools.
  * @size: Size used during DMA allocation.
@@ -104,7 +95,6 @@ struct dma_pool {
 	struct list_head pools; /* The 'struct device->dma_pools link */
 	enum pool_type type;
 	spinlock_t lock;
-	struct list_head inuse_list;
 	struct list_head free_list;
 	struct device *dev;
 	unsigned size;
@@ -606,7 +596,6 @@ static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
 	sec_pool->pool =  pool;
 
 	INIT_LIST_HEAD(&pool->free_list);
-	INIT_LIST_HEAD(&pool->inuse_list);
 	INIT_LIST_HEAD(&pool->pools);
 	spin_lock_init(&pool->lock);
 	pool->dev = dev;
@@ -879,22 +868,23 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	struct dma_pool *pool;
 	enum pool_type type;
 	unsigned i;
-	gfp_t gfp_flags;
 	int ret;
 
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
 	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
-	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
-		gfp_flags = GFP_USER | GFP_DMA32;
-	else
-		gfp_flags = GFP_HIGHUSER;
-	if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		gfp_flags |= __GFP_ZERO;
-
 	pool = ttm_dma_find_pool(dev, type);
 	if (!pool) {
+		gfp_t gfp_flags;
+
+		if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
+			gfp_flags = GFP_USER | GFP_DMA32;
+		else
+			gfp_flags = GFP_HIGHUSER;
+		if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+			gfp_flags |= __GFP_ZERO;
+
 		pool = ttm_dma_pool_init(dev, gfp_flags, type);
 		if (IS_ERR_OR_NULL(pool)) {
 			return -ENOMEM;
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations
  2017-07-13 13:56 [PATCH 1/3] drm/ttm: add support for different pool sizes Christian König
  2017-07-13 13:56 ` [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c Christian König
@ 2017-07-13 13:56 ` Christian König
  2017-07-14  0:32   ` kbuild test robot
  2017-07-14 16:24   ` Alex Deucher
  2017-07-14 15:41 ` [PATCH 1/3] drm/ttm: add support for different pool sizes Alex Deucher
  2 siblings, 2 replies; 7+ messages in thread
From: Christian König @ 2017-07-13 13:56 UTC (permalink / raw)
  To: dri-devel

From: Christian König <christian.koenig@amd.com>

Try to allocate huge pages when it makes sense.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 217 ++++++++++++++++++++++++-------
 1 file changed, 169 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 2081e20..e51d3fd 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -57,21 +57,25 @@
 #define NUM_PAGES_TO_ALLOC		(PAGE_SIZE/sizeof(struct page *))
 #define SMALL_ALLOCATION		4
 #define FREE_ALL_PAGES			(~0U)
+#define VADDR_FLAG_HUGE_POOL		1UL
 
 enum pool_type {
 	IS_UNDEFINED	= 0,
 	IS_WC		= 1 << 1,
 	IS_UC		= 1 << 2,
 	IS_CACHED	= 1 << 3,
-	IS_DMA32	= 1 << 4
+	IS_DMA32	= 1 << 4,
+	IS_HUGE		= 1 << 5
 };
 
 /*
- * The pool structure. There are usually six pools:
+ * The pool structure. There are up to seven pools:
  *  - generic (not restricted to DMA32):
  *      - write combined, uncached, cached.
  *  - dma32 (up to 2^32 - so up 4GB):
  *      - write combined, uncached, cached.
+ *  - huge (not restricted to DMA32):
+ *      - cached.
  * for each 'struct device'. The 'cached' is for pages that are actively used.
  * The other ones can be shrunk by the shrinker API if neccessary.
  * @pools: The 'struct device->dma_pools' link.
@@ -111,13 +115,14 @@ struct dma_pool {
  * The accounting page keeping track of the allocated page along with
  * the DMA address.
  * @page_list: The link to the 'page_list' in 'struct dma_pool'.
- * @vaddr: The virtual address of the page
+ * @vaddr: The virtual address of the page and a flag if the page belongs to a
+ * huge pool
  * @dma: The bus address of the page. If the page is not allocated
  *   via the DMA API, it will be -1.
  */
 struct dma_page {
 	struct list_head page_list;
-	void *vaddr;
+	unsigned long vaddr;
 	struct page *p;
 	dma_addr_t dma;
 };
@@ -316,7 +321,8 @@ static int ttm_set_pages_caching(struct dma_pool *pool,
 static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
 {
 	dma_addr_t dma = d_page->dma;
-	dma_free_coherent(pool->dev, pool->size, d_page->vaddr, dma);
+	d_page->vaddr &= ~VADDR_FLAG_HUGE_POOL;
+	dma_free_coherent(pool->dev, pool->size, (void *)d_page->vaddr, dma);
 
 	kfree(d_page);
 	d_page = NULL;
@@ -324,19 +330,22 @@ static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
 static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
 {
 	struct dma_page *d_page;
+	void *vaddr;
 
 	d_page = kmalloc(sizeof(struct dma_page), GFP_KERNEL);
 	if (!d_page)
 		return NULL;
 
-	d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size,
-					   &d_page->dma,
-					   pool->gfp_flags);
-	if (d_page->vaddr) {
-		if (is_vmalloc_addr(d_page->vaddr))
-			d_page->p = vmalloc_to_page(d_page->vaddr);
+	vaddr = dma_alloc_coherent(pool->dev, pool->size, &d_page->dma,
+				   pool->gfp_flags);
+	if (vaddr) {
+		if (is_vmalloc_addr(vaddr))
+			d_page->p = vmalloc_to_page(vaddr);
 		else
-			d_page->p = virt_to_page(d_page->vaddr);
+			d_page->p = virt_to_page(vaddr);
+		d_page->vaddr = (unsigned long)vaddr;
+		if (pool->type & IS_HUGE)
+			d_page->vaddr |= VADDR_FLAG_HUGE_POOL;
 	} else {
 		kfree(d_page);
 		d_page = NULL;
@@ -368,11 +377,40 @@ static void ttm_pool_update_free_locked(struct dma_pool *pool,
 }
 
 /* set memory back to wb and free the pages. */
+static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
+{
+	struct page *page = d_page->p;
+	unsigned i, num_pages;
+	int ret;
+
+	/* Don't set WB on WB page pool. */
+	if (!(pool->type & IS_CACHED)) {
+		num_pages = pool->size / PAGE_SIZE;
+		for (i = 0; i < num_pages; ++i, ++page) {
+			ret = set_pages_array_wb(&page, 1);
+			if (ret) {
+				pr_err("%s: Failed to set %d pages to wb!\n",
+				       pool->dev_name, 1);
+			}
+		}
+	}
+
+	list_del(&d_page->page_list);
+	__ttm_dma_free_page(pool, d_page);
+}
+
 static void ttm_dma_pages_put(struct dma_pool *pool, struct list_head *d_pages,
 			      struct page *pages[], unsigned npages)
 {
 	struct dma_page *d_page, *tmp;
 
+	if (pool->type & IS_HUGE) {
+		list_for_each_entry_safe(d_page, tmp, d_pages, page_list)
+			ttm_dma_page_put(pool, d_page);
+
+		return;
+	}
+
 	/* Don't set WB on WB page pool. */
 	if (npages && !(pool->type & IS_CACHED) &&
 	    set_pages_array_wb(pages, npages))
@@ -385,17 +423,6 @@ static void ttm_dma_pages_put(struct dma_pool *pool, struct list_head *d_pages,
 	}
 }
 
-static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
-{
-	/* Don't set WB on WB page pool. */
-	if (!(pool->type & IS_CACHED) && set_pages_array_wb(&d_page->p, 1))
-		pr_err("%s: Failed to set %d pages to wb!\n",
-		       pool->dev_name, 1);
-
-	list_del(&d_page->page_list);
-	__ttm_dma_free_page(pool, d_page);
-}
-
 /*
  * Free pages from pool.
  *
@@ -564,8 +591,8 @@ static int ttm_dma_pool_match(struct device *dev, void *res, void *match_data)
 static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
 					  enum pool_type type)
 {
-	char *n[] = {"wc", "uc", "cached", " dma32", "unknown",};
-	enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_UNDEFINED};
+	const char *n[] = {"wc", "uc", "cached", " dma32", "huge"};
+	enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_HUGE};
 	struct device_pools *sec_pool = NULL;
 	struct dma_pool *pool = NULL, **ptr;
 	unsigned i;
@@ -602,11 +629,18 @@ static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
 	pool->npages_free = pool->npages_in_use = 0;
 	pool->nfrees = 0;
 	pool->gfp_flags = flags;
-	pool->size = PAGE_SIZE;
+	if (type & IS_HUGE)
+#if CONFIG_TRANSPARENT_HUGEPAGE
+		pool->size = HPAGE_PMD_SIZE;
+#else
+		BUG();
+#endif
+	else
+		pool->size = PAGE_SIZE;
 	pool->type = type;
 	pool->nrefills = 0;
 	p = pool->name;
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < ARRAY_SIZE(t); i++) {
 		if (type & t[i]) {
 			p += snprintf(p, sizeof(pool->name) - (p - pool->name),
 				      "%s", n[i]);
@@ -710,7 +744,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
 	struct dma_page *dma_p;
 	struct page *p;
 	int r = 0;
-	unsigned i, cpages;
+	unsigned i, j, npages, cpages;
 	unsigned max_cpages = min(count,
 			(unsigned)(PAGE_SIZE/sizeof(struct page *)));
 
@@ -748,28 +782,32 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
 			goto out;
 		}
 		p = dma_p->p;
+		list_add(&dma_p->page_list, d_pages);
+
 #ifdef CONFIG_HIGHMEM
 		/* gfp flags of highmem page should never be dma32 so we
 		 * we should be fine in such case
 		 */
-		if (!PageHighMem(p))
+		if (PageHighMem(p))
+			continue;
 #endif
-		{
-			caching_array[cpages++] = p;
+
+		npages = pool->size / PAGE_SIZE;
+		for (j = 0; j < npages; ++j) {
+			caching_array[cpages++] = p + j;
 			if (cpages == max_cpages) {
 				/* Note: Cannot hold the spinlock */
 				r = ttm_set_pages_caching(pool, caching_array,
-						 cpages);
+							  cpages);
 				if (r) {
 					ttm_dma_handle_caching_state_failure(
-						pool, d_pages, caching_array,
-						cpages);
+					     pool, d_pages, caching_array,
+					     cpages);
 					goto out;
 				}
 				cpages = 0;
 			}
 		}
-		list_add(&dma_p->page_list, d_pages);
 	}
 
 	if (cpages) {
@@ -857,6 +895,26 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool,
 	return r;
 }
 
+static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
+{
+	struct ttm_tt *ttm = &ttm_dma->ttm;
+	gfp_t gfp_flags;
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
+		gfp_flags = GFP_USER | GFP_DMA32;
+	else
+		gfp_flags = GFP_HIGHUSER;
+	if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+		gfp_flags |= __GFP_ZERO;
+
+	if (huge) {
+		gfp_flags |= GFP_TRANSHUGE;
+		gfp_flags &= ~__GFP_MOVABLE;
+	}
+
+	return gfp_flags;
+}
+
 /*
  * On success pages list will hold count number of correctly
  * cached pages. On failure will hold the negative return value (-ENOMEM, etc).
@@ -865,6 +923,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 {
 	struct ttm_tt *ttm = &ttm_dma->ttm;
 	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
+	unsigned long num_pages = ttm->num_pages;
 	struct dma_pool *pool;
 	enum pool_type type;
 	unsigned i;
@@ -873,26 +932,61 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
+	INIT_LIST_HEAD(&ttm_dma->pages_list);
+	i = 0;
+
 	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
-	pool = ttm_dma_find_pool(dev, type);
+
+#if CONFIG_TRANSPARENT_HUGEPAGE
+	if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
+		goto skip_huge;
+
+	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
 	if (!pool) {
-		gfp_t gfp_flags;
+		gfp_t gfp_flags = ttm_dma_pool_gfp_flags(ttm_dma, true);
 
-		if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
-			gfp_flags = GFP_USER | GFP_DMA32;
-		else
-			gfp_flags = GFP_HIGHUSER;
-		if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-			gfp_flags |= __GFP_ZERO;
+		pool = ttm_dma_pool_init(dev, gfp_flags, type | IS_HUGE);
+		if (IS_ERR_OR_NULL(pool))
+			goto skip_huge;
+	}
 
-		pool = ttm_dma_pool_init(dev, gfp_flags, type);
-		if (IS_ERR_OR_NULL(pool)) {
+	while (num_pages >= HPAGE_PMD_NR) {
+		unsigned j;
+
+		ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
+		if (ret != 0)
+			break;
+
+		ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
+						pool->size);
+		if (unlikely(ret != 0)) {
+			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
 		}
+
+		for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) {
+			ttm->pages[j] = ttm->pages[j - 1] + 1;
+			ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] +
+				PAGE_SIZE;
+		}
+
+		i += HPAGE_PMD_NR;
+		num_pages -= HPAGE_PMD_NR;
 	}
 
-	INIT_LIST_HEAD(&ttm_dma->pages_list);
-	for (i = 0; i < ttm->num_pages; ++i) {
+skip_huge:
+#endif
+
+	pool = ttm_dma_find_pool(dev, type);
+	if (!pool) {
+		gfp_t gfp_flags = ttm_dma_pool_gfp_flags(ttm_dma, false);
+
+		pool = ttm_dma_pool_init(dev, gfp_flags, type);
+		if (IS_ERR_OR_NULL(pool))
+			return -ENOMEM;
+	}
+
+	while (num_pages) {
 		ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
 		if (ret != 0) {
 			ttm_dma_unpopulate(ttm_dma, dev);
@@ -905,6 +999,9 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 			ttm_dma_unpopulate(ttm_dma, dev);
 			return -ENOMEM;
 		}
+
+		++i;
+		--num_pages;
 	}
 
 	if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
@@ -928,10 +1025,33 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 	struct dma_page *d_page, *next;
 	enum pool_type type;
 	bool is_cached = false;
-	unsigned count = 0, i, npages = 0;
+	unsigned count, i, npages = 0;
 	unsigned long irq_flags;
 
 	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
+
+#if CONFIG_TRANSPARENT_HUGEPAGE
+	pool = ttm_dma_find_pool(dev, type | IS_HUGE);
+	if (pool) {
+		count = 0;
+		list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list,
+					 page_list) {
+			if (!(d_page->vaddr & VADDR_FLAG_HUGE_POOL))
+				continue;
+
+			count++;
+			ttm_mem_global_free_page(ttm->glob->mem_glob,
+						 d_page->p, pool->size);
+			ttm_dma_page_put(pool, d_page);
+		}
+
+		spin_lock_irqsave(&pool->lock, irq_flags);
+		pool->npages_in_use -= count;
+		pool->nfrees += count;
+		spin_unlock_irqrestore(&pool->lock, irq_flags);
+	}
+#endif
+
 	pool = ttm_dma_find_pool(dev, type);
 	if (!pool)
 		return;
@@ -940,6 +1060,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
 
 	/* make sure pages array match list and count number of pages */
+	count = 0;
 	list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) {
 		ttm->pages[count] = d_page->p;
 		count++;
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations
  2017-07-13 13:56 ` [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations Christian König
@ 2017-07-14  0:32   ` kbuild test robot
  2017-07-14 16:24   ` Alex Deucher
  1 sibling, 0 replies; 7+ messages in thread
From: kbuild test robot @ 2017-07-14  0:32 UTC (permalink / raw)
  To: Christian König; +Cc: kbuild-all, dri-devel

[-- Attachment #1: Type: text/plain, Size: 4260 bytes --]

Hi Christian,

[auto build test WARNING on drm/drm-next]
[also build test WARNING on v4.12 next-20170713]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Christian-K-nig/drm-ttm-add-support-for-different-pool-sizes/20170714-075329
base:   git://people.freedesktop.org/~airlied/linux.git drm-next
config: i386-randconfig-x013-201728 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers/gpu//drm/ttm/ttm_page_alloc_dma.c: In function 'ttm_dma_pool_init':
>> drivers/gpu//drm/ttm/ttm_page_alloc_dma.c:636:5: warning: "CONFIG_TRANSPARENT_HUGEPAGE" is not defined [-Wundef]
    #if CONFIG_TRANSPARENT_HUGEPAGE
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu//drm/ttm/ttm_page_alloc_dma.c: In function 'ttm_dma_populate':
   drivers/gpu//drm/ttm/ttm_page_alloc_dma.c:943:5: warning: "CONFIG_TRANSPARENT_HUGEPAGE" is not defined [-Wundef]
    #if CONFIG_TRANSPARENT_HUGEPAGE
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu//drm/ttm/ttm_page_alloc_dma.c: In function 'ttm_dma_unpopulate':
   drivers/gpu//drm/ttm/ttm_page_alloc_dma.c:1036:5: warning: "CONFIG_TRANSPARENT_HUGEPAGE" is not defined [-Wundef]
    #if CONFIG_TRANSPARENT_HUGEPAGE
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~

vim +/CONFIG_TRANSPARENT_HUGEPAGE +636 drivers/gpu//drm/ttm/ttm_page_alloc_dma.c

   593	
   594	static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
   595						  enum pool_type type)
   596	{
   597		const char *n[] = {"wc", "uc", "cached", " dma32", "huge"};
   598		enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_HUGE};
   599		struct device_pools *sec_pool = NULL;
   600		struct dma_pool *pool = NULL, **ptr;
   601		unsigned i;
   602		int ret = -ENODEV;
   603		char *p;
   604	
   605		if (!dev)
   606			return NULL;
   607	
   608		ptr = devres_alloc(ttm_dma_pool_release, sizeof(*ptr), GFP_KERNEL);
   609		if (!ptr)
   610			return NULL;
   611	
   612		ret = -ENOMEM;
   613	
   614		pool = kmalloc_node(sizeof(struct dma_pool), GFP_KERNEL,
   615				    dev_to_node(dev));
   616		if (!pool)
   617			goto err_mem;
   618	
   619		sec_pool = kmalloc_node(sizeof(struct device_pools), GFP_KERNEL,
   620					dev_to_node(dev));
   621		if (!sec_pool)
   622			goto err_mem;
   623	
   624		INIT_LIST_HEAD(&sec_pool->pools);
   625		sec_pool->dev = dev;
   626		sec_pool->pool =  pool;
   627	
   628		INIT_LIST_HEAD(&pool->free_list);
   629		INIT_LIST_HEAD(&pool->pools);
   630		spin_lock_init(&pool->lock);
   631		pool->dev = dev;
   632		pool->npages_free = pool->npages_in_use = 0;
   633		pool->nfrees = 0;
   634		pool->gfp_flags = flags;
   635		if (type & IS_HUGE)
 > 636	#if CONFIG_TRANSPARENT_HUGEPAGE
   637			pool->size = HPAGE_PMD_SIZE;
   638	#else
   639			BUG();
   640	#endif
   641		else
   642			pool->size = PAGE_SIZE;
   643		pool->type = type;
   644		pool->nrefills = 0;
   645		p = pool->name;
   646		for (i = 0; i < ARRAY_SIZE(t); i++) {
   647			if (type & t[i]) {
   648				p += snprintf(p, sizeof(pool->name) - (p - pool->name),
   649					      "%s", n[i]);
   650			}
   651		}
   652		*p = 0;
   653		/* We copy the name for pr_ calls b/c when dma_pool_destroy is called
   654		 * - the kobj->name has already been deallocated.*/
   655		snprintf(pool->dev_name, sizeof(pool->dev_name), "%s %s",
   656			 dev_driver_string(dev), dev_name(dev));
   657		mutex_lock(&_manager->lock);
   658		/* You can get the dma_pool from either the global: */
   659		list_add(&sec_pool->pools, &_manager->pools);
   660		_manager->npools++;
   661		/* or from 'struct device': */
   662		list_add(&pool->pools, &dev->dma_pools);
   663		mutex_unlock(&_manager->lock);
   664	
   665		*ptr = pool;
   666		devres_add(dev, ptr);
   667	
   668		return pool;
   669	err_mem:
   670		devres_free(ptr);
   671		kfree(sec_pool);
   672		kfree(pool);
   673		return ERR_PTR(ret);
   674	}
   675	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24243 bytes --]

[-- Attachment #3: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] drm/ttm: add support for different pool sizes
  2017-07-13 13:56 [PATCH 1/3] drm/ttm: add support for different pool sizes Christian König
  2017-07-13 13:56 ` [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c Christian König
  2017-07-13 13:56 ` [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations Christian König
@ 2017-07-14 15:41 ` Alex Deucher
  2 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2017-07-14 15:41 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Thu, Jul 13, 2017 at 9:56 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Correctly handle different page sizes in the memory accounting.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

I'd split this patch in two, one to remove the no_wait and
interruptible flags and one to add the size parameter.

Alex

> ---
>  drivers/gpu/drm/ttm/ttm_memory.c         | 12 +++++-------
>  drivers/gpu/drm/ttm/ttm_page_alloc.c     |  4 ++--
>  drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  6 +++---
>  include/drm/ttm/ttm_memory.h             |  5 ++---
>  4 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> index 29855be..e963749 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -546,8 +546,7 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
>  EXPORT_SYMBOL(ttm_mem_global_alloc);
>
>  int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
> -                             struct page *page,
> -                             bool no_wait, bool interruptible)
> +                             struct page *page, uint64_t size)
>  {
>
>         struct ttm_mem_zone *zone = NULL;
> @@ -564,11 +563,11 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
>         if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
>                 zone = glob->zone_kernel;
>  #endif
> -       return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
> -                                        interruptible);
> +       return ttm_mem_global_alloc_zone(glob, zone, size, false, false);
>  }
>
> -void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
> +void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page,
> +                             uint64_t size)
>  {
>         struct ttm_mem_zone *zone = NULL;
>
> @@ -579,10 +578,9 @@ void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
>         if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
>                 zone = glob->zone_kernel;
>  #endif
> -       ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
> +       ttm_mem_global_free_zone(glob, zone, size);
>  }
>
> -
>  size_t ttm_round_pot(size_t size)
>  {
>         if ((size & (size - 1)) == 0)
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index a37de5d..6add044 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -880,7 +880,7 @@ int ttm_pool_populate(struct ttm_tt *ttm)
>                 }
>
>                 ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
> -                                               false, false);
> +                                               PAGE_SIZE);
>                 if (unlikely(ret != 0)) {
>                         ttm_pool_unpopulate(ttm);
>                         return -ENOMEM;
> @@ -907,7 +907,7 @@ void ttm_pool_unpopulate(struct ttm_tt *ttm)
>         for (i = 0; i < ttm->num_pages; ++i) {
>                 if (ttm->pages[i]) {
>                         ttm_mem_global_free_page(ttm->glob->mem_glob,
> -                                                ttm->pages[i]);
> +                                                ttm->pages[i], PAGE_SIZE);
>                         ttm_put_pages(&ttm->pages[i], 1,
>                                       ttm->page_flags,
>                                       ttm->caching_state);
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index cec4b4b..6c38046 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -910,7 +910,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>                 }
>
>                 ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
> -                                               false, false);
> +                                               pool->size);
>                 if (unlikely(ret != 0)) {
>                         ttm_dma_unpopulate(ttm_dma, dev);
>                         return -ENOMEM;
> @@ -975,13 +975,13 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>         if (is_cached) {
>                 list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) {
>                         ttm_mem_global_free_page(ttm->glob->mem_glob,
> -                                                d_page->p);
> +                                                d_page->p, pool->size);
>                         ttm_dma_page_put(pool, d_page);
>                 }
>         } else {
>                 for (i = 0; i < count; i++) {
>                         ttm_mem_global_free_page(ttm->glob->mem_glob,
> -                                                ttm->pages[i]);
> +                                                ttm->pages[i], pool->size);
>                 }
>         }
>
> diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
> index c452089..2c1e359 100644
> --- a/include/drm/ttm/ttm_memory.h
> +++ b/include/drm/ttm/ttm_memory.h
> @@ -150,10 +150,9 @@ extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
>  extern void ttm_mem_global_free(struct ttm_mem_global *glob,
>                                 uint64_t amount);
>  extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
> -                                    struct page *page,
> -                                    bool no_wait, bool interruptible);
> +                                    struct page *page, uint64_t size);
>  extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
> -                                    struct page *page);
> +                                    struct page *page, uint64_t size);
>  extern size_t ttm_round_pot(size_t size);
>  extern uint64_t ttm_get_kernel_zone_memory_size(struct ttm_mem_global *glob);
>  #endif
> --
> 2.7.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c
  2017-07-13 13:56 ` [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c Christian König
@ 2017-07-14 15:42   ` Alex Deucher
  0 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2017-07-14 15:42 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Thu, Jul 13, 2017 at 9:56 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Remove unused defines and variables. Also stop computing the
> gfp_flags when they aren't used.
>
> No intended functional change.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 42 ++++++++++++--------------------
>  1 file changed, 16 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index 6c38046..2081e20 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -57,22 +57,15 @@
>  #define NUM_PAGES_TO_ALLOC             (PAGE_SIZE/sizeof(struct page *))
>  #define SMALL_ALLOCATION               4
>  #define FREE_ALL_PAGES                 (~0U)
> -/* times are in msecs */
> -#define IS_UNDEFINED                   (0)
> -#define IS_WC                          (1<<1)
> -#define IS_UC                          (1<<2)
> -#define IS_CACHED                      (1<<3)
> -#define IS_DMA32                       (1<<4)
>
>  enum pool_type {
> -       POOL_IS_UNDEFINED,
> -       POOL_IS_WC = IS_WC,
> -       POOL_IS_UC = IS_UC,
> -       POOL_IS_CACHED = IS_CACHED,
> -       POOL_IS_WC_DMA32 = IS_WC | IS_DMA32,
> -       POOL_IS_UC_DMA32 = IS_UC | IS_DMA32,
> -       POOL_IS_CACHED_DMA32 = IS_CACHED | IS_DMA32,
> +       IS_UNDEFINED    = 0,
> +       IS_WC           = 1 << 1,
> +       IS_UC           = 1 << 2,
> +       IS_CACHED       = 1 << 3,
> +       IS_DMA32        = 1 << 4
>  };
> +
>  /*
>   * The pool structure. There are usually six pools:
>   *  - generic (not restricted to DMA32):
> @@ -83,11 +76,9 @@ enum pool_type {
>   * The other ones can be shrunk by the shrinker API if neccessary.
>   * @pools: The 'struct device->dma_pools' link.
>   * @type: Type of the pool
> - * @lock: Protects the inuse_list and free_list from concurrnet access. Must be
> + * @lock: Protects the free_list from concurrnet access. Must be
>   * used with irqsave/irqrestore variants because pool allocator maybe called
>   * from delayed work.
> - * @inuse_list: Pool of pages that are in use. The order is very important and
> - *   it is in the order that the TTM pages that are put back are in.
>   * @free_list: Pool of pages that are free to be used. No order requirements.
>   * @dev: The device that is associated with these pools.
>   * @size: Size used during DMA allocation.
> @@ -104,7 +95,6 @@ struct dma_pool {
>         struct list_head pools; /* The 'struct device->dma_pools link */
>         enum pool_type type;
>         spinlock_t lock;
> -       struct list_head inuse_list;
>         struct list_head free_list;
>         struct device *dev;
>         unsigned size;
> @@ -606,7 +596,6 @@ static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
>         sec_pool->pool =  pool;
>
>         INIT_LIST_HEAD(&pool->free_list);
> -       INIT_LIST_HEAD(&pool->inuse_list);
>         INIT_LIST_HEAD(&pool->pools);
>         spin_lock_init(&pool->lock);
>         pool->dev = dev;
> @@ -879,22 +868,23 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>         struct dma_pool *pool;
>         enum pool_type type;
>         unsigned i;
> -       gfp_t gfp_flags;
>         int ret;
>
>         if (ttm->state != tt_unpopulated)
>                 return 0;
>
>         type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> -       if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> -               gfp_flags = GFP_USER | GFP_DMA32;
> -       else
> -               gfp_flags = GFP_HIGHUSER;
> -       if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> -               gfp_flags |= __GFP_ZERO;
> -
>         pool = ttm_dma_find_pool(dev, type);
>         if (!pool) {
> +               gfp_t gfp_flags;
> +
> +               if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> +                       gfp_flags = GFP_USER | GFP_DMA32;
> +               else
> +                       gfp_flags = GFP_HIGHUSER;
> +               if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +                       gfp_flags |= __GFP_ZERO;
> +
>                 pool = ttm_dma_pool_init(dev, gfp_flags, type);
>                 if (IS_ERR_OR_NULL(pool)) {
>                         return -ENOMEM;
> --
> 2.7.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations
  2017-07-13 13:56 ` [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations Christian König
  2017-07-14  0:32   ` kbuild test robot
@ 2017-07-14 16:24   ` Alex Deucher
  1 sibling, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2017-07-14 16:24 UTC (permalink / raw)
  To: Christian König; +Cc: Maling list - DRI developers

On Thu, Jul 13, 2017 at 9:56 AM, Christian König
<deathsimple@vodafone.de> wrote:
> From: Christian König <christian.koenig@amd.com>
>
> Try to allocate huge pages when it makes sense.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 217 ++++++++++++++++++++++++-------
>  1 file changed, 169 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index 2081e20..e51d3fd 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -57,21 +57,25 @@
>  #define NUM_PAGES_TO_ALLOC             (PAGE_SIZE/sizeof(struct page *))
>  #define SMALL_ALLOCATION               4
>  #define FREE_ALL_PAGES                 (~0U)
> +#define VADDR_FLAG_HUGE_POOL           1UL
>
>  enum pool_type {
>         IS_UNDEFINED    = 0,
>         IS_WC           = 1 << 1,
>         IS_UC           = 1 << 2,
>         IS_CACHED       = 1 << 3,
> -       IS_DMA32        = 1 << 4
> +       IS_DMA32        = 1 << 4,
> +       IS_HUGE         = 1 << 5
>  };
>
>  /*
> - * The pool structure. There are usually six pools:
> + * The pool structure. There are up to seven pools:
>   *  - generic (not restricted to DMA32):
>   *      - write combined, uncached, cached.
>   *  - dma32 (up to 2^32 - so up 4GB):
>   *      - write combined, uncached, cached.
> + *  - huge (not restricted to DMA32):
> + *      - cached.

Any need for uncached or write combined huge pages?

>   * for each 'struct device'. The 'cached' is for pages that are actively used.
>   * The other ones can be shrunk by the shrinker API if neccessary.
>   * @pools: The 'struct device->dma_pools' link.
> @@ -111,13 +115,14 @@ struct dma_pool {
>   * The accounting page keeping track of the allocated page along with
>   * the DMA address.
>   * @page_list: The link to the 'page_list' in 'struct dma_pool'.
> - * @vaddr: The virtual address of the page
> + * @vaddr: The virtual address of the page and a flag if the page belongs to a
> + * huge pool
>   * @dma: The bus address of the page. If the page is not allocated
>   *   via the DMA API, it will be -1.
>   */
>  struct dma_page {
>         struct list_head page_list;
> -       void *vaddr;
> +       unsigned long vaddr;
>         struct page *p;
>         dma_addr_t dma;
>  };
> @@ -316,7 +321,8 @@ static int ttm_set_pages_caching(struct dma_pool *pool,
>  static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
>  {
>         dma_addr_t dma = d_page->dma;
> -       dma_free_coherent(pool->dev, pool->size, d_page->vaddr, dma);
> +       d_page->vaddr &= ~VADDR_FLAG_HUGE_POOL;
> +       dma_free_coherent(pool->dev, pool->size, (void *)d_page->vaddr, dma);
>
>         kfree(d_page);
>         d_page = NULL;
> @@ -324,19 +330,22 @@ static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
>  static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
>  {
>         struct dma_page *d_page;
> +       void *vaddr;
>
>         d_page = kmalloc(sizeof(struct dma_page), GFP_KERNEL);
>         if (!d_page)
>                 return NULL;
>
> -       d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size,
> -                                          &d_page->dma,
> -                                          pool->gfp_flags);
> -       if (d_page->vaddr) {
> -               if (is_vmalloc_addr(d_page->vaddr))
> -                       d_page->p = vmalloc_to_page(d_page->vaddr);
> +       vaddr = dma_alloc_coherent(pool->dev, pool->size, &d_page->dma,
> +                                  pool->gfp_flags);
> +       if (vaddr) {
> +               if (is_vmalloc_addr(vaddr))
> +                       d_page->p = vmalloc_to_page(vaddr);
>                 else
> -                       d_page->p = virt_to_page(d_page->vaddr);
> +                       d_page->p = virt_to_page(vaddr);
> +               d_page->vaddr = (unsigned long)vaddr;
> +               if (pool->type & IS_HUGE)
> +                       d_page->vaddr |= VADDR_FLAG_HUGE_POOL;
>         } else {
>                 kfree(d_page);
>                 d_page = NULL;
> @@ -368,11 +377,40 @@ static void ttm_pool_update_free_locked(struct dma_pool *pool,
>  }
>
>  /* set memory back to wb and free the pages. */
> +static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
> +{
> +       struct page *page = d_page->p;
> +       unsigned i, num_pages;
> +       int ret;
> +
> +       /* Don't set WB on WB page pool. */
> +       if (!(pool->type & IS_CACHED)) {
> +               num_pages = pool->size / PAGE_SIZE;
> +               for (i = 0; i < num_pages; ++i, ++page) {
> +                       ret = set_pages_array_wb(&page, 1);
> +                       if (ret) {
> +                               pr_err("%s: Failed to set %d pages to wb!\n",
> +                                      pool->dev_name, 1);
> +                       }
> +               }
> +       }
> +
> +       list_del(&d_page->page_list);
> +       __ttm_dma_free_page(pool, d_page);
> +}
> +
>  static void ttm_dma_pages_put(struct dma_pool *pool, struct list_head *d_pages,
>                               struct page *pages[], unsigned npages)
>  {
>         struct dma_page *d_page, *tmp;
>
> +       if (pool->type & IS_HUGE) {
> +               list_for_each_entry_safe(d_page, tmp, d_pages, page_list)
> +                       ttm_dma_page_put(pool, d_page);
> +
> +               return;
> +       }
> +
>         /* Don't set WB on WB page pool. */
>         if (npages && !(pool->type & IS_CACHED) &&
>             set_pages_array_wb(pages, npages))
> @@ -385,17 +423,6 @@ static void ttm_dma_pages_put(struct dma_pool *pool, struct list_head *d_pages,
>         }
>  }
>
> -static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page)
> -{
> -       /* Don't set WB on WB page pool. */
> -       if (!(pool->type & IS_CACHED) && set_pages_array_wb(&d_page->p, 1))
> -               pr_err("%s: Failed to set %d pages to wb!\n",
> -                      pool->dev_name, 1);
> -
> -       list_del(&d_page->page_list);
> -       __ttm_dma_free_page(pool, d_page);
> -}
> -
>  /*
>   * Free pages from pool.
>   *
> @@ -564,8 +591,8 @@ static int ttm_dma_pool_match(struct device *dev, void *res, void *match_data)
>  static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
>                                           enum pool_type type)
>  {
> -       char *n[] = {"wc", "uc", "cached", " dma32", "unknown",};
> -       enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_UNDEFINED};
> +       const char *n[] = {"wc", "uc", "cached", " dma32", "huge"};
> +       enum pool_type t[] = {IS_WC, IS_UC, IS_CACHED, IS_DMA32, IS_HUGE};
>         struct device_pools *sec_pool = NULL;
>         struct dma_pool *pool = NULL, **ptr;
>         unsigned i;
> @@ -602,11 +629,18 @@ static struct dma_pool *ttm_dma_pool_init(struct device *dev, gfp_t flags,
>         pool->npages_free = pool->npages_in_use = 0;
>         pool->nfrees = 0;
>         pool->gfp_flags = flags;
> -       pool->size = PAGE_SIZE;
> +       if (type & IS_HUGE)
> +#if CONFIG_TRANSPARENT_HUGEPAGE

I think this should be #ifdef for consistency with the HUGEPAGE
support in the rest of the kernel.


> +               pool->size = HPAGE_PMD_SIZE;
> +#else
> +               BUG();
> +#endif
> +       else
> +               pool->size = PAGE_SIZE;
>         pool->type = type;
>         pool->nrefills = 0;
>         p = pool->name;
> -       for (i = 0; i < 5; i++) {
> +       for (i = 0; i < ARRAY_SIZE(t); i++) {
>                 if (type & t[i]) {
>                         p += snprintf(p, sizeof(pool->name) - (p - pool->name),
>                                       "%s", n[i]);
> @@ -710,7 +744,7 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>         struct dma_page *dma_p;
>         struct page *p;
>         int r = 0;
> -       unsigned i, cpages;
> +       unsigned i, j, npages, cpages;
>         unsigned max_cpages = min(count,
>                         (unsigned)(PAGE_SIZE/sizeof(struct page *)));
>
> @@ -748,28 +782,32 @@ static int ttm_dma_pool_alloc_new_pages(struct dma_pool *pool,
>                         goto out;
>                 }
>                 p = dma_p->p;
> +               list_add(&dma_p->page_list, d_pages);
> +
>  #ifdef CONFIG_HIGHMEM
>                 /* gfp flags of highmem page should never be dma32 so we
>                  * we should be fine in such case
>                  */
> -               if (!PageHighMem(p))
> +               if (PageHighMem(p))
> +                       continue;
>  #endif
> -               {
> -                       caching_array[cpages++] = p;
> +
> +               npages = pool->size / PAGE_SIZE;
> +               for (j = 0; j < npages; ++j) {
> +                       caching_array[cpages++] = p + j;
>                         if (cpages == max_cpages) {
>                                 /* Note: Cannot hold the spinlock */
>                                 r = ttm_set_pages_caching(pool, caching_array,
> -                                                cpages);
> +                                                         cpages);
>                                 if (r) {
>                                         ttm_dma_handle_caching_state_failure(
> -                                               pool, d_pages, caching_array,
> -                                               cpages);
> +                                            pool, d_pages, caching_array,
> +                                            cpages);
>                                         goto out;
>                                 }
>                                 cpages = 0;
>                         }
>                 }
> -               list_add(&dma_p->page_list, d_pages);
>         }
>
>         if (cpages) {
> @@ -857,6 +895,26 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool,
>         return r;
>  }
>
> +static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
> +{
> +       struct ttm_tt *ttm = &ttm_dma->ttm;
> +       gfp_t gfp_flags;
> +
> +       if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> +               gfp_flags = GFP_USER | GFP_DMA32;
> +       else
> +               gfp_flags = GFP_HIGHUSER;
> +       if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +               gfp_flags |= __GFP_ZERO;
> +
> +       if (huge) {
> +               gfp_flags |= GFP_TRANSHUGE;
> +               gfp_flags &= ~__GFP_MOVABLE;
> +       }
> +
> +       return gfp_flags;
> +}
> +
>  /*
>   * On success pages list will hold count number of correctly
>   * cached pages. On failure will hold the negative return value (-ENOMEM, etc).
> @@ -865,6 +923,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>  {
>         struct ttm_tt *ttm = &ttm_dma->ttm;
>         struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> +       unsigned long num_pages = ttm->num_pages;
>         struct dma_pool *pool;
>         enum pool_type type;
>         unsigned i;
> @@ -873,26 +932,61 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>         if (ttm->state != tt_unpopulated)
>                 return 0;
>
> +       INIT_LIST_HEAD(&ttm_dma->pages_list);
> +       i = 0;
> +
>         type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> -       pool = ttm_dma_find_pool(dev, type);
> +
> +#if CONFIG_TRANSPARENT_HUGEPAGE

Same here.

> +       if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> +               goto skip_huge;
> +
> +       pool = ttm_dma_find_pool(dev, type | IS_HUGE);
>         if (!pool) {
> -               gfp_t gfp_flags;
> +               gfp_t gfp_flags = ttm_dma_pool_gfp_flags(ttm_dma, true);
>
> -               if (ttm->page_flags & TTM_PAGE_FLAG_DMA32)
> -                       gfp_flags = GFP_USER | GFP_DMA32;
> -               else
> -                       gfp_flags = GFP_HIGHUSER;
> -               if (ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> -                       gfp_flags |= __GFP_ZERO;
> +               pool = ttm_dma_pool_init(dev, gfp_flags, type | IS_HUGE);
> +               if (IS_ERR_OR_NULL(pool))
> +                       goto skip_huge;
> +       }
>
> -               pool = ttm_dma_pool_init(dev, gfp_flags, type);
> -               if (IS_ERR_OR_NULL(pool)) {
> +       while (num_pages >= HPAGE_PMD_NR) {
> +               unsigned j;
> +
> +               ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
> +               if (ret != 0)
> +                       break;
> +
> +               ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
> +                                               pool->size);
> +               if (unlikely(ret != 0)) {
> +                       ttm_dma_unpopulate(ttm_dma, dev);
>                         return -ENOMEM;
>                 }
> +
> +               for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) {
> +                       ttm->pages[j] = ttm->pages[j - 1] + 1;
> +                       ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] +
> +                               PAGE_SIZE;
> +               }
> +
> +               i += HPAGE_PMD_NR;
> +               num_pages -= HPAGE_PMD_NR;
>         }
>
> -       INIT_LIST_HEAD(&ttm_dma->pages_list);
> -       for (i = 0; i < ttm->num_pages; ++i) {
> +skip_huge:
> +#endif
> +
> +       pool = ttm_dma_find_pool(dev, type);
> +       if (!pool) {
> +               gfp_t gfp_flags = ttm_dma_pool_gfp_flags(ttm_dma, false);
> +
> +               pool = ttm_dma_pool_init(dev, gfp_flags, type);
> +               if (IS_ERR_OR_NULL(pool))
> +                       return -ENOMEM;
> +       }
> +
> +       while (num_pages) {
>                 ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
>                 if (ret != 0) {
>                         ttm_dma_unpopulate(ttm_dma, dev);
> @@ -905,6 +999,9 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>                         ttm_dma_unpopulate(ttm_dma, dev);
>                         return -ENOMEM;
>                 }
> +
> +               ++i;
> +               --num_pages;
>         }
>
>         if (unlikely(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
> @@ -928,10 +1025,33 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>         struct dma_page *d_page, *next;
>         enum pool_type type;
>         bool is_cached = false;
> -       unsigned count = 0, i, npages = 0;
> +       unsigned count, i, npages = 0;
>         unsigned long irq_flags;
>
>         type = ttm_to_type(ttm->page_flags, ttm->caching_state);
> +
> +#if CONFIG_TRANSPARENT_HUGEPAGE

Same here.


> +       pool = ttm_dma_find_pool(dev, type | IS_HUGE);
> +       if (pool) {
> +               count = 0;
> +               list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list,
> +                                        page_list) {
> +                       if (!(d_page->vaddr & VADDR_FLAG_HUGE_POOL))
> +                               continue;
> +
> +                       count++;
> +                       ttm_mem_global_free_page(ttm->glob->mem_glob,
> +                                                d_page->p, pool->size);
> +                       ttm_dma_page_put(pool, d_page);
> +               }
> +
> +               spin_lock_irqsave(&pool->lock, irq_flags);
> +               pool->npages_in_use -= count;
> +               pool->nfrees += count;
> +               spin_unlock_irqrestore(&pool->lock, irq_flags);
> +       }
> +#endif
> +
>         pool = ttm_dma_find_pool(dev, type);
>         if (!pool)
>                 return;
> @@ -940,6 +1060,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
>                      ttm_to_type(ttm->page_flags, tt_cached)) == pool);
>
>         /* make sure pages array match list and count number of pages */
> +       count = 0;
>         list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) {
>                 ttm->pages[count] = d_page->p;
>                 count++;
> --
> 2.7.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-07-14 16:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-13 13:56 [PATCH 1/3] drm/ttm: add support for different pool sizes Christian König
2017-07-13 13:56 ` [PATCH 2/3] drm/ttm: cleanup ttm_page_alloc_dma.c Christian König
2017-07-14 15:42   ` Alex Deucher
2017-07-13 13:56 ` [PATCH 3/3] drm/ttm: add transparent huge page support for DMA allocations Christian König
2017-07-14  0:32   ` kbuild test robot
2017-07-14 16:24   ` Alex Deucher
2017-07-14 15:41 ` [PATCH 1/3] drm/ttm: add support for different pool sizes Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.