All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] drm/ttm: add pool wc/uc page allocator
@ 2010-02-28 21:30 Pauli Nieminen
  2010-03-01 22:32 ` Pauli Nieminen
  0 siblings, 1 reply; 13+ messages in thread
From: Pauli Nieminen @ 2010-02-28 21:30 UTC (permalink / raw)
  To: dri-devel; +Cc: Dave Airlie, Jerome Glisse

On AGP system we might allocate/free routinely uncached or wc memory,
changing page from cached (wb) to uc or wc is very expensive and involves
a lot of flushing. To improve performance this allocator use a pool
of uc,wc pages.

Pools are linked lists of pages. Ordered so that first is the latest adition
to the pool and last is the oldest page. Old pages are periodicaly freed from
pools to keep memory use at minimum.

Pools are protected with spinlocks to allow multiple threads to allocate pages
simultanously. Expensive operations must not hold the spinlock to maximise
performance for multiple threads.

Based on Jerome Glisse's and Dave Airlie's pool allocator.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
---
 drivers/gpu/drm/ttm/Makefile         |    2 +-
 drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  514 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
 drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
 5 files changed, 580 insertions(+), 26 deletions(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index 1e138f5..4256e20 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,6 +4,6 @@
 ccflags-y := -Iinclude/drm
 ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
 	ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
-	ttm_object.o ttm_lock.o ttm_execbuf_util.o
+	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f5245c0..d587fbe 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -32,6 +32,7 @@
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include "ttm_page_alloc.h"
 
 #define TTM_MEMORY_ALLOC_RETRIES 4
 
@@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
 		       "Zone %7s: Available graphics memory: %llu kiB.\n",
 		       zone->name, (unsigned long long) zone->max_mem >> 10);
 	}
+	ttm_page_alloc_init();
 	return 0;
 out_no_zone:
 	ttm_mem_global_release(glob);
@@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
 		zone = glob->zones[i];
 		kobject_del(&zone->kobj);
 		kobject_put(&zone->kobj);
-	}
+			}
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
+	ttm_page_alloc_fini();
 }
 EXPORT_SYMBOL(ttm_mem_global_release);
 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
new file mode 100644
index 0000000..de6576c
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -0,0 +1,514 @@
+/*
+ * Copyright (c) Red Hat Inc.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Jerome Glisse <jglisse@redhat.com>
+ *          Pauli Nienmien <suokkos@gmail.com>
+ */
+
+/* simple list based uncached page pool
+ * - Pool collects resently freed pages for reuse
+ * - Use page->lru to keep a free list
+ * - doesn't track currently in use pages
+ */
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/highmem.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+
+#include <asm/agp.h>
+#include "ttm/ttm_bo_driver.h"
+#include "ttm_page_alloc.h"
+
+/* Number of 4k pages to add at once */
+#define NUM_PAGES_TO_ADD 		64
+/* times are in msecs */
+#define TIME_TO_KEEP_PAGE_IN_POOL	15000
+#define PAGE_FREE_INTERVAL		1000
+
+/**
+ * list is a FILO stack.
+ * All pages pushed into it must go to begin while end of list must hold the
+ * least recently used pages. When pages in end of the list are not recently
+ * use they are freed.
+ *
+ * All expensive operations must be done outside of pool lock to prevent
+ * contention on lock.
+ */
+struct page_pool {
+	struct list_head	list;
+	int			gfp_flags;
+	unsigned		npages;
+	spinlock_t		lock;
+};
+
+# define MAX_NUM_POOLS 4
+
+struct pool_manager {
+	bool			page_alloc_inited;
+	unsigned long		time_to_wait;
+
+	struct timer_list	timer;
+
+	union {
+		struct page_pool	pools[MAX_NUM_POOLS];
+		struct {
+			struct page_pool	wc_pool;
+			struct page_pool	uc_pool;
+			struct page_pool	wc_pool_dma32;
+			struct page_pool	uc_pool_dma32;
+		} ;
+	};
+};
+
+
+static struct pool_manager _manager;
+
+#ifdef CONFIG_X86
+/* TODO: add this to x86 like _uc, this version here is inefficient */
+static int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		set_memory_wc((unsigned long)page_address(pages[i]), 1);
+	return 0;
+}
+#else
+static int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		unmap_page_from_agp(pages[i]);
+#endif
+	return 0;
+}
+
+static int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+#endif
+	return 0;
+}
+
+static int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+#endif
+	return 0;
+}
+#endif
+
+
+static void ttm_page_pool_init_locked(struct page_pool *pool)
+{
+	spin_lock_init(&pool->lock);
+	INIT_LIST_HEAD(&pool->list);
+}
+
+static struct page_pool *ttm_get_pool(int flags,
+		enum ttm_caching_state cstate)
+{
+	int pool_index;
+
+	if (cstate == tt_cached)
+		return NULL;
+
+	if (cstate == tt_wc)
+		pool_index = 0x0;
+	else
+		pool_index = 0x1;
+
+	if (flags & TTM_PAGE_FLAG_DMA32)
+		pool_index |= 0x2;
+
+	return &_manager.pools[pool_index];
+}
+
+static void ttm_pages_put(struct page *pages[], unsigned npages)
+{
+	unsigned i;
+	set_pages_array_wb(pages, npages);
+	for (i = 0; i < npages; ++i)
+		__free_page(pages[i]);
+}
+
+/**
+ * Free pages from pool.
+ * If limit_with_time is set to true pages that wil lbe have to have been
+ * in pool for time specified by pool_manager.time_to_wait.
+ *
+ * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
+ * blocking others operations on pool.
+ *
+ * This is called from the timer handler.
+ **/
+static void ttm_page_pool_free(struct page_pool *pool,
+		const bool limit_with_time)
+{
+	struct page *page;
+	struct page *pages_to_free[NUM_PAGES_TO_ADD];
+	unsigned freed_pages;
+	unsigned long now = jiffies + _manager.time_to_wait;
+	unsigned long irq_flags;
+
+restart:
+	freed_pages = 0;
+	spin_lock_irqsave(&pool->lock, irq_flags);
+
+	{
+		list_for_each_entry_reverse(page, &pool->list, lru) {
+
+			if (limit_with_time
+				&& time_after(now, page->private))
+				break;
+
+			pages_to_free[freed_pages++] = page;
+			if (freed_pages >= NUM_PAGES_TO_ADD) {
+				/* Limit the maximum time that we hold the lock
+				 * not to block more important threads */
+				__list_del(page->lru.prev, &pool->list);
+				pool->npages -= freed_pages;
+				spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+				ttm_pages_put(pages_to_free, freed_pages);
+				goto restart;
+			}
+		}
+
+		pool->npages -= freed_pages;
+		if (freed_pages)
+			__list_del(&page->lru, &pool->list);
+
+		spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+		if (freed_pages)
+			ttm_pages_put(pages_to_free, freed_pages);
+	}
+}
+
+/* periodicaly free unused pages from pool */
+static void ttm_page_pool_timer_handler(unsigned long man_addr)
+{
+	int i;
+	struct pool_manager *m = (struct pool_manager *)man_addr;
+
+	for (i = 0; i < MAX_NUM_POOLS; ++i)
+		ttm_page_pool_free(&_manager.pools[i], true);
+
+	mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
+}
+
+static void ttm_page_pool_timer_create(struct pool_manager *m)
+{
+	/* Create a timer that is fired every second to clean
+	 * the free list */
+	init_timer(&m->timer);
+	m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
+	m->timer.function = ttm_page_pool_timer_handler;
+	m->timer.data = (unsigned long)m;
+}
+
+static void ttm_page_pool_timer_free(struct pool_manager *m)
+{
+	del_timer_sync(&m->timer);
+}
+
+static void ttm_set_pages_caching(struct page **pages,
+		enum ttm_caching_state cstate, unsigned cpages)
+{
+	/* Set page caching */
+	switch (cstate) {
+	case tt_uncached:
+		set_pages_array_uc(pages, cpages);
+		break;
+	case tt_wc:
+		set_pages_array_wc(pages, cpages);
+		break;
+	default:
+		break;
+	}
+}
+/**
+ * Fill new pages to array.
+ */
+static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
+		enum ttm_caching_state cstate, unsigned count,
+		unsigned long *irq_flags)
+{
+	unsigned pages_to_alloc = count - pool->npages;
+	struct page **pages;
+	struct page *page;
+	struct list_head h;
+	unsigned i, cpages;
+	unsigned max_cpages = min(pages_to_alloc,
+			(unsigned)(PAGE_SIZE/sizeof(*page)));
+
+	/* reserve allready allocated pages from pool */
+	INIT_LIST_HEAD(&h);
+	list_splice_init(&pool->list, &h);
+
+	/* We are now safe to allow others users to modify the pool so unlock
+	 * it. Allocating pages is expensive operation. */
+	spin_unlock_irqrestore(&pool->lock, *irq_flags);
+
+	pages = kmalloc(pages_to_alloc*sizeof(*page), GFP_KERNEL);
+
+	if (!pages) {
+		printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
+		spin_lock_irqsave(&pool->lock, *irq_flags);
+		return -ENOMEM;
+	}
+
+	for (i = 0, cpages = 0; i < pages_to_alloc; ++i) {
+		page = alloc_page(pool->gfp_flags);
+		if (!page) {
+			printk(KERN_ERR "[ttm] unable to get page %d\n", i);
+
+			ttm_put_pages(&h, flags, cstate);
+			spin_lock_irqsave(&pool->lock, *irq_flags);
+			return -ENOMEM;
+		}
+
+#ifdef CONFIG_HIGHMEM
+		/* gfp flags of highmem page should never be dma32 so we
+		 * we should be fine in such case
+		 */
+		if (!PageHighMem(page))
+#endif
+		{
+			pages[cpages++] = page;
+			if (cpages == max_cpages) {
+
+				ttm_set_pages_caching(pages, cstate, cpages);
+				cpages = 0;
+			}
+		}
+		list_add(&page->lru, &h);
+	}
+
+	if (cpages)
+		ttm_set_pages_caching(pages, cstate, cpages);
+
+	spin_lock_irqsave(&pool->lock, *irq_flags);
+
+	list_splice(&h, &pool->list);
+
+	pool->npages += pages_to_alloc;
+
+	return 0;
+}
+
+/*
+ * On success pages list will hold count number of correctly
+ * cached pages.
+ */
+int ttm_get_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate, unsigned count)
+{
+	struct page_pool *pool = ttm_get_pool(flags, cstate);
+	struct page *page = NULL;
+	unsigned long irq_flags;
+	int gfp_flags = 0;
+	int retries = 1;
+	int r;
+
+	/* No pool for cached pages */
+	if (cstate == tt_cached) {
+		if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+			gfp_flags |= __GFP_ZERO;
+
+		if (flags & TTM_PAGE_FLAG_DMA32)
+			gfp_flags |= GFP_DMA32;
+		else
+			gfp_flags |= __GFP_HIGHMEM;
+
+		/* fallback in case of problems with pool allocation */
+alloc_pages:
+		for (r = 0; r < count; ++r) {
+			page = alloc_page(gfp_flags);
+			if (!page) {
+
+				printk(KERN_ERR "[ttm] unable to allocate page.");
+				return -ENOMEM;
+			}
+
+			list_add(&page->lru, pages);
+		}
+		return 0;
+	}
+
+	gfp_flags = pool->gfp_flags;
+
+	spin_lock_irqsave(&pool->lock, irq_flags);
+	do {
+		/* First we look if pool has the page we want */
+		if (pool->npages >= count) {
+
+
+			/* Find the cut location */
+			if (count <= pool->npages/2) {
+				unsigned c = 0;
+				list_for_each_entry(page, &pool->list, lru) {
+					if (++c >= count)
+						break;
+				}
+			} else {
+				/* We have to point to the last pages cut */
+				unsigned c = 0;
+				unsigned rcount = pool->npages - count + 1;
+
+				list_for_each_entry_reverse(page, &pool->list, lru) {
+					if (++c >= rcount)
+						break;
+				}
+			}
+
+			list_cut_position(pages, &pool->list, &page->lru);
+
+			pool->npages -= count;
+			spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+			if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
+				list_for_each_entry(page, pages, lru) {
+					clear_page(page_address(page));
+				}
+			}
+
+			return 0;
+		}
+
+		if (retries == 0)
+			break;
+
+		--retries;
+
+		r = ttm_pages_pool_fill_locked(pool, flags, cstate,
+				count, &irq_flags);
+		if (r) {
+			spin_unlock_irqrestore(&pool->lock, irq_flags);
+			return r;
+		}
+
+	} while (1);
+
+	spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+	/*  Pool allocator failed without errors. we try to allocate new pages
+	 *  instead. */
+	if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+		gfp_flags |= __GFP_ZERO;
+
+	goto alloc_pages;
+}
+
+/* Put all pages in pages list to correct pool to wait for reuse */
+void ttm_put_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate)
+{
+	struct page_pool *pool = ttm_get_pool(flags, cstate);
+	struct page *p, *tmp;
+	unsigned page_count = 0;
+	unsigned long irq_flags;
+	unsigned long now = jiffies;
+
+	if (pool == NULL) {
+		list_for_each_entry_safe(p, tmp, pages, lru) {
+			__free_page(p);
+		}
+		/* We took the ownership -> clear the pages list*/
+		INIT_LIST_HEAD(pages);
+		return;
+	}
+
+	list_for_each_entry_safe(p, tmp, pages, lru) {
+
+#ifdef CONFIG_HIGHMEM
+		/* we don't have pool for highmem -> free them */
+		if (PageHighMem(p)) {
+			list_del(&p->lru);
+			__free_page(p);
+		} else
+#endif
+		{
+			p->private = now;
+			++page_count;
+		}
+
+	}
+
+	spin_lock_irqsave(&pool->lock, irq_flags);
+	list_splice(pages, &pool->list);
+	pool->npages += page_count;
+	spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+	/* We took the ownership -> clear the pages list*/
+	INIT_LIST_HEAD(pages);
+}
+
+
+int ttm_page_alloc_init(void)
+{
+	if (_manager.page_alloc_inited)
+		return 0;
+
+	ttm_page_pool_init_locked(&_manager.wc_pool);
+	_manager.wc_pool.gfp_flags = GFP_HIGHUSER;
+	ttm_page_pool_init_locked(&_manager.uc_pool);
+	_manager.uc_pool.gfp_flags = GFP_HIGHUSER;
+	ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
+	_manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
+	ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
+	_manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
+
+	_manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
+	ttm_page_pool_timer_create(&_manager);
+	_manager.page_alloc_inited = 1;
+	return 0;
+}
+
+void ttm_page_alloc_fini(void)
+{
+	int i;
+
+	if (!_manager.page_alloc_inited)
+		return;
+
+	ttm_page_pool_timer_free(&_manager);
+
+	for (i = 0; i < MAX_NUM_POOLS; ++i)
+		ttm_page_pool_free(&_manager.pools[i], false);
+
+	_manager.page_alloc_inited = 0;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
new file mode 100644
index 0000000..eb0ddf4
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Red Hat Inc.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Jerome Glisse <jglisse@redhat.com>
+ */
+#ifndef TTM_PAGE_ALLOC
+#define TTM_PAGE_ALLOC
+
+#include "ttm/ttm_bo_driver.h"
+
+void ttm_put_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate);
+int ttm_get_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate, unsigned count);
+int ttm_page_alloc_init(void);
+void ttm_page_alloc_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 3d47a2c..d4e20f6 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -38,6 +38,7 @@
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
+#include "ttm_page_alloc.h"
 
 static int ttm_tt_swapin(struct ttm_tt *ttm);
 
@@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 	ttm->pages = NULL;
 }
 
-static struct page *ttm_tt_alloc_page(unsigned page_flags)
-{
-	gfp_t gfp_flags = GFP_USER;
-
-	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		gfp_flags |= __GFP_ZERO;
-
-	if (page_flags & TTM_PAGE_FLAG_DMA32)
-		gfp_flags |= __GFP_DMA32;
-	else
-		gfp_flags |= __GFP_HIGHMEM;
-
-	return alloc_page(gfp_flags);
-}
-
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 {
 	int write;
@@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
 	struct page *p;
+	struct list_head h;
 	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	int ret;
 
 	while (NULL == (p = ttm->pages[index])) {
-		p = ttm_tt_alloc_page(ttm->page_flags);
 
-		if (!p)
+		INIT_LIST_HEAD(&h);
+
+		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
+
+		if (ret != 0)
 			return NULL;
 
+		p = list_first_entry(&h, struct page, lru);
+
 		ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
 		if (unlikely(ret != 0))
 			goto out_err;
@@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
 	if (ttm->caching_state == c_state)
 		return 0;
 
-	if (c_state != tt_cached) {
-		ret = ttm_tt_populate(ttm);
-		if (unlikely(ret != 0))
-			return ret;
+	if (ttm->state == tt_unpopulated) {
+		/* Change caching but don't populate */
+		ttm->caching_state = c_state;
+		return 0;
 	}
 
 	if (ttm->caching_state == tt_cached)
@@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
 static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 {
 	int i;
+	unsigned count = 0;
+	struct list_head h;
 	struct page *cur_page;
 	struct ttm_backend *be = ttm->be;
 
+	INIT_LIST_HEAD(&h);
+
 	if (be)
 		be->func->clear(be);
-	(void)ttm_tt_set_caching(ttm, tt_cached);
 	for (i = 0; i < ttm->num_pages; ++i) {
+
 		cur_page = ttm->pages[i];
 		ttm->pages[i] = NULL;
 		if (cur_page) {
 			if (page_count(cur_page) != 1)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
-				       "Leaking pages.\n");
+				       "Leaking pages (%d/%d).\n",
+				       page_count(cur_page), count);
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
 						 cur_page);
-			__free_page(cur_page);
+			list_add(&cur_page->lru, &h);
+			count++;
 		}
 	}
+	ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
 	ttm->state = tt_unpopulated;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
-- 
1.6.3.3


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-02-28 21:30 [RFC] drm/ttm: add pool wc/uc page allocator Pauli Nieminen
@ 2010-03-01 22:32 ` Pauli Nieminen
  2010-03-02 10:56   ` Michel Dänzer
  2010-03-03 13:33   ` Jerome Glisse
  0 siblings, 2 replies; 13+ messages in thread
From: Pauli Nieminen @ 2010-03-01 22:32 UTC (permalink / raw)
  To: dri-devel; +Cc: Jerome Glisse

[-- Attachment #1: Type: text/plain, Size: 30552 bytes --]

On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
> On AGP system we might allocate/free routinely uncached or wc memory,
> changing page from cached (wb) to uc or wc is very expensive and involves
> a lot of flushing. To improve performance this allocator use a pool
> of uc,wc pages.
>
> Pools are linked lists of pages. Ordered so that first is the latest adition
> to the pool and last is the oldest page. Old pages are periodicaly freed from
> pools to keep memory use at minimum.
>
> Pools are protected with spinlocks to allow multiple threads to allocate pages
> simultanously. Expensive operations must not hold the spinlock to maximise
> performance for multiple threads.
>
> Based on Jerome Glisse's and Dave Airlie's pool allocator.
>
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> ---
>  drivers/gpu/drm/ttm/Makefile         |    2 +-
>  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
>  drivers/gpu/drm/ttm/ttm_page_alloc.c |  514 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
>  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
>  5 files changed, 580 insertions(+), 26 deletions(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
>
> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> index 1e138f5..4256e20 100644
> --- a/drivers/gpu/drm/ttm/Makefile
> +++ b/drivers/gpu/drm/ttm/Makefile
> @@ -4,6 +4,6 @@
>  ccflags-y := -Iinclude/drm
>  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
>        ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
> -       ttm_object.o ttm_lock.o ttm_execbuf_util.o
> +       ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
>
>  obj-$(CONFIG_DRM_TTM) += ttm.o
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> index f5245c0..d587fbe 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -32,6 +32,7 @@
>  #include <linux/wait.h>
>  #include <linux/mm.h>
>  #include <linux/module.h>
> +#include "ttm_page_alloc.h"
>
>  #define TTM_MEMORY_ALLOC_RETRIES 4
>
> @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>                       "Zone %7s: Available graphics memory: %llu kiB.\n",
>                       zone->name, (unsigned long long) zone->max_mem >> 10);
>        }
> +       ttm_page_alloc_init();
>        return 0;
>  out_no_zone:
>        ttm_mem_global_release(glob);
> @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>                zone = glob->zones[i];
>                kobject_del(&zone->kobj);
>                kobject_put(&zone->kobj);
> -       }
> +                       }
>        kobject_del(&glob->kobj);
>        kobject_put(&glob->kobj);
> +       ttm_page_alloc_fini();
>  }
>  EXPORT_SYMBOL(ttm_mem_global_release);
>
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> new file mode 100644
> index 0000000..de6576c
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -0,0 +1,514 @@
> +/*
> + * Copyright (c) Red Hat Inc.
> +
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Dave Airlie <airlied@redhat.com>
> + *          Jerome Glisse <jglisse@redhat.com>
> + *          Pauli Nienmien <suokkos@gmail.com>
> + */
> +
> +/* simple list based uncached page pool
> + * - Pool collects resently freed pages for reuse
> + * - Use page->lru to keep a free list
> + * - doesn't track currently in use pages
> + */
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/highmem.h>
> +#include <linux/mm_types.h>
> +#include <linux/jiffies.h>
> +#include <linux/timer.h>
> +
> +#include <asm/agp.h>
> +#include "ttm/ttm_bo_driver.h"
> +#include "ttm_page_alloc.h"
> +
> +/* Number of 4k pages to add at once */
> +#define NUM_PAGES_TO_ADD               64
> +/* times are in msecs */
> +#define TIME_TO_KEEP_PAGE_IN_POOL      15000
> +#define PAGE_FREE_INTERVAL             1000
> +
> +/**
> + * list is a FILO stack.
> + * All pages pushed into it must go to begin while end of list must hold the
> + * least recently used pages. When pages in end of the list are not recently
> + * use they are freed.
> + *
> + * All expensive operations must be done outside of pool lock to prevent
> + * contention on lock.
> + */
> +struct page_pool {
> +       struct list_head        list;
> +       int                     gfp_flags;
> +       unsigned                npages;
> +       spinlock_t              lock;
> +};
> +
> +# define MAX_NUM_POOLS 4
> +
> +struct pool_manager {
> +       bool                    page_alloc_inited;
> +       unsigned long           time_to_wait;
> +
> +       struct timer_list       timer;
> +
> +       union {
> +               struct page_pool        pools[MAX_NUM_POOLS];
> +               struct {
> +                       struct page_pool        wc_pool;
> +                       struct page_pool        uc_pool;
> +                       struct page_pool        wc_pool_dma32;
> +                       struct page_pool        uc_pool_dma32;
> +               } ;
> +       };
> +};
> +
> +
> +static struct pool_manager _manager;
> +
> +#ifdef CONFIG_X86
> +/* TODO: add this to x86 like _uc, this version here is inefficient */
> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> +{
> +       int i;
> +
> +       for (i = 0; i < addrinarray; i++)
> +               set_memory_wc((unsigned long)page_address(pages[i]), 1);
> +       return 0;
> +}
> +#else
> +static int set_pages_array_wb(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +       int i;
> +
> +       for (i = 0; i < addrinarray; i++)
> +               unmap_page_from_agp(pages[i]);
> +#endif
> +       return 0;
> +}
> +
> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +       int i;
> +
> +       for (i = 0; i < addrinarray; i++)
> +               map_page_into_agp(pages[i]);
> +#endif
> +       return 0;
> +}
> +
> +static int set_pages_array_uc(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +       int i;
> +
> +       for (i = 0; i < addrinarray; i++)
> +               map_page_into_agp(pages[i]);
> +#endif
> +       return 0;
> +}
> +#endif
> +
> +
> +static void ttm_page_pool_init_locked(struct page_pool *pool)
> +{
> +       spin_lock_init(&pool->lock);
> +       INIT_LIST_HEAD(&pool->list);
> +}
> +
> +static struct page_pool *ttm_get_pool(int flags,
> +               enum ttm_caching_state cstate)
> +{
> +       int pool_index;
> +
> +       if (cstate == tt_cached)
> +               return NULL;
> +
> +       if (cstate == tt_wc)
> +               pool_index = 0x0;
> +       else
> +               pool_index = 0x1;
> +
> +       if (flags & TTM_PAGE_FLAG_DMA32)
> +               pool_index |= 0x2;
> +
> +       return &_manager.pools[pool_index];
> +}
> +
> +static void ttm_pages_put(struct page *pages[], unsigned npages)
> +{
> +       unsigned i;
> +       set_pages_array_wb(pages, npages);
> +       for (i = 0; i < npages; ++i)
> +               __free_page(pages[i]);
> +}
> +
> +/**
> + * Free pages from pool.
> + * If limit_with_time is set to true pages that wil lbe have to have been
> + * in pool for time specified by pool_manager.time_to_wait.
> + *
> + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
> + * blocking others operations on pool.
> + *
> + * This is called from the timer handler.
> + **/
> +static void ttm_page_pool_free(struct page_pool *pool,
> +               const bool limit_with_time)
> +{
> +       struct page *page;
> +       struct page *pages_to_free[NUM_PAGES_TO_ADD];
> +       unsigned freed_pages;
> +       unsigned long now = jiffies + _manager.time_to_wait;
> +       unsigned long irq_flags;
> +
> +restart:
> +       freed_pages = 0;
> +       spin_lock_irqsave(&pool->lock, irq_flags);
> +
> +       {
> +               list_for_each_entry_reverse(page, &pool->list, lru) {
> +
> +                       if (limit_with_time
> +                               && time_after(now, page->private))
> +                               break;
> +
> +                       pages_to_free[freed_pages++] = page;
> +                       if (freed_pages >= NUM_PAGES_TO_ADD) {
> +                               /* Limit the maximum time that we hold the lock
> +                                * not to block more important threads */
> +                               __list_del(page->lru.prev, &pool->list);
> +                               pool->npages -= freed_pages;
> +                               spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +                               ttm_pages_put(pages_to_free, freed_pages);
> +                               goto restart;
> +                       }
> +               }
> +
> +               pool->npages -= freed_pages;
> +               if (freed_pages)
> +                       __list_del(&page->lru, &pool->list);
> +
> +               spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +               if (freed_pages)
> +                       ttm_pages_put(pages_to_free, freed_pages);
> +       }
> +}
> +
> +/* periodicaly free unused pages from pool */
> +static void ttm_page_pool_timer_handler(unsigned long man_addr)
> +{
> +       int i;
> +       struct pool_manager *m = (struct pool_manager *)man_addr;
> +
> +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> +               ttm_page_pool_free(&_manager.pools[i], true);
> +
> +       mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
> +}
> +
> +static void ttm_page_pool_timer_create(struct pool_manager *m)
> +{
> +       /* Create a timer that is fired every second to clean
> +        * the free list */
> +       init_timer(&m->timer);
> +       m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
> +       m->timer.function = ttm_page_pool_timer_handler;
> +       m->timer.data = (unsigned long)m;
> +}
> +
> +static void ttm_page_pool_timer_free(struct pool_manager *m)
> +{
> +       del_timer_sync(&m->timer);
> +}
> +
> +static void ttm_set_pages_caching(struct page **pages,
> +               enum ttm_caching_state cstate, unsigned cpages)
> +{
> +       /* Set page caching */
> +       switch (cstate) {
> +       case tt_uncached:
> +               set_pages_array_uc(pages, cpages);
> +               break;
> +       case tt_wc:
> +               set_pages_array_wc(pages, cpages);
> +               break;
> +       default:
> +               break;
> +       }
> +}
> +/**
> + * Fill new pages to array.
> + */
> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> +               enum ttm_caching_state cstate, unsigned count,
> +               unsigned long *irq_flags)
> +{
> +       unsigned pages_to_alloc = count - pool->npages;
> +       struct page **pages;
> +       struct page *page;
> +       struct list_head h;
> +       unsigned i, cpages;
> +       unsigned max_cpages = min(pages_to_alloc,
> +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
> +
> +       /* reserve allready allocated pages from pool */
> +       INIT_LIST_HEAD(&h);
> +       list_splice_init(&pool->list, &h);
> +
> +       /* We are now safe to allow others users to modify the pool so unlock
> +        * it. Allocating pages is expensive operation. */
> +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
> +
> +       pages = kmalloc(pages_to_alloc*sizeof(*page), GFP_KERNEL);
> +
> +       if (!pages) {
> +               printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
> +               spin_lock_irqsave(&pool->lock, *irq_flags);
> +               return -ENOMEM;
> +       }
> +
> +       for (i = 0, cpages = 0; i < pages_to_alloc; ++i) {
> +               page = alloc_page(pool->gfp_flags);
> +               if (!page) {
> +                       printk(KERN_ERR "[ttm] unable to get page %d\n", i);
> +
> +                       ttm_put_pages(&h, flags, cstate);
> +                       spin_lock_irqsave(&pool->lock, *irq_flags);
> +                       return -ENOMEM;
> +               }
> +
> +#ifdef CONFIG_HIGHMEM
> +               /* gfp flags of highmem page should never be dma32 so we
> +                * we should be fine in such case
> +                */
> +               if (!PageHighMem(page))
> +#endif
> +               {
> +                       pages[cpages++] = page;
> +                       if (cpages == max_cpages) {
> +
> +                               ttm_set_pages_caching(pages, cstate, cpages);
> +                               cpages = 0;
> +                       }
> +               }
> +               list_add(&page->lru, &h);
> +       }
> +
> +       if (cpages)
> +               ttm_set_pages_caching(pages, cstate, cpages);
> +
> +       spin_lock_irqsave(&pool->lock, *irq_flags);
> +
> +       list_splice(&h, &pool->list);
> +
> +       pool->npages += pages_to_alloc;
> +
> +       return 0;
> +}
> +
> +/*
> + * On success pages list will hold count number of correctly
> + * cached pages.
> + */
> +int ttm_get_pages(struct list_head *pages, int flags,
> +               enum ttm_caching_state cstate, unsigned count)
> +{
> +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> +       struct page *page = NULL;
> +       unsigned long irq_flags;
> +       int gfp_flags = 0;
> +       int retries = 1;
> +       int r;
> +
> +       /* No pool for cached pages */
> +       if (cstate == tt_cached) {
> +               if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +                       gfp_flags |= __GFP_ZERO;
> +
> +               if (flags & TTM_PAGE_FLAG_DMA32)
> +                       gfp_flags |= GFP_DMA32;
> +               else
> +                       gfp_flags |= __GFP_HIGHMEM;
> +
> +               /* fallback in case of problems with pool allocation */
> +alloc_pages:
> +               for (r = 0; r < count; ++r) {
> +                       page = alloc_page(gfp_flags);
> +                       if (!page) {
> +
> +                               printk(KERN_ERR "[ttm] unable to allocate page.");
> +                               return -ENOMEM;
> +                       }
> +
> +                       list_add(&page->lru, pages);
> +               }
> +               return 0;
> +       }
> +
> +       gfp_flags = pool->gfp_flags;
> +
> +       spin_lock_irqsave(&pool->lock, irq_flags);
> +       do {
> +               /* First we look if pool has the page we want */
> +               if (pool->npages >= count) {
> +
> +
> +                       /* Find the cut location */
> +                       if (count <= pool->npages/2) {
> +                               unsigned c = 0;
> +                               list_for_each_entry(page, &pool->list, lru) {
> +                                       if (++c >= count)
> +                                               break;
> +                               }
> +                       } else {
> +                               /* We have to point to the last pages cut */
> +                               unsigned c = 0;
> +                               unsigned rcount = pool->npages - count + 1;
> +
> +                               list_for_each_entry_reverse(page, &pool->list, lru) {
> +                                       if (++c >= rcount)
> +                                               break;
> +                               }
> +                       }
> +
> +                       list_cut_position(pages, &pool->list, &page->lru);
> +
> +                       pool->npages -= count;
> +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +                       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
> +                               list_for_each_entry(page, pages, lru) {
> +                                       clear_page(page_address(page));
> +                               }
> +                       }
> +
> +                       return 0;
> +               }
> +
> +               if (retries == 0)
> +                       break;
> +
> +               --retries;
> +
> +               r = ttm_pages_pool_fill_locked(pool, flags, cstate,
> +                               count, &irq_flags);
> +               if (r) {
> +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> +                       return r;
> +               }
> +
> +       } while (1);
> +
> +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +       /*  Pool allocator failed without errors. we try to allocate new pages
> +        *  instead. */
> +       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +               gfp_flags |= __GFP_ZERO;
> +
> +       goto alloc_pages;
> +}
> +
> +/* Put all pages in pages list to correct pool to wait for reuse */
> +void ttm_put_pages(struct list_head *pages, int flags,
> +               enum ttm_caching_state cstate)
> +{
> +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> +       struct page *p, *tmp;
> +       unsigned page_count = 0;
> +       unsigned long irq_flags;
> +       unsigned long now = jiffies;
> +
> +       if (pool == NULL) {
> +               list_for_each_entry_safe(p, tmp, pages, lru) {
> +                       __free_page(p);
> +               }
> +               /* We took the ownership -> clear the pages list*/
> +               INIT_LIST_HEAD(pages);
> +               return;
> +       }
> +
> +       list_for_each_entry_safe(p, tmp, pages, lru) {
> +
> +#ifdef CONFIG_HIGHMEM
> +               /* we don't have pool for highmem -> free them */
> +               if (PageHighMem(p)) {
> +                       list_del(&p->lru);
> +                       __free_page(p);
> +               } else
> +#endif
> +               {
> +                       p->private = now;
> +                       ++page_count;
> +               }
> +
> +       }
> +
> +       spin_lock_irqsave(&pool->lock, irq_flags);
> +       list_splice(pages, &pool->list);
> +       pool->npages += page_count;
> +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +       /* We took the ownership -> clear the pages list*/
> +       INIT_LIST_HEAD(pages);
> +}
> +
> +
> +int ttm_page_alloc_init(void)
> +{
> +       if (_manager.page_alloc_inited)
> +               return 0;
> +
> +       ttm_page_pool_init_locked(&_manager.wc_pool);
> +       _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
> +       ttm_page_pool_init_locked(&_manager.uc_pool);
> +       _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
> +       ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
> +       _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> +       ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
> +       _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> +
> +       _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
> +       ttm_page_pool_timer_create(&_manager);
> +       _manager.page_alloc_inited = 1;
> +       return 0;
> +}
> +
> +void ttm_page_alloc_fini(void)
> +{
> +       int i;
> +
> +       if (!_manager.page_alloc_inited)
> +               return;
> +
> +       ttm_page_pool_timer_free(&_manager);
> +
> +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> +               ttm_page_pool_free(&_manager.pools[i], false);
> +
> +       _manager.page_alloc_inited = 0;
> +}
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> new file mode 100644
> index 0000000..eb0ddf4
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) Red Hat Inc.
> +
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Dave Airlie <airlied@redhat.com>
> + *          Jerome Glisse <jglisse@redhat.com>
> + */
> +#ifndef TTM_PAGE_ALLOC
> +#define TTM_PAGE_ALLOC
> +
> +#include "ttm/ttm_bo_driver.h"
> +
> +void ttm_put_pages(struct list_head *pages, int flags,
> +               enum ttm_caching_state cstate);
> +int ttm_get_pages(struct list_head *pages, int flags,
> +               enum ttm_caching_state cstate, unsigned count);
> +int ttm_page_alloc_init(void);
> +void ttm_page_alloc_fini(void);
> +
> +#endif
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 3d47a2c..d4e20f6 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -38,6 +38,7 @@
>  #include "ttm/ttm_module.h"
>  #include "ttm/ttm_bo_driver.h"
>  #include "ttm/ttm_placement.h"
> +#include "ttm_page_alloc.h"
>
>  static int ttm_tt_swapin(struct ttm_tt *ttm);
>
> @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
>        ttm->pages = NULL;
>  }
>
> -static struct page *ttm_tt_alloc_page(unsigned page_flags)
> -{
> -       gfp_t gfp_flags = GFP_USER;
> -
> -       if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> -               gfp_flags |= __GFP_ZERO;
> -
> -       if (page_flags & TTM_PAGE_FLAG_DMA32)
> -               gfp_flags |= __GFP_DMA32;
> -       else
> -               gfp_flags |= __GFP_HIGHMEM;
> -
> -       return alloc_page(gfp_flags);
> -}
> -
>  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>  {
>        int write;
> @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
>  {
>        struct page *p;
> +       struct list_head h;
>        struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
>        int ret;
>
>        while (NULL == (p = ttm->pages[index])) {
> -               p = ttm_tt_alloc_page(ttm->page_flags);
>
> -               if (!p)
> +               INIT_LIST_HEAD(&h);
> +
> +               ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
> +
> +               if (ret != 0)
>                        return NULL;
>
> +               p = list_first_entry(&h, struct page, lru);
> +
>                ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
>                if (unlikely(ret != 0))
>                        goto out_err;
> @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
>        if (ttm->caching_state == c_state)
>                return 0;
>
> -       if (c_state != tt_cached) {
> -               ret = ttm_tt_populate(ttm);
> -               if (unlikely(ret != 0))
> -                       return ret;
> +       if (ttm->state == tt_unpopulated) {
> +               /* Change caching but don't populate */
> +               ttm->caching_state = c_state;
> +               return 0;
>        }
>
>        if (ttm->caching_state == tt_cached)
> @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
>  {
>        int i;
> +       unsigned count = 0;
> +       struct list_head h;
>        struct page *cur_page;
>        struct ttm_backend *be = ttm->be;
>
> +       INIT_LIST_HEAD(&h);
> +
>        if (be)
>                be->func->clear(be);
> -       (void)ttm_tt_set_caching(ttm, tt_cached);
>        for (i = 0; i < ttm->num_pages; ++i) {
> +
>                cur_page = ttm->pages[i];
>                ttm->pages[i] = NULL;
>                if (cur_page) {
>                        if (page_count(cur_page) != 1)
>                                printk(KERN_ERR TTM_PFX
>                                       "Erroneous page count. "
> -                                      "Leaking pages.\n");
> +                                      "Leaking pages (%d/%d).\n",
> +                                      page_count(cur_page), count);
>                        ttm_mem_global_free_page(ttm->glob->mem_glob,
>                                                 cur_page);
> -                       __free_page(cur_page);
> +                       list_add(&cur_page->lru, &h);
> +                       count++;
>                }
>        }
> +       ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
>        ttm->state = tt_unpopulated;
>        ttm->first_himem_page = ttm->num_pages;
>        ttm->last_lomem_page = -1;
> --
> 1.6.3.3
>
>

While no comments yet I did notice huge errors in
ttm_pages_pool_fill_locked. But I still would like to comments on
overall design.

Is it correct to use page->private in pool allocator?

bo allocation is still too expensive operation for dma buffers in
classic mesa. It takes quite a lot cpu time in bind memory (agp
system) and ttm_mem_global functions. Would it be possible to move
some parts those expensive operations into pool fill and pool free
code?

Also using single call to allocate all pages for buffer would improve
situation when pool is not large enough. I can change ttm_tt_populate
to allocate all pages with single call if it sounds ok.

I will still look at how to integrate pool allocator to shinker.


Attached fixed full path and here is fixed version of
ttm_pages_pool_fill_locked :

+static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
+               enum ttm_caching_state cstate, unsigned count,
+               unsigned long *irq_flags)
+{
+       unsigned pages_to_alloc = count - pool->npages;
+       struct page **pages;
+       struct page *page;
+       struct list_head h;
+       unsigned i, cpages;
+       unsigned max_cpages = min(pages_to_alloc,
+                       (unsigned)(PAGE_SIZE/sizeof(*page)));
+
+       /* reserve allready allocated pages from pool */
+       INIT_LIST_HEAD(&h);
+       list_splice_init(&pool->list, &h);
+       i = pool->npages;
+       pages_to_alloc = count;
+       pool->npages = 0;
+
+       /* We are now safe to allow others users to modify the pool so unlock
+        * it. Allocating pages is expensive operation. */
+       spin_unlock_irqrestore(&pool->lock, *irq_flags);
+
+       pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
+
+       if (!pages) {
+               printk(KERN_ERR "[ttm] unable to allocate table for
new pages.");
+               spin_lock_irqsave(&pool->lock, *irq_flags);
+               return -ENOMEM;
+       }
+
+       /* i is set to number of pages already in pool */
+       for (cpages = 0; i < pages_to_alloc; ++i) {
+               page = alloc_page(pool->gfp_flags);
+               if (!page) {
+                       printk(KERN_ERR "[ttm] unable to get page %u\n", i);
+
+                       ttm_put_pages(&h, flags, cstate);
+                       spin_lock_irqsave(&pool->lock, *irq_flags);
+                       return -ENOMEM;
+               }
+
+#ifdef CONFIG_HIGHMEM
+               /* gfp flags of highmem page should never be dma32 so we
+                * we should be fine in such case
+                */
+               if (!PageHighMem(page))
+#endif
+               {
+                       pages[cpages++] = page;
+                       if (cpages == max_cpages) {
+
+                               ttm_set_pages_caching(pages, cstate, cpages);
+                               cpages = 0;
+                       }
+               }
+               list_add(&page->lru, &h);
+       }
+
+       if (cpages)
+               ttm_set_pages_caching(pages, cstate, cpages);
+
+       spin_lock_irqsave(&pool->lock, *irq_flags);
+
+       list_splice(&h, &pool->list);
+
+       pool->npages += pages_to_alloc;
+
+       return 0;
+}

[-- Attachment #2: 0001-drm-ttm-add-pool-wc-uc-page-allocator.patch --]
[-- Type: text/x-diff, Size: 20918 bytes --]

From 8340b84a4752830feff6b77f3c23707dbd69f757 Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Wed, 23 Sep 2009 18:48:56 +1000
Subject: [PATCH] drm/ttm: add pool wc/uc page allocator

On AGP system we might allocate/free routinely uncached or wc memory,
changing page from cached (wb) to uc or wc is very expensive and involves
a lot of flushing. To improve performance this allocator use a pool
of uc,wc pages.

Pools are linked lists of pages. Ordered so that first is the latest adition
to the pool and last is the oldest page. Old pages are periodicaly freed from
pools to keep memory use at minimum.

Pools are protected with spinlocks to allow multiple threads to allocate pages
simultanously. Expensive operations must not hold the spinlock to maximise
performance for multiple threads.

V2: Fix ttm_page_pool_fill_locked.
 - max_cpages should be used for kmalloc instead of pages_to_alloc.
 - Set pool->npages to zero after taking all pages from the pool.

Based on Jerome Glisse's and Dave Airlie's pool allocator.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
---
 drivers/gpu/drm/ttm/Makefile         |    2 +-
 drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  518 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
 drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
 5 files changed, 584 insertions(+), 26 deletions(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
 create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index 1e138f5..4256e20 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,6 +4,6 @@
 ccflags-y := -Iinclude/drm
 ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
 	ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
-	ttm_object.o ttm_lock.o ttm_execbuf_util.o
+	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f5245c0..d587fbe 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -32,6 +32,7 @@
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include "ttm_page_alloc.h"
 
 #define TTM_MEMORY_ALLOC_RETRIES 4
 
@@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
 		       "Zone %7s: Available graphics memory: %llu kiB.\n",
 		       zone->name, (unsigned long long) zone->max_mem >> 10);
 	}
+	ttm_page_alloc_init();
 	return 0;
 out_no_zone:
 	ttm_mem_global_release(glob);
@@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
 		zone = glob->zones[i];
 		kobject_del(&zone->kobj);
 		kobject_put(&zone->kobj);
-	}
+			}
 	kobject_del(&glob->kobj);
 	kobject_put(&glob->kobj);
+	ttm_page_alloc_fini();
 }
 EXPORT_SYMBOL(ttm_mem_global_release);
 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
new file mode 100644
index 0000000..e269a10
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -0,0 +1,518 @@
+/*
+ * Copyright (c) Red Hat Inc.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Jerome Glisse <jglisse@redhat.com>
+ *          Pauli Nieminen <suokkos@gmail.com>
+ */
+
+/* simple list based uncached page pool
+ * - Pool collects resently freed pages for reuse
+ * - Use page->lru to keep a free list
+ * - doesn't track currently in use pages
+ */
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/highmem.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+
+#include <asm/agp.h>
+#include "ttm/ttm_bo_driver.h"
+#include "ttm_page_alloc.h"
+
+/* Number of 4k pages to add at once */
+#define NUM_PAGES_TO_ADD 		64
+/* times are in msecs */
+#define TIME_TO_KEEP_PAGE_IN_POOL	15000
+#define PAGE_FREE_INTERVAL		1000
+
+/**
+ * list is a FILO stack.
+ * All pages pushed into it must go to begin while end of list must hold the
+ * least recently used pages. When pages in end of the list are not recently
+ * use they are freed.
+ *
+ * All expensive operations must be done outside of pool lock to prevent
+ * contention on lock.
+ */
+struct page_pool {
+	struct list_head	list;
+	int			gfp_flags;
+	unsigned		npages;
+	spinlock_t		lock;
+};
+
+# define MAX_NUM_POOLS 4
+
+struct pool_manager {
+	bool			page_alloc_inited;
+	unsigned long		time_to_wait;
+
+	struct timer_list	timer;
+
+	union {
+		struct page_pool	pools[MAX_NUM_POOLS];
+		struct {
+			struct page_pool	wc_pool;
+			struct page_pool	uc_pool;
+			struct page_pool	wc_pool_dma32;
+			struct page_pool	uc_pool_dma32;
+		} ;
+	};
+};
+
+
+static struct pool_manager _manager;
+
+#ifdef CONFIG_X86
+/* TODO: add this to x86 like _uc, this version here is inefficient */
+static int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		set_memory_wc((unsigned long)page_address(pages[i]), 1);
+	return 0;
+}
+#else
+static int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		unmap_page_from_agp(pages[i]);
+#endif
+	return 0;
+}
+
+static int set_pages_array_wc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+#endif
+	return 0;
+}
+
+static int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+#ifdef TTM_HAS_AGP
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+#endif
+	return 0;
+}
+#endif
+
+
+static void ttm_page_pool_init_locked(struct page_pool *pool)
+{
+	spin_lock_init(&pool->lock);
+	INIT_LIST_HEAD(&pool->list);
+}
+
+static struct page_pool *ttm_get_pool(int flags,
+		enum ttm_caching_state cstate)
+{
+	int pool_index;
+
+	if (cstate == tt_cached)
+		return NULL;
+
+	if (cstate == tt_wc)
+		pool_index = 0x0;
+	else
+		pool_index = 0x1;
+
+	if (flags & TTM_PAGE_FLAG_DMA32)
+		pool_index |= 0x2;
+
+	return &_manager.pools[pool_index];
+}
+
+static void ttm_pages_put(struct page *pages[], unsigned npages)
+{
+	unsigned i;
+	set_pages_array_wb(pages, npages);
+	for (i = 0; i < npages; ++i)
+		__free_page(pages[i]);
+}
+
+/**
+ * Free pages from pool.
+ * If limit_with_time is set to true pages that wil lbe have to have been
+ * in pool for time specified by pool_manager.time_to_wait.
+ *
+ * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
+ * blocking others operations on pool.
+ *
+ * This is called from the timer handler.
+ **/
+static void ttm_page_pool_free(struct page_pool *pool,
+		const bool limit_with_time)
+{
+	struct page *page;
+	struct page *pages_to_free[NUM_PAGES_TO_ADD];
+	unsigned freed_pages;
+	unsigned long now = jiffies + _manager.time_to_wait;
+	unsigned long irq_flags;
+
+restart:
+	freed_pages = 0;
+	spin_lock_irqsave(&pool->lock, irq_flags);
+
+	{
+		list_for_each_entry_reverse(page, &pool->list, lru) {
+
+			if (limit_with_time
+				&& time_after(now, page->private))
+				break;
+
+			pages_to_free[freed_pages++] = page;
+			if (freed_pages >= NUM_PAGES_TO_ADD) {
+				/* Limit the maximum time that we hold the lock
+				 * not to block more important threads */
+				__list_del(page->lru.prev, &pool->list);
+				pool->npages -= freed_pages;
+				spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+				ttm_pages_put(pages_to_free, freed_pages);
+				goto restart;
+			}
+		}
+
+		pool->npages -= freed_pages;
+		if (freed_pages)
+			__list_del(&page->lru, &pool->list);
+
+		spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+		if (freed_pages)
+			ttm_pages_put(pages_to_free, freed_pages);
+	}
+}
+
+/* periodicaly free unused pages from pool */
+static void ttm_page_pool_timer_handler(unsigned long man_addr)
+{
+	int i;
+	struct pool_manager *m = (struct pool_manager *)man_addr;
+
+	for (i = 0; i < MAX_NUM_POOLS; ++i)
+		ttm_page_pool_free(&_manager.pools[i], true);
+
+	mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
+}
+
+static void ttm_page_pool_timer_create(struct pool_manager *m)
+{
+	/* Create a timer that is fired every second to clean
+	 * the free list */
+	init_timer(&m->timer);
+	m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
+	m->timer.function = ttm_page_pool_timer_handler;
+	m->timer.data = (unsigned long)m;
+}
+
+static void ttm_page_pool_timer_free(struct pool_manager *m)
+{
+	del_timer_sync(&m->timer);
+}
+
+static void ttm_set_pages_caching(struct page **pages,
+		enum ttm_caching_state cstate, unsigned cpages)
+{
+	/* Set page caching */
+	switch (cstate) {
+	case tt_uncached:
+		set_pages_array_uc(pages, cpages);
+		break;
+	case tt_wc:
+		set_pages_array_wc(pages, cpages);
+		break;
+	default:
+		break;
+	}
+}
+/**
+ * Fill new pages to array.
+ */
+static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
+		enum ttm_caching_state cstate, unsigned count,
+		unsigned long *irq_flags)
+{
+	unsigned pages_to_alloc = count - pool->npages;
+	struct page **pages;
+	struct page *page;
+	struct list_head h;
+	unsigned i, cpages;
+	unsigned max_cpages = min(pages_to_alloc,
+			(unsigned)(PAGE_SIZE/sizeof(*page)));
+
+	/* reserve allready allocated pages from pool */
+	INIT_LIST_HEAD(&h);
+	list_splice_init(&pool->list, &h);
+	i = pool->npages;
+	pages_to_alloc = count;
+	pool->npages = 0;
+
+	/* We are now safe to allow others users to modify the pool so unlock
+	 * it. Allocating pages is expensive operation. */
+	spin_unlock_irqrestore(&pool->lock, *irq_flags);
+
+	pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
+
+	if (!pages) {
+		printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
+		spin_lock_irqsave(&pool->lock, *irq_flags);
+		return -ENOMEM;
+	}
+
+	/* i is set to number of pages already in pool */
+	for (cpages = 0; i < pages_to_alloc; ++i) {
+		page = alloc_page(pool->gfp_flags);
+		if (!page) {
+			printk(KERN_ERR "[ttm] unable to get page %u\n", i);
+
+			ttm_put_pages(&h, flags, cstate);
+			spin_lock_irqsave(&pool->lock, *irq_flags);
+			return -ENOMEM;
+		}
+
+#ifdef CONFIG_HIGHMEM
+		/* gfp flags of highmem page should never be dma32 so we
+		 * we should be fine in such case
+		 */
+		if (!PageHighMem(page))
+#endif
+		{
+			pages[cpages++] = page;
+			if (cpages == max_cpages) {
+
+				ttm_set_pages_caching(pages, cstate, cpages);
+				cpages = 0;
+			}
+		}
+		list_add(&page->lru, &h);
+	}
+
+	if (cpages)
+		ttm_set_pages_caching(pages, cstate, cpages);
+
+	spin_lock_irqsave(&pool->lock, *irq_flags);
+
+	list_splice(&h, &pool->list);
+
+	pool->npages += pages_to_alloc;
+
+	return 0;
+}
+
+/*
+ * On success pages list will hold count number of correctly
+ * cached pages.
+ */
+int ttm_get_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate, unsigned count)
+{
+	struct page_pool *pool = ttm_get_pool(flags, cstate);
+	struct page *page = NULL;
+	unsigned long irq_flags;
+	int gfp_flags = 0;
+	int retries = 1;
+	int r;
+
+	/* No pool for cached pages */
+	if (cstate == tt_cached) {
+		if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+			gfp_flags |= __GFP_ZERO;
+
+		if (flags & TTM_PAGE_FLAG_DMA32)
+			gfp_flags |= GFP_DMA32;
+		else
+			gfp_flags |= __GFP_HIGHMEM;
+
+		/* fallback in case of problems with pool allocation */
+alloc_pages:
+		for (r = 0; r < count; ++r) {
+			page = alloc_page(gfp_flags);
+			if (!page) {
+
+				printk(KERN_ERR "[ttm] unable to allocate page.");
+				return -ENOMEM;
+			}
+
+			list_add(&page->lru, pages);
+		}
+		return 0;
+	}
+
+	gfp_flags = pool->gfp_flags;
+
+	spin_lock_irqsave(&pool->lock, irq_flags);
+	do {
+		/* First we look if pool has the page we want */
+		if (pool->npages >= count) {
+
+
+			/* Find the cut location */
+			if (count <= pool->npages/2) {
+				unsigned c = 0;
+				list_for_each_entry(page, &pool->list, lru) {
+					if (++c >= count)
+						break;
+				}
+			} else {
+				/* We have to point to the last pages cut */
+				unsigned c = 0;
+				unsigned rcount = pool->npages - count + 1;
+
+				list_for_each_entry_reverse(page, &pool->list, lru) {
+					if (++c >= rcount)
+						break;
+				}
+			}
+
+			list_cut_position(pages, &pool->list, &page->lru);
+
+			pool->npages -= count;
+			spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+			if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
+				list_for_each_entry(page, pages, lru) {
+					clear_page(page_address(page));
+				}
+			}
+
+			return 0;
+		}
+
+		if (retries == 0)
+			break;
+
+		--retries;
+
+		r = ttm_pages_pool_fill_locked(pool, flags, cstate,
+				count, &irq_flags);
+		if (r) {
+			spin_unlock_irqrestore(&pool->lock, irq_flags);
+			return r;
+		}
+
+	} while (1);
+
+	spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+	/*  Pool allocator failed without errors. we try to allocate new pages
+	 *  instead. */
+	if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
+		gfp_flags |= __GFP_ZERO;
+
+	goto alloc_pages;
+}
+
+/* Put all pages in pages list to correct pool to wait for reuse */
+void ttm_put_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate)
+{
+	struct page_pool *pool = ttm_get_pool(flags, cstate);
+	struct page *p, *tmp;
+	unsigned page_count = 0;
+	unsigned long irq_flags;
+	unsigned long now = jiffies;
+
+	if (pool == NULL) {
+		list_for_each_entry_safe(p, tmp, pages, lru) {
+			__free_page(p);
+		}
+		/* We took the ownership -> clear the pages list*/
+		INIT_LIST_HEAD(pages);
+		return;
+	}
+
+	list_for_each_entry_safe(p, tmp, pages, lru) {
+
+#ifdef CONFIG_HIGHMEM
+		/* we don't have pool for highmem -> free them */
+		if (PageHighMem(p)) {
+			list_del(&p->lru);
+			__free_page(p);
+		} else
+#endif
+		{
+			p->private = now;
+			++page_count;
+		}
+
+	}
+
+	spin_lock_irqsave(&pool->lock, irq_flags);
+	list_splice(pages, &pool->list);
+	pool->npages += page_count;
+	spin_unlock_irqrestore(&pool->lock, irq_flags);
+
+	/* We took the ownership -> clear the pages list*/
+	INIT_LIST_HEAD(pages);
+}
+
+
+int ttm_page_alloc_init(void)
+{
+	if (_manager.page_alloc_inited)
+		return 0;
+
+	ttm_page_pool_init_locked(&_manager.wc_pool);
+	_manager.wc_pool.gfp_flags = GFP_HIGHUSER;
+	ttm_page_pool_init_locked(&_manager.uc_pool);
+	_manager.uc_pool.gfp_flags = GFP_HIGHUSER;
+	ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
+	_manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
+	ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
+	_manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
+
+	_manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
+	ttm_page_pool_timer_create(&_manager);
+	_manager.page_alloc_inited = 1;
+	return 0;
+}
+
+void ttm_page_alloc_fini(void)
+{
+	int i;
+
+	if (!_manager.page_alloc_inited)
+		return;
+
+	ttm_page_pool_timer_free(&_manager);
+
+	for (i = 0; i < MAX_NUM_POOLS; ++i)
+		ttm_page_pool_free(&_manager.pools[i], false);
+
+	_manager.page_alloc_inited = 0;
+}
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
new file mode 100644
index 0000000..eb0ddf4
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) Red Hat Inc.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ *          Jerome Glisse <jglisse@redhat.com>
+ */
+#ifndef TTM_PAGE_ALLOC
+#define TTM_PAGE_ALLOC
+
+#include "ttm/ttm_bo_driver.h"
+
+void ttm_put_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate);
+int ttm_get_pages(struct list_head *pages, int flags,
+		enum ttm_caching_state cstate, unsigned count);
+int ttm_page_alloc_init(void);
+void ttm_page_alloc_fini(void);
+
+#endif
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 3d47a2c..d4e20f6 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -38,6 +38,7 @@
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
+#include "ttm_page_alloc.h"
 
 static int ttm_tt_swapin(struct ttm_tt *ttm);
 
@@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 	ttm->pages = NULL;
 }
 
-static struct page *ttm_tt_alloc_page(unsigned page_flags)
-{
-	gfp_t gfp_flags = GFP_USER;
-
-	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		gfp_flags |= __GFP_ZERO;
-
-	if (page_flags & TTM_PAGE_FLAG_DMA32)
-		gfp_flags |= __GFP_DMA32;
-	else
-		gfp_flags |= __GFP_HIGHMEM;
-
-	return alloc_page(gfp_flags);
-}
-
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 {
 	int write;
@@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
 	struct page *p;
+	struct list_head h;
 	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	int ret;
 
 	while (NULL == (p = ttm->pages[index])) {
-		p = ttm_tt_alloc_page(ttm->page_flags);
 
-		if (!p)
+		INIT_LIST_HEAD(&h);
+
+		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
+
+		if (ret != 0)
 			return NULL;
 
+		p = list_first_entry(&h, struct page, lru);
+
 		ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
 		if (unlikely(ret != 0))
 			goto out_err;
@@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
 	if (ttm->caching_state == c_state)
 		return 0;
 
-	if (c_state != tt_cached) {
-		ret = ttm_tt_populate(ttm);
-		if (unlikely(ret != 0))
-			return ret;
+	if (ttm->state == tt_unpopulated) {
+		/* Change caching but don't populate */
+		ttm->caching_state = c_state;
+		return 0;
 	}
 
 	if (ttm->caching_state == tt_cached)
@@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
 static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 {
 	int i;
+	unsigned count = 0;
+	struct list_head h;
 	struct page *cur_page;
 	struct ttm_backend *be = ttm->be;
 
+	INIT_LIST_HEAD(&h);
+
 	if (be)
 		be->func->clear(be);
-	(void)ttm_tt_set_caching(ttm, tt_cached);
 	for (i = 0; i < ttm->num_pages; ++i) {
+
 		cur_page = ttm->pages[i];
 		ttm->pages[i] = NULL;
 		if (cur_page) {
 			if (page_count(cur_page) != 1)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
-				       "Leaking pages.\n");
+				       "Leaking pages (%d/%d).\n",
+				       page_count(cur_page), count);
 			ttm_mem_global_free_page(ttm->glob->mem_glob,
 						 cur_page);
-			__free_page(cur_page);
+			list_add(&cur_page->lru, &h);
+			count++;
 		}
 	}
+	ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
 	ttm->state = tt_unpopulated;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
-- 
1.6.3.3


[-- Attachment #3: Type: text/plain, Size: 345 bytes --]

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev

[-- Attachment #4: Type: text/plain, Size: 161 bytes --]

--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-01 22:32 ` Pauli Nieminen
@ 2010-03-02 10:56   ` Michel Dänzer
       [not found]     ` <4B8CFB1B.1020806@shipmail.org>
  2010-03-03 13:33   ` Jerome Glisse
  1 sibling, 1 reply; 13+ messages in thread
From: Michel Dänzer @ 2010-03-02 10:56 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: Jerome Glisse, dri-devel

On Tue, 2010-03-02 at 00:32 +0200, Pauli Nieminen wrote: 
> 
> bo allocation is still too expensive operation for dma buffers in
> classic mesa. It takes quite a lot cpu time in bind memory (agp
> system) and ttm_mem_global functions. Would it be possible to move
> some parts those expensive operations into pool fill and pool free
> code?

Maybe we need userspace BO sub-allocation and/or caching. At least for
the 'DMA' buffers it should be simple for userspace to keep a round
robin list of buffers.


-- 
Earthling Michel Dänzer           |                http://www.vmware.com
Libre software enthusiast         |          Debian, X and DRI developer

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
       [not found]     ` <4B8CFB1B.1020806@shipmail.org>
@ 2010-03-03 12:13       ` Pauli Nieminen
  2010-03-03 12:47         ` Luca Barbieri
  0 siblings, 1 reply; 13+ messages in thread
From: Pauli Nieminen @ 2010-03-03 12:13 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: Michel Dänzer, dri-devel, Jerome Glisse

2010/3/2 Thomas Hellström <thomas@shipmail.org>:
> Michel Dänzer wrote:
>>
>> On Tue, 2010-03-02 at 00:32 +0200, Pauli Nieminen wrote:
>>>
>>> bo allocation is still too expensive operation for dma buffers in
>>> classic mesa. It takes quite a lot cpu time in bind memory (agp
>>> system) and ttm_mem_global functions. Would it be possible to move
>>> some parts those expensive operations into pool fill and pool free
>>> code?
>>>
>>
>> Maybe we need userspace BO sub-allocation and/or caching. At least for
>> the 'DMA' buffers it should be simple for userspace to keep a round
>> robin list of buffers.
>>
>>
>>
>
> Yeah, that's indeed one of the solutions.
> Drawback is that there is no way for process 2 to empty process 1's bo cache
> in situations of memory shortage, although I guess there are ways to send
> "empty cache" events to user-space if needed.
>
> The other solution is kernel bo sub-allocation and caching, but then one has
> to live with the overhead of page-faults and vma setups / teardowns with
> associated tlb flushes.
>
> /Thomas

There is already round-robin list of (suballocated) buffers and that
works well enough. Only problem with current implementation is that
freeing of buffers is done very lazily to avoid any need for
reallocation. But extra memory use from GTT looks small price to pay
for avoiding cost of allocating bos.

When I tested to reduce number of buffers held by mesa I was hoping to
lose very little performance to reduce number of empty buffers held by
mesa. But that was a bit too much hoped for.

But at least this looks like enough for others components reallocating
buffers often. There is temporary bo allocations in ddx. Kernel pool
allocation reduces allocation cost enough to match what cost was in
UMS. That means 20-40 times better allocation performance for uc/wc
pages.

Pauli

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 12:13       ` Pauli Nieminen
@ 2010-03-03 12:47         ` Luca Barbieri
  2010-03-03 13:23           ` Thomas Hellstrom
  0 siblings, 1 reply; 13+ messages in thread
From: Luca Barbieri @ 2010-03-03 12:47 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: Michel Dänzer, dri-devel, Jerome Glisse

While this is almost surely a good idea, note that userspace caching
and suballocation substantially improves Mesa performance even on PCIe
systems.
This is mostly due to the unavoidable overhead of kernel calls and
pagetable modifications, as well as the avoidable linear search the
kernel currently does to find an empty space in virtual address space,
as well as the additional pagefaults.

Userspace caching and suballocation mean that you just have to compute
a pointer, which you cannot beat with any kernel-space solution. This
is also the way glibc allocates normal memory with malloc(), for the
same reason.

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 12:47         ` Luca Barbieri
@ 2010-03-03 13:23           ` Thomas Hellstrom
  2010-03-03 14:03             ` Luca Barbieri
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Hellstrom @ 2010-03-03 13:23 UTC (permalink / raw)
  To: Luca Barbieri; +Cc: Jerome Glisse, Michel Dänzer, dri-devel

Luca Barbieri wrote:
> While this is almost surely a good idea, note that userspace caching
> and suballocation substantially improves Mesa performance even on PCIe
> systems.
> This is mostly due to the unavoidable overhead of kernel calls and
> pagetable modifications, 


> as well as the avoidable linear search the
> kernel currently does to find an empty space in virtual address space,
>   

^^^ Luca, I've never seen this show up high on a profile (yet). Do you 
see that with Nouveau? I used to have an rb-tree implementation of 
drm_mm_xxx lying around, but I didn't use it because I didn't have a 
case where it showed up?



> as well as the additional pagefaults.
>
> Userspace caching and suballocation mean that you just have to compute
> a pointer, which you cannot beat with any kernel-space solution. This
> is also the way glibc allocates normal memory with malloc(), for the
> same reason.
>   


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-01 22:32 ` Pauli Nieminen
  2010-03-02 10:56   ` Michel Dänzer
@ 2010-03-03 13:33   ` Jerome Glisse
  2010-03-03 14:23     ` Pauli Nieminen
  1 sibling, 1 reply; 13+ messages in thread
From: Jerome Glisse @ 2010-03-03 13:33 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: Jerome Glisse, dri-devel

On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
> On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
> > On AGP system we might allocate/free routinely uncached or wc memory,
> > changing page from cached (wb) to uc or wc is very expensive and involves
> > a lot of flushing. To improve performance this allocator use a pool
> > of uc,wc pages.
> >
> > Pools are linked lists of pages. Ordered so that first is the latest adition
> > to the pool and last is the oldest page. Old pages are periodicaly freed from
> > pools to keep memory use at minimum.
> >
> > Pools are protected with spinlocks to allow multiple threads to allocate pages
> > simultanously. Expensive operations must not hold the spinlock to maximise
> > performance for multiple threads.
> >
> > Based on Jerome Glisse's and Dave Airlie's pool allocator.
> >
> > Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> > Signed-off-by: Dave Airlie <airlied@redhat.com>
> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>

I think it's overdesigned, by trying to be to clever we often endup
with code more complex and less efficient in the end. While the idea
to free page only after some amount of time is the way to go, i think
a simpler approach will perform a better job. For instance:

pool {
  npages // npages in pool
  page *pages[enoughttohold16Morsomethingconfigurable]
  unsigned long next_free_time;
  lock
}
filllocked(pool,npages)
{
	if (npages > MAXPAGESPERPOOL)
		npages = MAXPAGESPERPOOL;
	for i, npages : allocpage
}
alloc(apages, npages)
{
	do {
	lock(pool)
	pool->next_free_time = jiffies + timeout;
	if (npages > pool->npages) {
		npages -= pool->npages;
		copypageptr(apages, pool->pages, pool->npages)
		pool->npages = 0;
		fillpool(pool, npages)
	} else {
		pool->npages -= npages;
		copypageptr(apages, pool->pages, npages)
		npages = 0;
	}
	unlock(pool)
	} while (npages)
}
poolshrinkcallbacktimer()
{
	for i in npools {
		if (trylock(pool[i].lock) {
			if (timeafter(jiffies, pool[i].nextfreetime)) {
				free bunch of pages for instance 1M at a time
			}
			unlock(pool[i].lock)
		}
	}
}

General remark i don't think you handle jiffies wrap around properly.
Also the whole lock dropping in middle of list traversal looks very
fragile to me, right now we don't have much concurency in radeon so
i don't think you well tested the effect of concurently using the
allocator. Anyway see bellow for more comments on the code.

Btw thanks for looking into this, this isn't a sexy area but it
definitly needs love.

Cheers,
Jerome


> > ---
> >  drivers/gpu/drm/ttm/Makefile         |    2 +-
> >  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
> >  drivers/gpu/drm/ttm/ttm_page_alloc.c |  514 ++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
> >  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
> >  5 files changed, 580 insertions(+), 26 deletions(-)
> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
> >
> > diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> > index 1e138f5..4256e20 100644
> > --- a/drivers/gpu/drm/ttm/Makefile
> > +++ b/drivers/gpu/drm/ttm/Makefile
> > @@ -4,6 +4,6 @@
> >  ccflags-y := -Iinclude/drm
> >  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
> >        ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
> > -       ttm_object.o ttm_lock.o ttm_execbuf_util.o
> > +       ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
> >
> >  obj-$(CONFIG_DRM_TTM) += ttm.o
> > diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> > index f5245c0..d587fbe 100644
> > --- a/drivers/gpu/drm/ttm/ttm_memory.c
> > +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> > @@ -32,6 +32,7 @@
> >  #include <linux/wait.h>
> >  #include <linux/mm.h>
> >  #include <linux/module.h>
> > +#include "ttm_page_alloc.h"
> >
> >  #define TTM_MEMORY_ALLOC_RETRIES 4
> >
> > @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
> >                       "Zone %7s: Available graphics memory: %llu kiB.\n",
> >                       zone->name, (unsigned long long) zone->max_mem >> 10);
> >        }
> > +       ttm_page_alloc_init();
> >        return 0;
> >  out_no_zone:
> >        ttm_mem_global_release(glob);
> > @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
> >                zone = glob->zones[i];
> >                kobject_del(&zone->kobj);
> >                kobject_put(&zone->kobj);
> > -       }
> > +                       }
> >        kobject_del(&glob->kobj);
> >        kobject_put(&glob->kobj);
> > +       ttm_page_alloc_fini();
> >  }
> >  EXPORT_SYMBOL(ttm_mem_global_release);
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> > new file mode 100644
> > index 0000000..de6576c
> > --- /dev/null
> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> > @@ -0,0 +1,514 @@
> > +/*
> > + * Copyright (c) Red Hat Inc.
> > +
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> > + * next paragraph) shall be included in all copies or substantial portions
> > + * of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Authors: Dave Airlie <airlied@redhat.com>
> > + *          Jerome Glisse <jglisse@redhat.com>
> > + *          Pauli Nienmien <suokkos@gmail.com>
> > + */
> > +
> > +/* simple list based uncached page pool
> > + * - Pool collects resently freed pages for reuse
> > + * - Use page->lru to keep a free list
> > + * - doesn't track currently in use pages
> > + */
> > +#include <linux/list.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/highmem.h>
> > +#include <linux/mm_types.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/timer.h>
> > +
> > +#include <asm/agp.h>
> > +#include "ttm/ttm_bo_driver.h"
> > +#include "ttm_page_alloc.h"
> > +
> > +/* Number of 4k pages to add at once */
> > +#define NUM_PAGES_TO_ADD               64
> > +/* times are in msecs */
> > +#define TIME_TO_KEEP_PAGE_IN_POOL      15000
> > +#define PAGE_FREE_INTERVAL             1000
> > +
> > +/**
> > + * list is a FILO stack.
> > + * All pages pushed into it must go to begin while end of list must hold the
> > + * least recently used pages. When pages in end of the list are not recently
> > + * use they are freed.
> > + *
> > + * All expensive operations must be done outside of pool lock to prevent
> > + * contention on lock.
> > + */
> > +struct page_pool {
> > +       struct list_head        list;
> > +       int                     gfp_flags;
> > +       unsigned                npages;
> > +       spinlock_t              lock;
> > +};
> > +
> > +# define MAX_NUM_POOLS 4
> > +
> > +struct pool_manager {
> > +       bool                    page_alloc_inited;
> > +       unsigned long           time_to_wait;
> > +
> > +       struct timer_list       timer;
> > +
> > +       union {
> > +               struct page_pool        pools[MAX_NUM_POOLS];
> > +               struct {
> > +                       struct page_pool        wc_pool;
> > +                       struct page_pool        uc_pool;
> > +                       struct page_pool        wc_pool_dma32;
> > +                       struct page_pool        uc_pool_dma32;
> > +               } ;
> > +       };
> > +};
> > +
> > +
> > +static struct pool_manager _manager;
> > +
> > +#ifdef CONFIG_X86
> > +/* TODO: add this to x86 like _uc, this version here is inefficient */
> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
> > +{
> > +       int i;
> > +
> > +       for (i = 0; i < addrinarray; i++)
> > +               set_memory_wc((unsigned long)page_address(pages[i]), 1);
> > +       return 0;
> > +}
> > +#else
> > +static int set_pages_array_wb(struct page **pages, int addrinarray)
> > +{
> > +#ifdef TTM_HAS_AGP
> > +       int i;
> > +
> > +       for (i = 0; i < addrinarray; i++)
> > +               unmap_page_from_agp(pages[i]);
> > +#endif
> > +       return 0;
> > +}
> > +
> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
> > +{
> > +#ifdef TTM_HAS_AGP
> > +       int i;
> > +
> > +       for (i = 0; i < addrinarray; i++)
> > +               map_page_into_agp(pages[i]);
> > +#endif
> > +       return 0;
> > +}
> > +
> > +static int set_pages_array_uc(struct page **pages, int addrinarray)
> > +{
> > +#ifdef TTM_HAS_AGP
> > +       int i;
> > +
> > +       for (i = 0; i < addrinarray; i++)
> > +               map_page_into_agp(pages[i]);
> > +#endif
> > +       return 0;
> > +}
> > +#endif
> > +
> > +
> > +static void ttm_page_pool_init_locked(struct page_pool *pool)
> > +{
> > +       spin_lock_init(&pool->lock);
> > +       INIT_LIST_HEAD(&pool->list);
> > +}
> > +
> > +static struct page_pool *ttm_get_pool(int flags,
> > +               enum ttm_caching_state cstate)
> > +{
> > +       int pool_index;
> > +
> > +       if (cstate == tt_cached)
> > +               return NULL;
> > +
> > +       if (cstate == tt_wc)
> > +               pool_index = 0x0;
> > +       else
> > +               pool_index = 0x1;
> > +
> > +       if (flags & TTM_PAGE_FLAG_DMA32)
> > +               pool_index |= 0x2;
> > +
> > +       return &_manager.pools[pool_index];
> > +}
> > +
> > +static void ttm_pages_put(struct page *pages[], unsigned npages)
> > +{
> > +       unsigned i;
> > +       set_pages_array_wb(pages, npages);
> > +       for (i = 0; i < npages; ++i)
> > +               __free_page(pages[i]);
> > +}
> > +
> > +/**
> > + * Free pages from pool.
> > + * If limit_with_time is set to true pages that wil lbe have to have been
> > + * in pool for time specified by pool_manager.time_to_wait.
> > + *
> > + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
> > + * blocking others operations on pool.
> > + *
> > + * This is called from the timer handler.
> > + **/
> > +static void ttm_page_pool_free(struct page_pool *pool,
> > +               const bool limit_with_time)
> > +{
> > +       struct page *page;
> > +       struct page *pages_to_free[NUM_PAGES_TO_ADD];
> > +       unsigned freed_pages;
> > +       unsigned long now = jiffies + _manager.time_to_wait;
> > +       unsigned long irq_flags;
> > +
> > +restart:
> > +       freed_pages = 0;
> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> > +
> > +       {
> > +               list_for_each_entry_reverse(page, &pool->list, lru) {
> > +
> > +                       if (limit_with_time
> > +                               && time_after(now, page->private))
> > +                               break;
> > +
> > +                       pages_to_free[freed_pages++] = page;
> > +                       if (freed_pages >= NUM_PAGES_TO_ADD) {
> > +                               /* Limit the maximum time that we hold the lock
> > +                                * not to block more important threads */
> > +                               __list_del(page->lru.prev, &pool->list);
> > +                               pool->npages -= freed_pages;
> > +                               spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +
> > +                               ttm_pages_put(pages_to_free, freed_pages);
> > +                               goto restart;
> > +                       }
> > +               }
> > +
> > +               pool->npages -= freed_pages;
> > +               if (freed_pages)
> > +                       __list_del(&page->lru, &pool->list);
> > +
> > +               spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +
> > +               if (freed_pages)
> > +                       ttm_pages_put(pages_to_free, freed_pages);
> > +       }
> > +}
> > +

One useless brace level

> > +/* periodicaly free unused pages from pool */
> > +static void ttm_page_pool_timer_handler(unsigned long man_addr)
> > +{
> > +       int i;
> > +       struct pool_manager *m = (struct pool_manager *)man_addr;
> > +
> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> > +               ttm_page_pool_free(&_manager.pools[i], true);
> > +
> > +       mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
> > +}
> > +
> > +static void ttm_page_pool_timer_create(struct pool_manager *m)
> > +{
> > +       /* Create a timer that is fired every second to clean
> > +        * the free list */
> > +       init_timer(&m->timer);
> > +       m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
> > +       m->timer.function = ttm_page_pool_timer_handler;
> > +       m->timer.data = (unsigned long)m;
> > +}
> > +
> > +static void ttm_page_pool_timer_free(struct pool_manager *m)
> > +{
> > +       del_timer_sync(&m->timer);
> > +}
> > +
> > +static void ttm_set_pages_caching(struct page **pages,
> > +               enum ttm_caching_state cstate, unsigned cpages)
> > +{
> > +       /* Set page caching */
> > +       switch (cstate) {
> > +       case tt_uncached:
> > +               set_pages_array_uc(pages, cpages);
> > +               break;
> > +       case tt_wc:
> > +               set_pages_array_wc(pages, cpages);
> > +               break;
> > +       default:
> > +               break;
> > +       }
> > +}
> > +/**
> > + * Fill new pages to array.
> > + */
> > +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> > +               enum ttm_caching_state cstate, unsigned count,
> > +               unsigned long *irq_flags)
> > +{
> > +       unsigned pages_to_alloc = count - pool->npages;
> > +       struct page **pages;
> > +       struct page *page;
> > +       struct list_head h;
> > +       unsigned i, cpages;
> > +       unsigned max_cpages = min(pages_to_alloc,
> > +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
> > +
> > +       /* reserve allready allocated pages from pool */
> > +       INIT_LIST_HEAD(&h);
> > +       list_splice_init(&pool->list, &h);
> > +
> > +       /* We are now safe to allow others users to modify the pool so unlock
> > +        * it. Allocating pages is expensive operation. */
> > +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
> > +
> > +       pages = kmalloc(pages_to_alloc*sizeof(*page), GFP_KERNEL);
> > +

page_to_alloc might be negative if pool->npages > count, i think it can
happen because there is lock dropping in the page alloc function below.
also pages_to_alloc*sizeof(*page) might be to big for kmalloc and we
might need to use vmalloc (this is why i think a design with a static
allocated on table is better)

> > +       if (!pages) {
> > +               printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
> > +               spin_lock_irqsave(&pool->lock, *irq_flags);
> > +               return -ENOMEM;
> > +       }
> > +
> > +       for (i = 0, cpages = 0; i < pages_to_alloc; ++i) {
> > +               page = alloc_page(pool->gfp_flags);
> > +               if (!page) {
> > +                       printk(KERN_ERR "[ttm] unable to get page %d\n", i);
> > +
> > +                       ttm_put_pages(&h, flags, cstate);
> > +                       spin_lock_irqsave(&pool->lock, *irq_flags);
> > +                       return -ENOMEM;
> > +               }
> > +
> > +#ifdef CONFIG_HIGHMEM
> > +               /* gfp flags of highmem page should never be dma32 so we
> > +                * we should be fine in such case
> > +                */
> > +               if (!PageHighMem(page))
> > +#endif
> > +               {
> > +                       pages[cpages++] = page;
> > +                       if (cpages == max_cpages) {
> > +
> > +                               ttm_set_pages_caching(pages, cstate, cpages);
> > +                               cpages = 0;
> > +                       }
> > +               }
> > +               list_add(&page->lru, &h);
> > +       }
> > +
> > +       if (cpages)
> > +               ttm_set_pages_caching(pages, cstate, cpages);
> > +
> > +       spin_lock_irqsave(&pool->lock, *irq_flags);
> > +
> > +       list_splice(&h, &pool->list);
> > +
> > +       pool->npages += pages_to_alloc;
> > +
> > +       return 0;
> > +}
> > +
> > +/*
> > + * On success pages list will hold count number of correctly
> > + * cached pages.
> > + */
> > +int ttm_get_pages(struct list_head *pages, int flags,
> > +               enum ttm_caching_state cstate, unsigned count)
> > +{
> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> > +       struct page *page = NULL;
> > +       unsigned long irq_flags;
> > +       int gfp_flags = 0;
> > +       int retries = 1;
> > +       int r;
> > +
> > +       /* No pool for cached pages */
> > +       if (cstate == tt_cached) {
> > +               if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> > +                       gfp_flags |= __GFP_ZERO;
> > +
> > +               if (flags & TTM_PAGE_FLAG_DMA32)
> > +                       gfp_flags |= GFP_DMA32;
> > +               else
> > +                       gfp_flags |= __GFP_HIGHMEM;
> > +
> > +               /* fallback in case of problems with pool allocation */
> > +alloc_pages:
> > +               for (r = 0; r < count; ++r) {
> > +                       page = alloc_page(gfp_flags);
> > +                       if (!page) {
> > +
> > +                               printk(KERN_ERR "[ttm] unable to allocate page.");
> > +                               return -ENOMEM;
> > +                       }
> > +
> > +                       list_add(&page->lru, pages);

this doesn't alloc page with proper caching

> > +               }
> > +               return 0;
> > +       }
> > +
> > +       gfp_flags = pool->gfp_flags;
> > +
> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> > +       do {
> > +               /* First we look if pool has the page we want */
> > +               if (pool->npages >= count) {
> > +
> > +
> > +                       /* Find the cut location */
> > +                       if (count <= pool->npages/2) {
> > +                               unsigned c = 0;
> > +                               list_for_each_entry(page, &pool->list, lru) {
> > +                                       if (++c >= count)
> > +                                               break;
> > +                               }
> > +                       } else {
> > +                               /* We have to point to the last pages cut */
> > +                               unsigned c = 0;
> > +                               unsigned rcount = pool->npages - count + 1;
> > +
> > +                               list_for_each_entry_reverse(page, &pool->list, lru) {
> > +                                       if (++c >= rcount)
> > +                                               break;
> > +                               }
> > +                       }
> > +
> > +                       list_cut_position(pages, &pool->list, &page->lru);
> > +
> > +                       pool->npages -= count;
> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +
> > +                       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
> > +                               list_for_each_entry(page, pages, lru) {
> > +                                       clear_page(page_address(page));
> > +                               }
> > +                       }
> > +
> > +                       return 0;
> > +               }
> > +
> > +               if (retries == 0)
> > +                       break;
> > +
> > +               --retries;
> > +
> > +               r = ttm_pages_pool_fill_locked(pool, flags, cstate,
> > +                               count, &irq_flags);
> > +               if (r) {
> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +                       return r;
> > +               }
> > +
> > +       } while (1);
> > +
> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +
> > +       /*  Pool allocator failed without errors. we try to allocate new pages
> > +        *  instead. */
> > +       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> > +               gfp_flags |= __GFP_ZERO;
> > +
> > +       goto alloc_pages;
> > +}

This function is quite complex, especialy the failing filling pool path
that looks supicious, idea of splitting work is not to have to do the
work in other function so if pool filling fails the alloc should fail
and shouldn't try to fill the pool on its own this kind of code
duplication is a recipie for failure i think.

> > +
> > +/* Put all pages in pages list to correct pool to wait for reuse */
> > +void ttm_put_pages(struct list_head *pages, int flags,
> > +               enum ttm_caching_state cstate)
> > +{
> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> > +       struct page *p, *tmp;
> > +       unsigned page_count = 0;
> > +       unsigned long irq_flags;
> > +       unsigned long now = jiffies;
> > +
> > +       if (pool == NULL) {
> > +               list_for_each_entry_safe(p, tmp, pages, lru) {
> > +                       __free_page(p);
> > +               }
> > +               /* We took the ownership -> clear the pages list*/
> > +               INIT_LIST_HEAD(pages);
> > +               return;
> > +       }
> > +
> > +       list_for_each_entry_safe(p, tmp, pages, lru) {
> > +
> > +#ifdef CONFIG_HIGHMEM
> > +               /* we don't have pool for highmem -> free them */
> > +               if (PageHighMem(p)) {
> > +                       list_del(&p->lru);
> > +                       __free_page(p);
> > +               } else
> > +#endif
> > +               {
> > +                       p->private = now;
> > +                       ++page_count;
> > +               }
> > +
> > +       }
> > +
> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> > +       list_splice(pages, &pool->list);
> > +       pool->npages += page_count;
> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> > +
> > +       /* We took the ownership -> clear the pages list*/
> > +       INIT_LIST_HEAD(pages);
> > +}
> > +
> > +
> > +int ttm_page_alloc_init(void)
> > +{
> > +       if (_manager.page_alloc_inited)
> > +               return 0;
> > +
> > +       ttm_page_pool_init_locked(&_manager.wc_pool);
> > +       _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
> > +       ttm_page_pool_init_locked(&_manager.uc_pool);
> > +       _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
> > +       ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
> > +       _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> > +       ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
> > +       _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> > +
> > +       _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
> > +       ttm_page_pool_timer_create(&_manager);
> > +       _manager.page_alloc_inited = 1;
> > +       return 0;
> > +}
> > +
> > +void ttm_page_alloc_fini(void)
> > +{
> > +       int i;
> > +
> > +       if (!_manager.page_alloc_inited)
> > +               return;
> > +
> > +       ttm_page_pool_timer_free(&_manager);
> > +
> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> > +               ttm_page_pool_free(&_manager.pools[i], false);
> > +
> > +       _manager.page_alloc_inited = 0;
> > +}
> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> > new file mode 100644
> > index 0000000..eb0ddf4
> > --- /dev/null
> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> > @@ -0,0 +1,38 @@
> > +/*
> > + * Copyright (c) Red Hat Inc.
> > +
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> > + * next paragraph) shall be included in all copies or substantial portions
> > + * of the Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> > + * DEALINGS IN THE SOFTWARE.
> > + *
> > + * Authors: Dave Airlie <airlied@redhat.com>
> > + *          Jerome Glisse <jglisse@redhat.com>
> > + */
> > +#ifndef TTM_PAGE_ALLOC
> > +#define TTM_PAGE_ALLOC
> > +
> > +#include "ttm/ttm_bo_driver.h"
> > +
> > +void ttm_put_pages(struct list_head *pages, int flags,
> > +               enum ttm_caching_state cstate);
> > +int ttm_get_pages(struct list_head *pages, int flags,
> > +               enum ttm_caching_state cstate, unsigned count);
> > +int ttm_page_alloc_init(void);
> > +void ttm_page_alloc_fini(void);
> > +
> > +#endif
> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> > index 3d47a2c..d4e20f6 100644
> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > @@ -38,6 +38,7 @@
> >  #include "ttm/ttm_module.h"
> >  #include "ttm/ttm_bo_driver.h"
> >  #include "ttm/ttm_placement.h"
> > +#include "ttm_page_alloc.h"
> >
> >  static int ttm_tt_swapin(struct ttm_tt *ttm);
> >
> > @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
> >        ttm->pages = NULL;
> >  }
> >
> > -static struct page *ttm_tt_alloc_page(unsigned page_flags)
> > -{
> > -       gfp_t gfp_flags = GFP_USER;
> > -
> > -       if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> > -               gfp_flags |= __GFP_ZERO;
> > -
> > -       if (page_flags & TTM_PAGE_FLAG_DMA32)
> > -               gfp_flags |= __GFP_DMA32;
> > -       else
> > -               gfp_flags |= __GFP_HIGHMEM;
> > -
> > -       return alloc_page(gfp_flags);
> > -}
> > -
> >  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >  {
> >        int write;
> > @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
> >  {
> >        struct page *p;
> > +       struct list_head h;
> >        struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> >        int ret;
> >
> >        while (NULL == (p = ttm->pages[index])) {
> > -               p = ttm_tt_alloc_page(ttm->page_flags);
> >
> > -               if (!p)
> > +               INIT_LIST_HEAD(&h);
> > +
> > +               ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
> > +
> > +               if (ret != 0)
> >                        return NULL;
> >
> > +               p = list_first_entry(&h, struct page, lru);
> > +
> >                ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
> >                if (unlikely(ret != 0))
> >                        goto out_err;
> > @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
> >        if (ttm->caching_state == c_state)
> >                return 0;
> >
> > -       if (c_state != tt_cached) {
> > -               ret = ttm_tt_populate(ttm);
> > -               if (unlikely(ret != 0))
> > -                       return ret;
> > +       if (ttm->state == tt_unpopulated) {
> > +               /* Change caching but don't populate */
> > +               ttm->caching_state = c_state;
> > +               return 0;
> >        }
> >
> >        if (ttm->caching_state == tt_cached)
> > @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
> >  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
> >  {
> >        int i;
> > +       unsigned count = 0;
> > +       struct list_head h;
> >        struct page *cur_page;
> >        struct ttm_backend *be = ttm->be;
> >
> > +       INIT_LIST_HEAD(&h);
> > +
> >        if (be)
> >                be->func->clear(be);
> > -       (void)ttm_tt_set_caching(ttm, tt_cached);
> >        for (i = 0; i < ttm->num_pages; ++i) {
> > +
> >                cur_page = ttm->pages[i];
> >                ttm->pages[i] = NULL;
> >                if (cur_page) {
> >                        if (page_count(cur_page) != 1)
> >                                printk(KERN_ERR TTM_PFX
> >                                       "Erroneous page count. "
> > -                                      "Leaking pages.\n");
> > +                                      "Leaking pages (%d/%d).\n",
> > +                                      page_count(cur_page), count);
> >                        ttm_mem_global_free_page(ttm->glob->mem_glob,
> >                                                 cur_page);
> > -                       __free_page(cur_page);
> > +                       list_add(&cur_page->lru, &h);
> > +                       count++;
> >                }
> >        }
> > +       ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
> >        ttm->state = tt_unpopulated;
> >        ttm->first_himem_page = ttm->num_pages;
> >        ttm->last_lomem_page = -1;
> > --
> > 1.6.3.3
> >
> >
> 
> While no comments yet I did notice huge errors in
> ttm_pages_pool_fill_locked. But I still would like to comments on
> overall design.
> 
> Is it correct to use page->private in pool allocator?
> 
> bo allocation is still too expensive operation for dma buffers in
> classic mesa. It takes quite a lot cpu time in bind memory (agp
> system) and ttm_mem_global functions. Would it be possible to move
> some parts those expensive operations into pool fill and pool free
> code?
> 
> Also using single call to allocate all pages for buffer would improve
> situation when pool is not large enough. I can change ttm_tt_populate
> to allocate all pages with single call if it sounds ok.
> 
> I will still look at how to integrate pool allocator to shinker.
> 
> 
> Attached fixed full path and here is fixed version of
> ttm_pages_pool_fill_locked :
> 
> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> +               enum ttm_caching_state cstate, unsigned count,
> +               unsigned long *irq_flags)
> +{
> +       unsigned pages_to_alloc = count - pool->npages;
> +       struct page **pages;
> +       struct page *page;
> +       struct list_head h;
> +       unsigned i, cpages;
> +       unsigned max_cpages = min(pages_to_alloc,
> +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
> +
> +       /* reserve allready allocated pages from pool */
> +       INIT_LIST_HEAD(&h);
> +       list_splice_init(&pool->list, &h);
> +       i = pool->npages;
> +       pages_to_alloc = count;
> +       pool->npages = 0;
> +
> +       /* We are now safe to allow others users to modify the pool so unlock
> +        * it. Allocating pages is expensive operation. */
> +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
> +
> +       pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
> +
> +       if (!pages) {
> +               printk(KERN_ERR "[ttm] unable to allocate table for
> new pages.");
> +               spin_lock_irqsave(&pool->lock, *irq_flags);
> +               return -ENOMEM;
> +       }
> +
> +       /* i is set to number of pages already in pool */
> +       for (cpages = 0; i < pages_to_alloc; ++i) {
> +               page = alloc_page(pool->gfp_flags);
> +               if (!page) {
> +                       printk(KERN_ERR "[ttm] unable to get page %u\n", i);
> +
> +                       ttm_put_pages(&h, flags, cstate);
> +                       spin_lock_irqsave(&pool->lock, *irq_flags);
> +                       return -ENOMEM;
> +               }
> +
> +#ifdef CONFIG_HIGHMEM
> +               /* gfp flags of highmem page should never be dma32 so we
> +                * we should be fine in such case
> +                */
> +               if (!PageHighMem(page))
> +#endif
> +               {
> +                       pages[cpages++] = page;
> +                       if (cpages == max_cpages) {
> +
> +                               ttm_set_pages_caching(pages, cstate, cpages);
> +                               cpages = 0;
> +                       }
> +               }
> +               list_add(&page->lru, &h);
> +       }
> +
> +       if (cpages)
> +               ttm_set_pages_caching(pages, cstate, cpages);
> +
> +       spin_lock_irqsave(&pool->lock, *irq_flags);
> +
> +       list_splice(&h, &pool->list);
> +
> +       pool->npages += pages_to_alloc;
> +
> +       return 0;
> +}

> From 8340b84a4752830feff6b77f3c23707dbd69f757 Mon Sep 17 00:00:00 2001
> From: Jerome Glisse <jglisse@redhat.com>
> Date: Wed, 23 Sep 2009 18:48:56 +1000
> Subject: [PATCH] drm/ttm: add pool wc/uc page allocator
> 
> On AGP system we might allocate/free routinely uncached or wc memory,
> changing page from cached (wb) to uc or wc is very expensive and involves
> a lot of flushing. To improve performance this allocator use a pool
> of uc,wc pages.
> 
> Pools are linked lists of pages. Ordered so that first is the latest adition
> to the pool and last is the oldest page. Old pages are periodicaly freed from
> pools to keep memory use at minimum.
> 
> Pools are protected with spinlocks to allow multiple threads to allocate pages
> simultanously. Expensive operations must not hold the spinlock to maximise
> performance for multiple threads.
> 
> V2: Fix ttm_page_pool_fill_locked.
>  - max_cpages should be used for kmalloc instead of pages_to_alloc.
>  - Set pool->npages to zero after taking all pages from the pool.
> 
> Based on Jerome Glisse's and Dave Airlie's pool allocator.
> 
> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> ---
>  drivers/gpu/drm/ttm/Makefile         |    2 +-
>  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
>  drivers/gpu/drm/ttm/ttm_page_alloc.c |  518 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
>  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
>  5 files changed, 584 insertions(+), 26 deletions(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
> 
> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> index 1e138f5..4256e20 100644
> --- a/drivers/gpu/drm/ttm/Makefile
> +++ b/drivers/gpu/drm/ttm/Makefile
> @@ -4,6 +4,6 @@
>  ccflags-y := -Iinclude/drm
>  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
>  	ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
> -	ttm_object.o ttm_lock.o ttm_execbuf_util.o
> +	ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
>  
>  obj-$(CONFIG_DRM_TTM) += ttm.o
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> index f5245c0..d587fbe 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -32,6 +32,7 @@
>  #include <linux/wait.h>
>  #include <linux/mm.h>
>  #include <linux/module.h>
> +#include "ttm_page_alloc.h"
>  
>  #define TTM_MEMORY_ALLOC_RETRIES 4
>  
> @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>  		       "Zone %7s: Available graphics memory: %llu kiB.\n",
>  		       zone->name, (unsigned long long) zone->max_mem >> 10);
>  	}
> +	ttm_page_alloc_init();
>  	return 0;
>  out_no_zone:
>  	ttm_mem_global_release(glob);
> @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>  		zone = glob->zones[i];
>  		kobject_del(&zone->kobj);
>  		kobject_put(&zone->kobj);
> -	}
> +			}
>  	kobject_del(&glob->kobj);
>  	kobject_put(&glob->kobj);
> +	ttm_page_alloc_fini();
>  }
>  EXPORT_SYMBOL(ttm_mem_global_release);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> new file mode 100644
> index 0000000..e269a10
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -0,0 +1,518 @@
> +/*
> + * Copyright (c) Red Hat Inc.
> +
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Dave Airlie <airlied@redhat.com>
> + *          Jerome Glisse <jglisse@redhat.com>
> + *          Pauli Nieminen <suokkos@gmail.com>
> + */
> +
> +/* simple list based uncached page pool
> + * - Pool collects resently freed pages for reuse
> + * - Use page->lru to keep a free list
> + * - doesn't track currently in use pages
> + */
> +#include <linux/list.h>
> +#include <linux/spinlock.h>
> +#include <linux/highmem.h>
> +#include <linux/mm_types.h>
> +#include <linux/jiffies.h>
> +#include <linux/timer.h>
> +
> +#include <asm/agp.h>
> +#include "ttm/ttm_bo_driver.h"
> +#include "ttm_page_alloc.h"
> +
> +/* Number of 4k pages to add at once */
> +#define NUM_PAGES_TO_ADD 		64
> +/* times are in msecs */
> +#define TIME_TO_KEEP_PAGE_IN_POOL	15000
> +#define PAGE_FREE_INTERVAL		1000
> +
> +/**
> + * list is a FILO stack.
> + * All pages pushed into it must go to begin while end of list must hold the
> + * least recently used pages. When pages in end of the list are not recently
> + * use they are freed.
> + *
> + * All expensive operations must be done outside of pool lock to prevent
> + * contention on lock.
> + */
> +struct page_pool {
> +	struct list_head	list;
> +	int			gfp_flags;
> +	unsigned		npages;
> +	spinlock_t		lock;
> +};
> +
> +# define MAX_NUM_POOLS 4
> +
> +struct pool_manager {
> +	bool			page_alloc_inited;
> +	unsigned long		time_to_wait;
> +
> +	struct timer_list	timer;
> +
> +	union {
> +		struct page_pool	pools[MAX_NUM_POOLS];
> +		struct {
> +			struct page_pool	wc_pool;
> +			struct page_pool	uc_pool;
> +			struct page_pool	wc_pool_dma32;
> +			struct page_pool	uc_pool_dma32;
> +		} ;
> +	};
> +};
> +
> +
> +static struct pool_manager _manager;
> +
> +#ifdef CONFIG_X86
> +/* TODO: add this to x86 like _uc, this version here is inefficient */
> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> +{
> +	int i;
> +
> +	for (i = 0; i < addrinarray; i++)
> +		set_memory_wc((unsigned long)page_address(pages[i]), 1);
> +	return 0;
> +}
> +#else
> +static int set_pages_array_wb(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +	int i;
> +
> +	for (i = 0; i < addrinarray; i++)
> +		unmap_page_from_agp(pages[i]);
> +#endif
> +	return 0;
> +}
> +
> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +	int i;
> +
> +	for (i = 0; i < addrinarray; i++)
> +		map_page_into_agp(pages[i]);
> +#endif
> +	return 0;
> +}
> +
> +static int set_pages_array_uc(struct page **pages, int addrinarray)
> +{
> +#ifdef TTM_HAS_AGP
> +	int i;
> +
> +	for (i = 0; i < addrinarray; i++)
> +		map_page_into_agp(pages[i]);
> +#endif
> +	return 0;
> +}
> +#endif
> +
> +
> +static void ttm_page_pool_init_locked(struct page_pool *pool)
> +{
> +	spin_lock_init(&pool->lock);
> +	INIT_LIST_HEAD(&pool->list);
> +}
> +
> +static struct page_pool *ttm_get_pool(int flags,
> +		enum ttm_caching_state cstate)
> +{
> +	int pool_index;
> +
> +	if (cstate == tt_cached)
> +		return NULL;
> +
> +	if (cstate == tt_wc)
> +		pool_index = 0x0;
> +	else
> +		pool_index = 0x1;
> +
> +	if (flags & TTM_PAGE_FLAG_DMA32)
> +		pool_index |= 0x2;
> +
> +	return &_manager.pools[pool_index];
> +}
> +
> +static void ttm_pages_put(struct page *pages[], unsigned npages)
> +{
> +	unsigned i;
> +	set_pages_array_wb(pages, npages);
> +	for (i = 0; i < npages; ++i)
> +		__free_page(pages[i]);
> +}
> +
> +/**
> + * Free pages from pool.
> + * If limit_with_time is set to true pages that wil lbe have to have been
> + * in pool for time specified by pool_manager.time_to_wait.
> + *
> + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
> + * blocking others operations on pool.
> + *
> + * This is called from the timer handler.
> + **/
> +static void ttm_page_pool_free(struct page_pool *pool,
> +		const bool limit_with_time)
> +{
> +	struct page *page;
> +	struct page *pages_to_free[NUM_PAGES_TO_ADD];
> +	unsigned freed_pages;
> +	unsigned long now = jiffies + _manager.time_to_wait;
> +	unsigned long irq_flags;
> +
> +restart:
> +	freed_pages = 0;
> +	spin_lock_irqsave(&pool->lock, irq_flags);
> +
> +	{
> +		list_for_each_entry_reverse(page, &pool->list, lru) {
> +
> +			if (limit_with_time
> +				&& time_after(now, page->private))
> +				break;
> +
> +			pages_to_free[freed_pages++] = page;
> +			if (freed_pages >= NUM_PAGES_TO_ADD) {
> +				/* Limit the maximum time that we hold the lock
> +				 * not to block more important threads */
> +				__list_del(page->lru.prev, &pool->list);
> +				pool->npages -= freed_pages;
> +				spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +				ttm_pages_put(pages_to_free, freed_pages);
> +				goto restart;
> +			}
> +		}
> +
> +		pool->npages -= freed_pages;
> +		if (freed_pages)
> +			__list_del(&page->lru, &pool->list);
> +
> +		spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +		if (freed_pages)
> +			ttm_pages_put(pages_to_free, freed_pages);
> +	}
> +}
> +
> +/* periodicaly free unused pages from pool */
> +static void ttm_page_pool_timer_handler(unsigned long man_addr)
> +{
> +	int i;
> +	struct pool_manager *m = (struct pool_manager *)man_addr;
> +
> +	for (i = 0; i < MAX_NUM_POOLS; ++i)
> +		ttm_page_pool_free(&_manager.pools[i], true);
> +
> +	mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
> +}
> +
> +static void ttm_page_pool_timer_create(struct pool_manager *m)
> +{
> +	/* Create a timer that is fired every second to clean
> +	 * the free list */
> +	init_timer(&m->timer);
> +	m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
> +	m->timer.function = ttm_page_pool_timer_handler;
> +	m->timer.data = (unsigned long)m;
> +}
> +
> +static void ttm_page_pool_timer_free(struct pool_manager *m)
> +{
> +	del_timer_sync(&m->timer);
> +}
> +
> +static void ttm_set_pages_caching(struct page **pages,
> +		enum ttm_caching_state cstate, unsigned cpages)
> +{
> +	/* Set page caching */
> +	switch (cstate) {
> +	case tt_uncached:
> +		set_pages_array_uc(pages, cpages);
> +		break;
> +	case tt_wc:
> +		set_pages_array_wc(pages, cpages);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +/**
> + * Fill new pages to array.
> + */
> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> +		enum ttm_caching_state cstate, unsigned count,
> +		unsigned long *irq_flags)
> +{
> +	unsigned pages_to_alloc = count - pool->npages;
> +	struct page **pages;
> +	struct page *page;
> +	struct list_head h;
> +	unsigned i, cpages;
> +	unsigned max_cpages = min(pages_to_alloc,
> +			(unsigned)(PAGE_SIZE/sizeof(*page)));
> +
> +	/* reserve allready allocated pages from pool */
> +	INIT_LIST_HEAD(&h);
> +	list_splice_init(&pool->list, &h);
> +	i = pool->npages;
> +	pages_to_alloc = count;
> +	pool->npages = 0;
> +
> +	/* We are now safe to allow others users to modify the pool so unlock
> +	 * it. Allocating pages is expensive operation. */
> +	spin_unlock_irqrestore(&pool->lock, *irq_flags);
> +
> +	pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
> +
> +	if (!pages) {
> +		printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
> +		spin_lock_irqsave(&pool->lock, *irq_flags);
> +		return -ENOMEM;
> +	}
> +
> +	/* i is set to number of pages already in pool */
> +	for (cpages = 0; i < pages_to_alloc; ++i) {
> +		page = alloc_page(pool->gfp_flags);
> +		if (!page) {
> +			printk(KERN_ERR "[ttm] unable to get page %u\n", i);
> +
> +			ttm_put_pages(&h, flags, cstate);
> +			spin_lock_irqsave(&pool->lock, *irq_flags);
> +			return -ENOMEM;
> +		}
> +
> +#ifdef CONFIG_HIGHMEM
> +		/* gfp flags of highmem page should never be dma32 so we
> +		 * we should be fine in such case
> +		 */
> +		if (!PageHighMem(page))
> +#endif
> +		{
> +			pages[cpages++] = page;
> +			if (cpages == max_cpages) {
> +
> +				ttm_set_pages_caching(pages, cstate, cpages);
> +				cpages = 0;
> +			}
> +		}
> +		list_add(&page->lru, &h);
> +	}
> +
> +	if (cpages)
> +		ttm_set_pages_caching(pages, cstate, cpages);
> +
> +	spin_lock_irqsave(&pool->lock, *irq_flags);
> +
> +	list_splice(&h, &pool->list);
> +
> +	pool->npages += pages_to_alloc;
> +
> +	return 0;
> +}
> +
> +/*
> + * On success pages list will hold count number of correctly
> + * cached pages.
> + */
> +int ttm_get_pages(struct list_head *pages, int flags,
> +		enum ttm_caching_state cstate, unsigned count)
> +{
> +	struct page_pool *pool = ttm_get_pool(flags, cstate);
> +	struct page *page = NULL;
> +	unsigned long irq_flags;
> +	int gfp_flags = 0;
> +	int retries = 1;
> +	int r;
> +
> +	/* No pool for cached pages */
> +	if (cstate == tt_cached) {
> +		if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +			gfp_flags |= __GFP_ZERO;
> +
> +		if (flags & TTM_PAGE_FLAG_DMA32)
> +			gfp_flags |= GFP_DMA32;
> +		else
> +			gfp_flags |= __GFP_HIGHMEM;
> +
> +		/* fallback in case of problems with pool allocation */
> +alloc_pages:
> +		for (r = 0; r < count; ++r) {
> +			page = alloc_page(gfp_flags);
> +			if (!page) {
> +
> +				printk(KERN_ERR "[ttm] unable to allocate page.");
> +				return -ENOMEM;
> +			}
> +
> +			list_add(&page->lru, pages);
> +		}
> +		return 0;
> +	}
> +
> +	gfp_flags = pool->gfp_flags;
> +
> +	spin_lock_irqsave(&pool->lock, irq_flags);
> +	do {
> +		/* First we look if pool has the page we want */
> +		if (pool->npages >= count) {
> +
> +
> +			/* Find the cut location */
> +			if (count <= pool->npages/2) {
> +				unsigned c = 0;
> +				list_for_each_entry(page, &pool->list, lru) {
> +					if (++c >= count)
> +						break;
> +				}
> +			} else {
> +				/* We have to point to the last pages cut */
> +				unsigned c = 0;
> +				unsigned rcount = pool->npages - count + 1;
> +
> +				list_for_each_entry_reverse(page, &pool->list, lru) {
> +					if (++c >= rcount)
> +						break;
> +				}
> +			}
> +
> +			list_cut_position(pages, &pool->list, &page->lru);
> +
> +			pool->npages -= count;
> +			spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +			if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
> +				list_for_each_entry(page, pages, lru) {
> +					clear_page(page_address(page));
> +				}
> +			}
> +
> +			return 0;
> +		}
> +
> +		if (retries == 0)
> +			break;
> +
> +		--retries;
> +
> +		r = ttm_pages_pool_fill_locked(pool, flags, cstate,
> +				count, &irq_flags);
> +		if (r) {
> +			spin_unlock_irqrestore(&pool->lock, irq_flags);
> +			return r;
> +		}
> +
> +	} while (1);
> +
> +	spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +	/*  Pool allocator failed without errors. we try to allocate new pages
> +	 *  instead. */
> +	if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> +		gfp_flags |= __GFP_ZERO;
> +
> +	goto alloc_pages;
> +}
> +
> +/* Put all pages in pages list to correct pool to wait for reuse */
> +void ttm_put_pages(struct list_head *pages, int flags,
> +		enum ttm_caching_state cstate)
> +{
> +	struct page_pool *pool = ttm_get_pool(flags, cstate);
> +	struct page *p, *tmp;
> +	unsigned page_count = 0;
> +	unsigned long irq_flags;
> +	unsigned long now = jiffies;
> +
> +	if (pool == NULL) {
> +		list_for_each_entry_safe(p, tmp, pages, lru) {
> +			__free_page(p);
> +		}
> +		/* We took the ownership -> clear the pages list*/
> +		INIT_LIST_HEAD(pages);
> +		return;
> +	}
> +
> +	list_for_each_entry_safe(p, tmp, pages, lru) {
> +
> +#ifdef CONFIG_HIGHMEM
> +		/* we don't have pool for highmem -> free them */
> +		if (PageHighMem(p)) {
> +			list_del(&p->lru);
> +			__free_page(p);
> +		} else
> +#endif
> +		{
> +			p->private = now;
> +			++page_count;
> +		}
> +
> +	}
> +
> +	spin_lock_irqsave(&pool->lock, irq_flags);
> +	list_splice(pages, &pool->list);
> +	pool->npages += page_count;
> +	spin_unlock_irqrestore(&pool->lock, irq_flags);
> +
> +	/* We took the ownership -> clear the pages list*/
> +	INIT_LIST_HEAD(pages);
> +}
> +
> +
> +int ttm_page_alloc_init(void)
> +{
> +	if (_manager.page_alloc_inited)
> +		return 0;
> +
> +	ttm_page_pool_init_locked(&_manager.wc_pool);
> +	_manager.wc_pool.gfp_flags = GFP_HIGHUSER;
> +	ttm_page_pool_init_locked(&_manager.uc_pool);
> +	_manager.uc_pool.gfp_flags = GFP_HIGHUSER;
> +	ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
> +	_manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> +	ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
> +	_manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> +
> +	_manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
> +	ttm_page_pool_timer_create(&_manager);
> +	_manager.page_alloc_inited = 1;
> +	return 0;
> +}
> +
> +void ttm_page_alloc_fini(void)
> +{
> +	int i;
> +
> +	if (!_manager.page_alloc_inited)
> +		return;
> +
> +	ttm_page_pool_timer_free(&_manager);
> +
> +	for (i = 0; i < MAX_NUM_POOLS; ++i)
> +		ttm_page_pool_free(&_manager.pools[i], false);
> +
> +	_manager.page_alloc_inited = 0;
> +}
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> new file mode 100644
> index 0000000..eb0ddf4
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> @@ -0,0 +1,38 @@
> +/*
> + * Copyright (c) Red Hat Inc.
> +
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Dave Airlie <airlied@redhat.com>
> + *          Jerome Glisse <jglisse@redhat.com>
> + */
> +#ifndef TTM_PAGE_ALLOC
> +#define TTM_PAGE_ALLOC
> +
> +#include "ttm/ttm_bo_driver.h"
> +
> +void ttm_put_pages(struct list_head *pages, int flags,
> +		enum ttm_caching_state cstate);
> +int ttm_get_pages(struct list_head *pages, int flags,
> +		enum ttm_caching_state cstate, unsigned count);
> +int ttm_page_alloc_init(void);
> +void ttm_page_alloc_fini(void);
> +
> +#endif
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 3d47a2c..d4e20f6 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -38,6 +38,7 @@
>  #include "ttm/ttm_module.h"
>  #include "ttm/ttm_bo_driver.h"
>  #include "ttm/ttm_placement.h"
> +#include "ttm_page_alloc.h"
>  
>  static int ttm_tt_swapin(struct ttm_tt *ttm);
>  
> @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
>  	ttm->pages = NULL;
>  }
>  
> -static struct page *ttm_tt_alloc_page(unsigned page_flags)
> -{
> -	gfp_t gfp_flags = GFP_USER;
> -
> -	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> -		gfp_flags |= __GFP_ZERO;
> -
> -	if (page_flags & TTM_PAGE_FLAG_DMA32)
> -		gfp_flags |= __GFP_DMA32;
> -	else
> -		gfp_flags |= __GFP_HIGHMEM;
> -
> -	return alloc_page(gfp_flags);
> -}
> -
>  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>  {
>  	int write;
> @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
>  {
>  	struct page *p;
> +	struct list_head h;
>  	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
>  	int ret;
>  
>  	while (NULL == (p = ttm->pages[index])) {
> -		p = ttm_tt_alloc_page(ttm->page_flags);
>  
> -		if (!p)
> +		INIT_LIST_HEAD(&h);
> +
> +		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
> +
> +		if (ret != 0)
>  			return NULL;
>  
> +		p = list_first_entry(&h, struct page, lru);
> +
>  		ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
>  		if (unlikely(ret != 0))
>  			goto out_err;
> @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
>  	if (ttm->caching_state == c_state)
>  		return 0;
>  
> -	if (c_state != tt_cached) {
> -		ret = ttm_tt_populate(ttm);
> -		if (unlikely(ret != 0))
> -			return ret;
> +	if (ttm->state == tt_unpopulated) {
> +		/* Change caching but don't populate */
> +		ttm->caching_state = c_state;
> +		return 0;
>  	}
>  
>  	if (ttm->caching_state == tt_cached)
> @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
>  {
>  	int i;
> +	unsigned count = 0;
> +	struct list_head h;
>  	struct page *cur_page;
>  	struct ttm_backend *be = ttm->be;
>  
> +	INIT_LIST_HEAD(&h);
> +
>  	if (be)
>  		be->func->clear(be);
> -	(void)ttm_tt_set_caching(ttm, tt_cached);
>  	for (i = 0; i < ttm->num_pages; ++i) {
> +
>  		cur_page = ttm->pages[i];
>  		ttm->pages[i] = NULL;
>  		if (cur_page) {
>  			if (page_count(cur_page) != 1)
>  				printk(KERN_ERR TTM_PFX
>  				       "Erroneous page count. "
> -				       "Leaking pages.\n");
> +				       "Leaking pages (%d/%d).\n",
> +				       page_count(cur_page), count);
>  			ttm_mem_global_free_page(ttm->glob->mem_glob,
>  						 cur_page);
> -			__free_page(cur_page);
> +			list_add(&cur_page->lru, &h);
> +			count++;
>  		}
>  	}
> +	ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
>  	ttm->state = tt_unpopulated;
>  	ttm->first_himem_page = ttm->num_pages;
>  	ttm->last_lomem_page = -1;
> -- 
> 1.6.3.3
> 

> ------------------------------------------------------------------------------
> Download Intel&#174; Parallel Studio Eval
> Try the new software tools for yourself. Speed compiling, find bugs
> proactively, and fine-tune applications for parallel performance.
> See why Intel Parallel Studio got high marks during beta.
> http://p.sf.net/sfu/intel-sw-dev

> --
> _______________________________________________
> Dri-devel mailing list
> Dri-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/dri-devel


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 13:23           ` Thomas Hellstrom
@ 2010-03-03 14:03             ` Luca Barbieri
  2010-03-03 14:19               ` Thomas Hellstrom
  0 siblings, 1 reply; 13+ messages in thread
From: Luca Barbieri @ 2010-03-03 14:03 UTC (permalink / raw)
  To: Thomas Hellstrom; +Cc: Jerome Glisse, Michel Dänzer, dri-devel

> ^^^ Luca, I've never seen this show up high on a profile (yet). Do you see
> that with Nouveau? I used to have an rb-tree implementation of drm_mm_xxx
> lying around, but I didn't use it because I didn't have a case where it
> showed up?

Yes, before I did userspace allocation, in doom3 profiles,
get_unmapped_area and function called by it (all coming from sys_mmap)
consumed around 10% of the time.

Note that this is not DRM-specific code, but rather the generic Linux
code for finding free virtual address space, which walks over all the
vmas in the process and, in the default x86 variant, does an rb-tree
lookup for each vma!

It could be fixed by either in the kernel with better data structures
and algorithms or in userspace by using MAP_FIXED.
However, this will still leave significant kernel overhead (e.g. just
drm_mm_search_free_in_range takes 1%).

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 14:03             ` Luca Barbieri
@ 2010-03-03 14:19               ` Thomas Hellstrom
  0 siblings, 0 replies; 13+ messages in thread
From: Thomas Hellstrom @ 2010-03-03 14:19 UTC (permalink / raw)
  To: Luca Barbieri; +Cc: Jerome Glisse, Michel Dänzer, dri-devel

Luca Barbieri wrote:
>> ^^^ Luca, I've never seen this show up high on a profile (yet). Do you see
>> that with Nouveau? I used to have an rb-tree implementation of drm_mm_xxx
>> lying around, but I didn't use it because I didn't have a case where it
>> showed up?
>>     
>
> Yes, before I did userspace allocation, in doom3 profiles,
> get_unmapped_area and function called by it (all coming from sys_mmap)
> consumed around 10% of the time.
>
> Note that this is not DRM-specific code, but rather the generic Linux
> code for finding free virtual address space, which walks over all the
> vmas in the process and, in the default x86 variant, does an rb-tree
> lookup for each vma!
>   

Ah, OK. I thought this was the drm linear search for device address 
space for each bo.

> It could be fixed by either in the kernel with better data structures
> and algorithms or in userspace by using MAP_FIXED.
> However, this will still leave significant kernel overhead (e.g. just
> drm_mm_search_free_in_range takes 1%).
>   

/Thomas


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 13:33   ` Jerome Glisse
@ 2010-03-03 14:23     ` Pauli Nieminen
  2010-03-03 14:50       ` Jerome Glisse
  0 siblings, 1 reply; 13+ messages in thread
From: Pauli Nieminen @ 2010-03-03 14:23 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: Jerome Glisse, dri-devel

On Wed, Mar 3, 2010 at 3:33 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
> On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
>> On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
>> > On AGP system we might allocate/free routinely uncached or wc memory,
>> > changing page from cached (wb) to uc or wc is very expensive and involves
>> > a lot of flushing. To improve performance this allocator use a pool
>> > of uc,wc pages.
>> >
>> > Pools are linked lists of pages. Ordered so that first is the latest adition
>> > to the pool and last is the oldest page. Old pages are periodicaly freed from
>> > pools to keep memory use at minimum.
>> >
>> > Pools are protected with spinlocks to allow multiple threads to allocate pages
>> > simultanously. Expensive operations must not hold the spinlock to maximise
>> > performance for multiple threads.
>> >
>> > Based on Jerome Glisse's and Dave Airlie's pool allocator.
>> >
>> > Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>> > Signed-off-by: Dave Airlie <airlied@redhat.com>
>> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
>
> I think it's overdesigned, by trying to be to clever we often endup
> with code more complex and less efficient in the end. While the idea
> to free page only after some amount of time is the way to go, i think
> a simpler approach will perform a better job. For instance:
>
> pool {
>  npages // npages in pool
>  page *pages[enoughttohold16Morsomethingconfigurable]
>  unsigned long next_free_time;
>  lock
> }
> filllocked(pool,npages)
> {
>        if (npages > MAXPAGESPERPOOL)
>                npages = MAXPAGESPERPOOL;
>        for i, npages : allocpage
> }
> alloc(apages, npages)
> {
>        do {
>        lock(pool)
>        pool->next_free_time = jiffies + timeout;
>        if (npages > pool->npages) {
>                npages -= pool->npages;
>                copypageptr(apages, pool->pages, pool->npages)
>                pool->npages = 0;
>                fillpool(pool, npages)
>        } else {
>                pool->npages -= npages;
>                copypageptr(apages, pool->pages, npages)
>                npages = 0;
>        }
>        unlock(pool)
>        } while (npages)
> }
> poolshrinkcallbacktimer()
> {
>        for i in npools {
>                if (trylock(pool[i].lock) {
>                        if (timeafter(jiffies, pool[i].nextfreetime)) {
>                                free bunch of pages for instance 1M at a time
>                        }
>                        unlock(pool[i].lock)
>                }
>        }
> }

I would need to use split buffer for that big arrays. My aim was to
have FILO queue pages so knowing which pages can be freed now is
simple because the oldest pages is always in the head of the list. I
know that FILO is easy to implement on top of deque but requirement to
split underlying array sounded like more complex than using linked
list. But as I noted in IRC. Requesting for N pages from the linked
list is doomed to have O(N) performance which is bad. So better
implementation would be switching to an array.

>
> General remark i don't think you handle jiffies wrap around properly.

Only place where jiffies are read is ttm_page_pool_free. There
difference is calculate with  timeafter call so I think it should
handle the wrapping.

> Also the whole lock dropping in middle of list traversal looks very
> fragile to me, right now we don't have much concurency in radeon so
> i don't think you well tested the effect of concurently using the
> allocator. Anyway see bellow for more comments on the code.
>
> Btw thanks for looking into this, this isn't a sexy area but it
> definitly needs love.
>
> Cheers,
> Jerome
>
>
>> > ---
>> >  drivers/gpu/drm/ttm/Makefile         |    2 +-
>> >  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
>> >  drivers/gpu/drm/ttm/ttm_page_alloc.c |  514 ++++++++++++++++++++++++++++++++++
>> >  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
>> >  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
>> >  5 files changed, 580 insertions(+), 26 deletions(-)
>> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
>> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
>> >
>> > diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
>> > index 1e138f5..4256e20 100644
>> > --- a/drivers/gpu/drm/ttm/Makefile
>> > +++ b/drivers/gpu/drm/ttm/Makefile
>> > @@ -4,6 +4,6 @@
>> >  ccflags-y := -Iinclude/drm
>> >  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
>> >        ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
>> > -       ttm_object.o ttm_lock.o ttm_execbuf_util.o
>> > +       ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
>> >
>> >  obj-$(CONFIG_DRM_TTM) += ttm.o
>> > diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
>> > index f5245c0..d587fbe 100644
>> > --- a/drivers/gpu/drm/ttm/ttm_memory.c
>> > +++ b/drivers/gpu/drm/ttm/ttm_memory.c
>> > @@ -32,6 +32,7 @@
>> >  #include <linux/wait.h>
>> >  #include <linux/mm.h>
>> >  #include <linux/module.h>
>> > +#include "ttm_page_alloc.h"
>> >
>> >  #define TTM_MEMORY_ALLOC_RETRIES 4
>> >
>> > @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>> >                       "Zone %7s: Available graphics memory: %llu kiB.\n",
>> >                       zone->name, (unsigned long long) zone->max_mem >> 10);
>> >        }
>> > +       ttm_page_alloc_init();
>> >        return 0;
>> >  out_no_zone:
>> >        ttm_mem_global_release(glob);
>> > @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>> >                zone = glob->zones[i];
>> >                kobject_del(&zone->kobj);
>> >                kobject_put(&zone->kobj);
>> > -       }
>> > +                       }
>> >        kobject_del(&glob->kobj);
>> >        kobject_put(&glob->kobj);
>> > +       ttm_page_alloc_fini();
>> >  }
>> >  EXPORT_SYMBOL(ttm_mem_global_release);
>> >
>> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>> > new file mode 100644
>> > index 0000000..de6576c
>> > --- /dev/null
>> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>> > @@ -0,0 +1,514 @@
>> > +/*
>> > + * Copyright (c) Red Hat Inc.
>> > +
>> > + * Permission is hereby granted, free of charge, to any person obtaining a
>> > + * copy of this software and associated documentation files (the "Software"),
>> > + * to deal in the Software without restriction, including without limitation
>> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
>> > + * and/or sell copies of the Software, and to permit persons to whom the
>> > + * Software is furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice (including the
>> > + * next paragraph) shall be included in all copies or substantial portions
>> > + * of the Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > + * DEALINGS IN THE SOFTWARE.
>> > + *
>> > + * Authors: Dave Airlie <airlied@redhat.com>
>> > + *          Jerome Glisse <jglisse@redhat.com>
>> > + *          Pauli Nienmien <suokkos@gmail.com>
>> > + */
>> > +
>> > +/* simple list based uncached page pool
>> > + * - Pool collects resently freed pages for reuse
>> > + * - Use page->lru to keep a free list
>> > + * - doesn't track currently in use pages
>> > + */
>> > +#include <linux/list.h>
>> > +#include <linux/spinlock.h>
>> > +#include <linux/highmem.h>
>> > +#include <linux/mm_types.h>
>> > +#include <linux/jiffies.h>
>> > +#include <linux/timer.h>
>> > +
>> > +#include <asm/agp.h>
>> > +#include "ttm/ttm_bo_driver.h"
>> > +#include "ttm_page_alloc.h"
>> > +
>> > +/* Number of 4k pages to add at once */
>> > +#define NUM_PAGES_TO_ADD               64
>> > +/* times are in msecs */
>> > +#define TIME_TO_KEEP_PAGE_IN_POOL      15000
>> > +#define PAGE_FREE_INTERVAL             1000
>> > +
>> > +/**
>> > + * list is a FILO stack.
>> > + * All pages pushed into it must go to begin while end of list must hold the
>> > + * least recently used pages. When pages in end of the list are not recently
>> > + * use they are freed.
>> > + *
>> > + * All expensive operations must be done outside of pool lock to prevent
>> > + * contention on lock.
>> > + */
>> > +struct page_pool {
>> > +       struct list_head        list;
>> > +       int                     gfp_flags;
>> > +       unsigned                npages;
>> > +       spinlock_t              lock;
>> > +};
>> > +
>> > +# define MAX_NUM_POOLS 4
>> > +
>> > +struct pool_manager {
>> > +       bool                    page_alloc_inited;
>> > +       unsigned long           time_to_wait;
>> > +
>> > +       struct timer_list       timer;
>> > +
>> > +       union {
>> > +               struct page_pool        pools[MAX_NUM_POOLS];
>> > +               struct {
>> > +                       struct page_pool        wc_pool;
>> > +                       struct page_pool        uc_pool;
>> > +                       struct page_pool        wc_pool_dma32;
>> > +                       struct page_pool        uc_pool_dma32;
>> > +               } ;
>> > +       };
>> > +};
>> > +
>> > +
>> > +static struct pool_manager _manager;
>> > +
>> > +#ifdef CONFIG_X86
>> > +/* TODO: add this to x86 like _uc, this version here is inefficient */
>> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
>> > +{
>> > +       int i;
>> > +
>> > +       for (i = 0; i < addrinarray; i++)
>> > +               set_memory_wc((unsigned long)page_address(pages[i]), 1);
>> > +       return 0;
>> > +}
>> > +#else
>> > +static int set_pages_array_wb(struct page **pages, int addrinarray)
>> > +{
>> > +#ifdef TTM_HAS_AGP
>> > +       int i;
>> > +
>> > +       for (i = 0; i < addrinarray; i++)
>> > +               unmap_page_from_agp(pages[i]);
>> > +#endif
>> > +       return 0;
>> > +}
>> > +
>> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
>> > +{
>> > +#ifdef TTM_HAS_AGP
>> > +       int i;
>> > +
>> > +       for (i = 0; i < addrinarray; i++)
>> > +               map_page_into_agp(pages[i]);
>> > +#endif
>> > +       return 0;
>> > +}
>> > +
>> > +static int set_pages_array_uc(struct page **pages, int addrinarray)
>> > +{
>> > +#ifdef TTM_HAS_AGP
>> > +       int i;
>> > +
>> > +       for (i = 0; i < addrinarray; i++)
>> > +               map_page_into_agp(pages[i]);
>> > +#endif
>> > +       return 0;
>> > +}
>> > +#endif
>> > +
>> > +
>> > +static void ttm_page_pool_init_locked(struct page_pool *pool)
>> > +{
>> > +       spin_lock_init(&pool->lock);
>> > +       INIT_LIST_HEAD(&pool->list);
>> > +}
>> > +
>> > +static struct page_pool *ttm_get_pool(int flags,
>> > +               enum ttm_caching_state cstate)
>> > +{
>> > +       int pool_index;
>> > +
>> > +       if (cstate == tt_cached)
>> > +               return NULL;
>> > +
>> > +       if (cstate == tt_wc)
>> > +               pool_index = 0x0;
>> > +       else
>> > +               pool_index = 0x1;
>> > +
>> > +       if (flags & TTM_PAGE_FLAG_DMA32)
>> > +               pool_index |= 0x2;
>> > +
>> > +       return &_manager.pools[pool_index];
>> > +}
>> > +
>> > +static void ttm_pages_put(struct page *pages[], unsigned npages)
>> > +{
>> > +       unsigned i;
>> > +       set_pages_array_wb(pages, npages);
>> > +       for (i = 0; i < npages; ++i)
>> > +               __free_page(pages[i]);
>> > +}
>> > +
>> > +/**
>> > + * Free pages from pool.
>> > + * If limit_with_time is set to true pages that wil lbe have to have been
>> > + * in pool for time specified by pool_manager.time_to_wait.
>> > + *
>> > + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
>> > + * blocking others operations on pool.
>> > + *
>> > + * This is called from the timer handler.
>> > + **/
>> > +static void ttm_page_pool_free(struct page_pool *pool,
>> > +               const bool limit_with_time)
>> > +{
>> > +       struct page *page;
>> > +       struct page *pages_to_free[NUM_PAGES_TO_ADD];
>> > +       unsigned freed_pages;
>> > +       unsigned long now = jiffies + _manager.time_to_wait;
>> > +       unsigned long irq_flags;
>> > +
>> > +restart:
>> > +       freed_pages = 0;
>> > +       spin_lock_irqsave(&pool->lock, irq_flags);
>> > +
>> > +       {
>> > +               list_for_each_entry_reverse(page, &pool->list, lru) {
>> > +
>> > +                       if (limit_with_time
>> > +                               && time_after(now, page->private))
>> > +                               break;
>> > +
>> > +                       pages_to_free[freed_pages++] = page;
>> > +                       if (freed_pages >= NUM_PAGES_TO_ADD) {
>> > +                               /* Limit the maximum time that we hold the lock
>> > +                                * not to block more important threads */
>> > +                               __list_del(page->lru.prev, &pool->list);
>> > +                               pool->npages -= freed_pages;
>> > +                               spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +
>> > +                               ttm_pages_put(pages_to_free, freed_pages);
>> > +                               goto restart;
>> > +                       }
>> > +               }
>> > +
>> > +               pool->npages -= freed_pages;
>> > +               if (freed_pages)
>> > +                       __list_del(&page->lru, &pool->list);
>> > +
>> > +               spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +
>> > +               if (freed_pages)
>> > +                       ttm_pages_put(pages_to_free, freed_pages);
>> > +       }
>> > +}
>> > +
>
> One useless brace level
>
>> > +/* periodicaly free unused pages from pool */
>> > +static void ttm_page_pool_timer_handler(unsigned long man_addr)
>> > +{
>> > +       int i;
>> > +       struct pool_manager *m = (struct pool_manager *)man_addr;
>> > +
>> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
>> > +               ttm_page_pool_free(&_manager.pools[i], true);
>> > +
>> > +       mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
>> > +}
>> > +
>> > +static void ttm_page_pool_timer_create(struct pool_manager *m)
>> > +{
>> > +       /* Create a timer that is fired every second to clean
>> > +        * the free list */
>> > +       init_timer(&m->timer);
>> > +       m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
>> > +       m->timer.function = ttm_page_pool_timer_handler;
>> > +       m->timer.data = (unsigned long)m;
>> > +}
>> > +
>> > +static void ttm_page_pool_timer_free(struct pool_manager *m)
>> > +{
>> > +       del_timer_sync(&m->timer);
>> > +}
>> > +
>> > +static void ttm_set_pages_caching(struct page **pages,
>> > +               enum ttm_caching_state cstate, unsigned cpages)
>> > +{
>> > +       /* Set page caching */
>> > +       switch (cstate) {
>> > +       case tt_uncached:
>> > +               set_pages_array_uc(pages, cpages);
>> > +               break;
>> > +       case tt_wc:
>> > +               set_pages_array_wc(pages, cpages);
>> > +               break;
>> > +       default:
>> > +               break;
>> > +       }
>> > +}
>> > +/**
>> > + * Fill new pages to array.
>> > + */
>> > +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
>> > +               enum ttm_caching_state cstate, unsigned count,
>> > +               unsigned long *irq_flags)
>> > +{
>> > +       unsigned pages_to_alloc = count - pool->npages;
>> > +       struct page **pages;
>> > +       struct page *page;
>> > +       struct list_head h;
>> > +       unsigned i, cpages;
>> > +       unsigned max_cpages = min(pages_to_alloc,
>> > +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
>> > +
>> > +       /* reserve allready allocated pages from pool */
>> > +       INIT_LIST_HEAD(&h);
>> > +       list_splice_init(&pool->list, &h);
>> > +
>> > +       /* We are now safe to allow others users to modify the pool so unlock
>> > +        * it. Allocating pages is expensive operation. */
>> > +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
>> > +
>> > +       pages = kmalloc(pages_to_alloc*sizeof(*page), GFP_KERNEL);
>> > +
>
> page_to_alloc might be negative if pool->npages > count, i think it can
> happen because there is lock dropping in the page alloc function below.
> also pages_to_alloc*sizeof(*page) might be to big for kmalloc and we
> might need to use vmalloc (this is why i think a design with a static
> allocated on table is better)

page_to_alloc can't be negative because lock is held untl later into
pool allocator.

Locks are only dropped:
- When allocation succeed
- When returning failure.
- After entering  ttm_pages_pool_fill_locked. Before droping lock
there code reserves pages from pool to private list and sets pool to
empty state.

 ttm_pages_pool_fill_locked had 2 errors in the first version. One of
them was wrong count paramter to kmalloc. 2nd version fixes that we
never alloc more than PAGE_SIZES memory. Second error was that
pool->npages wasn't set to zero when taking the pages from pool.

Basicaly fill_pool operates only in private variables when it is
outside of pool locks which makes it reentrant for unlocked part.

>
>> > +       if (!pages) {
>> > +               printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
>> > +               spin_lock_irqsave(&pool->lock, *irq_flags);
>> > +               return -ENOMEM;
>> > +       }
>> > +
>> > +       for (i = 0, cpages = 0; i < pages_to_alloc; ++i) {
>> > +               page = alloc_page(pool->gfp_flags);
>> > +               if (!page) {
>> > +                       printk(KERN_ERR "[ttm] unable to get page %d\n", i);
>> > +
>> > +                       ttm_put_pages(&h, flags, cstate);
>> > +                       spin_lock_irqsave(&pool->lock, *irq_flags);
>> > +                       return -ENOMEM;
>> > +               }
>> > +
>> > +#ifdef CONFIG_HIGHMEM
>> > +               /* gfp flags of highmem page should never be dma32 so we
>> > +                * we should be fine in such case
>> > +                */
>> > +               if (!PageHighMem(page))
>> > +#endif
>> > +               {
>> > +                       pages[cpages++] = page;
>> > +                       if (cpages == max_cpages) {
>> > +
>> > +                               ttm_set_pages_caching(pages, cstate, cpages);
>> > +                               cpages = 0;
>> > +                       }
>> > +               }
>> > +               list_add(&page->lru, &h);
>> > +       }
>> > +
>> > +       if (cpages)
>> > +               ttm_set_pages_caching(pages, cstate, cpages);
>> > +
>> > +       spin_lock_irqsave(&pool->lock, *irq_flags);
>> > +
>> > +       list_splice(&h, &pool->list);
>> > +
>> > +       pool->npages += pages_to_alloc;
>> > +
>> > +       return 0;
>> > +}
>> > +
>> > +/*
>> > + * On success pages list will hold count number of correctly
>> > + * cached pages.
>> > + */
>> > +int ttm_get_pages(struct list_head *pages, int flags,
>> > +               enum ttm_caching_state cstate, unsigned count)
>> > +{
>> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
>> > +       struct page *page = NULL;
>> > +       unsigned long irq_flags;
>> > +       int gfp_flags = 0;
>> > +       int retries = 1;
>> > +       int r;
>> > +
>> > +       /* No pool for cached pages */
>> > +       if (cstate == tt_cached) {
>> > +               if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> > +                       gfp_flags |= __GFP_ZERO;
>> > +
>> > +               if (flags & TTM_PAGE_FLAG_DMA32)
>> > +                       gfp_flags |= GFP_DMA32;
>> > +               else
>> > +                       gfp_flags |= __GFP_HIGHMEM;
>> > +
>> > +               /* fallback in case of problems with pool allocation */
>> > +alloc_pages:
>> > +               for (r = 0; r < count; ++r) {
>> > +                       page = alloc_page(gfp_flags);
>> > +                       if (!page) {
>> > +
>> > +                               printk(KERN_ERR "[ttm] unable to allocate page.");
>> > +                               return -ENOMEM;
>> > +                       }
>> > +
>> > +                       list_add(&page->lru, pages);
>
> this doesn't alloc page with proper caching
>
>> > +               }
>> > +               return 0;
>> > +       }
>> > +
>> > +       gfp_flags = pool->gfp_flags;
>> > +
>> > +       spin_lock_irqsave(&pool->lock, irq_flags);
>> > +       do {
>> > +               /* First we look if pool has the page we want */
>> > +               if (pool->npages >= count) {
>> > +
>> > +
>> > +                       /* Find the cut location */
>> > +                       if (count <= pool->npages/2) {
>> > +                               unsigned c = 0;
>> > +                               list_for_each_entry(page, &pool->list, lru) {
>> > +                                       if (++c >= count)
>> > +                                               break;
>> > +                               }
>> > +                       } else {
>> > +                               /* We have to point to the last pages cut */
>> > +                               unsigned c = 0;
>> > +                               unsigned rcount = pool->npages - count + 1;
>> > +
>> > +                               list_for_each_entry_reverse(page, &pool->list, lru) {
>> > +                                       if (++c >= rcount)
>> > +                                               break;
>> > +                               }
>> > +                       }
>> > +
>> > +                       list_cut_position(pages, &pool->list, &page->lru);
>> > +
>> > +                       pool->npages -= count;
>> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +
>> > +                       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
>> > +                               list_for_each_entry(page, pages, lru) {
>> > +                                       clear_page(page_address(page));
>> > +                               }
>> > +                       }
>> > +
>> > +                       return 0;
>> > +               }
>> > +
>> > +               if (retries == 0)
>> > +                       break;
>> > +
>> > +               --retries;
>> > +
>> > +               r = ttm_pages_pool_fill_locked(pool, flags, cstate,
>> > +                               count, &irq_flags);
>> > +               if (r) {
>> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +                       return r;
>> > +               }
>> > +
>> > +       } while (1);
>> > +
>> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +
>> > +       /*  Pool allocator failed without errors. we try to allocate new pages
>> > +        *  instead. */
>> > +       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> > +               gfp_flags |= __GFP_ZERO;
>> > +
>> > +       goto alloc_pages;
>> > +}
>
> This function is quite complex, especialy the failing filling pool path
> that looks supicious, idea of splitting work is not to have to do the
> work in other function so if pool filling fails the alloc should fail
> and shouldn't try to fill the pool on its own this kind of code
> duplication is a recipie for failure i think.

yes. Failing path should just return -ENOMEM. I will fix that.

>
>> > +
>> > +/* Put all pages in pages list to correct pool to wait for reuse */
>> > +void ttm_put_pages(struct list_head *pages, int flags,
>> > +               enum ttm_caching_state cstate)
>> > +{
>> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
>> > +       struct page *p, *tmp;
>> > +       unsigned page_count = 0;
>> > +       unsigned long irq_flags;
>> > +       unsigned long now = jiffies;
>> > +
>> > +       if (pool == NULL) {
>> > +               list_for_each_entry_safe(p, tmp, pages, lru) {
>> > +                       __free_page(p);
>> > +               }
>> > +               /* We took the ownership -> clear the pages list*/
>> > +               INIT_LIST_HEAD(pages);
>> > +               return;
>> > +       }
>> > +
>> > +       list_for_each_entry_safe(p, tmp, pages, lru) {
>> > +
>> > +#ifdef CONFIG_HIGHMEM
>> > +               /* we don't have pool for highmem -> free them */
>> > +               if (PageHighMem(p)) {
>> > +                       list_del(&p->lru);
>> > +                       __free_page(p);
>> > +               } else
>> > +#endif
>> > +               {
>> > +                       p->private = now;
>> > +                       ++page_count;
>> > +               }
>> > +
>> > +       }
>> > +
>> > +       spin_lock_irqsave(&pool->lock, irq_flags);
>> > +       list_splice(pages, &pool->list);
>> > +       pool->npages += page_count;
>> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
>> > +
>> > +       /* We took the ownership -> clear the pages list*/
>> > +       INIT_LIST_HEAD(pages);
>> > +}
>> > +
>> > +
>> > +int ttm_page_alloc_init(void)
>> > +{
>> > +       if (_manager.page_alloc_inited)
>> > +               return 0;
>> > +
>> > +       ttm_page_pool_init_locked(&_manager.wc_pool);
>> > +       _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
>> > +       ttm_page_pool_init_locked(&_manager.uc_pool);
>> > +       _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
>> > +       ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
>> > +       _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
>> > +       ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
>> > +       _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
>> > +
>> > +       _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
>> > +       ttm_page_pool_timer_create(&_manager);
>> > +       _manager.page_alloc_inited = 1;
>> > +       return 0;
>> > +}
>> > +
>> > +void ttm_page_alloc_fini(void)
>> > +{
>> > +       int i;
>> > +
>> > +       if (!_manager.page_alloc_inited)
>> > +               return;
>> > +
>> > +       ttm_page_pool_timer_free(&_manager);
>> > +
>> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
>> > +               ttm_page_pool_free(&_manager.pools[i], false);
>> > +
>> > +       _manager.page_alloc_inited = 0;
>> > +}
>> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
>> > new file mode 100644
>> > index 0000000..eb0ddf4
>> > --- /dev/null
>> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
>> > @@ -0,0 +1,38 @@
>> > +/*
>> > + * Copyright (c) Red Hat Inc.
>> > +
>> > + * Permission is hereby granted, free of charge, to any person obtaining a
>> > + * copy of this software and associated documentation files (the "Software"),
>> > + * to deal in the Software without restriction, including without limitation
>> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
>> > + * and/or sell copies of the Software, and to permit persons to whom the
>> > + * Software is furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice (including the
>> > + * next paragraph) shall be included in all copies or substantial portions
>> > + * of the Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > + * DEALINGS IN THE SOFTWARE.
>> > + *
>> > + * Authors: Dave Airlie <airlied@redhat.com>
>> > + *          Jerome Glisse <jglisse@redhat.com>
>> > + */
>> > +#ifndef TTM_PAGE_ALLOC
>> > +#define TTM_PAGE_ALLOC
>> > +
>> > +#include "ttm/ttm_bo_driver.h"
>> > +
>> > +void ttm_put_pages(struct list_head *pages, int flags,
>> > +               enum ttm_caching_state cstate);
>> > +int ttm_get_pages(struct list_head *pages, int flags,
>> > +               enum ttm_caching_state cstate, unsigned count);
>> > +int ttm_page_alloc_init(void);
>> > +void ttm_page_alloc_fini(void);
>> > +
>> > +#endif
>> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> > index 3d47a2c..d4e20f6 100644
>> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> > @@ -38,6 +38,7 @@
>> >  #include "ttm/ttm_module.h"
>> >  #include "ttm/ttm_bo_driver.h"
>> >  #include "ttm/ttm_placement.h"
>> > +#include "ttm_page_alloc.h"
>> >
>> >  static int ttm_tt_swapin(struct ttm_tt *ttm);
>> >
>> > @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
>> >        ttm->pages = NULL;
>> >  }
>> >
>> > -static struct page *ttm_tt_alloc_page(unsigned page_flags)
>> > -{
>> > -       gfp_t gfp_flags = GFP_USER;
>> > -
>> > -       if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> > -               gfp_flags |= __GFP_ZERO;
>> > -
>> > -       if (page_flags & TTM_PAGE_FLAG_DMA32)
>> > -               gfp_flags |= __GFP_DMA32;
>> > -       else
>> > -               gfp_flags |= __GFP_HIGHMEM;
>> > -
>> > -       return alloc_page(gfp_flags);
>> > -}
>> > -
>> >  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>> >  {
>> >        int write;
>> > @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>> >  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
>> >  {
>> >        struct page *p;
>> > +       struct list_head h;
>> >        struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
>> >        int ret;
>> >
>> >        while (NULL == (p = ttm->pages[index])) {
>> > -               p = ttm_tt_alloc_page(ttm->page_flags);
>> >
>> > -               if (!p)
>> > +               INIT_LIST_HEAD(&h);
>> > +
>> > +               ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
>> > +
>> > +               if (ret != 0)
>> >                        return NULL;
>> >
>> > +               p = list_first_entry(&h, struct page, lru);
>> > +
>> >                ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
>> >                if (unlikely(ret != 0))
>> >                        goto out_err;
>> > @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
>> >        if (ttm->caching_state == c_state)
>> >                return 0;
>> >
>> > -       if (c_state != tt_cached) {
>> > -               ret = ttm_tt_populate(ttm);
>> > -               if (unlikely(ret != 0))
>> > -                       return ret;
>> > +       if (ttm->state == tt_unpopulated) {
>> > +               /* Change caching but don't populate */
>> > +               ttm->caching_state = c_state;
>> > +               return 0;
>> >        }
>> >
>> >        if (ttm->caching_state == tt_cached)
>> > @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>> >  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
>> >  {
>> >        int i;
>> > +       unsigned count = 0;
>> > +       struct list_head h;
>> >        struct page *cur_page;
>> >        struct ttm_backend *be = ttm->be;
>> >
>> > +       INIT_LIST_HEAD(&h);
>> > +
>> >        if (be)
>> >                be->func->clear(be);
>> > -       (void)ttm_tt_set_caching(ttm, tt_cached);
>> >        for (i = 0; i < ttm->num_pages; ++i) {
>> > +
>> >                cur_page = ttm->pages[i];
>> >                ttm->pages[i] = NULL;
>> >                if (cur_page) {
>> >                        if (page_count(cur_page) != 1)
>> >                                printk(KERN_ERR TTM_PFX
>> >                                       "Erroneous page count. "
>> > -                                      "Leaking pages.\n");
>> > +                                      "Leaking pages (%d/%d).\n",
>> > +                                      page_count(cur_page), count);
>> >                        ttm_mem_global_free_page(ttm->glob->mem_glob,
>> >                                                 cur_page);
>> > -                       __free_page(cur_page);
>> > +                       list_add(&cur_page->lru, &h);
>> > +                       count++;
>> >                }
>> >        }
>> > +       ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
>> >        ttm->state = tt_unpopulated;
>> >        ttm->first_himem_page = ttm->num_pages;
>> >        ttm->last_lomem_page = -1;
>> > --
>> > 1.6.3.3
>> >
>> >
>>
>> While no comments yet I did notice huge errors in
>> ttm_pages_pool_fill_locked. But I still would like to comments on
>> overall design.
>>
>> Is it correct to use page->private in pool allocator?
>>
>> bo allocation is still too expensive operation for dma buffers in
>> classic mesa. It takes quite a lot cpu time in bind memory (agp
>> system) and ttm_mem_global functions. Would it be possible to move
>> some parts those expensive operations into pool fill and pool free
>> code?
>>
>> Also using single call to allocate all pages for buffer would improve
>> situation when pool is not large enough. I can change ttm_tt_populate
>> to allocate all pages with single call if it sounds ok.
>>
>> I will still look at how to integrate pool allocator to shinker.
>>
>>
>> Attached fixed full path and here is fixed version of
>> ttm_pages_pool_fill_locked :
>>
>> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
>> +               enum ttm_caching_state cstate, unsigned count,
>> +               unsigned long *irq_flags)
>> +{
>> +       unsigned pages_to_alloc = count - pool->npages;
>> +       struct page **pages;
>> +       struct page *page;
>> +       struct list_head h;
>> +       unsigned i, cpages;
>> +       unsigned max_cpages = min(pages_to_alloc,
>> +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
>> +
>> +       /* reserve allready allocated pages from pool */
>> +       INIT_LIST_HEAD(&h);
>> +       list_splice_init(&pool->list, &h);
>> +       i = pool->npages;
>> +       pages_to_alloc = count;
>> +       pool->npages = 0;
>> +
>> +       /* We are now safe to allow others users to modify the pool so unlock
>> +        * it. Allocating pages is expensive operation. */
>> +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
>> +
>> +       pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
>> +
>> +       if (!pages) {
>> +               printk(KERN_ERR "[ttm] unable to allocate table for
>> new pages.");
>> +               spin_lock_irqsave(&pool->lock, *irq_flags);
>> +               return -ENOMEM;
>> +       }
>> +
>> +       /* i is set to number of pages already in pool */
>> +       for (cpages = 0; i < pages_to_alloc; ++i) {
>> +               page = alloc_page(pool->gfp_flags);
>> +               if (!page) {
>> +                       printk(KERN_ERR "[ttm] unable to get page %u\n", i);
>> +
>> +                       ttm_put_pages(&h, flags, cstate);
>> +                       spin_lock_irqsave(&pool->lock, *irq_flags);
>> +                       return -ENOMEM;
>> +               }
>> +
>> +#ifdef CONFIG_HIGHMEM
>> +               /* gfp flags of highmem page should never be dma32 so we
>> +                * we should be fine in such case
>> +                */
>> +               if (!PageHighMem(page))
>> +#endif
>> +               {
>> +                       pages[cpages++] = page;
>> +                       if (cpages == max_cpages) {
>> +
>> +                               ttm_set_pages_caching(pages, cstate, cpages);
>> +                               cpages = 0;
>> +                       }
>> +               }
>> +               list_add(&page->lru, &h);
>> +       }
>> +
>> +       if (cpages)
>> +               ttm_set_pages_caching(pages, cstate, cpages);
>> +
>> +       spin_lock_irqsave(&pool->lock, *irq_flags);
>> +
>> +       list_splice(&h, &pool->list);
>> +
>> +       pool->npages += pages_to_alloc;
>> +
>> +       return 0;
>> +}
>
>> From 8340b84a4752830feff6b77f3c23707dbd69f757 Mon Sep 17 00:00:00 2001
>> From: Jerome Glisse <jglisse@redhat.com>
>> Date: Wed, 23 Sep 2009 18:48:56 +1000
>> Subject: [PATCH] drm/ttm: add pool wc/uc page allocator
>>
>> On AGP system we might allocate/free routinely uncached or wc memory,
>> changing page from cached (wb) to uc or wc is very expensive and involves
>> a lot of flushing. To improve performance this allocator use a pool
>> of uc,wc pages.
>>
>> Pools are linked lists of pages. Ordered so that first is the latest adition
>> to the pool and last is the oldest page. Old pages are periodicaly freed from
>> pools to keep memory use at minimum.
>>
>> Pools are protected with spinlocks to allow multiple threads to allocate pages
>> simultanously. Expensive operations must not hold the spinlock to maximise
>> performance for multiple threads.
>>
>> V2: Fix ttm_page_pool_fill_locked.
>>  - max_cpages should be used for kmalloc instead of pages_to_alloc.
>>  - Set pool->npages to zero after taking all pages from the pool.
>>
>> Based on Jerome Glisse's and Dave Airlie's pool allocator.
>>
>> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
>> ---
>>  drivers/gpu/drm/ttm/Makefile         |    2 +-
>>  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
>>  drivers/gpu/drm/ttm/ttm_page_alloc.c |  518 ++++++++++++++++++++++++++++++++++
>>  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
>>  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
>>  5 files changed, 584 insertions(+), 26 deletions(-)
>>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
>>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
>>
>> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
>> index 1e138f5..4256e20 100644
>> --- a/drivers/gpu/drm/ttm/Makefile
>> +++ b/drivers/gpu/drm/ttm/Makefile
>> @@ -4,6 +4,6 @@
>>  ccflags-y := -Iinclude/drm
>>  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
>>       ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
>> -     ttm_object.o ttm_lock.o ttm_execbuf_util.o
>> +     ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
>>
>>  obj-$(CONFIG_DRM_TTM) += ttm.o
>> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
>> index f5245c0..d587fbe 100644
>> --- a/drivers/gpu/drm/ttm/ttm_memory.c
>> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
>> @@ -32,6 +32,7 @@
>>  #include <linux/wait.h>
>>  #include <linux/mm.h>
>>  #include <linux/module.h>
>> +#include "ttm_page_alloc.h"
>>
>>  #define TTM_MEMORY_ALLOC_RETRIES 4
>>
>> @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>>                      "Zone %7s: Available graphics memory: %llu kiB.\n",
>>                      zone->name, (unsigned long long) zone->max_mem >> 10);
>>       }
>> +     ttm_page_alloc_init();
>>       return 0;
>>  out_no_zone:
>>       ttm_mem_global_release(glob);
>> @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>>               zone = glob->zones[i];
>>               kobject_del(&zone->kobj);
>>               kobject_put(&zone->kobj);
>> -     }
>> +                     }
>>       kobject_del(&glob->kobj);
>>       kobject_put(&glob->kobj);
>> +     ttm_page_alloc_fini();
>>  }
>>  EXPORT_SYMBOL(ttm_mem_global_release);
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>> new file mode 100644
>> index 0000000..e269a10
>> --- /dev/null
>> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
>> @@ -0,0 +1,518 @@
>> +/*
>> + * Copyright (c) Red Hat Inc.
>> +
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> + * DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors: Dave Airlie <airlied@redhat.com>
>> + *          Jerome Glisse <jglisse@redhat.com>
>> + *          Pauli Nieminen <suokkos@gmail.com>
>> + */
>> +
>> +/* simple list based uncached page pool
>> + * - Pool collects resently freed pages for reuse
>> + * - Use page->lru to keep a free list
>> + * - doesn't track currently in use pages
>> + */
>> +#include <linux/list.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/highmem.h>
>> +#include <linux/mm_types.h>
>> +#include <linux/jiffies.h>
>> +#include <linux/timer.h>
>> +
>> +#include <asm/agp.h>
>> +#include "ttm/ttm_bo_driver.h"
>> +#include "ttm_page_alloc.h"
>> +
>> +/* Number of 4k pages to add at once */
>> +#define NUM_PAGES_TO_ADD             64
>> +/* times are in msecs */
>> +#define TIME_TO_KEEP_PAGE_IN_POOL    15000
>> +#define PAGE_FREE_INTERVAL           1000
>> +
>> +/**
>> + * list is a FILO stack.
>> + * All pages pushed into it must go to begin while end of list must hold the
>> + * least recently used pages. When pages in end of the list are not recently
>> + * use they are freed.
>> + *
>> + * All expensive operations must be done outside of pool lock to prevent
>> + * contention on lock.
>> + */
>> +struct page_pool {
>> +     struct list_head        list;
>> +     int                     gfp_flags;
>> +     unsigned                npages;
>> +     spinlock_t              lock;
>> +};
>> +
>> +# define MAX_NUM_POOLS 4
>> +
>> +struct pool_manager {
>> +     bool                    page_alloc_inited;
>> +     unsigned long           time_to_wait;
>> +
>> +     struct timer_list       timer;
>> +
>> +     union {
>> +             struct page_pool        pools[MAX_NUM_POOLS];
>> +             struct {
>> +                     struct page_pool        wc_pool;
>> +                     struct page_pool        uc_pool;
>> +                     struct page_pool        wc_pool_dma32;
>> +                     struct page_pool        uc_pool_dma32;
>> +             } ;
>> +     };
>> +};
>> +
>> +
>> +static struct pool_manager _manager;
>> +
>> +#ifdef CONFIG_X86
>> +/* TODO: add this to x86 like _uc, this version here is inefficient */
>> +static int set_pages_array_wc(struct page **pages, int addrinarray)
>> +{
>> +     int i;
>> +
>> +     for (i = 0; i < addrinarray; i++)
>> +             set_memory_wc((unsigned long)page_address(pages[i]), 1);
>> +     return 0;
>> +}
>> +#else
>> +static int set_pages_array_wb(struct page **pages, int addrinarray)
>> +{
>> +#ifdef TTM_HAS_AGP
>> +     int i;
>> +
>> +     for (i = 0; i < addrinarray; i++)
>> +             unmap_page_from_agp(pages[i]);
>> +#endif
>> +     return 0;
>> +}
>> +
>> +static int set_pages_array_wc(struct page **pages, int addrinarray)
>> +{
>> +#ifdef TTM_HAS_AGP
>> +     int i;
>> +
>> +     for (i = 0; i < addrinarray; i++)
>> +             map_page_into_agp(pages[i]);
>> +#endif
>> +     return 0;
>> +}
>> +
>> +static int set_pages_array_uc(struct page **pages, int addrinarray)
>> +{
>> +#ifdef TTM_HAS_AGP
>> +     int i;
>> +
>> +     for (i = 0; i < addrinarray; i++)
>> +             map_page_into_agp(pages[i]);
>> +#endif
>> +     return 0;
>> +}
>> +#endif
>> +
>> +
>> +static void ttm_page_pool_init_locked(struct page_pool *pool)
>> +{
>> +     spin_lock_init(&pool->lock);
>> +     INIT_LIST_HEAD(&pool->list);
>> +}
>> +
>> +static struct page_pool *ttm_get_pool(int flags,
>> +             enum ttm_caching_state cstate)
>> +{
>> +     int pool_index;
>> +
>> +     if (cstate == tt_cached)
>> +             return NULL;
>> +
>> +     if (cstate == tt_wc)
>> +             pool_index = 0x0;
>> +     else
>> +             pool_index = 0x1;
>> +
>> +     if (flags & TTM_PAGE_FLAG_DMA32)
>> +             pool_index |= 0x2;
>> +
>> +     return &_manager.pools[pool_index];
>> +}
>> +
>> +static void ttm_pages_put(struct page *pages[], unsigned npages)
>> +{
>> +     unsigned i;
>> +     set_pages_array_wb(pages, npages);
>> +     for (i = 0; i < npages; ++i)
>> +             __free_page(pages[i]);
>> +}
>> +
>> +/**
>> + * Free pages from pool.
>> + * If limit_with_time is set to true pages that wil lbe have to have been
>> + * in pool for time specified by pool_manager.time_to_wait.
>> + *
>> + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
>> + * blocking others operations on pool.
>> + *
>> + * This is called from the timer handler.
>> + **/
>> +static void ttm_page_pool_free(struct page_pool *pool,
>> +             const bool limit_with_time)
>> +{
>> +     struct page *page;
>> +     struct page *pages_to_free[NUM_PAGES_TO_ADD];
>> +     unsigned freed_pages;
>> +     unsigned long now = jiffies + _manager.time_to_wait;
>> +     unsigned long irq_flags;
>> +
>> +restart:
>> +     freed_pages = 0;
>> +     spin_lock_irqsave(&pool->lock, irq_flags);
>> +
>> +     {
>> +             list_for_each_entry_reverse(page, &pool->list, lru) {
>> +
>> +                     if (limit_with_time
>> +                             && time_after(now, page->private))
>> +                             break;
>> +
>> +                     pages_to_free[freed_pages++] = page;
>> +                     if (freed_pages >= NUM_PAGES_TO_ADD) {
>> +                             /* Limit the maximum time that we hold the lock
>> +                              * not to block more important threads */
>> +                             __list_del(page->lru.prev, &pool->list);
>> +                             pool->npages -= freed_pages;
>> +                             spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +
>> +                             ttm_pages_put(pages_to_free, freed_pages);
>> +                             goto restart;
>> +                     }
>> +             }
>> +
>> +             pool->npages -= freed_pages;
>> +             if (freed_pages)
>> +                     __list_del(&page->lru, &pool->list);
>> +
>> +             spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +
>> +             if (freed_pages)
>> +                     ttm_pages_put(pages_to_free, freed_pages);
>> +     }
>> +}
>> +
>> +/* periodicaly free unused pages from pool */
>> +static void ttm_page_pool_timer_handler(unsigned long man_addr)
>> +{
>> +     int i;
>> +     struct pool_manager *m = (struct pool_manager *)man_addr;
>> +
>> +     for (i = 0; i < MAX_NUM_POOLS; ++i)
>> +             ttm_page_pool_free(&_manager.pools[i], true);
>> +
>> +     mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
>> +}
>> +
>> +static void ttm_page_pool_timer_create(struct pool_manager *m)
>> +{
>> +     /* Create a timer that is fired every second to clean
>> +      * the free list */
>> +     init_timer(&m->timer);
>> +     m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
>> +     m->timer.function = ttm_page_pool_timer_handler;
>> +     m->timer.data = (unsigned long)m;
>> +}
>> +
>> +static void ttm_page_pool_timer_free(struct pool_manager *m)
>> +{
>> +     del_timer_sync(&m->timer);
>> +}
>> +
>> +static void ttm_set_pages_caching(struct page **pages,
>> +             enum ttm_caching_state cstate, unsigned cpages)
>> +{
>> +     /* Set page caching */
>> +     switch (cstate) {
>> +     case tt_uncached:
>> +             set_pages_array_uc(pages, cpages);
>> +             break;
>> +     case tt_wc:
>> +             set_pages_array_wc(pages, cpages);
>> +             break;
>> +     default:
>> +             break;
>> +     }
>> +}
>> +/**
>> + * Fill new pages to array.
>> + */
>> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
>> +             enum ttm_caching_state cstate, unsigned count,
>> +             unsigned long *irq_flags)
>> +{
>> +     unsigned pages_to_alloc = count - pool->npages;
>> +     struct page **pages;
>> +     struct page *page;
>> +     struct list_head h;
>> +     unsigned i, cpages;
>> +     unsigned max_cpages = min(pages_to_alloc,
>> +                     (unsigned)(PAGE_SIZE/sizeof(*page)));
>> +
>> +     /* reserve allready allocated pages from pool */
>> +     INIT_LIST_HEAD(&h);
>> +     list_splice_init(&pool->list, &h);
>> +     i = pool->npages;
>> +     pages_to_alloc = count;
>> +     pool->npages = 0;
>> +
>> +     /* We are now safe to allow others users to modify the pool so unlock
>> +      * it. Allocating pages is expensive operation. */
>> +     spin_unlock_irqrestore(&pool->lock, *irq_flags);
>> +
>> +     pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
>> +
>> +     if (!pages) {
>> +             printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
>> +             spin_lock_irqsave(&pool->lock, *irq_flags);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     /* i is set to number of pages already in pool */
>> +     for (cpages = 0; i < pages_to_alloc; ++i) {
>> +             page = alloc_page(pool->gfp_flags);
>> +             if (!page) {
>> +                     printk(KERN_ERR "[ttm] unable to get page %u\n", i);
>> +
>> +                     ttm_put_pages(&h, flags, cstate);
>> +                     spin_lock_irqsave(&pool->lock, *irq_flags);
>> +                     return -ENOMEM;
>> +             }
>> +
>> +#ifdef CONFIG_HIGHMEM
>> +             /* gfp flags of highmem page should never be dma32 so we
>> +              * we should be fine in such case
>> +              */
>> +             if (!PageHighMem(page))
>> +#endif
>> +             {
>> +                     pages[cpages++] = page;
>> +                     if (cpages == max_cpages) {
>> +
>> +                             ttm_set_pages_caching(pages, cstate, cpages);
>> +                             cpages = 0;
>> +                     }
>> +             }
>> +             list_add(&page->lru, &h);
>> +     }
>> +
>> +     if (cpages)
>> +             ttm_set_pages_caching(pages, cstate, cpages);
>> +
>> +     spin_lock_irqsave(&pool->lock, *irq_flags);
>> +
>> +     list_splice(&h, &pool->list);
>> +
>> +     pool->npages += pages_to_alloc;
>> +
>> +     return 0;
>> +}
>> +
>> +/*
>> + * On success pages list will hold count number of correctly
>> + * cached pages.
>> + */
>> +int ttm_get_pages(struct list_head *pages, int flags,
>> +             enum ttm_caching_state cstate, unsigned count)
>> +{
>> +     struct page_pool *pool = ttm_get_pool(flags, cstate);
>> +     struct page *page = NULL;
>> +     unsigned long irq_flags;
>> +     int gfp_flags = 0;
>> +     int retries = 1;
>> +     int r;
>> +
>> +     /* No pool for cached pages */
>> +     if (cstate == tt_cached) {
>> +             if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> +                     gfp_flags |= __GFP_ZERO;
>> +
>> +             if (flags & TTM_PAGE_FLAG_DMA32)
>> +                     gfp_flags |= GFP_DMA32;
>> +             else
>> +                     gfp_flags |= __GFP_HIGHMEM;
>> +
>> +             /* fallback in case of problems with pool allocation */
>> +alloc_pages:
>> +             for (r = 0; r < count; ++r) {
>> +                     page = alloc_page(gfp_flags);
>> +                     if (!page) {
>> +
>> +                             printk(KERN_ERR "[ttm] unable to allocate page.");
>> +                             return -ENOMEM;
>> +                     }
>> +
>> +                     list_add(&page->lru, pages);
>> +             }
>> +             return 0;
>> +     }
>> +
>> +     gfp_flags = pool->gfp_flags;
>> +
>> +     spin_lock_irqsave(&pool->lock, irq_flags);
>> +     do {
>> +             /* First we look if pool has the page we want */
>> +             if (pool->npages >= count) {
>> +
>> +
>> +                     /* Find the cut location */
>> +                     if (count <= pool->npages/2) {
>> +                             unsigned c = 0;
>> +                             list_for_each_entry(page, &pool->list, lru) {
>> +                                     if (++c >= count)
>> +                                             break;
>> +                             }
>> +                     } else {
>> +                             /* We have to point to the last pages cut */
>> +                             unsigned c = 0;
>> +                             unsigned rcount = pool->npages - count + 1;
>> +
>> +                             list_for_each_entry_reverse(page, &pool->list, lru) {
>> +                                     if (++c >= rcount)
>> +                                             break;
>> +                             }
>> +                     }
>> +
>> +                     list_cut_position(pages, &pool->list, &page->lru);
>> +
>> +                     pool->npages -= count;
>> +                     spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +
>> +                     if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
>> +                             list_for_each_entry(page, pages, lru) {
>> +                                     clear_page(page_address(page));
>> +                             }
>> +                     }
>> +
>> +                     return 0;
>> +             }
>> +
>> +             if (retries == 0)
>> +                     break;
>> +
>> +             --retries;
>> +
>> +             r = ttm_pages_pool_fill_locked(pool, flags, cstate,
>> +                             count, &irq_flags);
>> +             if (r) {
>> +                     spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +                     return r;
>> +             }
>> +
>> +     } while (1);
>> +
>> +     spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +
>> +     /*  Pool allocator failed without errors. we try to allocate new pages
>> +      *  instead. */
>> +     if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> +             gfp_flags |= __GFP_ZERO;
>> +
>> +     goto alloc_pages;
>> +}
>> +
>> +/* Put all pages in pages list to correct pool to wait for reuse */
>> +void ttm_put_pages(struct list_head *pages, int flags,
>> +             enum ttm_caching_state cstate)
>> +{
>> +     struct page_pool *pool = ttm_get_pool(flags, cstate);
>> +     struct page *p, *tmp;
>> +     unsigned page_count = 0;
>> +     unsigned long irq_flags;
>> +     unsigned long now = jiffies;
>> +
>> +     if (pool == NULL) {
>> +             list_for_each_entry_safe(p, tmp, pages, lru) {
>> +                     __free_page(p);
>> +             }
>> +             /* We took the ownership -> clear the pages list*/
>> +             INIT_LIST_HEAD(pages);
>> +             return;
>> +     }
>> +
>> +     list_for_each_entry_safe(p, tmp, pages, lru) {
>> +
>> +#ifdef CONFIG_HIGHMEM
>> +             /* we don't have pool for highmem -> free them */
>> +             if (PageHighMem(p)) {
>> +                     list_del(&p->lru);
>> +                     __free_page(p);
>> +             } else
>> +#endif
>> +             {
>> +                     p->private = now;
>> +                     ++page_count;
>> +             }
>> +
>> +     }
>> +
>> +     spin_lock_irqsave(&pool->lock, irq_flags);
>> +     list_splice(pages, &pool->list);
>> +     pool->npages += page_count;
>> +     spin_unlock_irqrestore(&pool->lock, irq_flags);
>> +
>> +     /* We took the ownership -> clear the pages list*/
>> +     INIT_LIST_HEAD(pages);
>> +}
>> +
>> +
>> +int ttm_page_alloc_init(void)
>> +{
>> +     if (_manager.page_alloc_inited)
>> +             return 0;
>> +
>> +     ttm_page_pool_init_locked(&_manager.wc_pool);
>> +     _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
>> +     ttm_page_pool_init_locked(&_manager.uc_pool);
>> +     _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
>> +     ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
>> +     _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
>> +     ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
>> +     _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
>> +
>> +     _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
>> +     ttm_page_pool_timer_create(&_manager);
>> +     _manager.page_alloc_inited = 1;
>> +     return 0;
>> +}
>> +
>> +void ttm_page_alloc_fini(void)
>> +{
>> +     int i;
>> +
>> +     if (!_manager.page_alloc_inited)
>> +             return;
>> +
>> +     ttm_page_pool_timer_free(&_manager);
>> +
>> +     for (i = 0; i < MAX_NUM_POOLS; ++i)
>> +             ttm_page_pool_free(&_manager.pools[i], false);
>> +
>> +     _manager.page_alloc_inited = 0;
>> +}
>> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
>> new file mode 100644
>> index 0000000..eb0ddf4
>> --- /dev/null
>> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
>> @@ -0,0 +1,38 @@
>> +/*
>> + * Copyright (c) Red Hat Inc.
>> +
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> + * DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors: Dave Airlie <airlied@redhat.com>
>> + *          Jerome Glisse <jglisse@redhat.com>
>> + */
>> +#ifndef TTM_PAGE_ALLOC
>> +#define TTM_PAGE_ALLOC
>> +
>> +#include "ttm/ttm_bo_driver.h"
>> +
>> +void ttm_put_pages(struct list_head *pages, int flags,
>> +             enum ttm_caching_state cstate);
>> +int ttm_get_pages(struct list_head *pages, int flags,
>> +             enum ttm_caching_state cstate, unsigned count);
>> +int ttm_page_alloc_init(void);
>> +void ttm_page_alloc_fini(void);
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> index 3d47a2c..d4e20f6 100644
>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> @@ -38,6 +38,7 @@
>>  #include "ttm/ttm_module.h"
>>  #include "ttm/ttm_bo_driver.h"
>>  #include "ttm/ttm_placement.h"
>> +#include "ttm_page_alloc.h"
>>
>>  static int ttm_tt_swapin(struct ttm_tt *ttm);
>>
>> @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
>>       ttm->pages = NULL;
>>  }
>>
>> -static struct page *ttm_tt_alloc_page(unsigned page_flags)
>> -{
>> -     gfp_t gfp_flags = GFP_USER;
>> -
>> -     if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>> -             gfp_flags |= __GFP_ZERO;
>> -
>> -     if (page_flags & TTM_PAGE_FLAG_DMA32)
>> -             gfp_flags |= __GFP_DMA32;
>> -     else
>> -             gfp_flags |= __GFP_HIGHMEM;
>> -
>> -     return alloc_page(gfp_flags);
>> -}
>> -
>>  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>>  {
>>       int write;
>> @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
>>  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
>>  {
>>       struct page *p;
>> +     struct list_head h;
>>       struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
>>       int ret;
>>
>>       while (NULL == (p = ttm->pages[index])) {
>> -             p = ttm_tt_alloc_page(ttm->page_flags);
>>
>> -             if (!p)
>> +             INIT_LIST_HEAD(&h);
>> +
>> +             ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
>> +
>> +             if (ret != 0)
>>                       return NULL;
>>
>> +             p = list_first_entry(&h, struct page, lru);
>> +
>>               ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
>>               if (unlikely(ret != 0))
>>                       goto out_err;
>> @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
>>       if (ttm->caching_state == c_state)
>>               return 0;
>>
>> -     if (c_state != tt_cached) {
>> -             ret = ttm_tt_populate(ttm);
>> -             if (unlikely(ret != 0))
>> -                     return ret;
>> +     if (ttm->state == tt_unpopulated) {
>> +             /* Change caching but don't populate */
>> +             ttm->caching_state = c_state;
>> +             return 0;
>>       }
>>
>>       if (ttm->caching_state == tt_cached)
>> @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>>  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
>>  {
>>       int i;
>> +     unsigned count = 0;
>> +     struct list_head h;
>>       struct page *cur_page;
>>       struct ttm_backend *be = ttm->be;
>>
>> +     INIT_LIST_HEAD(&h);
>> +
>>       if (be)
>>               be->func->clear(be);
>> -     (void)ttm_tt_set_caching(ttm, tt_cached);
>>       for (i = 0; i < ttm->num_pages; ++i) {
>> +
>>               cur_page = ttm->pages[i];
>>               ttm->pages[i] = NULL;
>>               if (cur_page) {
>>                       if (page_count(cur_page) != 1)
>>                               printk(KERN_ERR TTM_PFX
>>                                      "Erroneous page count. "
>> -                                    "Leaking pages.\n");
>> +                                    "Leaking pages (%d/%d).\n",
>> +                                    page_count(cur_page), count);
>>                       ttm_mem_global_free_page(ttm->glob->mem_glob,
>>                                                cur_page);
>> -                     __free_page(cur_page);
>> +                     list_add(&cur_page->lru, &h);
>> +                     count++;
>>               }
>>       }
>> +     ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
>>       ttm->state = tt_unpopulated;
>>       ttm->first_himem_page = ttm->num_pages;
>>       ttm->last_lomem_page = -1;
>> --
>> 1.6.3.3
>>
>
>> ------------------------------------------------------------------------------
>> Download Intel&#174; Parallel Studio Eval
>> Try the new software tools for yourself. Speed compiling, find bugs
>> proactively, and fine-tune applications for parallel performance.
>> See why Intel Parallel Studio got high marks during beta.
>> http://p.sf.net/sfu/intel-sw-dev
>
>> --
>> _______________________________________________
>> Dri-devel mailing list
>> Dri-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/dri-devel
>
>

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 14:23     ` Pauli Nieminen
@ 2010-03-03 14:50       ` Jerome Glisse
  2010-03-03 15:46         ` Pauli Nieminen
  0 siblings, 1 reply; 13+ messages in thread
From: Jerome Glisse @ 2010-03-03 14:50 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: Jerome Glisse, dri-devel

On Wed, Mar 03, 2010 at 04:23:08PM +0200, Pauli Nieminen wrote:
> On Wed, Mar 3, 2010 at 3:33 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
> > On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
> >> On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
> >> > On AGP system we might allocate/free routinely uncached or wc memory,
> >> > changing page from cached (wb) to uc or wc is very expensive and involves
> >> > a lot of flushing. To improve performance this allocator use a pool
> >> > of uc,wc pages.
> >> >
> >> > Pools are linked lists of pages. Ordered so that first is the latest adition
> >> > to the pool and last is the oldest page. Old pages are periodicaly freed from
> >> > pools to keep memory use at minimum.
> >> >
> >> > Pools are protected with spinlocks to allow multiple threads to allocate pages
> >> > simultanously. Expensive operations must not hold the spinlock to maximise
> >> > performance for multiple threads.
> >> >
> >> > Based on Jerome Glisse's and Dave Airlie's pool allocator.
> >> >
> >> > Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> >> > Signed-off-by: Dave Airlie <airlied@redhat.com>
> >> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> >
> > I think it's overdesigned, by trying to be to clever we often endup
> > with code more complex and less efficient in the end. While the idea
> > to free page only after some amount of time is the way to go, i think
> > a simpler approach will perform a better job. For instance:
> >
> > pool {
> >  npages // npages in pool
> >  page *pages[enoughttohold16Morsomethingconfigurable]
> >  unsigned long next_free_time;
> >  lock
> > }
> > filllocked(pool,npages)
> > {
> >        if (npages > MAXPAGESPERPOOL)
> >                npages = MAXPAGESPERPOOL;
> >        for i, npages : allocpage
> > }
> > alloc(apages, npages)
> > {
> >        do {
> >        lock(pool)
> >        pool->next_free_time = jiffies + timeout;
> >        if (npages > pool->npages) {
> >                npages -= pool->npages;
> >                copypageptr(apages, pool->pages, pool->npages)
> >                pool->npages = 0;
> >                fillpool(pool, npages)
> >        } else {
> >                pool->npages -= npages;
> >                copypageptr(apages, pool->pages, npages)
> >                npages = 0;
> >        }
> >        unlock(pool)
> >        } while (npages)
> > }
> > poolshrinkcallbacktimer()
> > {
> >        for i in npools {
> >                if (trylock(pool[i].lock) {
> >                        if (timeafter(jiffies, pool[i].nextfreetime)) {
> >                                free bunch of pages for instance 1M at a time
> >                        }
> >                        unlock(pool[i].lock)
> >                }
> >        }
> > }
> 
> I would need to use split buffer for that big arrays. My aim was to
> have FILO queue pages so knowing which pages can be freed now is
> simple because the oldest pages is always in the head of the list. I
> know that FILO is easy to implement on top of deque but requirement to
> split underlying array sounded like more complex than using linked
> list. But as I noted in IRC. Requesting for N pages from the linked
> list is doomed to have O(N) performance which is bad. So better
> implementation would be switching to an array.

The design i outlined above drop the concept of recording time per
page so no need for a FILO, you just fill the pool and you empty
it 1M or less at a time every Nmsec this is a lot simpler no complex
spliting or list walking. One simple optimization is to make the
amount of page we free dependent of how much the pool is fill so
if there is 16M of memory in the pool free 1M at a time, but if there
64K then free one page at a time.

> >
> > General remark i don't think you handle jiffies wrap around properly.
> 
> Only place where jiffies are read is ttm_page_pool_free. There
> difference is calculate with  timeafter call so I think it should
> handle the wrapping.
> 

No doesn't work, for instance (assuming jiffies to be 16 bits)
page get it's private set to 0xFFF0, now ttm_page_pool_free jiffies
wrap around and are at 0x0 the test will evaluate to false and page
will be free prematuraly, i think better would have been to do:
page->private = jiffies + timeout in ttm_put_pages and do
time_after(jiffie, page->private) in ttm_page_pool_free that would
have avoided the wrap around.

Cheers,
Jerome

> > Also the whole lock dropping in middle of list traversal looks very
> > fragile to me, right now we don't have much concurency in radeon so
> > i don't think you well tested the effect of concurently using the
> > allocator. Anyway see bellow for more comments on the code.
> >
> > Btw thanks for looking into this, this isn't a sexy area but it
> > definitly needs love.
> >
> > Cheers,
> > Jerome
> >
> >
> >> > ---
> >> >  drivers/gpu/drm/ttm/Makefile         |    2 +-
> >> >  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
> >> >  drivers/gpu/drm/ttm/ttm_page_alloc.c |  514 ++++++++++++++++++++++++++++++++++
> >> >  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
> >> >  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
> >> >  5 files changed, 580 insertions(+), 26 deletions(-)
> >> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
> >> >  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
> >> >
> >> > diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> >> > index 1e138f5..4256e20 100644
> >> > --- a/drivers/gpu/drm/ttm/Makefile
> >> > +++ b/drivers/gpu/drm/ttm/Makefile
> >> > @@ -4,6 +4,6 @@
> >> >  ccflags-y := -Iinclude/drm
> >> >  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
> >> >        ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
> >> > -       ttm_object.o ttm_lock.o ttm_execbuf_util.o
> >> > +       ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
> >> >
> >> >  obj-$(CONFIG_DRM_TTM) += ttm.o
> >> > diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> >> > index f5245c0..d587fbe 100644
> >> > --- a/drivers/gpu/drm/ttm/ttm_memory.c
> >> > +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> >> > @@ -32,6 +32,7 @@
> >> >  #include <linux/wait.h>
> >> >  #include <linux/mm.h>
> >> >  #include <linux/module.h>
> >> > +#include "ttm_page_alloc.h"
> >> >
> >> >  #define TTM_MEMORY_ALLOC_RETRIES 4
> >> >
> >> > @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
> >> >                       "Zone %7s: Available graphics memory: %llu kiB.\n",
> >> >                       zone->name, (unsigned long long) zone->max_mem >> 10);
> >> >        }
> >> > +       ttm_page_alloc_init();
> >> >        return 0;
> >> >  out_no_zone:
> >> >        ttm_mem_global_release(glob);
> >> > @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
> >> >                zone = glob->zones[i];
> >> >                kobject_del(&zone->kobj);
> >> >                kobject_put(&zone->kobj);
> >> > -       }
> >> > +                       }
> >> >        kobject_del(&glob->kobj);
> >> >        kobject_put(&glob->kobj);
> >> > +       ttm_page_alloc_fini();
> >> >  }
> >> >  EXPORT_SYMBOL(ttm_mem_global_release);
> >> >
> >> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> >> > new file mode 100644
> >> > index 0000000..de6576c
> >> > --- /dev/null
> >> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> >> > @@ -0,0 +1,514 @@
> >> > +/*
> >> > + * Copyright (c) Red Hat Inc.
> >> > +
> >> > + * Permission is hereby granted, free of charge, to any person obtaining a
> >> > + * copy of this software and associated documentation files (the "Software"),
> >> > + * to deal in the Software without restriction, including without limitation
> >> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> >> > + * and/or sell copies of the Software, and to permit persons to whom the
> >> > + * Software is furnished to do so, subject to the following conditions:
> >> > + *
> >> > + * The above copyright notice and this permission notice (including the
> >> > + * next paragraph) shall be included in all copies or substantial portions
> >> > + * of the Software.
> >> > + *
> >> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> >> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> >> > + * DEALINGS IN THE SOFTWARE.
> >> > + *
> >> > + * Authors: Dave Airlie <airlied@redhat.com>
> >> > + *          Jerome Glisse <jglisse@redhat.com>
> >> > + *          Pauli Nienmien <suokkos@gmail.com>
> >> > + */
> >> > +
> >> > +/* simple list based uncached page pool
> >> > + * - Pool collects resently freed pages for reuse
> >> > + * - Use page->lru to keep a free list
> >> > + * - doesn't track currently in use pages
> >> > + */
> >> > +#include <linux/list.h>
> >> > +#include <linux/spinlock.h>
> >> > +#include <linux/highmem.h>
> >> > +#include <linux/mm_types.h>
> >> > +#include <linux/jiffies.h>
> >> > +#include <linux/timer.h>
> >> > +
> >> > +#include <asm/agp.h>
> >> > +#include "ttm/ttm_bo_driver.h"
> >> > +#include "ttm_page_alloc.h"
> >> > +
> >> > +/* Number of 4k pages to add at once */
> >> > +#define NUM_PAGES_TO_ADD               64
> >> > +/* times are in msecs */
> >> > +#define TIME_TO_KEEP_PAGE_IN_POOL      15000
> >> > +#define PAGE_FREE_INTERVAL             1000
> >> > +
> >> > +/**
> >> > + * list is a FILO stack.
> >> > + * All pages pushed into it must go to begin while end of list must hold the
> >> > + * least recently used pages. When pages in end of the list are not recently
> >> > + * use they are freed.
> >> > + *
> >> > + * All expensive operations must be done outside of pool lock to prevent
> >> > + * contention on lock.
> >> > + */
> >> > +struct page_pool {
> >> > +       struct list_head        list;
> >> > +       int                     gfp_flags;
> >> > +       unsigned                npages;
> >> > +       spinlock_t              lock;
> >> > +};
> >> > +
> >> > +# define MAX_NUM_POOLS 4
> >> > +
> >> > +struct pool_manager {
> >> > +       bool                    page_alloc_inited;
> >> > +       unsigned long           time_to_wait;
> >> > +
> >> > +       struct timer_list       timer;
> >> > +
> >> > +       union {
> >> > +               struct page_pool        pools[MAX_NUM_POOLS];
> >> > +               struct {
> >> > +                       struct page_pool        wc_pool;
> >> > +                       struct page_pool        uc_pool;
> >> > +                       struct page_pool        wc_pool_dma32;
> >> > +                       struct page_pool        uc_pool_dma32;
> >> > +               } ;
> >> > +       };
> >> > +};
> >> > +
> >> > +
> >> > +static struct pool_manager _manager;
> >> > +
> >> > +#ifdef CONFIG_X86
> >> > +/* TODO: add this to x86 like _uc, this version here is inefficient */
> >> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
> >> > +{
> >> > +       int i;
> >> > +
> >> > +       for (i = 0; i < addrinarray; i++)
> >> > +               set_memory_wc((unsigned long)page_address(pages[i]), 1);
> >> > +       return 0;
> >> > +}
> >> > +#else
> >> > +static int set_pages_array_wb(struct page **pages, int addrinarray)
> >> > +{
> >> > +#ifdef TTM_HAS_AGP
> >> > +       int i;
> >> > +
> >> > +       for (i = 0; i < addrinarray; i++)
> >> > +               unmap_page_from_agp(pages[i]);
> >> > +#endif
> >> > +       return 0;
> >> > +}
> >> > +
> >> > +static int set_pages_array_wc(struct page **pages, int addrinarray)
> >> > +{
> >> > +#ifdef TTM_HAS_AGP
> >> > +       int i;
> >> > +
> >> > +       for (i = 0; i < addrinarray; i++)
> >> > +               map_page_into_agp(pages[i]);
> >> > +#endif
> >> > +       return 0;
> >> > +}
> >> > +
> >> > +static int set_pages_array_uc(struct page **pages, int addrinarray)
> >> > +{
> >> > +#ifdef TTM_HAS_AGP
> >> > +       int i;
> >> > +
> >> > +       for (i = 0; i < addrinarray; i++)
> >> > +               map_page_into_agp(pages[i]);
> >> > +#endif
> >> > +       return 0;
> >> > +}
> >> > +#endif
> >> > +
> >> > +
> >> > +static void ttm_page_pool_init_locked(struct page_pool *pool)
> >> > +{
> >> > +       spin_lock_init(&pool->lock);
> >> > +       INIT_LIST_HEAD(&pool->list);
> >> > +}
> >> > +
> >> > +static struct page_pool *ttm_get_pool(int flags,
> >> > +               enum ttm_caching_state cstate)
> >> > +{
> >> > +       int pool_index;
> >> > +
> >> > +       if (cstate == tt_cached)
> >> > +               return NULL;
> >> > +
> >> > +       if (cstate == tt_wc)
> >> > +               pool_index = 0x0;
> >> > +       else
> >> > +               pool_index = 0x1;
> >> > +
> >> > +       if (flags & TTM_PAGE_FLAG_DMA32)
> >> > +               pool_index |= 0x2;
> >> > +
> >> > +       return &_manager.pools[pool_index];
> >> > +}
> >> > +
> >> > +static void ttm_pages_put(struct page *pages[], unsigned npages)
> >> > +{
> >> > +       unsigned i;
> >> > +       set_pages_array_wb(pages, npages);
> >> > +       for (i = 0; i < npages; ++i)
> >> > +               __free_page(pages[i]);
> >> > +}
> >> > +
> >> > +/**
> >> > + * Free pages from pool.
> >> > + * If limit_with_time is set to true pages that wil lbe have to have been
> >> > + * in pool for time specified by pool_manager.time_to_wait.
> >> > + *
> >> > + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
> >> > + * blocking others operations on pool.
> >> > + *
> >> > + * This is called from the timer handler.
> >> > + **/
> >> > +static void ttm_page_pool_free(struct page_pool *pool,
> >> > +               const bool limit_with_time)
> >> > +{
> >> > +       struct page *page;
> >> > +       struct page *pages_to_free[NUM_PAGES_TO_ADD];
> >> > +       unsigned freed_pages;
> >> > +       unsigned long now = jiffies + _manager.time_to_wait;
> >> > +       unsigned long irq_flags;
> >> > +
> >> > +restart:
> >> > +       freed_pages = 0;
> >> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> >> > +
> >> > +       {
> >> > +               list_for_each_entry_reverse(page, &pool->list, lru) {
> >> > +
> >> > +                       if (limit_with_time
> >> > +                               && time_after(now, page->private))
> >> > +                               break;
> >> > +
> >> > +                       pages_to_free[freed_pages++] = page;
> >> > +                       if (freed_pages >= NUM_PAGES_TO_ADD) {
> >> > +                               /* Limit the maximum time that we hold the lock
> >> > +                                * not to block more important threads */
> >> > +                               __list_del(page->lru.prev, &pool->list);
> >> > +                               pool->npages -= freed_pages;
> >> > +                               spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +
> >> > +                               ttm_pages_put(pages_to_free, freed_pages);
> >> > +                               goto restart;
> >> > +                       }
> >> > +               }
> >> > +
> >> > +               pool->npages -= freed_pages;
> >> > +               if (freed_pages)
> >> > +                       __list_del(&page->lru, &pool->list);
> >> > +
> >> > +               spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +
> >> > +               if (freed_pages)
> >> > +                       ttm_pages_put(pages_to_free, freed_pages);
> >> > +       }
> >> > +}
> >> > +
> >
> > One useless brace level
> >
> >> > +/* periodicaly free unused pages from pool */
> >> > +static void ttm_page_pool_timer_handler(unsigned long man_addr)
> >> > +{
> >> > +       int i;
> >> > +       struct pool_manager *m = (struct pool_manager *)man_addr;
> >> > +
> >> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> >> > +               ttm_page_pool_free(&_manager.pools[i], true);
> >> > +
> >> > +       mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
> >> > +}
> >> > +
> >> > +static void ttm_page_pool_timer_create(struct pool_manager *m)
> >> > +{
> >> > +       /* Create a timer that is fired every second to clean
> >> > +        * the free list */
> >> > +       init_timer(&m->timer);
> >> > +       m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
> >> > +       m->timer.function = ttm_page_pool_timer_handler;
> >> > +       m->timer.data = (unsigned long)m;
> >> > +}
> >> > +
> >> > +static void ttm_page_pool_timer_free(struct pool_manager *m)
> >> > +{
> >> > +       del_timer_sync(&m->timer);
> >> > +}
> >> > +
> >> > +static void ttm_set_pages_caching(struct page **pages,
> >> > +               enum ttm_caching_state cstate, unsigned cpages)
> >> > +{
> >> > +       /* Set page caching */
> >> > +       switch (cstate) {
> >> > +       case tt_uncached:
> >> > +               set_pages_array_uc(pages, cpages);
> >> > +               break;
> >> > +       case tt_wc:
> >> > +               set_pages_array_wc(pages, cpages);
> >> > +               break;
> >> > +       default:
> >> > +               break;
> >> > +       }
> >> > +}
> >> > +/**
> >> > + * Fill new pages to array.
> >> > + */
> >> > +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> >> > +               enum ttm_caching_state cstate, unsigned count,
> >> > +               unsigned long *irq_flags)
> >> > +{
> >> > +       unsigned pages_to_alloc = count - pool->npages;
> >> > +       struct page **pages;
> >> > +       struct page *page;
> >> > +       struct list_head h;
> >> > +       unsigned i, cpages;
> >> > +       unsigned max_cpages = min(pages_to_alloc,
> >> > +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
> >> > +
> >> > +       /* reserve allready allocated pages from pool */
> >> > +       INIT_LIST_HEAD(&h);
> >> > +       list_splice_init(&pool->list, &h);
> >> > +
> >> > +       /* We are now safe to allow others users to modify the pool so unlock
> >> > +        * it. Allocating pages is expensive operation. */
> >> > +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
> >> > +
> >> > +       pages = kmalloc(pages_to_alloc*sizeof(*page), GFP_KERNEL);
> >> > +
> >
> > page_to_alloc might be negative if pool->npages > count, i think it can
> > happen because there is lock dropping in the page alloc function below.
> > also pages_to_alloc*sizeof(*page) might be to big for kmalloc and we
> > might need to use vmalloc (this is why i think a design with a static
> > allocated on table is better)
> 
> page_to_alloc can't be negative because lock is held untl later into
> pool allocator.
> 
> Locks are only dropped:
> - When allocation succeed
> - When returning failure.
> - After entering  ttm_pages_pool_fill_locked. Before droping lock
> there code reserves pages from pool to private list and sets pool to
> empty state.
> 
>  ttm_pages_pool_fill_locked had 2 errors in the first version. One of
> them was wrong count paramter to kmalloc. 2nd version fixes that we
> never alloc more than PAGE_SIZES memory. Second error was that
> pool->npages wasn't set to zero when taking the pages from pool.
> 
> Basicaly fill_pool operates only in private variables when it is
> outside of pool locks which makes it reentrant for unlocked part.
> 
> >
> >> > +       if (!pages) {
> >> > +               printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
> >> > +               spin_lock_irqsave(&pool->lock, *irq_flags);
> >> > +               return -ENOMEM;
> >> > +       }
> >> > +
> >> > +       for (i = 0, cpages = 0; i < pages_to_alloc; ++i) {
> >> > +               page = alloc_page(pool->gfp_flags);
> >> > +               if (!page) {
> >> > +                       printk(KERN_ERR "[ttm] unable to get page %d\n", i);
> >> > +
> >> > +                       ttm_put_pages(&h, flags, cstate);
> >> > +                       spin_lock_irqsave(&pool->lock, *irq_flags);
> >> > +                       return -ENOMEM;
> >> > +               }
> >> > +
> >> > +#ifdef CONFIG_HIGHMEM
> >> > +               /* gfp flags of highmem page should never be dma32 so we
> >> > +                * we should be fine in such case
> >> > +                */
> >> > +               if (!PageHighMem(page))
> >> > +#endif
> >> > +               {
> >> > +                       pages[cpages++] = page;
> >> > +                       if (cpages == max_cpages) {
> >> > +
> >> > +                               ttm_set_pages_caching(pages, cstate, cpages);
> >> > +                               cpages = 0;
> >> > +                       }
> >> > +               }
> >> > +               list_add(&page->lru, &h);
> >> > +       }
> >> > +
> >> > +       if (cpages)
> >> > +               ttm_set_pages_caching(pages, cstate, cpages);
> >> > +
> >> > +       spin_lock_irqsave(&pool->lock, *irq_flags);
> >> > +
> >> > +       list_splice(&h, &pool->list);
> >> > +
> >> > +       pool->npages += pages_to_alloc;
> >> > +
> >> > +       return 0;
> >> > +}
> >> > +
> >> > +/*
> >> > + * On success pages list will hold count number of correctly
> >> > + * cached pages.
> >> > + */
> >> > +int ttm_get_pages(struct list_head *pages, int flags,
> >> > +               enum ttm_caching_state cstate, unsigned count)
> >> > +{
> >> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> >> > +       struct page *page = NULL;
> >> > +       unsigned long irq_flags;
> >> > +       int gfp_flags = 0;
> >> > +       int retries = 1;
> >> > +       int r;
> >> > +
> >> > +       /* No pool for cached pages */
> >> > +       if (cstate == tt_cached) {
> >> > +               if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> > +                       gfp_flags |= __GFP_ZERO;
> >> > +
> >> > +               if (flags & TTM_PAGE_FLAG_DMA32)
> >> > +                       gfp_flags |= GFP_DMA32;
> >> > +               else
> >> > +                       gfp_flags |= __GFP_HIGHMEM;
> >> > +
> >> > +               /* fallback in case of problems with pool allocation */
> >> > +alloc_pages:
> >> > +               for (r = 0; r < count; ++r) {
> >> > +                       page = alloc_page(gfp_flags);
> >> > +                       if (!page) {
> >> > +
> >> > +                               printk(KERN_ERR "[ttm] unable to allocate page.");
> >> > +                               return -ENOMEM;
> >> > +                       }
> >> > +
> >> > +                       list_add(&page->lru, pages);
> >
> > this doesn't alloc page with proper caching
> >
> >> > +               }
> >> > +               return 0;
> >> > +       }
> >> > +
> >> > +       gfp_flags = pool->gfp_flags;
> >> > +
> >> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> >> > +       do {
> >> > +               /* First we look if pool has the page we want */
> >> > +               if (pool->npages >= count) {
> >> > +
> >> > +
> >> > +                       /* Find the cut location */
> >> > +                       if (count <= pool->npages/2) {
> >> > +                               unsigned c = 0;
> >> > +                               list_for_each_entry(page, &pool->list, lru) {
> >> > +                                       if (++c >= count)
> >> > +                                               break;
> >> > +                               }
> >> > +                       } else {
> >> > +                               /* We have to point to the last pages cut */
> >> > +                               unsigned c = 0;
> >> > +                               unsigned rcount = pool->npages - count + 1;
> >> > +
> >> > +                               list_for_each_entry_reverse(page, &pool->list, lru) {
> >> > +                                       if (++c >= rcount)
> >> > +                                               break;
> >> > +                               }
> >> > +                       }
> >> > +
> >> > +                       list_cut_position(pages, &pool->list, &page->lru);
> >> > +
> >> > +                       pool->npages -= count;
> >> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +
> >> > +                       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
> >> > +                               list_for_each_entry(page, pages, lru) {
> >> > +                                       clear_page(page_address(page));
> >> > +                               }
> >> > +                       }
> >> > +
> >> > +                       return 0;
> >> > +               }
> >> > +
> >> > +               if (retries == 0)
> >> > +                       break;
> >> > +
> >> > +               --retries;
> >> > +
> >> > +               r = ttm_pages_pool_fill_locked(pool, flags, cstate,
> >> > +                               count, &irq_flags);
> >> > +               if (r) {
> >> > +                       spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +                       return r;
> >> > +               }
> >> > +
> >> > +       } while (1);
> >> > +
> >> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +
> >> > +       /*  Pool allocator failed without errors. we try to allocate new pages
> >> > +        *  instead. */
> >> > +       if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> > +               gfp_flags |= __GFP_ZERO;
> >> > +
> >> > +       goto alloc_pages;
> >> > +}
> >
> > This function is quite complex, especialy the failing filling pool path
> > that looks supicious, idea of splitting work is not to have to do the
> > work in other function so if pool filling fails the alloc should fail
> > and shouldn't try to fill the pool on its own this kind of code
> > duplication is a recipie for failure i think.
> 
> yes. Failing path should just return -ENOMEM. I will fix that.
> 
> >
> >> > +
> >> > +/* Put all pages in pages list to correct pool to wait for reuse */
> >> > +void ttm_put_pages(struct list_head *pages, int flags,
> >> > +               enum ttm_caching_state cstate)
> >> > +{
> >> > +       struct page_pool *pool = ttm_get_pool(flags, cstate);
> >> > +       struct page *p, *tmp;
> >> > +       unsigned page_count = 0;
> >> > +       unsigned long irq_flags;
> >> > +       unsigned long now = jiffies;
> >> > +
> >> > +       if (pool == NULL) {
> >> > +               list_for_each_entry_safe(p, tmp, pages, lru) {
> >> > +                       __free_page(p);
> >> > +               }
> >> > +               /* We took the ownership -> clear the pages list*/
> >> > +               INIT_LIST_HEAD(pages);
> >> > +               return;
> >> > +       }
> >> > +
> >> > +       list_for_each_entry_safe(p, tmp, pages, lru) {
> >> > +
> >> > +#ifdef CONFIG_HIGHMEM
> >> > +               /* we don't have pool for highmem -> free them */
> >> > +               if (PageHighMem(p)) {
> >> > +                       list_del(&p->lru);
> >> > +                       __free_page(p);
> >> > +               } else
> >> > +#endif
> >> > +               {
> >> > +                       p->private = now;
> >> > +                       ++page_count;
> >> > +               }
> >> > +
> >> > +       }
> >> > +
> >> > +       spin_lock_irqsave(&pool->lock, irq_flags);
> >> > +       list_splice(pages, &pool->list);
> >> > +       pool->npages += page_count;
> >> > +       spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> > +
> >> > +       /* We took the ownership -> clear the pages list*/
> >> > +       INIT_LIST_HEAD(pages);
> >> > +}
> >> > +
> >> > +
> >> > +int ttm_page_alloc_init(void)
> >> > +{
> >> > +       if (_manager.page_alloc_inited)
> >> > +               return 0;
> >> > +
> >> > +       ttm_page_pool_init_locked(&_manager.wc_pool);
> >> > +       _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
> >> > +       ttm_page_pool_init_locked(&_manager.uc_pool);
> >> > +       _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
> >> > +       ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
> >> > +       _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> >> > +       ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
> >> > +       _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> >> > +
> >> > +       _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
> >> > +       ttm_page_pool_timer_create(&_manager);
> >> > +       _manager.page_alloc_inited = 1;
> >> > +       return 0;
> >> > +}
> >> > +
> >> > +void ttm_page_alloc_fini(void)
> >> > +{
> >> > +       int i;
> >> > +
> >> > +       if (!_manager.page_alloc_inited)
> >> > +               return;
> >> > +
> >> > +       ttm_page_pool_timer_free(&_manager);
> >> > +
> >> > +       for (i = 0; i < MAX_NUM_POOLS; ++i)
> >> > +               ttm_page_pool_free(&_manager.pools[i], false);
> >> > +
> >> > +       _manager.page_alloc_inited = 0;
> >> > +}
> >> > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> >> > new file mode 100644
> >> > index 0000000..eb0ddf4
> >> > --- /dev/null
> >> > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> >> > @@ -0,0 +1,38 @@
> >> > +/*
> >> > + * Copyright (c) Red Hat Inc.
> >> > +
> >> > + * Permission is hereby granted, free of charge, to any person obtaining a
> >> > + * copy of this software and associated documentation files (the "Software"),
> >> > + * to deal in the Software without restriction, including without limitation
> >> > + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> >> > + * and/or sell copies of the Software, and to permit persons to whom the
> >> > + * Software is furnished to do so, subject to the following conditions:
> >> > + *
> >> > + * The above copyright notice and this permission notice (including the
> >> > + * next paragraph) shall be included in all copies or substantial portions
> >> > + * of the Software.
> >> > + *
> >> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >> > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> >> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> >> > + * DEALINGS IN THE SOFTWARE.
> >> > + *
> >> > + * Authors: Dave Airlie <airlied@redhat.com>
> >> > + *          Jerome Glisse <jglisse@redhat.com>
> >> > + */
> >> > +#ifndef TTM_PAGE_ALLOC
> >> > +#define TTM_PAGE_ALLOC
> >> > +
> >> > +#include "ttm/ttm_bo_driver.h"
> >> > +
> >> > +void ttm_put_pages(struct list_head *pages, int flags,
> >> > +               enum ttm_caching_state cstate);
> >> > +int ttm_get_pages(struct list_head *pages, int flags,
> >> > +               enum ttm_caching_state cstate, unsigned count);
> >> > +int ttm_page_alloc_init(void);
> >> > +void ttm_page_alloc_fini(void);
> >> > +
> >> > +#endif
> >> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> >> > index 3d47a2c..d4e20f6 100644
> >> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> >> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> >> > @@ -38,6 +38,7 @@
> >> >  #include "ttm/ttm_module.h"
> >> >  #include "ttm/ttm_bo_driver.h"
> >> >  #include "ttm/ttm_placement.h"
> >> > +#include "ttm_page_alloc.h"
> >> >
> >> >  static int ttm_tt_swapin(struct ttm_tt *ttm);
> >> >
> >> > @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
> >> >        ttm->pages = NULL;
> >> >  }
> >> >
> >> > -static struct page *ttm_tt_alloc_page(unsigned page_flags)
> >> > -{
> >> > -       gfp_t gfp_flags = GFP_USER;
> >> > -
> >> > -       if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> > -               gfp_flags |= __GFP_ZERO;
> >> > -
> >> > -       if (page_flags & TTM_PAGE_FLAG_DMA32)
> >> > -               gfp_flags |= __GFP_DMA32;
> >> > -       else
> >> > -               gfp_flags |= __GFP_HIGHMEM;
> >> > -
> >> > -       return alloc_page(gfp_flags);
> >> > -}
> >> > -
> >> >  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >> >  {
> >> >        int write;
> >> > @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >> >  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
> >> >  {
> >> >        struct page *p;
> >> > +       struct list_head h;
> >> >        struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> >> >        int ret;
> >> >
> >> >        while (NULL == (p = ttm->pages[index])) {
> >> > -               p = ttm_tt_alloc_page(ttm->page_flags);
> >> >
> >> > -               if (!p)
> >> > +               INIT_LIST_HEAD(&h);
> >> > +
> >> > +               ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
> >> > +
> >> > +               if (ret != 0)
> >> >                        return NULL;
> >> >
> >> > +               p = list_first_entry(&h, struct page, lru);
> >> > +
> >> >                ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
> >> >                if (unlikely(ret != 0))
> >> >                        goto out_err;
> >> > @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
> >> >        if (ttm->caching_state == c_state)
> >> >                return 0;
> >> >
> >> > -       if (c_state != tt_cached) {
> >> > -               ret = ttm_tt_populate(ttm);
> >> > -               if (unlikely(ret != 0))
> >> > -                       return ret;
> >> > +       if (ttm->state == tt_unpopulated) {
> >> > +               /* Change caching but don't populate */
> >> > +               ttm->caching_state = c_state;
> >> > +               return 0;
> >> >        }
> >> >
> >> >        if (ttm->caching_state == tt_cached)
> >> > @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
> >> >  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
> >> >  {
> >> >        int i;
> >> > +       unsigned count = 0;
> >> > +       struct list_head h;
> >> >        struct page *cur_page;
> >> >        struct ttm_backend *be = ttm->be;
> >> >
> >> > +       INIT_LIST_HEAD(&h);
> >> > +
> >> >        if (be)
> >> >                be->func->clear(be);
> >> > -       (void)ttm_tt_set_caching(ttm, tt_cached);
> >> >        for (i = 0; i < ttm->num_pages; ++i) {
> >> > +
> >> >                cur_page = ttm->pages[i];
> >> >                ttm->pages[i] = NULL;
> >> >                if (cur_page) {
> >> >                        if (page_count(cur_page) != 1)
> >> >                                printk(KERN_ERR TTM_PFX
> >> >                                       "Erroneous page count. "
> >> > -                                      "Leaking pages.\n");
> >> > +                                      "Leaking pages (%d/%d).\n",
> >> > +                                      page_count(cur_page), count);
> >> >                        ttm_mem_global_free_page(ttm->glob->mem_glob,
> >> >                                                 cur_page);
> >> > -                       __free_page(cur_page);
> >> > +                       list_add(&cur_page->lru, &h);
> >> > +                       count++;
> >> >                }
> >> >        }
> >> > +       ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
> >> >        ttm->state = tt_unpopulated;
> >> >        ttm->first_himem_page = ttm->num_pages;
> >> >        ttm->last_lomem_page = -1;
> >> > --
> >> > 1.6.3.3
> >> >
> >> >
> >>
> >> While no comments yet I did notice huge errors in
> >> ttm_pages_pool_fill_locked. But I still would like to comments on
> >> overall design.
> >>
> >> Is it correct to use page->private in pool allocator?
> >>
> >> bo allocation is still too expensive operation for dma buffers in
> >> classic mesa. It takes quite a lot cpu time in bind memory (agp
> >> system) and ttm_mem_global functions. Would it be possible to move
> >> some parts those expensive operations into pool fill and pool free
> >> code?
> >>
> >> Also using single call to allocate all pages for buffer would improve
> >> situation when pool is not large enough. I can change ttm_tt_populate
> >> to allocate all pages with single call if it sounds ok.
> >>
> >> I will still look at how to integrate pool allocator to shinker.
> >>
> >>
> >> Attached fixed full path and here is fixed version of
> >> ttm_pages_pool_fill_locked :
> >>
> >> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> >> +               enum ttm_caching_state cstate, unsigned count,
> >> +               unsigned long *irq_flags)
> >> +{
> >> +       unsigned pages_to_alloc = count - pool->npages;
> >> +       struct page **pages;
> >> +       struct page *page;
> >> +       struct list_head h;
> >> +       unsigned i, cpages;
> >> +       unsigned max_cpages = min(pages_to_alloc,
> >> +                       (unsigned)(PAGE_SIZE/sizeof(*page)));
> >> +
> >> +       /* reserve allready allocated pages from pool */
> >> +       INIT_LIST_HEAD(&h);
> >> +       list_splice_init(&pool->list, &h);
> >> +       i = pool->npages;
> >> +       pages_to_alloc = count;
> >> +       pool->npages = 0;
> >> +
> >> +       /* We are now safe to allow others users to modify the pool so unlock
> >> +        * it. Allocating pages is expensive operation. */
> >> +       spin_unlock_irqrestore(&pool->lock, *irq_flags);
> >> +
> >> +       pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
> >> +
> >> +       if (!pages) {
> >> +               printk(KERN_ERR "[ttm] unable to allocate table for
> >> new pages.");
> >> +               spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +               return -ENOMEM;
> >> +       }
> >> +
> >> +       /* i is set to number of pages already in pool */
> >> +       for (cpages = 0; i < pages_to_alloc; ++i) {
> >> +               page = alloc_page(pool->gfp_flags);
> >> +               if (!page) {
> >> +                       printk(KERN_ERR "[ttm] unable to get page %u\n", i);
> >> +
> >> +                       ttm_put_pages(&h, flags, cstate);
> >> +                       spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +                       return -ENOMEM;
> >> +               }
> >> +
> >> +#ifdef CONFIG_HIGHMEM
> >> +               /* gfp flags of highmem page should never be dma32 so we
> >> +                * we should be fine in such case
> >> +                */
> >> +               if (!PageHighMem(page))
> >> +#endif
> >> +               {
> >> +                       pages[cpages++] = page;
> >> +                       if (cpages == max_cpages) {
> >> +
> >> +                               ttm_set_pages_caching(pages, cstate, cpages);
> >> +                               cpages = 0;
> >> +                       }
> >> +               }
> >> +               list_add(&page->lru, &h);
> >> +       }
> >> +
> >> +       if (cpages)
> >> +               ttm_set_pages_caching(pages, cstate, cpages);
> >> +
> >> +       spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +
> >> +       list_splice(&h, &pool->list);
> >> +
> >> +       pool->npages += pages_to_alloc;
> >> +
> >> +       return 0;
> >> +}
> >
> >> From 8340b84a4752830feff6b77f3c23707dbd69f757 Mon Sep 17 00:00:00 2001
> >> From: Jerome Glisse <jglisse@redhat.com>
> >> Date: Wed, 23 Sep 2009 18:48:56 +1000
> >> Subject: [PATCH] drm/ttm: add pool wc/uc page allocator
> >>
> >> On AGP system we might allocate/free routinely uncached or wc memory,
> >> changing page from cached (wb) to uc or wc is very expensive and involves
> >> a lot of flushing. To improve performance this allocator use a pool
> >> of uc,wc pages.
> >>
> >> Pools are linked lists of pages. Ordered so that first is the latest adition
> >> to the pool and last is the oldest page. Old pages are periodicaly freed from
> >> pools to keep memory use at minimum.
> >>
> >> Pools are protected with spinlocks to allow multiple threads to allocate pages
> >> simultanously. Expensive operations must not hold the spinlock to maximise
> >> performance for multiple threads.
> >>
> >> V2: Fix ttm_page_pool_fill_locked.
> >>  - max_cpages should be used for kmalloc instead of pages_to_alloc.
> >>  - Set pool->npages to zero after taking all pages from the pool.
> >>
> >> Based on Jerome Glisse's and Dave Airlie's pool allocator.
> >>
> >> Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> >> Signed-off-by: Dave Airlie <airlied@redhat.com>
> >> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> >> ---
> >>  drivers/gpu/drm/ttm/Makefile         |    2 +-
> >>  drivers/gpu/drm/ttm/ttm_memory.c     |    5 +-
> >>  drivers/gpu/drm/ttm/ttm_page_alloc.c |  518 ++++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/ttm/ttm_page_alloc.h |   38 +++
> >>  drivers/gpu/drm/ttm/ttm_tt.c         |   47 ++--
> >>  5 files changed, 584 insertions(+), 26 deletions(-)
> >>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
> >>  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.h
> >>
> >> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> >> index 1e138f5..4256e20 100644
> >> --- a/drivers/gpu/drm/ttm/Makefile
> >> +++ b/drivers/gpu/drm/ttm/Makefile
> >> @@ -4,6 +4,6 @@
> >>  ccflags-y := -Iinclude/drm
> >>  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
> >>       ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
> >> -     ttm_object.o ttm_lock.o ttm_execbuf_util.o
> >> +     ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
> >>
> >>  obj-$(CONFIG_DRM_TTM) += ttm.o
> >> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
> >> index f5245c0..d587fbe 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> >> @@ -32,6 +32,7 @@
> >>  #include <linux/wait.h>
> >>  #include <linux/mm.h>
> >>  #include <linux/module.h>
> >> +#include "ttm_page_alloc.h"
> >>
> >>  #define TTM_MEMORY_ALLOC_RETRIES 4
> >>
> >> @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
> >>                      "Zone %7s: Available graphics memory: %llu kiB.\n",
> >>                      zone->name, (unsigned long long) zone->max_mem >> 10);
> >>       }
> >> +     ttm_page_alloc_init();
> >>       return 0;
> >>  out_no_zone:
> >>       ttm_mem_global_release(glob);
> >> @@ -413,9 +415,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
> >>               zone = glob->zones[i];
> >>               kobject_del(&zone->kobj);
> >>               kobject_put(&zone->kobj);
> >> -     }
> >> +                     }
> >>       kobject_del(&glob->kobj);
> >>       kobject_put(&glob->kobj);
> >> +     ttm_page_alloc_fini();
> >>  }
> >>  EXPORT_SYMBOL(ttm_mem_global_release);
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> >> new file mode 100644
> >> index 0000000..e269a10
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> >> @@ -0,0 +1,518 @@
> >> +/*
> >> + * Copyright (c) Red Hat Inc.
> >> +
> >> + * Permission is hereby granted, free of charge, to any person obtaining a
> >> + * copy of this software and associated documentation files (the "Software"),
> >> + * to deal in the Software without restriction, including without limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> >> + * and/or sell copies of the Software, and to permit persons to whom the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice (including the
> >> + * next paragraph) shall be included in all copies or substantial portions
> >> + * of the Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> >> + * DEALINGS IN THE SOFTWARE.
> >> + *
> >> + * Authors: Dave Airlie <airlied@redhat.com>
> >> + *          Jerome Glisse <jglisse@redhat.com>
> >> + *          Pauli Nieminen <suokkos@gmail.com>
> >> + */
> >> +
> >> +/* simple list based uncached page pool
> >> + * - Pool collects resently freed pages for reuse
> >> + * - Use page->lru to keep a free list
> >> + * - doesn't track currently in use pages
> >> + */
> >> +#include <linux/list.h>
> >> +#include <linux/spinlock.h>
> >> +#include <linux/highmem.h>
> >> +#include <linux/mm_types.h>
> >> +#include <linux/jiffies.h>
> >> +#include <linux/timer.h>
> >> +
> >> +#include <asm/agp.h>
> >> +#include "ttm/ttm_bo_driver.h"
> >> +#include "ttm_page_alloc.h"
> >> +
> >> +/* Number of 4k pages to add at once */
> >> +#define NUM_PAGES_TO_ADD             64
> >> +/* times are in msecs */
> >> +#define TIME_TO_KEEP_PAGE_IN_POOL    15000
> >> +#define PAGE_FREE_INTERVAL           1000
> >> +
> >> +/**
> >> + * list is a FILO stack.
> >> + * All pages pushed into it must go to begin while end of list must hold the
> >> + * least recently used pages. When pages in end of the list are not recently
> >> + * use they are freed.
> >> + *
> >> + * All expensive operations must be done outside of pool lock to prevent
> >> + * contention on lock.
> >> + */
> >> +struct page_pool {
> >> +     struct list_head        list;
> >> +     int                     gfp_flags;
> >> +     unsigned                npages;
> >> +     spinlock_t              lock;
> >> +};
> >> +
> >> +# define MAX_NUM_POOLS 4
> >> +
> >> +struct pool_manager {
> >> +     bool                    page_alloc_inited;
> >> +     unsigned long           time_to_wait;
> >> +
> >> +     struct timer_list       timer;
> >> +
> >> +     union {
> >> +             struct page_pool        pools[MAX_NUM_POOLS];
> >> +             struct {
> >> +                     struct page_pool        wc_pool;
> >> +                     struct page_pool        uc_pool;
> >> +                     struct page_pool        wc_pool_dma32;
> >> +                     struct page_pool        uc_pool_dma32;
> >> +             } ;
> >> +     };
> >> +};
> >> +
> >> +
> >> +static struct pool_manager _manager;
> >> +
> >> +#ifdef CONFIG_X86
> >> +/* TODO: add this to x86 like _uc, this version here is inefficient */
> >> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> >> +{
> >> +     int i;
> >> +
> >> +     for (i = 0; i < addrinarray; i++)
> >> +             set_memory_wc((unsigned long)page_address(pages[i]), 1);
> >> +     return 0;
> >> +}
> >> +#else
> >> +static int set_pages_array_wb(struct page **pages, int addrinarray)
> >> +{
> >> +#ifdef TTM_HAS_AGP
> >> +     int i;
> >> +
> >> +     for (i = 0; i < addrinarray; i++)
> >> +             unmap_page_from_agp(pages[i]);
> >> +#endif
> >> +     return 0;
> >> +}
> >> +
> >> +static int set_pages_array_wc(struct page **pages, int addrinarray)
> >> +{
> >> +#ifdef TTM_HAS_AGP
> >> +     int i;
> >> +
> >> +     for (i = 0; i < addrinarray; i++)
> >> +             map_page_into_agp(pages[i]);
> >> +#endif
> >> +     return 0;
> >> +}
> >> +
> >> +static int set_pages_array_uc(struct page **pages, int addrinarray)
> >> +{
> >> +#ifdef TTM_HAS_AGP
> >> +     int i;
> >> +
> >> +     for (i = 0; i < addrinarray; i++)
> >> +             map_page_into_agp(pages[i]);
> >> +#endif
> >> +     return 0;
> >> +}
> >> +#endif
> >> +
> >> +
> >> +static void ttm_page_pool_init_locked(struct page_pool *pool)
> >> +{
> >> +     spin_lock_init(&pool->lock);
> >> +     INIT_LIST_HEAD(&pool->list);
> >> +}
> >> +
> >> +static struct page_pool *ttm_get_pool(int flags,
> >> +             enum ttm_caching_state cstate)
> >> +{
> >> +     int pool_index;
> >> +
> >> +     if (cstate == tt_cached)
> >> +             return NULL;
> >> +
> >> +     if (cstate == tt_wc)
> >> +             pool_index = 0x0;
> >> +     else
> >> +             pool_index = 0x1;
> >> +
> >> +     if (flags & TTM_PAGE_FLAG_DMA32)
> >> +             pool_index |= 0x2;
> >> +
> >> +     return &_manager.pools[pool_index];
> >> +}
> >> +
> >> +static void ttm_pages_put(struct page *pages[], unsigned npages)
> >> +{
> >> +     unsigned i;
> >> +     set_pages_array_wb(pages, npages);
> >> +     for (i = 0; i < npages; ++i)
> >> +             __free_page(pages[i]);
> >> +}
> >> +
> >> +/**
> >> + * Free pages from pool.
> >> + * If limit_with_time is set to true pages that wil lbe have to have been
> >> + * in pool for time specified by pool_manager.time_to_wait.
> >> + *
> >> + * Pages are freed in NUM_PAES_TO_ADD chunks to prevent freeing from
> >> + * blocking others operations on pool.
> >> + *
> >> + * This is called from the timer handler.
> >> + **/
> >> +static void ttm_page_pool_free(struct page_pool *pool,
> >> +             const bool limit_with_time)
> >> +{
> >> +     struct page *page;
> >> +     struct page *pages_to_free[NUM_PAGES_TO_ADD];
> >> +     unsigned freed_pages;
> >> +     unsigned long now = jiffies + _manager.time_to_wait;
> >> +     unsigned long irq_flags;
> >> +
> >> +restart:
> >> +     freed_pages = 0;
> >> +     spin_lock_irqsave(&pool->lock, irq_flags);
> >> +
> >> +     {
> >> +             list_for_each_entry_reverse(page, &pool->list, lru) {
> >> +
> >> +                     if (limit_with_time
> >> +                             && time_after(now, page->private))
> >> +                             break;
> >> +
> >> +                     pages_to_free[freed_pages++] = page;
> >> +                     if (freed_pages >= NUM_PAGES_TO_ADD) {
> >> +                             /* Limit the maximum time that we hold the lock
> >> +                              * not to block more important threads */
> >> +                             __list_del(page->lru.prev, &pool->list);
> >> +                             pool->npages -= freed_pages;
> >> +                             spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +
> >> +                             ttm_pages_put(pages_to_free, freed_pages);
> >> +                             goto restart;
> >> +                     }
> >> +             }
> >> +
> >> +             pool->npages -= freed_pages;
> >> +             if (freed_pages)
> >> +                     __list_del(&page->lru, &pool->list);
> >> +
> >> +             spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +
> >> +             if (freed_pages)
> >> +                     ttm_pages_put(pages_to_free, freed_pages);
> >> +     }
> >> +}
> >> +
> >> +/* periodicaly free unused pages from pool */
> >> +static void ttm_page_pool_timer_handler(unsigned long man_addr)
> >> +{
> >> +     int i;
> >> +     struct pool_manager *m = (struct pool_manager *)man_addr;
> >> +
> >> +     for (i = 0; i < MAX_NUM_POOLS; ++i)
> >> +             ttm_page_pool_free(&_manager.pools[i], true);
> >> +
> >> +     mod_timer(&m->timer, jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL));
> >> +}
> >> +
> >> +static void ttm_page_pool_timer_create(struct pool_manager *m)
> >> +{
> >> +     /* Create a timer that is fired every second to clean
> >> +      * the free list */
> >> +     init_timer(&m->timer);
> >> +     m->timer.expires = jiffies + msecs_to_jiffies(PAGE_FREE_INTERVAL);
> >> +     m->timer.function = ttm_page_pool_timer_handler;
> >> +     m->timer.data = (unsigned long)m;
> >> +}
> >> +
> >> +static void ttm_page_pool_timer_free(struct pool_manager *m)
> >> +{
> >> +     del_timer_sync(&m->timer);
> >> +}
> >> +
> >> +static void ttm_set_pages_caching(struct page **pages,
> >> +             enum ttm_caching_state cstate, unsigned cpages)
> >> +{
> >> +     /* Set page caching */
> >> +     switch (cstate) {
> >> +     case tt_uncached:
> >> +             set_pages_array_uc(pages, cpages);
> >> +             break;
> >> +     case tt_wc:
> >> +             set_pages_array_wc(pages, cpages);
> >> +             break;
> >> +     default:
> >> +             break;
> >> +     }
> >> +}
> >> +/**
> >> + * Fill new pages to array.
> >> + */
> >> +static int ttm_pages_pool_fill_locked(struct page_pool *pool, int flags,
> >> +             enum ttm_caching_state cstate, unsigned count,
> >> +             unsigned long *irq_flags)
> >> +{
> >> +     unsigned pages_to_alloc = count - pool->npages;
> >> +     struct page **pages;
> >> +     struct page *page;
> >> +     struct list_head h;
> >> +     unsigned i, cpages;
> >> +     unsigned max_cpages = min(pages_to_alloc,
> >> +                     (unsigned)(PAGE_SIZE/sizeof(*page)));
> >> +
> >> +     /* reserve allready allocated pages from pool */
> >> +     INIT_LIST_HEAD(&h);
> >> +     list_splice_init(&pool->list, &h);
> >> +     i = pool->npages;
> >> +     pages_to_alloc = count;
> >> +     pool->npages = 0;
> >> +
> >> +     /* We are now safe to allow others users to modify the pool so unlock
> >> +      * it. Allocating pages is expensive operation. */
> >> +     spin_unlock_irqrestore(&pool->lock, *irq_flags);
> >> +
> >> +     pages = kmalloc(max_cpages*sizeof(*page), GFP_KERNEL);
> >> +
> >> +     if (!pages) {
> >> +             printk(KERN_ERR "[ttm] unable to allocate table for new pages.");
> >> +             spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +             return -ENOMEM;
> >> +     }
> >> +
> >> +     /* i is set to number of pages already in pool */
> >> +     for (cpages = 0; i < pages_to_alloc; ++i) {
> >> +             page = alloc_page(pool->gfp_flags);
> >> +             if (!page) {
> >> +                     printk(KERN_ERR "[ttm] unable to get page %u\n", i);
> >> +
> >> +                     ttm_put_pages(&h, flags, cstate);
> >> +                     spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +                     return -ENOMEM;
> >> +             }
> >> +
> >> +#ifdef CONFIG_HIGHMEM
> >> +             /* gfp flags of highmem page should never be dma32 so we
> >> +              * we should be fine in such case
> >> +              */
> >> +             if (!PageHighMem(page))
> >> +#endif
> >> +             {
> >> +                     pages[cpages++] = page;
> >> +                     if (cpages == max_cpages) {
> >> +
> >> +                             ttm_set_pages_caching(pages, cstate, cpages);
> >> +                             cpages = 0;
> >> +                     }
> >> +             }
> >> +             list_add(&page->lru, &h);
> >> +     }
> >> +
> >> +     if (cpages)
> >> +             ttm_set_pages_caching(pages, cstate, cpages);
> >> +
> >> +     spin_lock_irqsave(&pool->lock, *irq_flags);
> >> +
> >> +     list_splice(&h, &pool->list);
> >> +
> >> +     pool->npages += pages_to_alloc;
> >> +
> >> +     return 0;
> >> +}
> >> +
> >> +/*
> >> + * On success pages list will hold count number of correctly
> >> + * cached pages.
> >> + */
> >> +int ttm_get_pages(struct list_head *pages, int flags,
> >> +             enum ttm_caching_state cstate, unsigned count)
> >> +{
> >> +     struct page_pool *pool = ttm_get_pool(flags, cstate);
> >> +     struct page *page = NULL;
> >> +     unsigned long irq_flags;
> >> +     int gfp_flags = 0;
> >> +     int retries = 1;
> >> +     int r;
> >> +
> >> +     /* No pool for cached pages */
> >> +     if (cstate == tt_cached) {
> >> +             if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> +                     gfp_flags |= __GFP_ZERO;
> >> +
> >> +             if (flags & TTM_PAGE_FLAG_DMA32)
> >> +                     gfp_flags |= GFP_DMA32;
> >> +             else
> >> +                     gfp_flags |= __GFP_HIGHMEM;
> >> +
> >> +             /* fallback in case of problems with pool allocation */
> >> +alloc_pages:
> >> +             for (r = 0; r < count; ++r) {
> >> +                     page = alloc_page(gfp_flags);
> >> +                     if (!page) {
> >> +
> >> +                             printk(KERN_ERR "[ttm] unable to allocate page.");
> >> +                             return -ENOMEM;
> >> +                     }
> >> +
> >> +                     list_add(&page->lru, pages);
> >> +             }
> >> +             return 0;
> >> +     }
> >> +
> >> +     gfp_flags = pool->gfp_flags;
> >> +
> >> +     spin_lock_irqsave(&pool->lock, irq_flags);
> >> +     do {
> >> +             /* First we look if pool has the page we want */
> >> +             if (pool->npages >= count) {
> >> +
> >> +
> >> +                     /* Find the cut location */
> >> +                     if (count <= pool->npages/2) {
> >> +                             unsigned c = 0;
> >> +                             list_for_each_entry(page, &pool->list, lru) {
> >> +                                     if (++c >= count)
> >> +                                             break;
> >> +                             }
> >> +                     } else {
> >> +                             /* We have to point to the last pages cut */
> >> +                             unsigned c = 0;
> >> +                             unsigned rcount = pool->npages - count + 1;
> >> +
> >> +                             list_for_each_entry_reverse(page, &pool->list, lru) {
> >> +                                     if (++c >= rcount)
> >> +                                             break;
> >> +                             }
> >> +                     }
> >> +
> >> +                     list_cut_position(pages, &pool->list, &page->lru);
> >> +
> >> +                     pool->npages -= count;
> >> +                     spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +
> >> +                     if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
> >> +                             list_for_each_entry(page, pages, lru) {
> >> +                                     clear_page(page_address(page));
> >> +                             }
> >> +                     }
> >> +
> >> +                     return 0;
> >> +             }
> >> +
> >> +             if (retries == 0)
> >> +                     break;
> >> +
> >> +             --retries;
> >> +
> >> +             r = ttm_pages_pool_fill_locked(pool, flags, cstate,
> >> +                             count, &irq_flags);
> >> +             if (r) {
> >> +                     spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +                     return r;
> >> +             }
> >> +
> >> +     } while (1);
> >> +
> >> +     spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +
> >> +     /*  Pool allocator failed without errors. we try to allocate new pages
> >> +      *  instead. */
> >> +     if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> +             gfp_flags |= __GFP_ZERO;
> >> +
> >> +     goto alloc_pages;
> >> +}
> >> +
> >> +/* Put all pages in pages list to correct pool to wait for reuse */
> >> +void ttm_put_pages(struct list_head *pages, int flags,
> >> +             enum ttm_caching_state cstate)
> >> +{
> >> +     struct page_pool *pool = ttm_get_pool(flags, cstate);
> >> +     struct page *p, *tmp;
> >> +     unsigned page_count = 0;
> >> +     unsigned long irq_flags;
> >> +     unsigned long now = jiffies;
> >> +
> >> +     if (pool == NULL) {
> >> +             list_for_each_entry_safe(p, tmp, pages, lru) {
> >> +                     __free_page(p);
> >> +             }
> >> +             /* We took the ownership -> clear the pages list*/
> >> +             INIT_LIST_HEAD(pages);
> >> +             return;
> >> +     }
> >> +
> >> +     list_for_each_entry_safe(p, tmp, pages, lru) {
> >> +
> >> +#ifdef CONFIG_HIGHMEM
> >> +             /* we don't have pool for highmem -> free them */
> >> +             if (PageHighMem(p)) {
> >> +                     list_del(&p->lru);
> >> +                     __free_page(p);
> >> +             } else
> >> +#endif
> >> +             {
> >> +                     p->private = now;
> >> +                     ++page_count;
> >> +             }
> >> +
> >> +     }
> >> +
> >> +     spin_lock_irqsave(&pool->lock, irq_flags);
> >> +     list_splice(pages, &pool->list);
> >> +     pool->npages += page_count;
> >> +     spin_unlock_irqrestore(&pool->lock, irq_flags);
> >> +
> >> +     /* We took the ownership -> clear the pages list*/
> >> +     INIT_LIST_HEAD(pages);
> >> +}
> >> +
> >> +
> >> +int ttm_page_alloc_init(void)
> >> +{
> >> +     if (_manager.page_alloc_inited)
> >> +             return 0;
> >> +
> >> +     ttm_page_pool_init_locked(&_manager.wc_pool);
> >> +     _manager.wc_pool.gfp_flags = GFP_HIGHUSER;
> >> +     ttm_page_pool_init_locked(&_manager.uc_pool);
> >> +     _manager.uc_pool.gfp_flags = GFP_HIGHUSER;
> >> +     ttm_page_pool_init_locked(&_manager.wc_pool_dma32);
> >> +     _manager.wc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> >> +     ttm_page_pool_init_locked(&_manager.uc_pool_dma32);
> >> +     _manager.uc_pool_dma32.gfp_flags = GFP_USER | GFP_DMA32;
> >> +
> >> +     _manager.time_to_wait = msecs_to_jiffies(TIME_TO_KEEP_PAGE_IN_POOL);
> >> +     ttm_page_pool_timer_create(&_manager);
> >> +     _manager.page_alloc_inited = 1;
> >> +     return 0;
> >> +}
> >> +
> >> +void ttm_page_alloc_fini(void)
> >> +{
> >> +     int i;
> >> +
> >> +     if (!_manager.page_alloc_inited)
> >> +             return;
> >> +
> >> +     ttm_page_pool_timer_free(&_manager);
> >> +
> >> +     for (i = 0; i < MAX_NUM_POOLS; ++i)
> >> +             ttm_page_pool_free(&_manager.pools[i], false);
> >> +
> >> +     _manager.page_alloc_inited = 0;
> >> +}
> >> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.h b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> >> new file mode 100644
> >> index 0000000..eb0ddf4
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.h
> >> @@ -0,0 +1,38 @@
> >> +/*
> >> + * Copyright (c) Red Hat Inc.
> >> +
> >> + * Permission is hereby granted, free of charge, to any person obtaining a
> >> + * copy of this software and associated documentation files (the "Software"),
> >> + * to deal in the Software without restriction, including without limitation
> >> + * the rights to use, copy, modify, merge, publish, distribute, sub license,
> >> + * and/or sell copies of the Software, and to permit persons to whom the
> >> + * Software is furnished to do so, subject to the following conditions:
> >> + *
> >> + * The above copyright notice and this permission notice (including the
> >> + * next paragraph) shall be included in all copies or substantial portions
> >> + * of the Software.
> >> + *
> >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> >> + * DEALINGS IN THE SOFTWARE.
> >> + *
> >> + * Authors: Dave Airlie <airlied@redhat.com>
> >> + *          Jerome Glisse <jglisse@redhat.com>
> >> + */
> >> +#ifndef TTM_PAGE_ALLOC
> >> +#define TTM_PAGE_ALLOC
> >> +
> >> +#include "ttm/ttm_bo_driver.h"
> >> +
> >> +void ttm_put_pages(struct list_head *pages, int flags,
> >> +             enum ttm_caching_state cstate);
> >> +int ttm_get_pages(struct list_head *pages, int flags,
> >> +             enum ttm_caching_state cstate, unsigned count);
> >> +int ttm_page_alloc_init(void);
> >> +void ttm_page_alloc_fini(void);
> >> +
> >> +#endif
> >> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> >> index 3d47a2c..d4e20f6 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> >> @@ -38,6 +38,7 @@
> >>  #include "ttm/ttm_module.h"
> >>  #include "ttm/ttm_bo_driver.h"
> >>  #include "ttm/ttm_placement.h"
> >> +#include "ttm_page_alloc.h"
> >>
> >>  static int ttm_tt_swapin(struct ttm_tt *ttm);
> >>
> >> @@ -72,21 +73,6 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
> >>       ttm->pages = NULL;
> >>  }
> >>
> >> -static struct page *ttm_tt_alloc_page(unsigned page_flags)
> >> -{
> >> -     gfp_t gfp_flags = GFP_USER;
> >> -
> >> -     if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
> >> -             gfp_flags |= __GFP_ZERO;
> >> -
> >> -     if (page_flags & TTM_PAGE_FLAG_DMA32)
> >> -             gfp_flags |= __GFP_DMA32;
> >> -     else
> >> -             gfp_flags |= __GFP_HIGHMEM;
> >> -
> >> -     return alloc_page(gfp_flags);
> >> -}
> >> -
> >>  static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >>  {
> >>       int write;
> >> @@ -127,15 +113,21 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
> >>  static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
> >>  {
> >>       struct page *p;
> >> +     struct list_head h;
> >>       struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
> >>       int ret;
> >>
> >>       while (NULL == (p = ttm->pages[index])) {
> >> -             p = ttm_tt_alloc_page(ttm->page_flags);
> >>
> >> -             if (!p)
> >> +             INIT_LIST_HEAD(&h);
> >> +
> >> +             ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
> >> +
> >> +             if (ret != 0)
> >>                       return NULL;
> >>
> >> +             p = list_first_entry(&h, struct page, lru);
> >> +
> >>               ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
> >>               if (unlikely(ret != 0))
> >>                       goto out_err;
> >> @@ -244,10 +236,10 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
> >>       if (ttm->caching_state == c_state)
> >>               return 0;
> >>
> >> -     if (c_state != tt_cached) {
> >> -             ret = ttm_tt_populate(ttm);
> >> -             if (unlikely(ret != 0))
> >> -                     return ret;
> >> +     if (ttm->state == tt_unpopulated) {
> >> +             /* Change caching but don't populate */
> >> +             ttm->caching_state = c_state;
> >> +             return 0;
> >>       }
> >>
> >>       if (ttm->caching_state == tt_cached)
> >> @@ -298,25 +290,32 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
> >>  static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
> >>  {
> >>       int i;
> >> +     unsigned count = 0;
> >> +     struct list_head h;
> >>       struct page *cur_page;
> >>       struct ttm_backend *be = ttm->be;
> >>
> >> +     INIT_LIST_HEAD(&h);
> >> +
> >>       if (be)
> >>               be->func->clear(be);
> >> -     (void)ttm_tt_set_caching(ttm, tt_cached);
> >>       for (i = 0; i < ttm->num_pages; ++i) {
> >> +
> >>               cur_page = ttm->pages[i];
> >>               ttm->pages[i] = NULL;
> >>               if (cur_page) {
> >>                       if (page_count(cur_page) != 1)
> >>                               printk(KERN_ERR TTM_PFX
> >>                                      "Erroneous page count. "
> >> -                                    "Leaking pages.\n");
> >> +                                    "Leaking pages (%d/%d).\n",
> >> +                                    page_count(cur_page), count);
> >>                       ttm_mem_global_free_page(ttm->glob->mem_glob,
> >>                                                cur_page);
> >> -                     __free_page(cur_page);
> >> +                     list_add(&cur_page->lru, &h);
> >> +                     count++;
> >>               }
> >>       }
> >> +     ttm_put_pages(&h, ttm->page_flags, ttm->caching_state);
> >>       ttm->state = tt_unpopulated;
> >>       ttm->first_himem_page = ttm->num_pages;
> >>       ttm->last_lomem_page = -1;
> >> --
> >> 1.6.3.3
> >>
> >
> >> ------------------------------------------------------------------------------
> >> Download Intel&#174; Parallel Studio Eval
> >> Try the new software tools for yourself. Speed compiling, find bugs
> >> proactively, and fine-tune applications for parallel performance.
> >> See why Intel Parallel Studio got high marks during beta.
> >> http://p.sf.net/sfu/intel-sw-dev
> >
> >> --
> >> _______________________________________________
> >> Dri-devel mailing list
> >> Dri-devel@lists.sourceforge.net
> >> https://lists.sourceforge.net/lists/listinfo/dri-devel
> >
> >
> 

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 14:50       ` Jerome Glisse
@ 2010-03-03 15:46         ` Pauli Nieminen
  2010-03-03 16:02           ` Jerome Glisse
  0 siblings, 1 reply; 13+ messages in thread
From: Pauli Nieminen @ 2010-03-03 15:46 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: Jerome Glisse, dri-devel

On Wed, Mar 3, 2010 at 4:50 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
> On Wed, Mar 03, 2010 at 04:23:08PM +0200, Pauli Nieminen wrote:
>> On Wed, Mar 3, 2010 at 3:33 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
>> > On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
>> >> On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
>> >> > On AGP system we might allocate/free routinely uncached or wc memory,
>> >> > changing page from cached (wb) to uc or wc is very expensive and involves
>> >> > a lot of flushing. To improve performance this allocator use a pool
>> >> > of uc,wc pages.
>> >> >
>> >> > Pools are linked lists of pages. Ordered so that first is the latest adition
>> >> > to the pool and last is the oldest page. Old pages are periodicaly freed from
>> >> > pools to keep memory use at minimum.
>> >> >
>> >> > Pools are protected with spinlocks to allow multiple threads to allocate pages
>> >> > simultanously. Expensive operations must not hold the spinlock to maximise
>> >> > performance for multiple threads.
>> >> >
>> >> > Based on Jerome Glisse's and Dave Airlie's pool allocator.
>> >> >
>> >> > Signed-off-by: Jerome Glisse <jglisse@redhat.com>
>> >> > Signed-off-by: Dave Airlie <airlied@redhat.com>
>> >> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
>> >
>> > I think it's overdesigned, by trying to be to clever we often endup
>> > with code more complex and less efficient in the end. While the idea
>> > to free page only after some amount of time is the way to go, i think
>> > a simpler approach will perform a better job. For instance:
>> >
>> > pool {
>> >  npages // npages in pool
>> >  page *pages[enoughttohold16Morsomethingconfigurable]
>> >  unsigned long next_free_time;
>> >  lock
>> > }
>> > filllocked(pool,npages)
>> > {
>> >        if (npages > MAXPAGESPERPOOL)
>> >                npages = MAXPAGESPERPOOL;
>> >        for i, npages : allocpage
>> > }
>> > alloc(apages, npages)
>> > {
>> >        do {
>> >        lock(pool)
>> >        pool->next_free_time = jiffies + timeout;
>> >        if (npages > pool->npages) {
>> >                npages -= pool->npages;
>> >                copypageptr(apages, pool->pages, pool->npages)
>> >                pool->npages = 0;
>> >                fillpool(pool, npages)
>> >        } else {
>> >                pool->npages -= npages;
>> >                copypageptr(apages, pool->pages, npages)
>> >                npages = 0;
>> >        }
>> >        unlock(pool)
>> >        } while (npages)
>> > }
>> > poolshrinkcallbacktimer()
>> > {
>> >        for i in npools {
>> >                if (trylock(pool[i].lock) {
>> >                        if (timeafter(jiffies, pool[i].nextfreetime)) {
>> >                                free bunch of pages for instance 1M at a time
>> >                        }
>> >                        unlock(pool[i].lock)
>> >                }
>> >        }
>> > }
>>
>> I would need to use split buffer for that big arrays. My aim was to
>> have FILO queue pages so knowing which pages can be freed now is
>> simple because the oldest pages is always in the head of the list. I
>> know that FILO is easy to implement on top of deque but requirement to
>> split underlying array sounded like more complex than using linked
>> list. But as I noted in IRC. Requesting for N pages from the linked
>> list is doomed to have O(N) performance which is bad. So better
>> implementation would be switching to an array.
>
> The design i outlined above drop the concept of recording time per
> page so no need for a FILO, you just fill the pool and you empty
> it 1M or less at a time every Nmsec this is a lot simpler no complex
> spliting or list walking. One simple optimization is to make the
> amount of page we free dependent of how much the pool is fill so
> if there is 16M of memory in the pool free 1M at a time, but if there
> 64K then free one page at a time.
>

I think that we should take into acount the dynamic memory amount that
current user active is requiring. What if there was case ever that
dynamic memory sizes is 20M? List based implementation is easier to
scale to what user space is doing.
20M isn't even much if you think how much dynamic allocations would be
done by multiseat system (driving multiple GPUs same time).

I also tough about the array implementation and there is also that
O(N) cost anyway.

But imporements that I think would help here:
- Add the shrinker callback and don't use own timer.
- Simplify the implementation a lot.
- Maybe having pool for cached pages too would make it a lot simpler
when all allocations would use same code path.

I did originaly choose list implementation for easier scalability and
I think that is good reason not to switch to array based
implementation. Another point against array is that it takes some
extra memory space.

>> >
>> > General remark i don't think you handle jiffies wrap around properly.
>>
>> Only place where jiffies are read is ttm_page_pool_free. There
>> difference is calculate with  timeafter call so I think it should
>> handle the wrapping.
>>
>
> No doesn't work, for instance (assuming jiffies to be 16 bits)
> page get it's private set to 0xFFF0, now ttm_page_pool_free jiffies
> wrap around and are at 0x0 the test will evaluate to false and page
> will be free prematuraly, i think better would have been to do:
> page->private = jiffies + timeout in ttm_put_pages and do
> time_after(jiffie, page->private) in ttm_page_pool_free that would
> have avoided the wrap around.
>

Yes. Now I noticed the bug. I had the + timeout in wrong function.

Simpler solution for determining number of pages to free based on demand:
- Set nlowpages variable in pool to number of pages in pool
- Always after returning pages from pool check if npages is less than
nlowpages and update nlowpages if needed.
shrinkcallback()
{
 foreach pool in pools {
  lock(pool)
  if (time_elapsed() > min_shrinker_interval) {
   free_pages(nlowpages)
   nlowpages = npages
   last_shrinker_call = jiffies;
  }
 unlock(pool)
 }
}

Quarantining minimum interval is important or we might free too much
pages that would have very bad impact on performance. Maybe something
like 500ms to 1 second could be good minimum interval.

> Cheers,
> Jerome
>
>> > Also the whole lock dropping in middle of list traversal looks very
>> > fragile to me, right now we don't have much concurency in radeon so
>> > i don't think you well tested the effect of concurently using the
>> > allocator. Anyway see bellow for more comments on the code.
>> >
>> > Btw thanks for looking into this, this isn't a sexy area but it
>> > definitly needs love.
>> >
>> > Cheers,
>> > Jerome
>> >
>> >
>> >
>> > page_to_alloc might be negative if pool->npages > count, i think it can
>> > happen because there is lock dropping in the page alloc function below.
>> > also pages_to_alloc*sizeof(*page) might be to big for kmalloc and we
>> > might need to use vmalloc (this is why i think a design with a static
>> > allocated on table is better)
>>
>> page_to_alloc can't be negative because lock is held untl later into
>> pool allocator.
>>
>> Locks are only dropped:
>> - When allocation succeed
>> - When returning failure.
>> - After entering  ttm_pages_pool_fill_locked. Before droping lock
>> there code reserves pages from pool to private list and sets pool to
>> empty state.
>>
>>  ttm_pages_pool_fill_locked had 2 errors in the first version. One of
>> them was wrong count paramter to kmalloc. 2nd version fixes that we
>> never alloc more than PAGE_SIZES memory. Second error was that
>> pool->npages wasn't set to zero when taking the pages from pool.
>>
>> Basicaly fill_pool operates only in private variables when it is
>> outside of pool locks which makes it reentrant for unlocked part.
>>
>> >
>> >
>> > This function is quite complex, especialy the failing filling pool path
>> > that looks supicious, idea of splitting work is not to have to do the
>> > work in other function so if pool filling fails the alloc should fail
>> > and shouldn't try to fill the pool on its own this kind of code
>> > duplication is a recipie for failure i think.
>>
>> yes. Failing path should just return -ENOMEM. I will fix that.
>>
>> >
>> >>
>> >> While no comments yet I did notice huge errors in
>> >> ttm_pages_pool_fill_locked. But I still would like to comments on
>> >> overall design.
>> >>
>> >> Is it correct to use page->private in pool allocator?
>> >>
>> >> bo allocation is still too expensive operation for dma buffers in
>> >> classic mesa. It takes quite a lot cpu time in bind memory (agp
>> >> system) and ttm_mem_global functions. Would it be possible to move
>> >> some parts those expensive operations into pool fill and pool free
>> >> code?
>> >>
>> >> Also using single call to allocate all pages for buffer would improve
>> >> situation when pool is not large enough. I can change ttm_tt_populate
>> >> to allocate all pages with single call if it sounds ok.
>> >>
>> >> I will still look at how to integrate pool allocator to shinker.
>> >>
>> >>
>> >> Attached fixed full path and here is fixed version of
>> >> ttm_pages_pool_fill_locked :
>> >>
>> >

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC] drm/ttm: add pool wc/uc page allocator
  2010-03-03 15:46         ` Pauli Nieminen
@ 2010-03-03 16:02           ` Jerome Glisse
  0 siblings, 0 replies; 13+ messages in thread
From: Jerome Glisse @ 2010-03-03 16:02 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: Jerome Glisse, dri-devel

On Wed, Mar 03, 2010 at 05:46:43PM +0200, Pauli Nieminen wrote:
> On Wed, Mar 3, 2010 at 4:50 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
> > On Wed, Mar 03, 2010 at 04:23:08PM +0200, Pauli Nieminen wrote:
> >> On Wed, Mar 3, 2010 at 3:33 PM, Jerome Glisse <glisse@freedesktop.org> wrote:
> >> > On Tue, Mar 02, 2010 at 12:32:54AM +0200, Pauli Nieminen wrote:
> >> >> On Sun, Feb 28, 2010 at 11:30 PM, Pauli Nieminen <suokkos@gmail.com> wrote:
> >> >> > On AGP system we might allocate/free routinely uncached or wc memory,
> >> >> > changing page from cached (wb) to uc or wc is very expensive and involves
> >> >> > a lot of flushing. To improve performance this allocator use a pool
> >> >> > of uc,wc pages.
> >> >> >
> >> >> > Pools are linked lists of pages. Ordered so that first is the latest adition
> >> >> > to the pool and last is the oldest page. Old pages are periodicaly freed from
> >> >> > pools to keep memory use at minimum.
> >> >> >
> >> >> > Pools are protected with spinlocks to allow multiple threads to allocate pages
> >> >> > simultanously. Expensive operations must not hold the spinlock to maximise
> >> >> > performance for multiple threads.
> >> >> >
> >> >> > Based on Jerome Glisse's and Dave Airlie's pool allocator.
> >> >> >
> >> >> > Signed-off-by: Jerome Glisse <jglisse@redhat.com>
> >> >> > Signed-off-by: Dave Airlie <airlied@redhat.com>
> >> >> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> >> >
> >> > I think it's overdesigned, by trying to be to clever we often endup
> >> > with code more complex and less efficient in the end. While the idea
> >> > to free page only after some amount of time is the way to go, i think
> >> > a simpler approach will perform a better job. For instance:
> >> >
> >> > pool {
> >> >  npages // npages in pool
> >> >  page *pages[enoughttohold16Morsomethingconfigurable]
> >> >  unsigned long next_free_time;
> >> >  lock
> >> > }
> >> > filllocked(pool,npages)
> >> > {
> >> >        if (npages > MAXPAGESPERPOOL)
> >> >                npages = MAXPAGESPERPOOL;
> >> >        for i, npages : allocpage
> >> > }
> >> > alloc(apages, npages)
> >> > {
> >> >        do {
> >> >        lock(pool)
> >> >        pool->next_free_time = jiffies + timeout;
> >> >        if (npages > pool->npages) {
> >> >                npages -= pool->npages;
> >> >                copypageptr(apages, pool->pages, pool->npages)
> >> >                pool->npages = 0;
> >> >                fillpool(pool, npages)
> >> >        } else {
> >> >                pool->npages -= npages;
> >> >                copypageptr(apages, pool->pages, npages)
> >> >                npages = 0;
> >> >        }
> >> >        unlock(pool)
> >> >        } while (npages)
> >> > }
> >> > poolshrinkcallbacktimer()
> >> > {
> >> >        for i in npools {
> >> >                if (trylock(pool[i].lock) {
> >> >                        if (timeafter(jiffies, pool[i].nextfreetime)) {
> >> >                                free bunch of pages for instance 1M at a time
> >> >                        }
> >> >                        unlock(pool[i].lock)
> >> >                }
> >> >        }
> >> > }
> >>
> >> I would need to use split buffer for that big arrays. My aim was to
> >> have FILO queue pages so knowing which pages can be freed now is
> >> simple because the oldest pages is always in the head of the list. I
> >> know that FILO is easy to implement on top of deque but requirement to
> >> split underlying array sounded like more complex than using linked
> >> list. But as I noted in IRC. Requesting for N pages from the linked
> >> list is doomed to have O(N) performance which is bad. So better
> >> implementation would be switching to an array.
> >
> > The design i outlined above drop the concept of recording time per
> > page so no need for a FILO, you just fill the pool and you empty
> > it 1M or less at a time every Nmsec this is a lot simpler no complex
> > spliting or list walking. One simple optimization is to make the
> > amount of page we free dependent of how much the pool is fill so
> > if there is 16M of memory in the pool free 1M at a time, but if there
> > 64K then free one page at a time.
> >
> 
> I think that we should take into acount the dynamic memory amount that
> current user active is requiring. What if there was case ever that
> dynamic memory sizes is 20M? List based implementation is easier to
> scale to what user space is doing.
> 20M isn't even much if you think how much dynamic allocations would be
> done by multiseat system (driving multiple GPUs same time).
> 

You mean an app constantly allocating 20M and freeing 20M ? even with
multiple GPU this sounds like broken userspace, note that i don't think
there is dual AGP motherboard out there but i kind of forget about such
evil thing. Also, i picked 32M randomly as i think best choice i making
the pool as big as AGP aperture. Anyway bottom line is that we should
not think to all possible use case but rather worry about the most
common sensible one.

> I also tough about the array implementation and there is also that
> O(N) cost anyway.
> 
> But imporements that I think would help here:
> - Add the shrinker callback and don't use own timer.
> - Simplify the implementation a lot.
> - Maybe having pool for cached pages too would make it a lot simpler
> when all allocations would use same code path.
> 
> I did originaly choose list implementation for easier scalability and
> I think that is good reason not to switch to array based
> implementation. Another point against array is that it takes some
> extra memory space.

I don't think we want scalability, i think having a static table storing
page which means we won't stole more memory from the system than the
pool can handle (we are talking about a pool here not an ocean ;))
Anyway if you can provide a simpler implementation with list why not,
but the current one really looks too complex and worry me a lot about
corner case.

Cheers,
Jerome

> >> >
> >> > General remark i don't think you handle jiffies wrap around properly.
> >>
> >> Only place where jiffies are read is ttm_page_pool_free. There
> >> difference is calculate with  timeafter call so I think it should
> >> handle the wrapping.
> >>
> >
> > No doesn't work, for instance (assuming jiffies to be 16 bits)
> > page get it's private set to 0xFFF0, now ttm_page_pool_free jiffies
> > wrap around and are at 0x0 the test will evaluate to false and page
> > will be free prematuraly, i think better would have been to do:
> > page->private = jiffies + timeout in ttm_put_pages and do
> > time_after(jiffie, page->private) in ttm_page_pool_free that would
> > have avoided the wrap around.
> >
> 
> Yes. Now I noticed the bug. I had the + timeout in wrong function.
> 
> Simpler solution for determining number of pages to free based on demand:
> - Set nlowpages variable in pool to number of pages in pool
> - Always after returning pages from pool check if npages is less than
> nlowpages and update nlowpages if needed.
> shrinkcallback()
> {
>  foreach pool in pools {
>   lock(pool)
>   if (time_elapsed() > min_shrinker_interval) {
>    free_pages(nlowpages)
>    nlowpages = npages
>    last_shrinker_call = jiffies;
>   }
>  unlock(pool)
>  }
> }
> 
> Quarantining minimum interval is important or we might free too much
> pages that would have very bad impact on performance. Maybe something
> like 500ms to 1 second could be good minimum interval.
> 
> > Cheers,
> > Jerome
> >
> >> > Also the whole lock dropping in middle of list traversal looks very
> >> > fragile to me, right now we don't have much concurency in radeon so
> >> > i don't think you well tested the effect of concurently using the
> >> > allocator. Anyway see bellow for more comments on the code.
> >> >
> >> > Btw thanks for looking into this, this isn't a sexy area but it
> >> > definitly needs love.
> >> >
> >> > Cheers,
> >> > Jerome
> >> >
> >> >
> >> >
> >> > page_to_alloc might be negative if pool->npages > count, i think it can
> >> > happen because there is lock dropping in the page alloc function below.
> >> > also pages_to_alloc*sizeof(*page) might be to big for kmalloc and we
> >> > might need to use vmalloc (this is why i think a design with a static
> >> > allocated on table is better)
> >>
> >> page_to_alloc can't be negative because lock is held untl later into
> >> pool allocator.
> >>
> >> Locks are only dropped:
> >> - When allocation succeed
> >> - When returning failure.
> >> - After entering  ttm_pages_pool_fill_locked. Before droping lock
> >> there code reserves pages from pool to private list and sets pool to
> >> empty state.
> >>
> >>  ttm_pages_pool_fill_locked had 2 errors in the first version. One of
> >> them was wrong count paramter to kmalloc. 2nd version fixes that we
> >> never alloc more than PAGE_SIZES memory. Second error was that
> >> pool->npages wasn't set to zero when taking the pages from pool.
> >>
> >> Basicaly fill_pool operates only in private variables when it is
> >> outside of pool locks which makes it reentrant for unlocked part.
> >>
> >> >
> >> >
> >> > This function is quite complex, especialy the failing filling pool path
> >> > that looks supicious, idea of splitting work is not to have to do the
> >> > work in other function so if pool filling fails the alloc should fail
> >> > and shouldn't try to fill the pool on its own this kind of code
> >> > duplication is a recipie for failure i think.
> >>
> >> yes. Failing path should just return -ENOMEM. I will fix that.
> >>
> >> >
> >> >>
> >> >> While no comments yet I did notice huge errors in
> >> >> ttm_pages_pool_fill_locked. But I still would like to comments on
> >> >> overall design.
> >> >>
> >> >> Is it correct to use page->private in pool allocator?
> >> >>
> >> >> bo allocation is still too expensive operation for dma buffers in
> >> >> classic mesa. It takes quite a lot cpu time in bind memory (agp
> >> >> system) and ttm_mem_global functions. Would it be possible to move
> >> >> some parts those expensive operations into pool fill and pool free
> >> >> code?
> >> >>
> >> >> Also using single call to allocate all pages for buffer would improve
> >> >> situation when pool is not large enough. I can change ttm_tt_populate
> >> >> to allocate all pages with single call if it sounds ok.
> >> >>
> >> >> I will still look at how to integrate pool allocator to shinker.
> >> >>
> >> >>
> >> >> Attached fixed full path and here is fixed version of
> >> >> ttm_pages_pool_fill_locked :
> >> >>
> >> >
> 

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-03-03 16:02 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-28 21:30 [RFC] drm/ttm: add pool wc/uc page allocator Pauli Nieminen
2010-03-01 22:32 ` Pauli Nieminen
2010-03-02 10:56   ` Michel Dänzer
     [not found]     ` <4B8CFB1B.1020806@shipmail.org>
2010-03-03 12:13       ` Pauli Nieminen
2010-03-03 12:47         ` Luca Barbieri
2010-03-03 13:23           ` Thomas Hellstrom
2010-03-03 14:03             ` Luca Barbieri
2010-03-03 14:19               ` Thomas Hellstrom
2010-03-03 13:33   ` Jerome Glisse
2010-03-03 14:23     ` Pauli Nieminen
2010-03-03 14:50       ` Jerome Glisse
2010-03-03 15:46         ` Pauli Nieminen
2010-03-03 16:02           ` Jerome Glisse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.