linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] dma-buf: heaps: Add deferred-free-helper library code
@ 2021-01-23  3:46 John Stultz
  2021-01-23  3:46 ` [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap John Stultz
  2021-01-23  3:46 ` [PATCH v2 3/3] dma-buf: system_heap: Add deferred freeing to the " John Stultz
  0 siblings, 2 replies; 6+ messages in thread
From: John Stultz @ 2021-01-23  3:46 UTC (permalink / raw)
  To: lkml
  Cc: John Stultz, Daniel Vetter, Sumit Semwal, Liam Mark,
	Chris Goldsworthy, Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Daniel Mentz, Ørjan Eide,
	Robin Murphy, Ezequiel Garcia, Simon Ser, James Jones,
	linux-media, dri-devel

This patch provides infrastructure for deferring buffer frees.

This is a feature ION provided which when used with some form
of a page pool, provides a nice performance boost in an
allocation microbenchmark. The reason it helps is it allows the
page-zeroing to be done out of the normal allocation/free path,
and pushed off to a kthread.

As not all heaps will find this useful, its implemented as
a optional helper library that heaps can utilize.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
v2:
* Fix sleep in atomic issue from using a mutex, by switching
  to a spinlock as Reported-by: kernel test robot <oliver.sang@intel.com>
* Cleanup API to use a reason enum for clarity and add some documentation
  comments as suggested by Suren Baghdasaryan.
---
 drivers/dma-buf/heaps/Kconfig                |   3 +
 drivers/dma-buf/heaps/Makefile               |   1 +
 drivers/dma-buf/heaps/deferred-free-helper.c | 136 +++++++++++++++++++
 drivers/dma-buf/heaps/deferred-free-helper.h |  55 ++++++++
 4 files changed, 195 insertions(+)
 create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.c
 create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.h

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index a5eef06c4226..ecf65204f714 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -1,3 +1,6 @@
+config DMABUF_HEAPS_DEFERRED_FREE
+	bool
+
 config DMABUF_HEAPS_SYSTEM
 	bool "DMA-BUF System Heap"
 	depends on DMABUF_HEAPS
diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile
index 974467791032..4e7839875615 100644
--- a/drivers/dma-buf/heaps/Makefile
+++ b/drivers/dma-buf/heaps/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o
 obj-$(CONFIG_DMABUF_HEAPS_SYSTEM)	+= system_heap.o
 obj-$(CONFIG_DMABUF_HEAPS_CMA)		+= cma_heap.o
diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c
new file mode 100644
index 000000000000..cf04148167a2
--- /dev/null
+++ b/drivers/dma-buf/heaps/deferred-free-helper.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Deferred dmabuf freeing helper
+ *
+ * Copyright (C) 2020 Linaro, Ltd.
+ *
+ * Based on the ION page pool code
+ * Copyright (C) 2011 Google, Inc.
+ */
+
+#include <linux/freezer.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/sched/signal.h>
+
+#include "deferred-free-helper.h"
+
+static LIST_HEAD(free_list);
+static size_t list_size;
+wait_queue_head_t freelist_waitqueue;
+struct task_struct *freelist_task;
+static DEFINE_SPINLOCK(free_list_lock);
+
+void deferred_free(struct deferred_freelist_item *item,
+		   void (*free)(struct deferred_freelist_item*,
+				enum df_reason),
+		   size_t size)
+{
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&item->list);
+	item->size = size;
+	item->free = free;
+
+	spin_lock_irqsave(&free_list_lock, flags);
+	list_add(&item->list, &free_list);
+	list_size += size;
+	spin_unlock_irqrestore(&free_list_lock, flags);
+	wake_up(&freelist_waitqueue);
+}
+
+static size_t free_one_item(enum df_reason reason)
+{
+	unsigned long flags;
+	size_t size = 0;
+	struct deferred_freelist_item *item;
+
+	spin_lock_irqsave(&free_list_lock, flags);
+	if (list_empty(&free_list)) {
+		spin_unlock_irqrestore(&free_list_lock, flags);
+		return 0;
+	}
+	item = list_first_entry(&free_list, struct deferred_freelist_item, list);
+	list_del(&item->list);
+	size = item->size;
+	list_size -= size;
+	spin_unlock_irqrestore(&free_list_lock, flags);
+
+	item->free(item, reason);
+	return size;
+}
+
+static unsigned long get_freelist_size(void)
+{
+	unsigned long size;
+	unsigned long flags;
+
+	spin_lock_irqsave(&free_list_lock, flags);
+	size = list_size;
+	spin_unlock_irqrestore(&free_list_lock, flags);
+	return size;
+}
+
+static unsigned long freelist_shrink_count(struct shrinker *shrinker,
+					   struct shrink_control *sc)
+{
+	return get_freelist_size();
+}
+
+static unsigned long freelist_shrink_scan(struct shrinker *shrinker,
+					  struct shrink_control *sc)
+{
+	int total_freed = 0;
+
+	if (sc->nr_to_scan == 0)
+		return 0;
+
+	while (total_freed < sc->nr_to_scan) {
+		int freed = free_one_item(DF_UNDER_PRESSURE);
+
+		if (!freed)
+			break;
+
+		total_freed += freed;
+	}
+
+	return total_freed;
+}
+
+static struct shrinker freelist_shrinker = {
+	.count_objects = freelist_shrink_count,
+	.scan_objects = freelist_shrink_scan,
+	.seeks = DEFAULT_SEEKS,
+	.batch = 0,
+};
+
+static int deferred_free_thread(void *data)
+{
+	while (true) {
+		wait_event_freezable(freelist_waitqueue,
+				     get_freelist_size() > 0);
+
+		free_one_item(DF_NORMAL);
+	}
+
+	return 0;
+}
+
+static int deferred_freelist_init(void)
+{
+	list_size = 0;
+
+	init_waitqueue_head(&freelist_waitqueue);
+	freelist_task = kthread_run(deferred_free_thread, NULL,
+				    "%s", "dmabuf-deferred-free-worker");
+	if (IS_ERR(freelist_task)) {
+		pr_err("%s: creating thread for deferred free failed\n",
+		       __func__);
+		return -1;
+	}
+	sched_set_normal(freelist_task, 19);
+
+	return register_shrinker(&freelist_shrinker);
+}
+device_initcall(deferred_freelist_init);
diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h
new file mode 100644
index 000000000000..2c43dd5a3eda
--- /dev/null
+++ b/drivers/dma-buf/heaps/deferred-free-helper.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef DEFERRED_FREE_HELPER_H
+#define DEFERRED_FREE_HELPER_H
+
+/**
+ * df_reason - enum for reason why item was freed
+ *
+ * This provides a reason for why the free funciton was called
+ * on the item. This is useful when deferred_free is used in
+ * combination with a pagepool, so under pressure the page can
+ * be immediately freed.
+ *
+ * DF_NORMAL:         Normal deferred free
+ *
+ * DF_UNDER_PRESSURE: Free was called because the system
+ *                    is under memory pressure. Usually
+ *                    from a shrinker. Avoid allocating
+ *                    memory in the free call, as it may
+ *                    fail.
+ */
+enum df_reason {
+	DF_NORMAL,
+	DF_UNDER_PRESSURE,
+};
+
+/**
+ * deferred_freelist_item - item structure for deferred freelist
+ *
+ * This is to be added to the structure for whatever you want to
+ * defer freeing on.
+ *
+ * @size: size of the item to be freed
+ * @free: function pointer to be called when freeing the item
+ * @list: list entry for the deferred list
+ */
+struct deferred_freelist_item {
+	size_t size;
+	void (*free)(struct deferred_freelist_item *i,
+		     enum df_reason reason);
+	struct list_head list;
+};
+
+/**
+ * deferred_free - call to add item to the deferred free list
+ *
+ * @item: Pointer to deferred_freelist_item field of a structure
+ * @free: Function pointer to the free call
+ * @size: Size of the item to be freed
+ */
+void deferred_free(struct deferred_freelist_item *item,
+		   void (*free)(struct deferred_freelist_item *i,
+				enum df_reason reason),
+		   size_t size);
+#endif
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap
  2021-01-23  3:46 [PATCH v2 1/3] dma-buf: heaps: Add deferred-free-helper library code John Stultz
@ 2021-01-23  3:46 ` John Stultz
  2021-01-27 20:21   ` Daniel Mentz
  2021-01-23  3:46 ` [PATCH v2 3/3] dma-buf: system_heap: Add deferred freeing to the " John Stultz
  1 sibling, 1 reply; 6+ messages in thread
From: John Stultz @ 2021-01-23  3:46 UTC (permalink / raw)
  To: lkml
  Cc: John Stultz, Daniel Vetter, Sumit Semwal, Liam Mark,
	Chris Goldsworthy, Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Daniel Mentz, Ørjan Eide,
	Robin Murphy, Ezequiel Garcia, Simon Ser, James Jones,
	linux-media, dri-devel

Reuse/abuse the pagepool code from the network code to speed
up allocation performance.

This is similar to the ION pagepool usage, but tries to
utilize generic code instead of a custom implementation.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
v2:
* Fix build issue caused by selecting PAGE_POOL w/o NET
  as Reported-by: kernel test robot <lkp@intel.com>
---
 drivers/dma-buf/heaps/Kconfig       |  2 +
 drivers/dma-buf/heaps/system_heap.c | 68 +++++++++++++++++++++++++++--
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index ecf65204f714..748e840e6edd 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -4,6 +4,8 @@ config DMABUF_HEAPS_DEFERRED_FREE
 config DMABUF_HEAPS_SYSTEM
 	bool "DMA-BUF System Heap"
 	depends on DMABUF_HEAPS
+	select NET
+	select PAGE_POOL
 	help
 	  Choose this option to enable the system dmabuf heap. The system heap
 	  is backed by pages from the buddy allocator. If in doubt, say Y.
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index 17e0e9a68baf..885e30894b77 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -20,6 +20,7 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <net/page_pool.h>
 
 static struct dma_heap *sys_heap;
 
@@ -53,6 +54,7 @@ static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP};
  */
 static const unsigned int orders[] = {8, 4, 0};
 #define NUM_ORDERS ARRAY_SIZE(orders)
+struct page_pool *pools[NUM_ORDERS];
 
 static struct sg_table *dup_sg_table(struct sg_table *table)
 {
@@ -281,18 +283,59 @@ static void system_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
 	dma_buf_map_clear(map);
 }
 
+static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
+{
+	void *addr = vmap(pages, num, VM_MAP, pgprot);
+
+	if (!addr)
+		return -ENOMEM;
+	memset(addr, 0, PAGE_SIZE * num);
+	vunmap(addr);
+	return 0;
+}
+
+static int system_heap_zero_buffer(struct system_heap_buffer *buffer)
+{
+	struct sg_table *sgt = &buffer->sg_table;
+	struct sg_page_iter piter;
+	struct page *pages[32];
+	int p = 0;
+	int ret = 0;
+
+	for_each_sgtable_page(sgt, &piter, 0) {
+		pages[p++] = sg_page_iter_page(&piter);
+		if (p == ARRAY_SIZE(pages)) {
+			ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
+			if (ret)
+				return ret;
+			p = 0;
+		}
+	}
+	if (p)
+		ret = system_heap_clear_pages(pages, p, PAGE_KERNEL);
+
+	return ret;
+}
+
 static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
 {
 	struct system_heap_buffer *buffer = dmabuf->priv;
 	struct sg_table *table;
 	struct scatterlist *sg;
-	int i;
+	int i, j;
+
+	/* Zero the buffer pages before adding back to the pool */
+	system_heap_zero_buffer(buffer);
 
 	table = &buffer->sg_table;
 	for_each_sg(table->sgl, sg, table->nents, i) {
 		struct page *page = sg_page(sg);
 
-		__free_pages(page, compound_order(page));
+		for (j = 0; j < NUM_ORDERS; j++) {
+			if (compound_order(page) == orders[j])
+				break;
+		}
+		page_pool_put_full_page(pools[j], page, false);
 	}
 	sg_free_table(table);
 	kfree(buffer);
@@ -322,8 +365,7 @@ static struct page *alloc_largest_available(unsigned long size,
 			continue;
 		if (max_order < orders[i])
 			continue;
-
-		page = alloc_pages(order_flags[i], orders[i]);
+		page = page_pool_alloc_pages(pools[i], order_flags[i]);
 		if (!page)
 			continue;
 		return page;
@@ -428,6 +470,24 @@ static const struct dma_heap_ops system_heap_ops = {
 static int system_heap_create(void)
 {
 	struct dma_heap_export_info exp_info;
+	int i;
+
+	for (i = 0; i < NUM_ORDERS; i++) {
+		struct page_pool_params pp;
+
+		memset(&pp, 0, sizeof(pp));
+		pp.order = orders[i];
+		pools[i] = page_pool_create(&pp);
+
+		if (IS_ERR(pools[i])) {
+			int j;
+
+			pr_err("%s: page pool creation failed!\n", __func__);
+			for (j = 0; j < i; j++)
+				page_pool_destroy(pools[j]);
+			return PTR_ERR(pools[i]);
+		}
+	}
 
 	exp_info.name = "system";
 	exp_info.ops = &system_heap_ops;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 3/3] dma-buf: system_heap: Add deferred freeing to the system heap
  2021-01-23  3:46 [PATCH v2 1/3] dma-buf: heaps: Add deferred-free-helper library code John Stultz
  2021-01-23  3:46 ` [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap John Stultz
@ 2021-01-23  3:46 ` John Stultz
  1 sibling, 0 replies; 6+ messages in thread
From: John Stultz @ 2021-01-23  3:46 UTC (permalink / raw)
  To: lkml
  Cc: John Stultz, Daniel Vetter, Sumit Semwal, Liam Mark,
	Chris Goldsworthy, Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Daniel Mentz, Ørjan Eide,
	Robin Murphy, Ezequiel Garcia, Simon Ser, James Jones,
	linux-media, dri-devel

Utilize the deferred free helper library in the system heap.

This provides a nice performance bump and puts the
system heap performance on par with ION.

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Chris Goldsworthy <cgoldswo@codeaurora.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sandeep Patil <sspatil@google.com>
Cc: Daniel Mentz <danielmentz@google.com>
Cc: Ørjan Eide <orjan.eide@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Ezequiel Garcia <ezequiel@collabora.com>
Cc: Simon Ser <contact@emersion.fr>
Cc: James Jones <jajones@nvidia.com>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
v2:
* Rework deferred-free api to use reason enum as suggested by
  Suren Baghdasaryan
---
 drivers/dma-buf/heaps/Kconfig       |  1 +
 drivers/dma-buf/heaps/system_heap.c | 32 ++++++++++++++++++++++-------
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index 748e840e6edd..3f4d7b949301 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -6,6 +6,7 @@ config DMABUF_HEAPS_SYSTEM
 	depends on DMABUF_HEAPS
 	select NET
 	select PAGE_POOL
+	select DMABUF_HEAPS_DEFERRED_FREE
 	help
 	  Choose this option to enable the system dmabuf heap. The system heap
 	  is backed by pages from the buddy allocator. If in doubt, say Y.
diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c
index 885e30894b77..747fa2250e84 100644
--- a/drivers/dma-buf/heaps/system_heap.c
+++ b/drivers/dma-buf/heaps/system_heap.c
@@ -22,6 +22,8 @@
 #include <linux/vmalloc.h>
 #include <net/page_pool.h>
 
+#include "deferred-free-helper.h"
+
 static struct dma_heap *sys_heap;
 
 struct system_heap_buffer {
@@ -32,6 +34,7 @@ struct system_heap_buffer {
 	struct sg_table sg_table;
 	int vmap_cnt;
 	void *vaddr;
+	struct deferred_freelist_item deferred_free;
 };
 
 struct dma_heap_attachment {
@@ -317,30 +320,45 @@ static int system_heap_zero_buffer(struct system_heap_buffer *buffer)
 	return ret;
 }
 
-static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
+static void system_heap_buf_free(struct deferred_freelist_item *item,
+				 enum df_reason reason)
 {
-	struct system_heap_buffer *buffer = dmabuf->priv;
+	struct system_heap_buffer *buffer;
 	struct sg_table *table;
 	struct scatterlist *sg;
 	int i, j;
 
+	buffer = container_of(item, struct system_heap_buffer, deferred_free);
 	/* Zero the buffer pages before adding back to the pool */
-	system_heap_zero_buffer(buffer);
+	if (reason == DF_NORMAL)
+		if (system_heap_zero_buffer(buffer))
+			reason = DF_UNDER_PRESSURE; // On failure, just free
 
 	table = &buffer->sg_table;
 	for_each_sg(table->sgl, sg, table->nents, i) {
 		struct page *page = sg_page(sg);
 
-		for (j = 0; j < NUM_ORDERS; j++) {
-			if (compound_order(page) == orders[j])
-				break;
+		if (reason == DF_UNDER_PRESSURE) {
+			__free_pages(page, compound_order(page));
+		} else {
+			for (j = 0; j < NUM_ORDERS; j++) {
+				if (compound_order(page) == orders[j])
+					break;
+			}
+			page_pool_put_full_page(pools[j], page, false);
 		}
-		page_pool_put_full_page(pools[j], page, false);
 	}
 	sg_free_table(table);
 	kfree(buffer);
 }
 
+static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
+{
+	struct system_heap_buffer *buffer = dmabuf->priv;
+
+	deferred_free(&buffer->deferred_free, system_heap_buf_free, buffer->len);
+}
+
 static const struct dma_buf_ops system_heap_buf_ops = {
 	.attach = system_heap_attach,
 	.detach = system_heap_detach,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap
  2021-01-23  3:46 ` [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap John Stultz
@ 2021-01-27 20:21   ` Daniel Mentz
  2021-01-28  5:10     ` John Stultz
  0 siblings, 1 reply; 6+ messages in thread
From: Daniel Mentz @ 2021-01-27 20:21 UTC (permalink / raw)
  To: John Stultz
  Cc: lkml, Daniel Vetter, Sumit Semwal, Liam Mark, Chris Goldsworthy,
	Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Ørjan Eide, Robin Murphy,
	Ezequiel Garcia, Simon Ser, James Jones, linux-media, dri-devel

On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> +{
> +       void *addr = vmap(pages, num, VM_MAP, pgprot);
> +
> +       if (!addr)
> +               return -ENOMEM;
> +       memset(addr, 0, PAGE_SIZE * num);
> +       vunmap(addr);
> +       return 0;
> +}

I thought that vmap/vunmap are expensive, and I am wondering if
there's a faster way that avoids vmap.

How about lifting this code from lib/iov_iter.c
static void memzero_page(struct page *page, size_t offset, size_t len)
{
        char *addr = kmap_atomic(page);
        memset(addr + offset, 0, len);
        kunmap_atomic(addr);
}

Or what about lifting that code from the old ion_cma_heap.c

if (PageHighMem(pages)) {
        unsigned long nr_clear_pages = nr_pages;
        struct page *page = pages;

        while (nr_clear_pages > 0) {
                void *vaddr = kmap_atomic(page);

                memset(vaddr, 0, PAGE_SIZE);
                kunmap_atomic(vaddr);
                page++;
                nr_clear_pages--;
        }
} else {
        memset(page_address(pages), 0, size);
}

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap
  2021-01-27 20:21   ` Daniel Mentz
@ 2021-01-28  5:10     ` John Stultz
  2021-01-28  7:04       ` Daniel Mentz
  0 siblings, 1 reply; 6+ messages in thread
From: John Stultz @ 2021-01-28  5:10 UTC (permalink / raw)
  To: Daniel Mentz
  Cc: lkml, Daniel Vetter, Sumit Semwal, Liam Mark, Chris Goldsworthy,
	Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Ørjan Eide, Robin Murphy,
	Ezequiel Garcia, Simon Ser, James Jones, linux-media, dri-devel

On Wed, Jan 27, 2021 at 12:21 PM Daniel Mentz <danielmentz@google.com> wrote:
>
> On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> > +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> > +{
> > +       void *addr = vmap(pages, num, VM_MAP, pgprot);
> > +
> > +       if (!addr)
> > +               return -ENOMEM;
> > +       memset(addr, 0, PAGE_SIZE * num);
> > +       vunmap(addr);
> > +       return 0;
> > +}
>
> I thought that vmap/vunmap are expensive, and I am wondering if
> there's a faster way that avoids vmap.
>
> How about lifting this code from lib/iov_iter.c
> static void memzero_page(struct page *page, size_t offset, size_t len)
> {
>         char *addr = kmap_atomic(page);
>         memset(addr + offset, 0, len);
>         kunmap_atomic(addr);
> }
>
> Or what about lifting that code from the old ion_cma_heap.c
>
> if (PageHighMem(pages)) {
>         unsigned long nr_clear_pages = nr_pages;
>         struct page *page = pages;
>
>         while (nr_clear_pages > 0) {
>                 void *vaddr = kmap_atomic(page);
>
>                 memset(vaddr, 0, PAGE_SIZE);
>                 kunmap_atomic(vaddr);
>                 page++;
>                 nr_clear_pages--;
>         }
> } else {
>         memset(page_address(pages), 0, size);
> }

Though, this last memset only works since CMA is contiguous, so it
probably needs to always do the kmap_atomic for each page, right?

I'm still a little worried if this is right, as the current
implementation with the vmap comes from the old ion_heap_sglist_zero
logic, which similarly tries to batch the vmaps  32 pages at at time,
but I'll give it a try.

thanks
-john

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap
  2021-01-28  5:10     ` John Stultz
@ 2021-01-28  7:04       ` Daniel Mentz
  0 siblings, 0 replies; 6+ messages in thread
From: Daniel Mentz @ 2021-01-28  7:04 UTC (permalink / raw)
  To: John Stultz
  Cc: lkml, Daniel Vetter, Sumit Semwal, Liam Mark, Chris Goldsworthy,
	Laura Abbott, Brian Starkey, Hridya Valsaraju,
	Suren Baghdasaryan, Sandeep Patil, Ørjan Eide, Robin Murphy,
	Ezequiel Garcia, Simon Ser, James Jones, linux-media, dri-devel

On Wed, Jan 27, 2021 at 9:10 PM John Stultz <john.stultz@linaro.org> wrote:
>
> On Wed, Jan 27, 2021 at 12:21 PM Daniel Mentz <danielmentz@google.com> wrote:
> >
> > On Fri, Jan 22, 2021 at 7:47 PM John Stultz <john.stultz@linaro.org> wrote:
> > > +static int system_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
> > > +{
> > > +       void *addr = vmap(pages, num, VM_MAP, pgprot);
> > > +
> > > +       if (!addr)
> > > +               return -ENOMEM;
> > > +       memset(addr, 0, PAGE_SIZE * num);
> > > +       vunmap(addr);
> > > +       return 0;
> > > +}
> >
> > I thought that vmap/vunmap are expensive, and I am wondering if
> > there's a faster way that avoids vmap.
> >
> > How about lifting this code from lib/iov_iter.c
> > static void memzero_page(struct page *page, size_t offset, size_t len)
> > {
> >         char *addr = kmap_atomic(page);
> >         memset(addr + offset, 0, len);
> >         kunmap_atomic(addr);
> > }
> >
> > Or what about lifting that code from the old ion_cma_heap.c
> >
> > if (PageHighMem(pages)) {
> >         unsigned long nr_clear_pages = nr_pages;
> >         struct page *page = pages;
> >
> >         while (nr_clear_pages > 0) {
> >                 void *vaddr = kmap_atomic(page);
> >
> >                 memset(vaddr, 0, PAGE_SIZE);
> >                 kunmap_atomic(vaddr);
> >                 page++;
> >                 nr_clear_pages--;
> >         }
> > } else {
> >         memset(page_address(pages), 0, size);
> > }
>
> Though, this last memset only works since CMA is contiguous, so it
> probably needs to always do the kmap_atomic for each page, right?

Yeah, but with the system heap page pool, some of these pages might be
64KB or 1MB large. kmap_atomic(page) just maps to page_address(page)
in most cases. I think iterating over all pages individually in this
manner might still be faster than using vmap.

>
> I'm still a little worried if this is right, as the current
> implementation with the vmap comes from the old ion_heap_sglist_zero
> logic, which similarly tries to batch the vmaps  32 pages at at time,
> but I'll give it a try.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-01-28  8:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-23  3:46 [PATCH v2 1/3] dma-buf: heaps: Add deferred-free-helper library code John Stultz
2021-01-23  3:46 ` [PATCH v2 2/3] dma-buf: system_heap: Add pagepool support to system heap John Stultz
2021-01-27 20:21   ` Daniel Mentz
2021-01-28  5:10     ` John Stultz
2021-01-28  7:04       ` Daniel Mentz
2021-01-23  3:46 ` [PATCH v2 3/3] dma-buf: system_heap: Add deferred freeing to the " John Stultz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).