All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: mika.kuoppala@intel.com
Subject: [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
Date: Thu,  2 Feb 2017 15:02:31 +0000	[thread overview]
Message-ID: <20170202150248.27860-2-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20170202150248.27860-1-chris@chris-wilson.co.uk>

Inline the address computation to avoid the vfunc call for every page.
We still have to pay the high overhead of sg_page_iter_next(), but now
at least GCC can optimise the inner most loop, giving a significant
boost to some thrashing Unreal Engine workloads.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f8cef51cf24c..0d540c244e85 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1885,6 +1885,11 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	}
 }
 
+struct sgt_dma {
+	struct scatterlist *sg;
+	dma_addr_t dma, max;
+};
+
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
@@ -1894,27 +1899,34 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned act_pt = first_entry / GEN6_PTES;
 	unsigned act_pte = first_entry % GEN6_PTES;
-	gen6_pte_t *pt_vaddr = NULL;
-	struct sgt_iter sgt_iter;
-	dma_addr_t addr;
+	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+	struct sgt_dma iter;
+	gen6_pte_t *vaddr;
+
+	vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+	iter.sg = pages->sgl;
+	iter.dma = sg_dma_address(iter.sg);
+	iter.max = iter.dma + iter.sg->length;
+	do {
+		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 
-	for_each_sgt_dma(addr, sgt_iter, pages) {
-		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
+		iter.dma += PAGE_SIZE;
+		if (iter.dma == iter.max) {
+			iter.sg = __sg_next(iter.sg);
+			if (!iter.sg)
+				break;
 
-		pt_vaddr[act_pte] =
-			vm->pte_encode(addr, cache_level, flags);
+			iter.dma = sg_dma_address(iter.sg);
+			iter.max = iter.dma + iter.sg->length;
+		}
 
 		if (++act_pte == GEN6_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
-			act_pt++;
+			kunmap_px(ppgtt, vaddr);
+			vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
 			act_pte = 0;
 		}
-	}
-
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	} while (1);
+	kunmap_px(ppgtt, vaddr);
 }
 
 static int gen6_alloc_va_range(struct i915_address_space *vm,
@@ -2496,27 +2508,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     enum i915_cache_level level, u32 flags)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct sgt_iter sgt_iter;
-	gen6_pte_t __iomem *gtt_entries;
-	gen6_pte_t gtt_entry;
+	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+	unsigned int i = start >> PAGE_SHIFT;
+	struct sgt_iter iter;
 	dma_addr_t addr;
-	int i = 0;
-
-	gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
-
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = vm->pte_encode(addr, level, flags);
-		iowrite32(gtt_entry, &gtt_entries[i++]);
-	}
-
-	/* XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
+	for_each_sgt_dma(addr, iter, st)
+		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2017-02-02 15:02 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-02 15:02 [PATCH 01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Chris Wilson
2017-02-02 15:02 ` Chris Wilson [this message]
2017-02-09 11:34   ` [PATCH 02/19] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Matthew Auld
2017-02-02 15:02 ` [PATCH 03/19] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Chris Wilson
2017-02-02 15:32   ` Chris Wilson
2017-02-02 15:57     ` Tvrtko Ursulin
2017-02-02 16:10       ` Chris Wilson
2017-02-02 16:39         ` Tvrtko Ursulin
2017-02-02 17:05           ` Chris Wilson
2017-02-02 17:17             ` Tvrtko Ursulin
2017-02-03  8:34               ` Chris Wilson
2017-02-02 15:02 ` [PATCH 04/19] drm/i915: Don't special case teardown of aliasing_ppgtt Chris Wilson
2017-02-06 14:21   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 05/19] drm/i915: Split ggtt/alasing_gtt unbind_vma Chris Wilson
2017-02-06 15:07   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 06/19] drm/i915: Convert clflushed pagetables over to WC maps Chris Wilson
2017-02-09 15:08   ` Mika Kuoppala
2017-02-09 15:11     ` Mika Kuoppala
2017-02-02 15:02 ` [PATCH 07/19] drm/i915: Remove kmap/kunmap wrappers Chris Wilson
2017-02-10 11:25   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 08/19] drm/i915: Remove user-triggerable WARN for large objects Chris Wilson
2017-02-02 15:07   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 09/19] drm/i915: Move allocate_va_range to GTT Chris Wilson
2017-02-07 10:01   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 10/19] drm/i915: Remove redundant clear of appgtt Chris Wilson
2017-02-07 10:06   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 11/19] drm/i915: Tidy gen6_write_pde() Chris Wilson
2017-02-07 10:18   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 12/19] drm/i915: Remove bitmap tracking for used-ptes Chris Wilson
2017-02-06 20:32   ` Michał Winiarski
2017-02-02 15:02 ` [PATCH 13/19] drm/i915: Remove bitmap tracking for used-pdes Chris Wilson
2017-02-08 16:30   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 14/19] drm/i915: Remove bitmap tracking for used-pdpes Chris Wilson
2017-02-08 17:42   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 15/19] drm/i915: Remove bitmap tracking for used-pml4 Chris Wilson
2017-02-08 17:47   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 16/19] drm/i915: Remove superfluous posting reads after clear GGTT Chris Wilson
2017-02-08 17:48   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 17/19] drm/i915: Always mark the PDP as dirty when altered Chris Wilson
2017-02-08 17:53   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 18/19] drm/i915: Remove defunct GTT tracepoints Chris Wilson
2017-02-08 18:01   ` Matthew Auld
2017-02-02 15:02 ` [PATCH 19/19] drm/i915: Remove unused ppgtt->enable() Chris Wilson
2017-02-03 13:04   ` Joonas Lahtinen
2017-02-02 17:54 ` ✓ Fi.CI.BAT: success for series starting with [01/19] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() Patchwork
2017-02-07 15:30 ` [PATCH 01/19] " Mika Kuoppala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170202150248.27860-2-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.