All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries
Date: Wed, 10 Jul 2019 19:21:15 +0300	[thread overview]
Message-ID: <87y315q2ro.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190707210024.26192-8-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We can simplify our gtt walking code by comparing against NULL for
> scratch entries as opposed to looking up the distinct per-level scratch
> pointer.
>
> The only caveat is to remember to protect external parties and map the
> NULL to the scratch top pd.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 124 +++++++++-------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |   2 +-
>  2 files changed, 41 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index b7882f06214a..a99b89502a90 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -596,18 +596,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
>  
>  #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
>  
> -#define fill_px(px, v) fill_page_dma(px_base(px), (v))
> -#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
> -
> -static void fill_page_dma(struct i915_page_dma *p, const u64 val)
> +static void
> +fill_page_dma(struct i915_page_dma *p, const u64 val, unsigned int count)
>  {
> -	kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES));
> +	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
>  }
>  
> -static void fill_page_dma_32(struct i915_page_dma *p, const u32 v)
> -{
> -	fill_page_dma(p, (u64)v << 32 | v);
> -}
> +#define fill_px(px, v) fill_page_dma(px_base(px), (v), I915_PDES)
> +#define fill32_px(px, v) do { \
> +	u64 vv = lower_32_bits(v); \
> +	fill_px(px, vv << 32 | vv); \
> +} while (0)
>  
>  static int
>  setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
> @@ -711,7 +710,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
>  	}
>  
>  	atomic_set(&pt->used, 0);
> -
>  	return pt;
>  }
>  
> @@ -719,13 +717,11 @@ static struct i915_page_directory *__alloc_pd(void)
>  {
>  	struct i915_page_directory *pd;
>  
> -	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
> +	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
>  	if (unlikely(!pd))
>  		return NULL;
>  
> -	atomic_set(px_used(pd), 0);
>  	spin_lock_init(&pd->lock);
> -
>  	return pd;
>  }
>  
> @@ -753,63 +749,56 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
>  
>  #define free_px(vm, px) free_pd(vm, px_base(px))
>  
> -static void init_pd(struct i915_page_directory *pd,
> -		    struct i915_page_scratch *scratch)
> -{
> -	fill_px(pd, scratch->encode);
> -	memset_p(pd->entry, scratch, 512);
> -}
> -
>  static inline void
>  write_dma_entry(struct i915_page_dma * const pdma,
> -		const unsigned short pde,
> +		const unsigned short idx,
>  		const u64 encoded_entry)
>  {
>  	u64 * const vaddr = kmap_atomic(pdma->page);
>  
> -	vaddr[pde] = encoded_entry;
> +	vaddr[idx] = encoded_entry;
>  	kunmap_atomic(vaddr);
>  }
>  
>  static inline void
>  __set_pd_entry(struct i915_page_directory * const pd,
> -	       const unsigned short pde,
> +	       const unsigned short idx,

My excuse was that as it is a pd, the pde fits.
Considering that now it is at any level and pde
in bspec is reserved for the last level, idx
is better.

>  	       struct i915_page_dma * const to,
>  	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
>  {
>  	GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
>  
>  	atomic_inc(px_used(pd));
> -	pd->entry[pde] = to;
> -	write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
> +	pd->entry[idx] = to;
> +	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
>  }
>  
> -#define set_pd_entry(pd, pde, to) \
> -	__set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
> +#define set_pd_entry(pd, idx, to) \
> +	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
>  
>  static inline void
>  clear_pd_entry(struct i915_page_directory * const pd,
> -	       const unsigned short pde,
> -	       struct i915_page_scratch * const scratch)
> +	       const unsigned short idx,
> +	       const struct i915_page_scratch * const scratch)
>  {
>  	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
>  
> -	write_dma_entry(px_base(pd), pde, scratch->encode);
> -	pd->entry[pde] = scratch;
> +	write_dma_entry(px_base(pd), idx, scratch->encode);
> +	pd->entry[idx] = NULL;
>  	atomic_dec(px_used(pd));
>  }
>  
>  static bool
>  release_pd_entry(struct i915_page_directory * const pd,
> -		 const unsigned short pde,
> +		 const unsigned short idx,
>  		 struct i915_page_table * const pt,
> -		 struct i915_page_scratch * const scratch)
> +		 const struct i915_page_scratch * const scratch)
>  {
>  	bool free = false;
>  
>  	spin_lock(&pd->lock);
>  	if (atomic_dec_and_test(&pt->used)) {
> -		clear_pd_entry(pd, pde, scratch);
> +		clear_pd_entry(pd, idx, scratch);
>  		free = true;
>  	}
>  	spin_unlock(&pd->lock);
> @@ -910,7 +899,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < I915_PDES; i++) {
> -		if (pd->entry[i] != &vm->scratch[1])
> +		if (pd->entry[i])
>  			free_pd(vm, pd->entry[i]);
>  	}
>  }
> @@ -922,7 +911,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
>  	int i;
>  
>  	for (i = 0; i < pdpes; i++) {
> -		if (pdp->entry[i] == &vm->scratch[2])
> +		if (!pdp->entry[i])
>  			continue;
>  
>  		gen8_free_page_tables(vm, pdp->entry[i]);
> @@ -940,7 +929,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
>  	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
>  		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
>  
> -		if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3]))
> +		if (!pdp)
>  			continue;
>  
>  		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
> @@ -994,8 +983,6 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
>  	u32 pde;
>  
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
> -		GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
> -

Null oops is atleast equally descriptive.

>  		atomic_inc(&pt->used);
>  		gen8_ppgtt_clear_pt(vm, pt, start, length);
>  		if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
> @@ -1014,8 +1001,6 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
>  	unsigned int pdpe;
>  
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2]));
> -
>  		atomic_inc(px_used(pd));
>  		gen8_ppgtt_clear_pd(vm, pd, start, length);
>  		if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
> @@ -1044,8 +1029,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
>  	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
>  
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3]));
> -
>  		atomic_inc(px_used(pdp));
>  		gen8_ppgtt_clear_pdp(vm, pdp, start, length);
>  		if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
> @@ -1066,7 +1049,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  	gen8_for_each_pde(pt, pd, start, length, pde) {
>  		const int count = gen8_pte_count(start, length);
>  
> -		if (px_base(pt) == px_base(&vm->scratch[1])) {
> +		if (!pt) {
>  			spin_unlock(&pd->lock);
>  
>  			pt = fetch_and_zero(&alloc);
> @@ -1081,7 +1064,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
>  				fill_px(pt, vm->scratch[0].encode);
>  
>  			spin_lock(&pd->lock);
> -			if (pd->entry[pde] == &vm->scratch[1]) {
> +			if (!pd->entry[pde]) {
>  				set_pd_entry(pd, pde, pt);
>  			} else {
>  				alloc = pt;
> @@ -1113,7 +1096,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  
>  	spin_lock(&pdp->lock);
>  	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
> -		if (px_base(pd) == px_base(&vm->scratch[2])) {
> +		if (!pd) {
>  			spin_unlock(&pdp->lock);
>  
>  			pd = fetch_and_zero(&alloc);
> @@ -1124,10 +1107,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pd, &vm->scratch[1]);
> +			fill_px(pd, vm->scratch[1].encode);
>  
>  			spin_lock(&pdp->lock);
> -			if (pdp->entry[pdpe] == &vm->scratch[2]) {
> +			if (!pdp->entry[pdpe]) {
>  				set_pd_entry(pdp, pdpe, pd);
>  			} else {
>  				alloc = pd;
> @@ -1177,7 +1160,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  
>  	spin_lock(&pml4->lock);
>  	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
> -		if (px_base(pdp) == px_base(&vm->scratch[3])) {
> +		if (!pdp) {
>  			spin_unlock(&pml4->lock);
>  
>  			pdp = fetch_and_zero(&alloc);
> @@ -1188,10 +1171,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
>  				goto unwind;
>  			}
>  
> -			init_pd(pdp, &vm->scratch[2]);
> +			fill_px(pdp, vm->scratch[2].encode);
>  
>  			spin_lock(&pml4->lock);
> -			if (pml4->entry[pml4e] == &vm->scratch[3]) {
> +			if (!pml4->entry[pml4e]) {
>  				set_pd_entry(pml4, pml4e, pdp);
>  			} else {
>  				alloc = pdp;
> @@ -1527,7 +1510,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
>  		if (IS_ERR(pd))
>  			goto unwind;
>  
> -		init_pd(pd, &vm->scratch[1]);
> +		fill_px(pd, vm->scratch[1].encode);
>  		set_pd_entry(pdp, pdpe, pd);
>  	}
>  
> @@ -1558,46 +1541,19 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
>  	ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>  }
>  
> -static void init_pd_n(struct i915_address_space *vm,
> -		      struct i915_page_directory *pd,
> -		      struct i915_page_scratch *scratch,
> -		      const unsigned int entries)
> -{
> -	u64 * const vaddr = kmap_atomic_px(pd);
> -
> -	memset64(vaddr, scratch->encode, entries);
> -	kunmap_atomic(vaddr);
> -
> -	memset_p(pd->entry, scratch, entries);
> -}
> -
>  static struct i915_page_directory *
>  gen8_alloc_top_pd(struct i915_address_space *vm)
>  {
> +	const unsigned int count = vm->total >> __gen8_pte_shift(vm->top);

Nice.

>  	struct i915_page_directory *pd;
>  
> -	if (i915_vm_is_4lvl(vm)) {
> -		pd = alloc_pd(vm);
> -		if (!IS_ERR(pd))
> -			init_pd(pd, &vm->scratch[3]);
> +	GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));

I don't have to think. I like that.

>  
> +	pd = alloc_pd(vm);
> +	if (IS_ERR(pd))
>  		return pd;
> -	}
> -
> -	/* 3lvl */
> -	pd = __alloc_pd();
> -	if (!pd)
> -		return ERR_PTR(-ENOMEM);
> -
> -	pd->entry[GEN8_3LVL_PDPES] = NULL;

Ok you dont like the sentry. Perhaps you could write
a few soothing words how noisily we crash if we
run long on this runway. If the tower sees and
sends firetrucks, it is fine.

> -
> -	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
> -		kfree(pd);
> -		return ERR_PTR(-ENOMEM);
> -	}
> -
> -	init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES);
>  
> +	fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
>  	return pd;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 669b204d4c13..2341944b9b17 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -610,7 +610,7 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
>  {
>  	struct i915_page_dma *pt = ppgtt->pd->entry[n];
>  
> -	return px_dma(pt);
> +	return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));

No other external users.

Earlier in series I yearned a little of is_entry_available|free|empty.
But the comparing against null beats any.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

>  }
>  
>  static inline struct i915_ggtt *
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-07-10 16:21 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-07 21:00 Refactor GTT recursion to be ... recursion Chris Wilson
2019-07-07 21:00 ` [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch Chris Wilson
2019-07-09 12:24   ` Mika Kuoppala
2019-07-09 12:29     ` Chris Wilson
2019-07-09 12:41       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory Chris Wilson
2019-07-09 14:43   ` Mika Kuoppala
2019-07-09 14:46     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc Chris Wilson
2019-07-09 14:59   ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth Chris Wilson
2019-07-10  8:17   ` Mika Kuoppala
2019-07-10  8:25     ` Chris Wilson
2019-07-10 14:25       ` Mika Kuoppala
2019-07-10 14:35         ` Chris Wilson
2019-07-10 14:50           ` Mika Kuoppala
2019-07-10 15:03             ` Chris Wilson
2019-07-10 15:11               ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels Chris Wilson
2019-07-09 15:21   ` Chris Wilson
2019-07-10  9:24   ` Mika Kuoppala
2019-07-10  9:28     ` Chris Wilson
2019-07-10 13:49   ` Mika Kuoppala
2019-07-10 13:55     ` Chris Wilson
2019-07-10 14:55     ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array Chris Wilson
2019-07-10 14:18   ` Mika Kuoppala
2019-07-10 14:28     ` Chris Wilson
2019-07-10 14:53       ` Mika Kuoppala
2019-07-07 21:00 ` [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries Chris Wilson
2019-07-10 16:21   ` Mika Kuoppala [this message]
2019-07-10 17:28     ` Chris Wilson
2019-07-07 21:00 ` [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 Chris Wilson
2019-07-07 21:00 ` [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear " Chris Wilson
2019-07-07 21:00 ` [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc " Chris Wilson
2019-07-07 21:00 ` [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion " Chris Wilson
2019-07-07 21:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/11] drm/i915/gtt: Use shallow dma pages for scratch Patchwork
2019-07-07 21:46 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-07-07 22:00 ` ✓ Fi.CI.BAT: success " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87y315q2ro.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.