All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
@ 2015-01-11  0:22 Roy Spliet
       [not found] ` <1420935780-17104-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Roy Spliet @ 2015-01-11  0:22 UTC (permalink / raw)
  To: Nouveau Mailinglist; +Cc: Roy Spliet

MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp      | 18 ++++++++++++------
 .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp    |  2 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 2077388..b1e7409 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
 
    code[0] = 0xe0000000;
 
+   if (i->src(1).getFile() == FILE_IMMEDIATE) {
+      code[1] = 0;
+      emitForm_IMM(i);
+      code[0] |= neg_mul << 15;
+      code[0] |= neg_add << 22;
+      if (i->saturate)
+         code[0] |= 1 << 8;
+   } else
    if (i->encSize == 4) {
       emitForm_MUL(i);
-      assert(!neg_mul && !neg_add);
+      code[0] |= neg_mul << 15;
+      code[0] |= neg_add << 22;
+      if (i->saturate)
+         code[0] |= 1 << 8;
    } else {
       code[1]  = neg_mul << 26;
       code[1] |= neg_add << 27;
@@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
 
    // check constraints on short MAD
    if (info.srcNr >= 2 && i->srcExists(2)) {
-      if (i->saturate || i->src(2).mod)
-         return 8;
-      if ((i->src(0).mod ^ i->src(1).mod) ||
-          (i->src(0).mod | i->src(1).mod).abs())
-         return 8;
       if (!i->defExists(0) ||
           i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
          return 8;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 48f996b..f4dedd7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
    static const uint32_t shortForm[(OP_LAST + 31) / 32] =
    {
       // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
-      0x00010e40, 0x00000040, 0x00000498, 0x00000000
+      0x00014e40, 0x00000040, 0x00000498, 0x00000000
    };
    static const operation noDestList[] =
    {
-- 
2.1.0



_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2
       [not found] ` <1420935780-17104-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
@ 2015-01-11  0:22   ` Roy Spliet
       [not found]     ` <1420935780-17104-2-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
  2015-01-11  0:23   ` [PATCH 3/3] nv50/ir: Fold IMM into MAD Roy Spliet
  2015-01-11  0:34   ` [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation Ilia Mirkin
  2 siblings, 1 reply; 9+ messages in thread
From: Roy Spliet @ 2015-01-11  0:22 UTC (permalink / raw)
  To: Nouveau Mailinglist; +Cc: Roy Spliet

If liveness analysis indicates it's good, this should improve the chances
of being able to emit the short MAD form.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 898653c..1273449 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -627,6 +627,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
 #define JOIN_MASK_UNION      (1 << 1)
 #define JOIN_MASK_MOV        (1 << 2)
 #define JOIN_MASK_TEX        (1 << 3)
+#define JOIN_MASK_MAD        (1 << 4)
 
 class GCRA
 {
@@ -851,7 +852,7 @@ GCRA::coalesce(ArrayList& insns)
    case 0x80:
    case 0x90:
    case 0xa0:
-      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
+      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX | JOIN_MASK_MAD);
       break;
    case 0xc0:
    case 0xd0:
@@ -995,6 +996,13 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
                copyCompound(insn->getSrc(0), insn->getDef(0));
          }
          break;
+      case OP_MAD:
+         if (!(mask & JOIN_MASK_MAD))
+            break;
+         if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR &&
+             insn->def(0).getFile() == FILE_GPR)
+            coalesceValues(insn->getDef(0), insn->getSrc(2), false);
+         break;
       case OP_TEX:
       case OP_TXB:
       case OP_TXL:
-- 
2.1.0



_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/3] nv50/ir: Fold IMM into MAD
       [not found] ` <1420935780-17104-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
  2015-01-11  0:22   ` [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2 Roy Spliet
@ 2015-01-11  0:23   ` Roy Spliet
       [not found]     ` <1420935780-17104-3-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
  2015-01-11  0:34   ` [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation Ilia Mirkin
  2 siblings, 1 reply; 9+ messages in thread
From: Roy Spliet @ 2015-01-11  0:23 UTC (permalink / raw)
  To: Nouveau Mailinglist; +Cc: Roy Spliet

Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it is required that SDST == SSRC2.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20ca..1fc3ae6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
 
 // =============================================================================
 
+// Fold Immediate into MAD; must be done after register allocation due to
+// constraint SDST == SSRC2
+// TODO:
+// Does NVC0+ have other situations where this pass makes sense?
+class NV50PostRaConstantFolding : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+   Value *vtmp;
+   Instruction *def;
+
+   for (Instruction *i = bb->getFirst(); i; i = i->next) {
+      switch (i->op) {
+      case OP_MAD:
+         if(i->def(0).getFile() == FILE_GPR &&
+               i->src(0).getFile() == FILE_GPR &&
+               i->src(1).getFile() == FILE_GPR &&
+               i->src(2).getFile() == FILE_GPR &&
+               i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) {
+            for (int s = 1; s >= 0; s--) {
+               def = i->getSrc(1)->getInsn();
+               if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
+                  vtmp = i->getSrc(1);
+                  i->setSrc(1, def->getSrc(0));
+                  if (vtmp->refCount() == 0)
+                     delete_Instruction(bb->getProgram(), def);
+                  break;
+               }
+
+               vtmp = i->getSrc(0);
+               i->setSrc(0, i->getSrc(1));
+               i->setSrc(1, vtmp);
+            }
+         }
+         break;
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+// =============================================================================
+
 // Common subexpression elimination. Stupid O^2 implementation.
 class LocalCSE : public Pass
 {
@@ -2629,6 +2679,8 @@ bool
 Program::optimizePostRA(int level)
 {
    RUN_PASS(2, FlatteningPass, run);
+   if (getTarget()->getChipset() < 0xc0)
+      RUN_PASS(2, NV50PostRaConstantFolding, run);
    return true;
 }
 
-- 
2.1.0



_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
       [not found] ` <1420935780-17104-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
  2015-01-11  0:22   ` [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2 Roy Spliet
  2015-01-11  0:23   ` [PATCH 3/3] nv50/ir: Fold IMM into MAD Roy Spliet
@ 2015-01-11  0:34   ` Ilia Mirkin
       [not found]     ` <CAKb7UvgcpnZvgkG_TEKOncby5ctVGX+igxVmcMLZrH+2682OqQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2 siblings, 1 reply; 9+ messages in thread
From: Ilia Mirkin @ 2015-01-11  0:34 UTC (permalink / raw)
  To: Roy Spliet; +Cc: Nouveau Mailinglist

And you're allowing saturate/neg emission on the short form. Is this
already in envytools? Also, what's the shortForm thing? This change is
probably fine, but the changelog needs work.

On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet@eclipso.eu> wrote:
> MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit
>
> Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp      | 18 ++++++++++++------
>  .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp    |  2 +-
>  2 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
> index 2077388..b1e7409 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
> @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
>
>     code[0] = 0xe0000000;
>
> +   if (i->src(1).getFile() == FILE_IMMEDIATE) {
> +      code[1] = 0;
> +      emitForm_IMM(i);
> +      code[0] |= neg_mul << 15;
> +      code[0] |= neg_add << 22;
> +      if (i->saturate)
> +         code[0] |= 1 << 8;
> +   } else
>     if (i->encSize == 4) {
>        emitForm_MUL(i);
> -      assert(!neg_mul && !neg_add);
> +      code[0] |= neg_mul << 15;
> +      code[0] |= neg_add << 22;
> +      if (i->saturate)
> +         code[0] |= 1 << 8;
>     } else {
>        code[1]  = neg_mul << 26;
>        code[1] |= neg_add << 27;
> @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
>
>     // check constraints on short MAD
>     if (info.srcNr >= 2 && i->srcExists(2)) {
> -      if (i->saturate || i->src(2).mod)
> -         return 8;
> -      if ((i->src(0).mod ^ i->src(1).mod) ||
> -          (i->src(0).mod | i->src(1).mod).abs())
> -         return 8;
>        if (!i->defExists(0) ||
>            i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
>           return 8;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index 48f996b..f4dedd7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
>     static const uint32_t shortForm[(OP_LAST + 31) / 32] =
>     {
>        // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
> -      0x00010e40, 0x00000040, 0x00000498, 0x00000000
> +      0x00014e40, 0x00000040, 0x00000498, 0x00000000
>     };
>     static const operation noDestList[] =
>     {
> --
> 2.1.0
>
>
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/3] nv50/ir: Fold IMM into MAD
       [not found]     ` <1420935780-17104-3-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
@ 2015-01-11  0:39       ` Ilia Mirkin
  0 siblings, 0 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-01-11  0:39 UTC (permalink / raw)
  To: Roy Spliet; +Cc: Nouveau Mailinglist

On Sat, Jan 10, 2015 at 7:23 PM, Roy Spliet <rspliet@eclipso.eu> wrote:
> Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
> a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
> done post-RA because it is required that SDST == SSRC2.

"because it requires that"

>
> Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 52 ++++++++++++++++++++++
>  1 file changed, 52 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 21d20ca..1fc3ae6 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
>
>  // =============================================================================
>
> +// Fold Immediate into MAD; must be done after register allocation due to
> +// constraint SDST == SSRC2
> +// TODO:
> +// Does NVC0+ have other situations where this pass makes sense?
> +class NV50PostRaConstantFolding : public Pass
> +{
> +private:
> +   virtual bool visit(BasicBlock *);
> +};
> +
> +bool
> +NV50PostRaConstantFolding::visit(BasicBlock *bb)
> +{
> +   Value *vtmp;
> +   Instruction *def;
> +
> +   for (Instruction *i = bb->getFirst(); i; i = i->next) {
> +      switch (i->op) {
> +      case OP_MAD:
> +         if(i->def(0).getFile() == FILE_GPR &&
> +               i->src(0).getFile() == FILE_GPR &&
> +               i->src(1).getFile() == FILE_GPR &&
> +               i->src(2).getFile() == FILE_GPR &&
> +               i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id) {


This would be much easier to read as

if (... != GPR || != GPR || ...) break; (or continue...)

> +            for (int s = 1; s >= 0; s--) {

You don't end up using 's' in the loop. Did you mean to have some
clever logic that flips the order of src0 and src1 in case the "wrong"
one came from an immediate?

> +               def = i->getSrc(1)->getInsn();
> +               if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
> +                  vtmp = i->getSrc(1);
> +                  i->setSrc(1, def->getSrc(0));
> +                  if (vtmp->refCount() == 0)
> +                     delete_Instruction(bb->getProgram(), def);

This shouldn't be necessary, it's all allocated in an arena and will
get cleaned up later.

> +                  break;
> +               }
> +
> +               vtmp = i->getSrc(0);
> +               i->setSrc(0, i->getSrc(1));
> +               i->setSrc(1, vtmp);
> +            }
> +         }
> +         break;
> +      default:
> +         break;
> +      }
> +   }
> +
> +   return true;
> +}
> +
> +// =============================================================================
> +
>  // Common subexpression elimination. Stupid O^2 implementation.
>  class LocalCSE : public Pass
>  {
> @@ -2629,6 +2679,8 @@ bool
>  Program::optimizePostRA(int level)
>  {
>     RUN_PASS(2, FlatteningPass, run);
> +   if (getTarget()->getChipset() < 0xc0)
> +      RUN_PASS(2, NV50PostRaConstantFolding, run);
>     return true;
>  }
>
> --
> 2.1.0
>
>
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2
       [not found]     ` <1420935780-17104-2-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
@ 2015-01-11  0:43       ` Ilia Mirkin
  0 siblings, 0 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-01-11  0:43 UTC (permalink / raw)
  To: Roy Spliet; +Cc: Nouveau Mailinglist

On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet@eclipso.eu> wrote:
> If liveness analysis indicates it's good, this should improve the chances
> of being able to emit the short MAD form.
>
> Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 898653c..1273449 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -627,6 +627,7 @@ RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb)
>  #define JOIN_MASK_UNION      (1 << 1)
>  #define JOIN_MASK_MOV        (1 << 2)
>  #define JOIN_MASK_TEX        (1 << 3)
> +#define JOIN_MASK_MAD        (1 << 4)
>
>  class GCRA
>  {
> @@ -851,7 +852,7 @@ GCRA::coalesce(ArrayList& insns)
>     case 0x80:
>     case 0x90:
>     case 0xa0:
> -      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX);
> +      ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX | JOIN_MASK_MAD);
>        break;
>     case 0xc0:
>     case 0xd0:
> @@ -995,6 +996,13 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
>                 copyCompound(insn->getSrc(0), insn->getDef(0));
>           }
>           break;
> +      case OP_MAD:
> +         if (!(mask & JOIN_MASK_MAD))
> +            break;
> +         if (insn->srcExists(2) && insn->src(2).getFile() == FILE_GPR &&
> +             insn->def(0).getFile() == FILE_GPR)

Use the same check here as you do elsewhere... check that
insn->defExists(0). It might not in case that only flags are returned
I guess? Not sure if mad actually allows that.

> +            coalesceValues(insn->getDef(0), insn->getSrc(2), false);
> +         break;
>        case OP_TEX:
>        case OP_TXB:
>        case OP_TXL:
> --
> 2.1.0
>
>
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
       [not found]     ` <CAKb7UvgcpnZvgkG_TEKOncby5ctVGX+igxVmcMLZrH+2682OqQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-01-11  0:45       ` Roy Spliet
       [not found]         ` <54B1C7C7.7000805-FA6nBp6kBxZzu6KWmfFNGwC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Roy Spliet @ 2015-01-11  0:45 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Op 11-01-15 om 01:34 schreef Ilia Mirkin:
> And you're allowing saturate/neg emission on the short form.
Yes
> Is this already in envytools?
Tesla floating point instructions are poorly documented in the RST 
documents; fmad is no exception. I'll make sure to check envydis.
> Also, what's the shortForm thing?
Documented in envytools; see 
http://envytools.readthedocs.org/en/latest/hw/graph/tesla/cuda/isa.html#instruction-format 
. In short, opcodes are either 4 bytes (short) or 8 bytes (long).
> This change is
> probably fine, but the changelog needs work.
If you insist I could elaborate a little further. However, documenting 
what a short opcode is seems a bit superfluous.
>
> On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet@eclipso.eu> wrote:
>> MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit
>>
>> Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
>> ---
>>   .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp      | 18 ++++++++++++------
>>   .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp    |  2 +-
>>   2 files changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>> index 2077388..b1e7409 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>> @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
>>
>>      code[0] = 0xe0000000;
>>
>> +   if (i->src(1).getFile() == FILE_IMMEDIATE) {
>> +      code[1] = 0;
>> +      emitForm_IMM(i);
>> +      code[0] |= neg_mul << 15;
>> +      code[0] |= neg_add << 22;
>> +      if (i->saturate)
>> +         code[0] |= 1 << 8;
>> +   } else
>>      if (i->encSize == 4) {
>>         emitForm_MUL(i);
>> -      assert(!neg_mul && !neg_add);
>> +      code[0] |= neg_mul << 15;
>> +      code[0] |= neg_add << 22;
>> +      if (i->saturate)
>> +         code[0] |= 1 << 8;
>>      } else {
>>         code[1]  = neg_mul << 26;
>>         code[1] |= neg_add << 27;
>> @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
>>
>>      // check constraints on short MAD
>>      if (info.srcNr >= 2 && i->srcExists(2)) {
>> -      if (i->saturate || i->src(2).mod)
>> -         return 8;
>> -      if ((i->src(0).mod ^ i->src(1).mod) ||
>> -          (i->src(0).mod | i->src(1).mod).abs())
>> -         return 8;
>>         if (!i->defExists(0) ||
>>             i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
>>            return 8;
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> index 48f996b..f4dedd7 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>> @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
>>      static const uint32_t shortForm[(OP_LAST + 31) / 32] =
>>      {
>>         // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
>> -      0x00010e40, 0x00000040, 0x00000498, 0x00000000
>> +      0x00014e40, 0x00000040, 0x00000498, 0x00000000
>>      };
>>      static const operation noDestList[] =
>>      {
>> --
>> 2.1.0
>>
>>
>>
>> _______________________________________________
>> Nouveau mailing list
>> Nouveau@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/nouveau
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation
       [not found]         ` <54B1C7C7.7000805-FA6nBp6kBxZzu6KWmfFNGwC/G2K4zDHf@public.gmane.org>
@ 2015-01-11  0:50           ` Ilia Mirkin
  0 siblings, 0 replies; 9+ messages in thread
From: Ilia Mirkin @ 2015-01-11  0:50 UTC (permalink / raw)
  To: Roy Spliet; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Sat, Jan 10, 2015 at 7:45 PM, Roy Spliet <seven@nimrod-online.com> wrote:
> Op 11-01-15 om 01:34 schreef Ilia Mirkin:
>>
>> And you're allowing saturate/neg emission on the short form.
>
> Yes
>>
>> Is this already in envytools?
>
> Tesla floating point instructions are poorly documented in the RST
> documents; fmad is no exception. I'll make sure to check envydis.

Sorry, I meant envydis

>>
>> Also, what's the shortForm thing?
>
> Documented in envytools; see
> http://envytools.readthedocs.org/en/latest/hw/graph/tesla/cuda/isa.html#instruction-format
> . In short, opcodes are either 4 bytes (short) or 8 bytes (long).

Yes, I'm aware of that bit :)

>>
>> This change is
>> probably fine, but the changelog needs work.
>
> If you insist I could elaborate a little further. However, documenting what
> a short opcode is seems a bit superfluous.

I meant what was the reason for the change to the shortForm array in
target_nv50? I don't remember offhand what it is, and you were doing a
bunch of things in here and I wasn't sure which of them it was related
to.

>
>>
>> On Sat, Jan 10, 2015 at 7:22 PM, Roy Spliet <rspliet@eclipso.eu> wrote:
>>>
>>> MAD IMM has a very specific SDST == SSRC2 requirement, so don't emit
>>>
>>> Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
>>> ---
>>>   .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp      | 18
>>> ++++++++++++------
>>>   .../drivers/nouveau/codegen/nv50_ir_target_nv50.cpp    |  2 +-
>>>   2 files changed, 13 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>>> index 2077388..b1e7409 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
>>> @@ -939,9 +939,20 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
>>>
>>>      code[0] = 0xe0000000;
>>>
>>> +   if (i->src(1).getFile() == FILE_IMMEDIATE) {
>>> +      code[1] = 0;
>>> +      emitForm_IMM(i);
>>> +      code[0] |= neg_mul << 15;
>>> +      code[0] |= neg_add << 22;
>>> +      if (i->saturate)
>>> +         code[0] |= 1 << 8;
>>> +   } else
>>>      if (i->encSize == 4) {
>>>         emitForm_MUL(i);
>>> -      assert(!neg_mul && !neg_add);
>>> +      code[0] |= neg_mul << 15;
>>> +      code[0] |= neg_add << 22;
>>> +      if (i->saturate)
>>> +         code[0] |= 1 << 8;
>>>      } else {
>>>         code[1]  = neg_mul << 26;
>>>         code[1] |= neg_add << 27;
>>> @@ -1931,11 +1942,6 @@ CodeEmitterNV50::getMinEncodingSize(const
>>> Instruction *i) const
>>>
>>>      // check constraints on short MAD
>>>      if (info.srcNr >= 2 && i->srcExists(2)) {
>>> -      if (i->saturate || i->src(2).mod)
>>> -         return 8;
>>> -      if ((i->src(0).mod ^ i->src(1).mod) ||
>>> -          (i->src(0).mod | i->src(1).mod).abs())
>>> -         return 8;
>>>         if (!i->defExists(0) ||
>>>             i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
>>>            return 8;
>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> index 48f996b..f4dedd7 100644
>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>> @@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
>>>      static const uint32_t shortForm[(OP_LAST + 31) / 32] =
>>>      {
>>>         // MOV,ADD,SUB,MUL,SAD,L/PINTERP,RCP,TEX,TXF
>>> -      0x00010e40, 0x00000040, 0x00000498, 0x00000000
>>> +      0x00014e40, 0x00000040, 0x00000498, 0x00000000
>>>      };
>>>      static const operation noDestList[] =
>>>      {
>>> --
>>> 2.1.0
>>>
>>>
>>>
>>> _______________________________________________
>>> Nouveau mailing list
>>> Nouveau@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/nouveau
>>
>> _______________________________________________
>> Nouveau mailing list
>> Nouveau@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/nouveau
>
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/3] nv50/ir: Fold IMM into MAD
       [not found] ` <1423259216-7202-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
@ 2015-02-06 21:46   ` Roy Spliet
  0 siblings, 0 replies; 9+ messages in thread
From: Roy Spliet @ 2015-02-06 21:46 UTC (permalink / raw)
  To: Nouveau Mailinglist; +Cc: Roy Spliet

Add a specific optimisation pass for NV50 to check whether SRC0 or SRC1 is
a MOV dst, IMM. If so: fold the IMM in and try to drop the MOV. Must be
done post-RA because it requires that SDST == SSRC2.

V2: improve readability and add comments to clarify decisions
V3: Remove redundant code... compiler already attempts to put the IMM in
SSRC1

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 53 ++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20ca..62d2ef7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2259,6 +2259,56 @@ FlatteningPass::tryPredicateConditional(BasicBlock *bb)
 
 // =============================================================================
 
+// Fold Immediate into MAD; must be done after register allocation due to
+// constraint SDST == SSRC2
+// TODO:
+// Does NVC0+ have other situations where this pass makes sense?
+class NV50PostRaConstantFolding : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+};
+
+bool
+NV50PostRaConstantFolding::visit(BasicBlock *bb)
+{
+   Value *vtmp;
+   Instruction *def;
+
+   for (Instruction *i = bb->getFirst(); i; i = i->next) {
+      switch (i->op) {
+      case OP_MAD:
+         if (i->def(0).getFile() != FILE_GPR ||
+             i->src(0).getFile() != FILE_GPR ||
+             i->src(1).getFile() != FILE_GPR ||
+             i->src(2).getFile() != FILE_GPR ||
+             i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
+            break;
+
+         def = i->getSrc(1)->getInsn();
+         if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
+            vtmp = i->getSrc(1);
+            i->setSrc(1, def->getSrc(0));
+
+            /* There's no post-RA dead code elimination, so do it here
+             * XXX: if we add more code-removing post-RA passes, we might
+             *      want to create a post-RA dead-code elim pass */
+            if (vtmp->refCount() == 0)
+               delete_Instruction(bb->getProgram(), def);
+
+            break;
+         }
+         break;
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+// =============================================================================
+
 // Common subexpression elimination. Stupid O^2 implementation.
 class LocalCSE : public Pass
 {
@@ -2629,6 +2679,9 @@ bool
 Program::optimizePostRA(int level)
 {
    RUN_PASS(2, FlatteningPass, run);
+   if (getTarget()->getChipset() < 0xc0)
+      RUN_PASS(2, NV50PostRaConstantFolding, run);
+
    return true;
 }
 
-- 
2.1.0



_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-02-06 21:46 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-11  0:22 [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation Roy Spliet
     [not found] ` <1420935780-17104-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
2015-01-11  0:22   ` [PATCH 2/3] nv50/ir: For MAD, prefer SDST == SSRC2 Roy Spliet
     [not found]     ` <1420935780-17104-2-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
2015-01-11  0:43       ` Ilia Mirkin
2015-01-11  0:23   ` [PATCH 3/3] nv50/ir: Fold IMM into MAD Roy Spliet
     [not found]     ` <1420935780-17104-3-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
2015-01-11  0:39       ` Ilia Mirkin
2015-01-11  0:34   ` [PATCH 1/3] nv50/ir: Add support for MAD short+IMM notation Ilia Mirkin
     [not found]     ` <CAKb7UvgcpnZvgkG_TEKOncby5ctVGX+igxVmcMLZrH+2682OqQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-01-11  0:45       ` Roy Spliet
     [not found]         ` <54B1C7C7.7000805-FA6nBp6kBxZzu6KWmfFNGwC/G2K4zDHf@public.gmane.org>
2015-01-11  0:50           ` Ilia Mirkin
2015-02-06 21:46 [PATCH 1/3] nv50/ir: Add support for MAD 4-byte opcode Roy Spliet
     [not found] ` <1423259216-7202-1-git-send-email-rspliet-pPYjZ15w/Xhulxpn9UvDqw@public.gmane.org>
2015-02-06 21:46   ` [PATCH 3/3] nv50/ir: Fold IMM into MAD Roy Spliet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.