* [PATCH v4] nv110/exa: update sched codes
@ 2017-06-27 15:16 Aaryaman Vasishta
[not found] ` <20170627151603.2090-1-jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Aaryaman Vasishta @ 2017-06-27 15:16 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
samuel.pitoiset-Re5JQEeQqe8AvxtiuMwx3w
v4: Updated the wait dependancy bars based on tex component masks.
This patch adds proper delays to maxwell exa shaders. Tested with
rendercheck -f a8r8g8b8.
I am still wondering whether the rd's are required. We could
still wait on the write bars instead. eg. see
"sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in exacmnv110.fp
Trello:
https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com>
---
src/shader/exac8nv110.fp | 10 +++++-----
src/shader/exac8nv110.fpc | 18 +++++++++---------
src/shader/exacanv110.fp | 10 +++++-----
src/shader/exacanv110.fpc | 18 +++++++++---------
src/shader/exacmnv110.fp | 10 +++++-----
src/shader/exacmnv110.fpc | 18 +++++++++---------
src/shader/exas8nv110.fp | 6 +++---
src/shader/exas8nv110.fpc | 12 ++++++------
src/shader/exasanv110.fp | 10 +++++-----
src/shader/exasanv110.fpc | 18 +++++++++---------
src/shader/exascnv110.fp | 6 +++---
src/shader/exascnv110.fpc | 10 +++++-----
src/shader/videonv110.fp | 14 +++++++-------
src/shader/videonv110.fpc | 26 +++++++++++++-------------
14 files changed, 93 insertions(+), 93 deletions(-)
diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
index ce78036..101b67f 100644
--- a/src/shader/exac8nv110.fp
+++ b/src/shader/exac8nv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r1 $r2 0x0 0x1 t2d 0x8
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r2 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r0 $r1
mov $r2 $r3 0xf
mov $r1 $r3 0xf
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
mov $r0 $r3 0xf
exit
#endif
diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
index 4aa1368..1f7d649 100644
--- a/src/shader/exac8nv110.fpc
+++ b/src/shader/exac8nv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0x21e0072f,
+0x005cbc03,
0x0007ff02,
0xe043ff89,
0x2ff70201,
0xc03a0014,
0x4007ff03,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0074f,
+0x001fbc06,
0x0007ff02,
0xe043ff88,
0x2ff70200,
0xc03a0004,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe6,
+0x001f8400,
0x00170003,
0x5c681000,
0x00370002,
0x5c980780,
0x00370001,
0x5c980780,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00370000,
0x5c980780,
diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
index a70d5c5..fe55fcd 100644
--- a/src/shader/exacanv110.fp
+++ b/src/shader/exacanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x1 t2d 0xf
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r7
fmul ftz $r2 $r2 $r6
fmul ftz $r1 $r1 $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
index 7c0ca5e..7c8ebbd 100644
--- a/src/shader/exacanv110.fpc
+++ b/src/shader/exacanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
0x0007ff02,
0xe043ff89,
0xaff70204,
0xc03a0017,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
0x00770303,
0x5c681000,
0x00670202,
0x5c681000,
0x00570101,
0x5c681000,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
index fe5c294..7113ab3 100644
--- a/src/shader/exacmnv110.fp
+++ b/src/shader/exacmnv110.fp
@@ -25,23 +25,23 @@ NV110FP_Composite[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
ipa $r2 a[0x90] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x1 t2d 0x8
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r4
fmul ftz $r2 $r2 $r4
fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
index 9d62c1a..60352a8 100644
--- a/src/shader/exacmnv110.fpc
+++ b/src/shader/exacmnv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
0x0007ff02,
0xe043ff89,
0x2ff70204,
0xc03a0014,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
0x00470303,
0x5c681000,
0x00470202,
0x5c681000,
0x00470101,
0x5c681000,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
index 4fe2e19..a555beb 100644
--- a/src/shader/exas8nv110.fp
+++ b/src/shader/exas8nv110.fp
@@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0x8
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
mov $r3 $r0 0xf
mov $r2 $r0 0xf
mov $r1 $r0 0xf
diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
index 1181c41..e58d168 100644
--- a/src/shader/exas8nv110.fpc
+++ b/src/shader/exas8nv110.fpc
@@ -1,21 +1,21 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001fbc03,
0x0007ff00,
0xe043ff88,
0x2ff70000,
0xc03a0004,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe1,
+0x001f8400,
0x00070003,
0x5c980780,
0x00070002,
diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
index 61374a6..ee818cd 100644
--- a/src/shader/exasanv110.fp
+++ b/src/shader/exasanv110.fp
@@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r3 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
ipa $r2 a[0x80] $r0 0x0 0x1
tex nodep $r4 $r2 0x0 0x0 t2d 0x8
ipa $r1 a[0x94] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
ipa $r0 a[0x90] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x1 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
fmul ftz $r3 $r3 $r4
fmul ftz $r2 $r2 $r4
fmul ftz $r1 $r1 $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0xf) (st 0x0)
fmul ftz $r0 $r0 $r4
exit
#endif
diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
index 5516a03..604bf9a 100644
--- a/src/shader/exasanv110.fpc
+++ b/src/shader/exasanv110.fpc
@@ -1,36 +1,36 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff03,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x001cbc03,
0x0007ff02,
0xe043ff88,
0x2ff70204,
0xc03a0004,
0x4007ff01,
0xe043ff89,
-0xfc0007e0,
-0x001f8000,
+0xe5e0172f,
+0x001fbc02,
0x0007ff00,
0xe043ff89,
0xaff70000,
0xc03a0017,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc201fe1,
+0x001f8400,
0x00470303,
0x5c681000,
0x00470202,
0x5c681000,
0x00470101,
0x5c681000,
-0xfc0007e0,
+0xfde007e1,
0x001f8000,
0x00470000,
0x5c681000,
diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
index 90bbb55..86e14e8 100644
--- a/src/shader/exascnv110.fp
+++ b/src/shader/exascnv110.fp
@@ -25,14 +25,14 @@ NV110FP_Source[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r0 a[0x7c] 0x0 0x0 0x1
mufu rcp $r0 $r0
ipa $r1 a[0x84] $r0 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
ipa $r0 a[0x80] $r0 0x0 0x1
tex nodep $r0 $r0 0x0 0x0 t2d 0xf
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
exit
#endif
diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
index 2dba15d..1fef5d2 100644
--- a/src/shader/exascnv110.fpc
+++ b/src/shader/exascnv110.fpc
@@ -1,20 +1,20 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff00,
0xe003ff87,
0x00470000,
0x50800000,
0x4007ff01,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xfde0072f,
+0x001fbc03,
0x0007ff00,
0xe043ff88,
0xaff70000,
0xc03a0007,
0x34070000,
0xf0f00000,
-0xfc0007e0,
+0xfc0007ef,
0x001f8000,
0x0007000f,
0xe3000000,
diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
index 2728311..773aad5 100644
--- a/src/shader/videonv110.fp
+++ b/src/shader/videonv110.fp
@@ -25,30 +25,30 @@ NV110FP_NV12[] = {
};
#else
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
ipa pass $r2 a[0x7c] 0x0 0x0 0x1
mufu rcp $r2 $r2
ipa $r0 a[0x80] $r2 0x0 0x1
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
ipa $r1 a[0x84] $r2 0x0 0x1
tex nodep $r4 $r0 0x0 0x0 t2d 0x8
tex nodep $r0 $r0 0x0 0x1 t2d 0xc
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x6) (st 0x1)
depbar le 0x5 0x1 0x1
fmul ftz $r5 $r4 c0[0x0]
fadd ftz $r3 $r5 c0[0x4]
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6) (st 0x6) (st 0xf)
fadd ftz $r4 $r5 c0[0x8]
fadd ftz $r5 $r5 c0[0xc]
depbar le 0x5 0x0 0x0
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
ffma ftz $r3 $r0 c0[0x10] $r3
ffma ftz $r4 $r0 c0[0x14] $r4
ffma ftz $r5 $r0 c0[0x18] $r5
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0x1) (st 0x1) (st 0x6)
ffma ftz $r0 $r1 c0[0x1c] $r3
ffma ftz $r2 $r1 c0[0x24] $r5
ffma ftz $r1 $r1 c0[0x20] $r4
-sched (st 0x0) (st 0x0) (st 0x0)
+sched (st 0xf) (st 0x0) (st 0x0)
exit
#endif
diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
index 31d745a..8e7bedf 100644
--- a/src/shader/videonv110.fpc
+++ b/src/shader/videonv110.fpc
@@ -1,52 +1,52 @@
-0xfc0007e0,
-0x001f8000,
+0xe1a0070f,
+0x003c3c01,
0xcff7ff02,
0xe003ff87,
0x00470202,
0x50800000,
0x0027ff00,
0xe043ff88,
-0xfc0007e0,
-0x001f8000,
+0xe1e0072f,
+0x003c3c03,
0x4027ff01,
0xe043ff88,
0x2ff70004,
0xc03a0004,
0x2ff70000,
0xc03a0016,
-0xfc0007e0,
-0x001f8000,
+0xfcc007ef,
+0x001f8400,
0x34170001,
0xf0f00000,
0x00070405,
0x4c681000,
0x00170503,
0x4c581000,
-0xfc0007e0,
-0x001f8000,
+0xfcc007e6,
+0x001fbc00,
0x00270504,
0x4c581000,
0x00370505,
0x4c581000,
0x34070000,
0xf0f00000,
-0xfc0007e0,
-0x001f8000,
+0xfc200fe6,
+0x001f8400,
0x00470003,
0x49a00180,
0x00570004,
0x49a00200,
0x00670005,
0x49a00280,
-0xfc0007e0,
-0x001f8000,
+0xfc2007e1,
+0x001f9800,
0x00770100,
0x49a00180,
0x00970102,
0x49a00280,
0x00870101,
0x49a00200,
-0xfc0007e0,
+0xfc0007ef,
0x001f8000,
0x0007000f,
0xe3000000,
--
2.11.0
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v4] nv110/exa: update sched codes
[not found] ` <20170627151603.2090-1-jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-06-28 3:53 ` Ilia Mirkin
[not found] ` <CAKb7UvgcwqEE2C6hFoSHp8n21UO6Oa2T7WaavNtw9iTTx0m_yw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-29 21:26 ` Samuel Pitoiset
1 sibling, 1 reply; 6+ messages in thread
From: Ilia Mirkin @ 2017-06-28 3:53 UTC (permalink / raw)
To: Aaryaman Vasishta; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
BTW, you can drop those explicit "depbar" ops. I think they're only
needed when you're doing something weird with barriers. Blob doesn't
use them (anymore)
On Tue, Jun 27, 2017 at 11:16 AM, Aaryaman Vasishta
<jem456.vasishta@gmail.com> wrote:
> v4: Updated the wait dependancy bars based on tex component masks.
>
> This patch adds proper delays to maxwell exa shaders. Tested with
> rendercheck -f a8r8g8b8.
>
> I am still wondering whether the rd's are required. We could
> still wait on the write bars instead. eg. see
> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in exacmnv110.fp
>
> Trello:
> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
>
> Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com>
> ---
> src/shader/exac8nv110.fp | 10 +++++-----
> src/shader/exac8nv110.fpc | 18 +++++++++---------
> src/shader/exacanv110.fp | 10 +++++-----
> src/shader/exacanv110.fpc | 18 +++++++++---------
> src/shader/exacmnv110.fp | 10 +++++-----
> src/shader/exacmnv110.fpc | 18 +++++++++---------
> src/shader/exas8nv110.fp | 6 +++---
> src/shader/exas8nv110.fpc | 12 ++++++------
> src/shader/exasanv110.fp | 10 +++++-----
> src/shader/exasanv110.fpc | 18 +++++++++---------
> src/shader/exascnv110.fp | 6 +++---
> src/shader/exascnv110.fpc | 10 +++++-----
> src/shader/videonv110.fp | 14 +++++++-------
> src/shader/videonv110.fpc | 26 +++++++++++++-------------
> 14 files changed, 93 insertions(+), 93 deletions(-)
>
> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> index ce78036..101b67f 100644
> --- a/src/shader/exac8nv110.fp
> +++ b/src/shader/exac8nv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0 $r3 0xf
> exit
> #endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> index 4aa1368..1f7d649 100644
> --- a/src/shader/exac8nv110.fpc
> +++ b/src/shader/exac8nv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0x21e0072f,
> +0x005cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70201,
> 0xc03a0014,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0074f,
> +0x001fbc06,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70200,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe6,
> +0x001f8400,
> 0x00170003,
> 0x5c681000,
> 0x00370002,
> 0x5c980780,
> 0x00370001,
> 0x5c980780,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00370000,
> 0x5c980780,
> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> index a70d5c5..fe55fcd 100644
> --- a/src/shader/exacanv110.fp
> +++ b/src/shader/exacanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r7
> fmul ftz $r2 $r2 $r6
> fmul ftz $r1 $r1 $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> index 7c0ca5e..7c8ebbd 100644
> --- a/src/shader/exacanv110.fpc
> +++ b/src/shader/exacanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0xaff70204,
> 0xc03a0017,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00770303,
> 0x5c681000,
> 0x00670202,
> 0x5c681000,
> 0x00570101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> index fe5c294..7113ab3 100644
> --- a/src/shader/exacmnv110.fp
> +++ b/src/shader/exacmnv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> index 9d62c1a..60352a8 100644
> --- a/src/shader/exacmnv110.fpc
> +++ b/src/shader/exacmnv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70204,
> 0xc03a0014,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> index 4fe2e19..a555beb 100644
> --- a/src/shader/exas8nv110.fp
> +++ b/src/shader/exas8nv110.fp
> @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
> mov $r3 $r0 0xf
> mov $r2 $r0 0xf
> mov $r1 $r0 0xf
> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> index 1181c41..e58d168 100644
> --- a/src/shader/exas8nv110.fpc
> +++ b/src/shader/exas8nv110.fpc
> @@ -1,21 +1,21 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0x2ff70000,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe1,
> +0x001f8400,
> 0x00070003,
> 0x5c980780,
> 0x00070002,
> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> index 61374a6..ee818cd 100644
> --- a/src/shader/exasanv110.fp
> +++ b/src/shader/exasanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> ipa $r1 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x90] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> index 5516a03..604bf9a 100644
> --- a/src/shader/exasanv110.fpc
> +++ b/src/shader/exasanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70204,
> 0xc03a0004,
> 0x4007ff01,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff89,
> 0xaff70000,
> 0xc03a0017,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> index 90bbb55..86e14e8 100644
> --- a/src/shader/exascnv110.fp
> +++ b/src/shader/exascnv110.fp
> @@ -25,14 +25,14 @@ NV110FP_Source[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> index 2dba15d..1fef5d2 100644
> --- a/src/shader/exascnv110.fpc
> +++ b/src/shader/exascnv110.fpc
> @@ -1,20 +1,20 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xfde0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> index 2728311..773aad5 100644
> --- a/src/shader/videonv110.fp
> +++ b/src/shader/videonv110.fp
> @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r2 $r2
> ipa $r0 a[0x80] $r2 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
> ipa $r1 a[0x84] $r2 0x0 0x1
> tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x6) (st 0x1)
> depbar le 0x5 0x1 0x1
> fmul ftz $r5 $r4 c0[0x0]
> fadd ftz $r3 $r5 c0[0x4]
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6) (st 0x6) (st 0xf)
> fadd ftz $r4 $r5 c0[0x8]
> fadd ftz $r5 $r5 c0[0xc]
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
> ffma ftz $r3 $r0 c0[0x10] $r3
> ffma ftz $r4 $r0 c0[0x14] $r4
> ffma ftz $r5 $r0 c0[0x18] $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0x1) (st 0x6)
> ffma ftz $r0 $r1 c0[0x1c] $r3
> ffma ftz $r2 $r1 c0[0x24] $r5
> ffma ftz $r1 $r1 c0[0x20] $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> index 31d745a..8e7bedf 100644
> --- a/src/shader/videonv110.fpc
> +++ b/src/shader/videonv110.fpc
> @@ -1,52 +1,52 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff02,
> 0xe003ff87,
> 0x00470202,
> 0x50800000,
> 0x0027ff00,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x003c3c03,
> 0x4027ff01,
> 0xe043ff88,
> 0x2ff70004,
> 0xc03a0004,
> 0x2ff70000,
> 0xc03a0016,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007ef,
> +0x001f8400,
> 0x34170001,
> 0xf0f00000,
> 0x00070405,
> 0x4c681000,
> 0x00170503,
> 0x4c581000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007e6,
> +0x001fbc00,
> 0x00270504,
> 0x4c581000,
> 0x00370505,
> 0x4c581000,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe6,
> +0x001f8400,
> 0x00470003,
> 0x49a00180,
> 0x00570004,
> 0x49a00200,
> 0x00670005,
> 0x49a00280,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2007e1,
> +0x001f9800,
> 0x00770100,
> 0x49a00180,
> 0x00970102,
> 0x49a00280,
> 0x00870101,
> 0x49a00200,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
> --
> 2.11.0
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] nv110/exa: update sched codes
[not found] ` <CAKb7UvgcwqEE2C6hFoSHp8n21UO6Oa2T7WaavNtw9iTTx0m_yw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-06-28 9:05 ` Aaryaman Vasishta
[not found] ` <CABVHfRs238NmtjpM12gzaQFjUP3MGewD0iMTkA0GcYtgjzzKxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Aaryaman Vasishta @ 2017-06-28 9:05 UTC (permalink / raw)
To: Ilia Mirkin; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 16272 bytes --]
Hi,
On Wed, Jun 28, 2017 at 12:53 PM, Ilia Mirkin <imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org> wrote:
> BTW, you can drop those explicit "depbar" ops. I think they're only
> needed when you're doing something weird with barriers. Blob doesn't
> use them (anymore)
>
Gotcha. Should I remove them in the same patch or a different one? It seems
like the depbar removal is different than what the commit message describes
here, so maybe it could do with a separate commit. I could be wrong,
though, as it's my first time contributing to nouveau.
Cheers,
Aaryaman
> On Tue, Jun 27, 2017 at 11:16 AM, Aaryaman Vasishta
> <jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> > v4: Updated the wait dependancy bars based on tex component masks.
> >
> > This patch adds proper delays to maxwell exa shaders. Tested with
> > rendercheck -f a8r8g8b8.
> >
> > I am still wondering whether the rd's are required. We could
> > still wait on the write bars instead. eg. see
> > "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in
> exacmnv110.fp
> >
> > Trello:
> > https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-wit
> h-proper-delays
> >
> > Signed-off-by: Aaryaman Vasishta <jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> > ---
> > src/shader/exac8nv110.fp | 10 +++++-----
> > src/shader/exac8nv110.fpc | 18 +++++++++---------
> > src/shader/exacanv110.fp | 10 +++++-----
> > src/shader/exacanv110.fpc | 18 +++++++++---------
> > src/shader/exacmnv110.fp | 10 +++++-----
> > src/shader/exacmnv110.fpc | 18 +++++++++---------
> > src/shader/exas8nv110.fp | 6 +++---
> > src/shader/exas8nv110.fpc | 12 ++++++------
> > src/shader/exasanv110.fp | 10 +++++-----
> > src/shader/exasanv110.fpc | 18 +++++++++---------
> > src/shader/exascnv110.fp | 6 +++---
> > src/shader/exascnv110.fpc | 10 +++++-----
> > src/shader/videonv110.fp | 14 +++++++-------
> > src/shader/videonv110.fpc | 26 +++++++++++++-------------
> > 14 files changed, 93 insertions(+), 93 deletions(-)
> >
> > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> > index ce78036..101b67f 100644
> > --- a/src/shader/exac8nv110.fp
> > +++ b/src/shader/exac8nv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt
> 0x2)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> > ipa $r3 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> > ipa $r2 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r0 $r1
> > mov $r2 $r3 0xf
> > mov $r1 $r3 0xf
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > mov $r0 $r3 0xf
> > exit
> > #endif
> > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> > index 4aa1368..1f7d649 100644
> > --- a/src/shader/exac8nv110.fpc
> > +++ b/src/shader/exac8nv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0x21e0072f,
> > +0x005cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0x2ff70201,
> > 0xc03a0014,
> > 0x4007ff03,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0074f,
> > +0x001fbc06,
> > 0x0007ff02,
> > 0xe043ff88,
> > 0x2ff70200,
> > 0xc03a0004,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe6,
> > +0x001f8400,
> > 0x00170003,
> > 0x5c681000,
> > 0x00370002,
> > 0x5c980780,
> > 0x00370001,
> > 0x5c980780,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00370000,
> > 0x5c980780,
> > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> > index a70d5c5..fe55fcd 100644
> > --- a/src/shader/exacanv110.fp
> > +++ b/src/shader/exacanv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r7
> > fmul ftz $r2 $r2 $r6
> > fmul ftz $r1 $r1 $r5
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> > index 7c0ca5e..7c8ebbd 100644
> > --- a/src/shader/exacanv110.fpc
> > +++ b/src/shader/exacanv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0xaff70204,
> > 0xc03a0017,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00770303,
> > 0x5c681000,
> > 0x00670202,
> > 0x5c681000,
> > 0x00570101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> > index fe5c294..7113ab3 100644
> > --- a/src/shader/exacmnv110.fp
> > +++ b/src/shader/exacmnv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r4
> > fmul ftz $r2 $r2 $r4
> > fmul ftz $r1 $r1 $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> > index 9d62c1a..60352a8 100644
> > --- a/src/shader/exacmnv110.fpc
> > +++ b/src/shader/exacmnv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0x2ff70204,
> > 0xc03a0014,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00470303,
> > 0x5c681000,
> > 0x00470202,
> > 0x5c681000,
> > 0x00470101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> > index 4fe2e19..a555beb 100644
> > --- a/src/shader/exas8nv110.fp
> > +++ b/src/shader/exas8nv110.fp
> > @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
> > mov $r3 $r0 0xf
> > mov $r2 $r0 0xf
> > mov $r1 $r0 0xf
> > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> > index 1181c41..e58d168 100644
> > --- a/src/shader/exas8nv110.fpc
> > +++ b/src/shader/exas8nv110.fpc
> > @@ -1,21 +1,21 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001fbc03,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0x2ff70000,
> > 0xc03a0004,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc200fe1,
> > +0x001f8400,
> > 0x00070003,
> > 0x5c980780,
> > 0x00070002,
> > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> > index 61374a6..ee818cd 100644
> > --- a/src/shader/exasanv110.fp
> > +++ b/src/shader/exasanv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x80] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> > ipa $r1 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x90] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r4
> > fmul ftz $r2 $r2 $r4
> > fmul ftz $r1 $r1 $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> > index 5516a03..604bf9a 100644
> > --- a/src/shader/exasanv110.fpc
> > +++ b/src/shader/exasanv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff88,
> > 0x2ff70204,
> > 0xc03a0004,
> > 0x4007ff01,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff89,
> > 0xaff70000,
> > 0xc03a0017,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00470303,
> > 0x5c681000,
> > 0x00470202,
> > 0x5c681000,
> > 0x00470101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> > index 90bbb55..86e14e8 100644
> > --- a/src/shader/exascnv110.fp
> > +++ b/src/shader/exascnv110.fp
> > @@ -25,14 +25,14 @@ NV110FP_Source[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x0) (st 0x0)
> > exit
> > #endif
> > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> > index 2dba15d..1fef5d2 100644
> > --- a/src/shader/exascnv110.fpc
> > +++ b/src/shader/exascnv110.fpc
> > @@ -1,20 +1,20 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfde0072f,
> > +0x001fbc03,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > +0xfc0007ef,
> > 0x001f8000,
> > 0x0007000f,
> > 0xe3000000,
> > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> > index 2728311..773aad5 100644
> > --- a/src/shader/videonv110.fp
> > +++ b/src/shader/videonv110.fp
> > @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r2 $r2
> > ipa $r0 a[0x80] $r2 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
> > ipa $r1 a[0x84] $r2 0x0 0x1
> > tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> > tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x6) (st 0x1)
> > depbar le 0x5 0x1 0x1
> > fmul ftz $r5 $r4 c0[0x0]
> > fadd ftz $r3 $r5 c0[0x4]
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6) (st 0x6) (st 0xf)
> > fadd ftz $r4 $r5 c0[0x8]
> > fadd ftz $r5 $r5 c0[0xc]
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
> > ffma ftz $r3 $r0 c0[0x10] $r3
> > ffma ftz $r4 $r0 c0[0x14] $r4
> > ffma ftz $r5 $r0 c0[0x18] $r5
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0x1) (st 0x6)
> > ffma ftz $r0 $r1 c0[0x1c] $r3
> > ffma ftz $r2 $r1 c0[0x24] $r5
> > ffma ftz $r1 $r1 c0[0x20] $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x0) (st 0x0)
> > exit
> > #endif
> > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> > index 31d745a..8e7bedf 100644
> > --- a/src/shader/videonv110.fpc
> > +++ b/src/shader/videonv110.fpc
> > @@ -1,52 +1,52 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff02,
> > 0xe003ff87,
> > 0x00470202,
> > 0x50800000,
> > 0x0027ff00,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x003c3c03,
> > 0x4027ff01,
> > 0xe043ff88,
> > 0x2ff70004,
> > 0xc03a0004,
> > 0x2ff70000,
> > 0xc03a0016,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfcc007ef,
> > +0x001f8400,
> > 0x34170001,
> > 0xf0f00000,
> > 0x00070405,
> > 0x4c681000,
> > 0x00170503,
> > 0x4c581000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfcc007e6,
> > +0x001fbc00,
> > 0x00270504,
> > 0x4c581000,
> > 0x00370505,
> > 0x4c581000,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc200fe6,
> > +0x001f8400,
> > 0x00470003,
> > 0x49a00180,
> > 0x00570004,
> > 0x49a00200,
> > 0x00670005,
> > 0x49a00280,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc2007e1,
> > +0x001f9800,
> > 0x00770100,
> > 0x49a00180,
> > 0x00970102,
> > 0x49a00280,
> > 0x00870101,
> > 0x49a00200,
> > -0xfc0007e0,
> > +0xfc0007ef,
> > 0x001f8000,
> > 0x0007000f,
> > 0xe3000000,
> > --
> > 2.11.0
> >
> > _______________________________________________
> > Nouveau mailing list
> > Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> > https://lists.freedesktop.org/mailman/listinfo/nouveau
>
[-- Attachment #1.2: Type: text/html, Size: 21233 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] nv110/exa: update sched codes
[not found] ` <CABVHfRs238NmtjpM12gzaQFjUP3MGewD0iMTkA0GcYtgjzzKxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-06-29 21:24 ` Samuel Pitoiset
0 siblings, 0 replies; 6+ messages in thread
From: Samuel Pitoiset @ 2017-06-29 21:24 UTC (permalink / raw)
To: Aaryaman Vasishta, Ilia Mirkin; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 06/28/2017 11:05 AM, Aaryaman Vasishta wrote:
>
> Hi,
>
> On Wed, Jun 28, 2017 at 12:53 PM, Ilia Mirkin <imirkin@alum.mit.edu
> <mailto:imirkin@alum.mit.edu>> wrote:
>
> BTW, you can drop those explicit "depbar" ops. I think they're only
> needed when you're doing something weird with barriers. Blob doesn't
> use them (anymore)
>
> Gotcha. Should I remove them in the same patch or a different one? It
> seems like the depbar removal is different than what the commit message
> describes here, so maybe it could do with a separate commit. I could be
> wrong, though, as it's my first time contributing to nouveau.
With a separate patch.
>
> Cheers,
> Aaryaman
>
> On Tue, Jun 27, 2017 at 11:16 AM, Aaryaman Vasishta
> <jem456.vasishta@gmail.com <mailto:jem456.vasishta@gmail.com>> wrote:
> > v4: Updated the wait dependancy bars based on tex component masks.
> >
> > This patch adds proper delays to maxwell exa shaders. Tested with
> > rendercheck -f a8r8g8b8.
> >
> > I am still wondering whether the rd's are required. We could
> > still wait on the write bars instead. eg. see
> > "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in
> exacmnv110.fp
> >
> > Trello:
> >
> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
> <https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays>
> >
> > Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com
> <mailto:jem456.vasishta@gmail.com>>
> > ---
> > src/shader/exac8nv110.fp | 10 +++++-----
> > src/shader/exac8nv110.fpc | 18 +++++++++---------
> > src/shader/exacanv110.fp | 10 +++++-----
> > src/shader/exacanv110.fpc | 18 +++++++++---------
> > src/shader/exacmnv110.fp | 10 +++++-----
> > src/shader/exacmnv110.fpc | 18 +++++++++---------
> > src/shader/exas8nv110.fp | 6 +++---
> > src/shader/exas8nv110.fpc | 12 ++++++------
> > src/shader/exasanv110.fp | 10 +++++-----
> > src/shader/exasanv110.fpc | 18 +++++++++---------
> > src/shader/exascnv110.fp | 6 +++---
> > src/shader/exascnv110.fpc | 10 +++++-----
> > src/shader/videonv110.fp | 14 +++++++-------
> > src/shader/videonv110.fpc | 26 +++++++++++++-------------
> > 14 files changed, 93 insertions(+), 93 deletions(-)
> >
> > diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> > index ce78036..101b67f 100644
> > --- a/src/shader/exac8nv110.fp
> > +++ b/src/shader/exac8nv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr
> 0x1 wt 0x2)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> > ipa $r3 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> > ipa $r2 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r0 $r1
> > mov $r2 $r3 0xf
> > mov $r1 $r3 0xf
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > mov $r0 $r3 0xf
> > exit
> > #endif
> > diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> > index 4aa1368..1f7d649 100644
> > --- a/src/shader/exac8nv110.fpc
> > +++ b/src/shader/exac8nv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0x21e0072f,
> > +0x005cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0x2ff70201,
> > 0xc03a0014,
> > 0x4007ff03,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0074f,
> > +0x001fbc06,
> > 0x0007ff02,
> > 0xe043ff88,
> > 0x2ff70200,
> > 0xc03a0004,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe6,
> > +0x001f8400,
> > 0x00170003,
> > 0x5c681000,
> > 0x00370002,
> > 0x5c980780,
> > 0x00370001,
> > 0x5c980780,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00370000,
> > 0x5c980780,
> > diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> > index a70d5c5..fe55fcd 100644
> > --- a/src/shader/exacanv110.fp
> > +++ b/src/shader/exacanv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r7
> > fmul ftz $r2 $r2 $r6
> > fmul ftz $r1 $r1 $r5
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> > index 7c0ca5e..7c8ebbd 100644
> > --- a/src/shader/exacanv110.fpc
> > +++ b/src/shader/exacanv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0xaff70204,
> > 0xc03a0017,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00770303,
> > 0x5c681000,
> > 0x00670202,
> > 0x5c681000,
> > 0x00570101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> > index fe5c294..7113ab3 100644
> > --- a/src/shader/exacmnv110.fp
> > +++ b/src/shader/exacmnv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x90] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r4
> > fmul ftz $r2 $r2 $r4
> > fmul ftz $r1 $r1 $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> > index 9d62c1a..60352a8 100644
> > --- a/src/shader/exacmnv110.fpc
> > +++ b/src/shader/exacmnv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff89,
> > 0x2ff70204,
> > 0xc03a0014,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00470303,
> > 0x5c681000,
> > 0x00470202,
> > 0x5c681000,
> > 0x00470101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> > index 4fe2e19..a555beb 100644
> > --- a/src/shader/exas8nv110.fp
> > +++ b/src/shader/exas8nv110.fp
> > @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
> > mov $r3 $r0 0xf
> > mov $r2 $r0 0xf
> > mov $r1 $r0 0xf
> > diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> > index 1181c41..e58d168 100644
> > --- a/src/shader/exas8nv110.fpc
> > +++ b/src/shader/exas8nv110.fpc
> > @@ -1,21 +1,21 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001fbc03,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0x2ff70000,
> > 0xc03a0004,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc200fe1,
> > +0x001f8400,
> > 0x00070003,
> > 0x5c980780,
> > 0x00070002,
> > diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> > index 61374a6..ee818cd 100644
> > --- a/src/shader/exasanv110.fp
> > +++ b/src/shader/exasanv110.fp
> > @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r3 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> > ipa $r2 a[0x80] $r0 0x0 0x1
> > tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> > ipa $r1 a[0x94] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> > ipa $r0 a[0x90] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> > fmul ftz $r3 $r3 $r4
> > fmul ftz $r2 $r2 $r4
> > fmul ftz $r1 $r1 $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0xf) (st 0x0)
> > fmul ftz $r0 $r0 $r4
> > exit
> > #endif
> > diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> > index 5516a03..604bf9a 100644
> > --- a/src/shader/exasanv110.fpc
> > +++ b/src/shader/exasanv110.fpc
> > @@ -1,36 +1,36 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff03,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x001cbc03,
> > 0x0007ff02,
> > 0xe043ff88,
> > 0x2ff70204,
> > 0xc03a0004,
> > 0x4007ff01,
> > 0xe043ff89,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe5e0172f,
> > +0x001fbc02,
> > 0x0007ff00,
> > 0xe043ff89,
> > 0xaff70000,
> > 0xc03a0017,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc201fe1,
> > +0x001f8400,
> > 0x00470303,
> > 0x5c681000,
> > 0x00470202,
> > 0x5c681000,
> > 0x00470101,
> > 0x5c681000,
> > -0xfc0007e0,
> > +0xfde007e1,
> > 0x001f8000,
> > 0x00470000,
> > 0x5c681000,
> > diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> > index 90bbb55..86e14e8 100644
> > --- a/src/shader/exascnv110.fp
> > +++ b/src/shader/exascnv110.fp
> > @@ -25,14 +25,14 @@ NV110FP_Source[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r0 $r0
> > ipa $r1 a[0x84] $r0 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
> > ipa $r0 a[0x80] $r0 0x0 0x1
> > tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x0) (st 0x0)
> > exit
> > #endif
> > diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> > index 2dba15d..1fef5d2 100644
> > --- a/src/shader/exascnv110.fpc
> > +++ b/src/shader/exascnv110.fpc
> > @@ -1,20 +1,20 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff00,
> > 0xe003ff87,
> > 0x00470000,
> > 0x50800000,
> > 0x4007ff01,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfde0072f,
> > +0x001fbc03,
> > 0x0007ff00,
> > 0xe043ff88,
> > 0xaff70000,
> > 0xc03a0007,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > +0xfc0007ef,
> > 0x001f8000,
> > 0x0007000f,
> > 0xe3000000,
> > diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> > index 2728311..773aad5 100644
> > --- a/src/shader/videonv110.fp
> > +++ b/src/shader/videonv110.fp
> > @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
> > };
> > #else
> >
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> > ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> > mufu rcp $r2 $r2
> > ipa $r0 a[0x80] $r2 0x0 0x1
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
> > ipa $r1 a[0x84] $r2 0x0 0x1
> > tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> > tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x6) (st 0x1)
> > depbar le 0x5 0x1 0x1
> > fmul ftz $r5 $r4 c0[0x0]
> > fadd ftz $r3 $r5 c0[0x4]
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6) (st 0x6) (st 0xf)
> > fadd ftz $r4 $r5 c0[0x8]
> > fadd ftz $r5 $r5 c0[0xc]
> > depbar le 0x5 0x0 0x0
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
> > ffma ftz $r3 $r0 c0[0x10] $r3
> > ffma ftz $r4 $r0 c0[0x14] $r4
> > ffma ftz $r5 $r0 c0[0x18] $r5
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0x1) (st 0x1) (st 0x6)
> > ffma ftz $r0 $r1 c0[0x1c] $r3
> > ffma ftz $r2 $r1 c0[0x24] $r5
> > ffma ftz $r1 $r1 c0[0x20] $r4
> > -sched (st 0x0) (st 0x0) (st 0x0)
> > +sched (st 0xf) (st 0x0) (st 0x0)
> > exit
> > #endif
> > diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> > index 31d745a..8e7bedf 100644
> > --- a/src/shader/videonv110.fpc
> > +++ b/src/shader/videonv110.fpc
> > @@ -1,52 +1,52 @@
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1a0070f,
> > +0x003c3c01,
> > 0xcff7ff02,
> > 0xe003ff87,
> > 0x00470202,
> > 0x50800000,
> > 0x0027ff00,
> > 0xe043ff88,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xe1e0072f,
> > +0x003c3c03,
> > 0x4027ff01,
> > 0xe043ff88,
> > 0x2ff70004,
> > 0xc03a0004,
> > 0x2ff70000,
> > 0xc03a0016,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfcc007ef,
> > +0x001f8400,
> > 0x34170001,
> > 0xf0f00000,
> > 0x00070405,
> > 0x4c681000,
> > 0x00170503,
> > 0x4c581000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfcc007e6,
> > +0x001fbc00,
> > 0x00270504,
> > 0x4c581000,
> > 0x00370505,
> > 0x4c581000,
> > 0x34070000,
> > 0xf0f00000,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc200fe6,
> > +0x001f8400,
> > 0x00470003,
> > 0x49a00180,
> > 0x00570004,
> > 0x49a00200,
> > 0x00670005,
> > 0x49a00280,
> > -0xfc0007e0,
> > -0x001f8000,
> > +0xfc2007e1,
> > +0x001f9800,
> > 0x00770100,
> > 0x49a00180,
> > 0x00970102,
> > 0x49a00280,
> > 0x00870101,
> > 0x49a00200,
> > -0xfc0007e0,
> > +0xfc0007ef,
> > 0x001f8000,
> > 0x0007000f,
> > 0xe3000000,
> > --
> > 2.11.0
> >
> > _______________________________________________
> > Nouveau mailing list
> > Nouveau@lists.freedesktop.org <mailto:Nouveau@lists.freedesktop.org>
> > https://lists.freedesktop.org/mailman/listinfo/nouveau
> <https://lists.freedesktop.org/mailman/listinfo/nouveau>
>
>
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] nv110/exa: update sched codes
[not found] ` <20170627151603.2090-1-jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-28 3:53 ` Ilia Mirkin
@ 2017-06-29 21:26 ` Samuel Pitoiset
[not found] ` <7794eca8-72cb-30d0-fbeb-79571d387f20-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
1 sibling, 1 reply; 6+ messages in thread
From: Samuel Pitoiset @ 2017-06-29 21:26 UTC (permalink / raw)
To: Aaryaman Vasishta, nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Do you still have some glitches or does it work correctly now?
Did you also remove the spurious wait dep bars between v3 and v4?
On 06/27/2017 05:16 PM, Aaryaman Vasishta wrote:
> v4: Updated the wait dependancy bars based on tex component masks.
>
> This patch adds proper delays to maxwell exa shaders. Tested with
> rendercheck -f a8r8g8b8.
>
> I am still wondering whether the rd's are required. We could
> still wait on the write bars instead. eg. see
> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in exacmnv110.fp
>
> Trello:
> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-with-proper-delays
>
> Signed-off-by: Aaryaman Vasishta <jem456.vasishta@gmail.com>
> ---
> src/shader/exac8nv110.fp | 10 +++++-----
> src/shader/exac8nv110.fpc | 18 +++++++++---------
> src/shader/exacanv110.fp | 10 +++++-----
> src/shader/exacanv110.fpc | 18 +++++++++---------
> src/shader/exacmnv110.fp | 10 +++++-----
> src/shader/exacmnv110.fpc | 18 +++++++++---------
> src/shader/exas8nv110.fp | 6 +++---
> src/shader/exas8nv110.fpc | 12 ++++++------
> src/shader/exasanv110.fp | 10 +++++-----
> src/shader/exasanv110.fpc | 18 +++++++++---------
> src/shader/exascnv110.fp | 6 +++---
> src/shader/exascnv110.fpc | 10 +++++-----
> src/shader/videonv110.fp | 14 +++++++-------
> src/shader/videonv110.fpc | 26 +++++++++++++-------------
> 14 files changed, 93 insertions(+), 93 deletions(-)
>
> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> index ce78036..101b67f 100644
> --- a/src/shader/exac8nv110.fp
> +++ b/src/shader/exac8nv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r0 $r1
> mov $r2 $r3 0xf
> mov $r1 $r3 0xf
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> mov $r0 $r3 0xf
> exit
> #endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> index 4aa1368..1f7d649 100644
> --- a/src/shader/exac8nv110.fpc
> +++ b/src/shader/exac8nv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0x21e0072f,
> +0x005cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70201,
> 0xc03a0014,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0074f,
> +0x001fbc06,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70200,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe6,
> +0x001f8400,
> 0x00170003,
> 0x5c681000,
> 0x00370002,
> 0x5c980780,
> 0x00370001,
> 0x5c980780,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00370000,
> 0x5c980780,
> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> index a70d5c5..fe55fcd 100644
> --- a/src/shader/exacanv110.fp
> +++ b/src/shader/exacanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r7
> fmul ftz $r2 $r2 $r6
> fmul ftz $r1 $r1 $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> index 7c0ca5e..7c8ebbd 100644
> --- a/src/shader/exacanv110.fpc
> +++ b/src/shader/exacanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0xaff70204,
> 0xc03a0017,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00770303,
> 0x5c681000,
> 0x00670202,
> 0x5c681000,
> 0x00570101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> index fe5c294..7113ab3 100644
> --- a/src/shader/exacmnv110.fp
> +++ b/src/shader/exacmnv110.fp
> @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x90] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> index 9d62c1a..60352a8 100644
> --- a/src/shader/exacmnv110.fpc
> +++ b/src/shader/exacmnv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff89,
> 0x2ff70204,
> 0xc03a0014,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> index 4fe2e19..a555beb 100644
> --- a/src/shader/exas8nv110.fp
> +++ b/src/shader/exas8nv110.fp
> @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
> mov $r3 $r0 0xf
> mov $r2 $r0 0xf
> mov $r1 $r0 0xf
> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> index 1181c41..e58d168 100644
> --- a/src/shader/exas8nv110.fpc
> +++ b/src/shader/exas8nv110.fpc
> @@ -1,21 +1,21 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0x2ff70000,
> 0xc03a0004,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe1,
> +0x001f8400,
> 0x00070003,
> 0x5c980780,
> 0x00070002,
> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> index 61374a6..ee818cd 100644
> --- a/src/shader/exasanv110.fp
> +++ b/src/shader/exasanv110.fp
> @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r3 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
> ipa $r2 a[0x80] $r0 0x0 0x1
> tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> ipa $r1 a[0x94] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
> ipa $r0 a[0x90] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
> fmul ftz $r3 $r3 $r4
> fmul ftz $r2 $r2 $r4
> fmul ftz $r1 $r1 $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0xf) (st 0x0)
> fmul ftz $r0 $r0 $r4
> exit
> #endif
> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> index 5516a03..604bf9a 100644
> --- a/src/shader/exasanv110.fpc
> +++ b/src/shader/exasanv110.fpc
> @@ -1,36 +1,36 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff03,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x001cbc03,
> 0x0007ff02,
> 0xe043ff88,
> 0x2ff70204,
> 0xc03a0004,
> 0x4007ff01,
> 0xe043ff89,
> -0xfc0007e0,
> -0x001f8000,
> +0xe5e0172f,
> +0x001fbc02,
> 0x0007ff00,
> 0xe043ff89,
> 0xaff70000,
> 0xc03a0017,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc201fe1,
> +0x001f8400,
> 0x00470303,
> 0x5c681000,
> 0x00470202,
> 0x5c681000,
> 0x00470101,
> 0x5c681000,
> -0xfc0007e0,
> +0xfde007e1,
> 0x001f8000,
> 0x00470000,
> 0x5c681000,
> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> index 90bbb55..86e14e8 100644
> --- a/src/shader/exascnv110.fp
> +++ b/src/shader/exascnv110.fp
> @@ -25,14 +25,14 @@ NV110FP_Source[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r0 $r0
> ipa $r1 a[0x84] $r0 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
> ipa $r0 a[0x80] $r0 0x0 0x1
> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> index 2dba15d..1fef5d2 100644
> --- a/src/shader/exascnv110.fpc
> +++ b/src/shader/exascnv110.fpc
> @@ -1,20 +1,20 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff00,
> 0xe003ff87,
> 0x00470000,
> 0x50800000,
> 0x4007ff01,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xfde0072f,
> +0x001fbc03,
> 0x0007ff00,
> 0xe043ff88,
> 0xaff70000,
> 0xc03a0007,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> index 2728311..773aad5 100644
> --- a/src/shader/videonv110.fp
> +++ b/src/shader/videonv110.fp
> @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
> };
> #else
>
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
> ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> mufu rcp $r2 $r2
> ipa $r0 a[0x80] $r2 0x0 0x1
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
> ipa $r1 a[0x84] $r2 0x0 0x1
> tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x6) (st 0x1)
> depbar le 0x5 0x1 0x1
> fmul ftz $r5 $r4 c0[0x0]
> fadd ftz $r3 $r5 c0[0x4]
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6) (st 0x6) (st 0xf)
> fadd ftz $r4 $r5 c0[0x8]
> fadd ftz $r5 $r5 c0[0xc]
> depbar le 0x5 0x0 0x0
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
> ffma ftz $r3 $r0 c0[0x10] $r3
> ffma ftz $r4 $r0 c0[0x14] $r4
> ffma ftz $r5 $r0 c0[0x18] $r5
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0x1) (st 0x1) (st 0x6)
> ffma ftz $r0 $r1 c0[0x1c] $r3
> ffma ftz $r2 $r1 c0[0x24] $r5
> ffma ftz $r1 $r1 c0[0x20] $r4
> -sched (st 0x0) (st 0x0) (st 0x0)
> +sched (st 0xf) (st 0x0) (st 0x0)
> exit
> #endif
> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> index 31d745a..8e7bedf 100644
> --- a/src/shader/videonv110.fpc
> +++ b/src/shader/videonv110.fpc
> @@ -1,52 +1,52 @@
> -0xfc0007e0,
> -0x001f8000,
> +0xe1a0070f,
> +0x003c3c01,
> 0xcff7ff02,
> 0xe003ff87,
> 0x00470202,
> 0x50800000,
> 0x0027ff00,
> 0xe043ff88,
> -0xfc0007e0,
> -0x001f8000,
> +0xe1e0072f,
> +0x003c3c03,
> 0x4027ff01,
> 0xe043ff88,
> 0x2ff70004,
> 0xc03a0004,
> 0x2ff70000,
> 0xc03a0016,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007ef,
> +0x001f8400,
> 0x34170001,
> 0xf0f00000,
> 0x00070405,
> 0x4c681000,
> 0x00170503,
> 0x4c581000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfcc007e6,
> +0x001fbc00,
> 0x00270504,
> 0x4c581000,
> 0x00370505,
> 0x4c581000,
> 0x34070000,
> 0xf0f00000,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc200fe6,
> +0x001f8400,
> 0x00470003,
> 0x49a00180,
> 0x00570004,
> 0x49a00200,
> 0x00670005,
> 0x49a00280,
> -0xfc0007e0,
> -0x001f8000,
> +0xfc2007e1,
> +0x001f9800,
> 0x00770100,
> 0x49a00180,
> 0x00970102,
> 0x49a00280,
> 0x00870101,
> 0x49a00200,
> -0xfc0007e0,
> +0xfc0007ef,
> 0x001f8000,
> 0x0007000f,
> 0xe3000000,
>
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v4] nv110/exa: update sched codes
[not found] ` <7794eca8-72cb-30d0-fbeb-79571d387f20-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-07-01 15:32 ` Aaryaman Vasishta
0 siblings, 0 replies; 6+ messages in thread
From: Aaryaman Vasishta @ 2017-07-01 15:32 UTC (permalink / raw)
To: Samuel Pitoiset; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
[-- Attachment #1.1: Type: text/plain, Size: 15848 bytes --]
Hi,
On Fri, Jun 30, 2017 at 6:26 AM, Samuel Pitoiset <samuel.pitoiset-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
wrote:
> Do you still have some glitches or does it work correctly now?
No visible glitches on my machine so far (Pascal 1080, Debain stretch GNOME
desktop)
I used "rendercheck -f a8r8g8b8" to make sure there's no differences
between the test results after the patch. They've helped me in my debugging
to an extent.
> Did you also remove the spurious wait dep bars between v3 and v4?
There's a redundant read dep-bar on "sched (st 0xf wr 0x1) (st 0xf wr 0x0
rd 0x1 wt 0x3) (st 0xf wr 0x1 wt 0x2)" which I've removed in v5 of this
patch. I'll be sending them to the ML now.
Cheers,
Aaryaman
>
> On 06/27/2017 05:16 PM, Aaryaman Vasishta wrote:
>
>> v4: Updated the wait dependancy bars based on tex component masks.
>>
>> This patch adds proper delays to maxwell exa shaders. Tested with
>> rendercheck -f a8r8g8b8.
>>
>> I am still wondering whether the rd's are required. We could
>> still wait on the write bars instead. eg. see
>> "sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)" in
>> exacmnv110.fp
>>
>> Trello:
>> https://trello.com/c/6LPB2EIS/174-update-maxwell-shaders-wit
>> h-proper-delays
>>
>> Signed-off-by: Aaryaman Vasishta <jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>> src/shader/exac8nv110.fp | 10 +++++-----
>> src/shader/exac8nv110.fpc | 18 +++++++++---------
>> src/shader/exacanv110.fp | 10 +++++-----
>> src/shader/exacanv110.fpc | 18 +++++++++---------
>> src/shader/exacmnv110.fp | 10 +++++-----
>> src/shader/exacmnv110.fpc | 18 +++++++++---------
>> src/shader/exas8nv110.fp | 6 +++---
>> src/shader/exas8nv110.fpc | 12 ++++++------
>> src/shader/exasanv110.fp | 10 +++++-----
>> src/shader/exasanv110.fpc | 18 +++++++++---------
>> src/shader/exascnv110.fp | 6 +++---
>> src/shader/exascnv110.fpc | 10 +++++-----
>> src/shader/videonv110.fp | 14 +++++++-------
>> src/shader/videonv110.fpc | 26 +++++++++++++-------------
>> 14 files changed, 93 insertions(+), 93 deletions(-)
>>
>> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
>> index ce78036..101b67f 100644
>> --- a/src/shader/exac8nv110.fp
>> +++ b/src/shader/exac8nv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_Composite_A8[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r3 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 rd 0x1 wt 0x3) (st 0xf wr 0x1 wt
>> 0x2)
>> ipa $r2 a[0x90] $r0 0x0 0x1
>> tex nodep $r1 $r2 0x0 0x1 t2d 0x8
>> ipa $r3 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x2) (st 0xf wr 0x1 wt 0x6) (st 0xf)
>> ipa $r2 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r2 0x0 0x0 t2d 0x8
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6 wt 0x3) (st 0x1) (st 0x1)
>> fmul ftz $r3 $r0 $r1
>> mov $r2 $r3 0xf
>> mov $r1 $r3 0xf
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0xf) (st 0x0)
>> mov $r0 $r3 0xf
>> exit
>> #endif
>> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
>> index 4aa1368..1f7d649 100644
>> --- a/src/shader/exac8nv110.fpc
>> +++ b/src/shader/exac8nv110.fpc
>> @@ -1,36 +1,36 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff03,
>> 0xe043ff89,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0x21e0072f,
>> +0x005cbc03,
>> 0x0007ff02,
>> 0xe043ff89,
>> 0x2ff70201,
>> 0xc03a0014,
>> 0x4007ff03,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe5e0074f,
>> +0x001fbc06,
>> 0x0007ff02,
>> 0xe043ff88,
>> 0x2ff70200,
>> 0xc03a0004,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc201fe6,
>> +0x001f8400,
>> 0x00170003,
>> 0x5c681000,
>> 0x00370002,
>> 0x5c980780,
>> 0x00370001,
>> 0x5c980780,
>> -0xfc0007e0,
>> +0xfde007e1,
>> 0x001f8000,
>> 0x00370000,
>> 0x5c980780,
>> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
>> index a70d5c5..fe55fcd 100644
>> --- a/src/shader/exacanv110.fp
>> +++ b/src/shader/exacanv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_CAComposite[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r3 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>> ipa $r2 a[0x90] $r0 0x0 0x1
>> tex nodep $r4 $r2 0x0 0x1 t2d 0xf
>> ipa $r1 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
>> ipa $r0 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>> fmul ftz $r3 $r3 $r7
>> fmul ftz $r2 $r2 $r6
>> fmul ftz $r1 $r1 $r5
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0xf) (st 0x0)
>> fmul ftz $r0 $r0 $r4
>> exit
>> #endif
>> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
>> index 7c0ca5e..7c8ebbd 100644
>> --- a/src/shader/exacanv110.fpc
>> +++ b/src/shader/exacanv110.fpc
>> @@ -1,36 +1,36 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff03,
>> 0xe043ff89,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1e0072f,
>> +0x001cbc03,
>> 0x0007ff02,
>> 0xe043ff89,
>> 0xaff70204,
>> 0xc03a0017,
>> 0x4007ff01,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe5e0172f,
>> +0x001fbc02,
>> 0x0007ff00,
>> 0xe043ff88,
>> 0xaff70000,
>> 0xc03a0007,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc201fe1,
>> +0x001f8400,
>> 0x00770303,
>> 0x5c681000,
>> 0x00670202,
>> 0x5c681000,
>> 0x00570101,
>> 0x5c681000,
>> -0xfc0007e0,
>> +0xfde007e1,
>> 0x001f8000,
>> 0x00470000,
>> 0x5c681000,
>> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
>> index fe5c294..7113ab3 100644
>> --- a/src/shader/exacmnv110.fp
>> +++ b/src/shader/exacmnv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_Composite[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r3 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>> ipa $r2 a[0x90] $r0 0x0 0x1
>> tex nodep $r4 $r2 0x0 0x1 t2d 0x8
>> ipa $r1 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
>> ipa $r0 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>> fmul ftz $r3 $r3 $r4
>> fmul ftz $r2 $r2 $r4
>> fmul ftz $r1 $r1 $r4
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0xf) (st 0x0)
>> fmul ftz $r0 $r0 $r4
>> exit
>> #endif
>> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
>> index 9d62c1a..60352a8 100644
>> --- a/src/shader/exacmnv110.fpc
>> +++ b/src/shader/exacmnv110.fpc
>> @@ -1,36 +1,36 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff03,
>> 0xe043ff89,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1e0072f,
>> +0x001cbc03,
>> 0x0007ff02,
>> 0xe043ff89,
>> 0x2ff70204,
>> 0xc03a0014,
>> 0x4007ff01,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe5e0172f,
>> +0x001fbc02,
>> 0x0007ff00,
>> 0xe043ff88,
>> 0xaff70000,
>> 0xc03a0007,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc201fe1,
>> +0x001f8400,
>> 0x00470303,
>> 0x5c681000,
>> 0x00470202,
>> 0x5c681000,
>> 0x00470101,
>> 0x5c681000,
>> -0xfc0007e0,
>> +0xfde007e1,
>> 0x001f8000,
>> 0x00470000,
>> 0x5c681000,
>> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
>> index 4fe2e19..a555beb 100644
>> --- a/src/shader/exas8nv110.fp
>> +++ b/src/shader/exas8nv110.fp
>> @@ -25,15 +25,15 @@ NV110FP_Source_A8[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r1 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf)
>> ipa $r0 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r0 0x0 0x0 t2d 0x8
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1 wt 0x1) (st 0x1) (st 0x1)
>> mov $r3 $r0 0xf
>> mov $r2 $r0 0xf
>> mov $r1 $r0 0xf
>> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
>> index 1181c41..e58d168 100644
>> --- a/src/shader/exas8nv110.fpc
>> +++ b/src/shader/exas8nv110.fpc
>> @@ -1,21 +1,21 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff01,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1e0072f,
>> +0x001fbc03,
>> 0x0007ff00,
>> 0xe043ff88,
>> 0x2ff70000,
>> 0xc03a0004,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc200fe1,
>> +0x001f8400,
>> 0x00070003,
>> 0x5c980780,
>> 0x00070002,
>> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
>> index 61374a6..ee818cd 100644
>> --- a/src/shader/exasanv110.fp
>> +++ b/src/shader/exasanv110.fp
>> @@ -25,23 +25,23 @@ NV110FP_CACompositeSrcAlpha[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r3 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x1)
>> ipa $r2 a[0x80] $r0 0x0 0x1
>> tex nodep $r4 $r2 0x0 0x0 t2d 0x8
>> ipa $r1 a[0x94] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1 wt 0x2) (st 0xf wr 0x1 wt 0x2) (st 0xf)
>> ipa $r0 a[0x90] $r0 0x0 0x1
>> tex nodep $r0 $r0 0x0 0x1 t2d 0xf
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1 wt 0x3) (st 0x1) (st 0x1)
>> fmul ftz $r3 $r3 $r4
>> fmul ftz $r2 $r2 $r4
>> fmul ftz $r1 $r1 $r4
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0xf) (st 0x0)
>> fmul ftz $r0 $r0 $r4
>> exit
>> #endif
>> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
>> index 5516a03..604bf9a 100644
>> --- a/src/shader/exasanv110.fpc
>> +++ b/src/shader/exasanv110.fpc
>> @@ -1,36 +1,36 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff03,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1e0072f,
>> +0x001cbc03,
>> 0x0007ff02,
>> 0xe043ff88,
>> 0x2ff70204,
>> 0xc03a0004,
>> 0x4007ff01,
>> 0xe043ff89,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe5e0172f,
>> +0x001fbc02,
>> 0x0007ff00,
>> 0xe043ff89,
>> 0xaff70000,
>> 0xc03a0017,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc201fe1,
>> +0x001f8400,
>> 0x00470303,
>> 0x5c681000,
>> 0x00470202,
>> 0x5c681000,
>> 0x00470101,
>> 0x5c681000,
>> -0xfc0007e0,
>> +0xfde007e1,
>> 0x001f8000,
>> 0x00470000,
>> 0x5c681000,
>> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
>> index 90bbb55..86e14e8 100644
>> --- a/src/shader/exascnv110.fp
>> +++ b/src/shader/exascnv110.fp
>> @@ -25,14 +25,14 @@ NV110FP_Source[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r0 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r0 $r0
>> ipa $r1 a[0x84] $r0 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wt 0x3) (st 0xf)
>> ipa $r0 a[0x80] $r0 0x0 0x1
>> tex nodep $r0 $r0 0x0 0x0 t2d 0xf
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf) (st 0x0) (st 0x0)
>> exit
>> #endif
>> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
>> index 2dba15d..1fef5d2 100644
>> --- a/src/shader/exascnv110.fpc
>> +++ b/src/shader/exascnv110.fpc
>> @@ -1,20 +1,20 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff00,
>> 0xe003ff87,
>> 0x00470000,
>> 0x50800000,
>> 0x4007ff01,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfde0072f,
>> +0x001fbc03,
>> 0x0007ff00,
>> 0xe043ff88,
>> 0xaff70000,
>> 0xc03a0007,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> +0xfc0007ef,
>> 0x001f8000,
>> 0x0007000f,
>> 0xe3000000,
>> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
>> index 2728311..773aad5 100644
>> --- a/src/shader/videonv110.fp
>> +++ b/src/shader/videonv110.fp
>> @@ -25,30 +25,30 @@ NV110FP_NV12[] = {
>> };
>> #else
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x0) (st 0xd wr 0x0 wt 0x1) (st 0xf wr 0x0 wt 0x1)
>> ipa pass $r2 a[0x7c] 0x0 0x0 0x1
>> mufu rcp $r2 $r2
>> ipa $r0 a[0x80] $r2 0x0 0x1
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf wr 0x1) (st 0xf wr 0x0 wt 0x3) (st 0xf wr 0x0 wt 0x1)
>> ipa $r1 a[0x84] $r2 0x0 0x1
>> tex nodep $r4 $r0 0x0 0x0 t2d 0x8
>> tex nodep $r0 $r0 0x0 0x1 t2d 0xc
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf) (st 0x6) (st 0x1)
>> depbar le 0x5 0x1 0x1
>> fmul ftz $r5 $r4 c0[0x0]
>> fadd ftz $r3 $r5 c0[0x4]
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6) (st 0x6) (st 0xf)
>> fadd ftz $r4 $r5 c0[0x8]
>> fadd ftz $r5 $r5 c0[0xc]
>> depbar le 0x5 0x0 0x0
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x6 wt 0x1) (st 0x1) (st 0x1)
>> ffma ftz $r3 $r0 c0[0x10] $r3
>> ffma ftz $r4 $r0 c0[0x14] $r4
>> ffma ftz $r5 $r0 c0[0x18] $r5
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0x1) (st 0x1) (st 0x6)
>> ffma ftz $r0 $r1 c0[0x1c] $r3
>> ffma ftz $r2 $r1 c0[0x24] $r5
>> ffma ftz $r1 $r1 c0[0x20] $r4
>> -sched (st 0x0) (st 0x0) (st 0x0)
>> +sched (st 0xf) (st 0x0) (st 0x0)
>> exit
>> #endif
>> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
>> index 31d745a..8e7bedf 100644
>> --- a/src/shader/videonv110.fpc
>> +++ b/src/shader/videonv110.fpc
>> @@ -1,52 +1,52 @@
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1a0070f,
>> +0x003c3c01,
>> 0xcff7ff02,
>> 0xe003ff87,
>> 0x00470202,
>> 0x50800000,
>> 0x0027ff00,
>> 0xe043ff88,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xe1e0072f,
>> +0x003c3c03,
>> 0x4027ff01,
>> 0xe043ff88,
>> 0x2ff70004,
>> 0xc03a0004,
>> 0x2ff70000,
>> 0xc03a0016,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfcc007ef,
>> +0x001f8400,
>> 0x34170001,
>> 0xf0f00000,
>> 0x00070405,
>> 0x4c681000,
>> 0x00170503,
>> 0x4c581000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfcc007e6,
>> +0x001fbc00,
>> 0x00270504,
>> 0x4c581000,
>> 0x00370505,
>> 0x4c581000,
>> 0x34070000,
>> 0xf0f00000,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc200fe6,
>> +0x001f8400,
>> 0x00470003,
>> 0x49a00180,
>> 0x00570004,
>> 0x49a00200,
>> 0x00670005,
>> 0x49a00280,
>> -0xfc0007e0,
>> -0x001f8000,
>> +0xfc2007e1,
>> +0x001f9800,
>> 0x00770100,
>> 0x49a00180,
>> 0x00970102,
>> 0x49a00280,
>> 0x00870101,
>> 0x49a00200,
>> -0xfc0007e0,
>> +0xfc0007ef,
>> 0x001f8000,
>> 0x0007000f,
>> 0xe3000000,
>>
>>
[-- Attachment #1.2: Type: text/html, Size: 18543 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-07-01 15:32 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-27 15:16 [PATCH v4] nv110/exa: update sched codes Aaryaman Vasishta
[not found] ` <20170627151603.2090-1-jem456.vasishta-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-06-28 3:53 ` Ilia Mirkin
[not found] ` <CAKb7UvgcwqEE2C6hFoSHp8n21UO6Oa2T7WaavNtw9iTTx0m_yw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-28 9:05 ` Aaryaman Vasishta
[not found] ` <CABVHfRs238NmtjpM12gzaQFjUP3MGewD0iMTkA0GcYtgjzzKxA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-06-29 21:24 ` Samuel Pitoiset
2017-06-29 21:26 ` Samuel Pitoiset
[not found] ` <7794eca8-72cb-30d0-fbeb-79571d387f20-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-07-01 15:32 ` Aaryaman Vasishta
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.