intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both.
@ 2011-03-29 22:17 Eric Anholt
  2011-03-30  0:22 ` Kenneth Graunke
  2011-03-30  1:49 ` Zou, Nanhai
  0 siblings, 2 replies; 3+ messages in thread
From: Eric Anholt @ 2011-03-29 22:17 UTC (permalink / raw)
  To: intel-gfx

---

I don't have GT1 to test with.  Does this fix VS regressions for
people with that hardware?

 src/mesa/drivers/dri/i965/brw_context.c    |   13 +++++++++++--
 src/mesa/drivers/dri/i965/gen6_vs_state.c  |    2 +-
 src/mesa/drivers/dri/i965/gen6_wm_state.c  |    2 +-
 src/mesa/drivers/dri/intel/intel_chipset.h |   10 ++++++----
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..a74ba5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -183,8 +183,17 @@ GLboolean brwCreateContext( int api,
    /* WM maximum threads is number of EUs times number of threads per EU. */
    if (intel->gen >= 6) {
       brw->urb.size = 1024;
-      brw->vs_max_threads = 60;
-      brw->wm_max_threads = 80;
+      if (IS_GT2(intel->intelScreen->deviceID)) {
+	 /* This could possibly be 80, but is supposed to require
+	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
+	  * GPU reset to change.
+	  */
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 60;
+      } else {
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 24;
+      }
    } else if (intel->gen == 5) {
       brw->urb.size = 1024;
       brw->vs_max_threads = 72;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index ce0b8ea..a10cec3 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -137,7 +137,7 @@ upload_vs_state(struct brw_context *brw)
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
-   OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */
+   OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 78901ec..f4f0475 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw)
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
 	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 
-   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
    /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->dispatch_width == 8)
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 4ff9140..f7dcf47 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -125,12 +125,14 @@
 /* Compat macro for intel_decode.c */
 #define IS_IRONLAKE(devid)	IS_GEN5(devid)
 
-#define IS_GEN6(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+#define IS_GT2(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS	|| \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+
+#define IS_GEN6(devid)		(IS_GT2(devid) || \
+				 devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_S)
 
 #define IS_GT1(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
-- 
1.7.4.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both.
  2011-03-29 22:17 [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both Eric Anholt
@ 2011-03-30  0:22 ` Kenneth Graunke
  2011-03-30  1:49 ` Zou, Nanhai
  1 sibling, 0 replies; 3+ messages in thread
From: Kenneth Graunke @ 2011-03-30  0:22 UTC (permalink / raw)
  To: intel-gfx

On 03/29/2011 03:17 PM, Eric Anholt wrote:
> ---
>
> I don't have GT1 to test with.  Does this fix VS regressions for
> people with that hardware?

Me neither, but it sure looks correct.

[snip]
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 9483ec6..a74ba5c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -183,8 +183,17 @@ GLboolean brwCreateContext( int api,
>      /* WM maximum threads is number of EUs times number of threads per EU. */
>      if (intel->gen>= 6) {
>         brw->urb.size = 1024;
> -      brw->vs_max_threads = 60;
> -      brw->wm_max_threads = 80;
> +      if (IS_GT2(intel->intelScreen->deviceID)) {
> +	 /* This could possibly be 80, but is supposed to require
> +	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
> +	  * GPU reset to change.
> +	  */
> +	 brw->wm_max_threads = 40;
> +	 brw->vs_max_threads = 60;
> +      } else {
> +	 brw->wm_max_threads = 40;
> +	 brw->vs_max_threads = 24;
> +      }

Presumably 60 = 12 EUs * 5 threads/EU (for GT2), while 24 = 6 EUs * 4 
threads/EU (for GT1).  If so, a comment to that effect would be nice...

I still have no idea where 40/80 come from.

[snip]
> diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
> index 4ff9140..f7dcf47 100644
> --- a/src/mesa/drivers/dri/intel/intel_chipset.h
> +++ b/src/mesa/drivers/dri/intel/intel_chipset.h

Drop the intel_chipset.h changes, I just committed an equivalent patch.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both.
  2011-03-29 22:17 [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both Eric Anholt
  2011-03-30  0:22 ` Kenneth Graunke
@ 2011-03-30  1:49 ` Zou, Nanhai
  1 sibling, 0 replies; 3+ messages in thread
From: Zou, Nanhai @ 2011-03-30  1:49 UTC (permalink / raw)
  To: Eric Anholt, intel-gfx

I had some simple test on that machine.
It fails at VS threads set to 3, but pass at VS thread number 1 and 2.
So I believe it's some hidden bug. 
One limitation for multi VS thread is
Not to use the scratch space, that will introduce race condition.
But I don't think our VS kernel used scratch space.

Thanks
Zou Nanhai

-----Original Message-----
From: intel-gfx-bounces+nanhai.zou=intel.com@lists.freedesktop.org [mailto:intel-gfx-bounces+nanhai.zou=intel.com@lists.freedesktop.org] On Behalf Of Eric Anholt
Sent: 2011年3月30日 6:17
To: intel-gfx@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both.

---

I don't have GT1 to test with.  Does this fix VS regressions for
people with that hardware?

 src/mesa/drivers/dri/i965/brw_context.c    |   13 +++++++++++--
 src/mesa/drivers/dri/i965/gen6_vs_state.c  |    2 +-
 src/mesa/drivers/dri/i965/gen6_wm_state.c  |    2 +-
 src/mesa/drivers/dri/intel/intel_chipset.h |   10 ++++++----
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..a74ba5c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -183,8 +183,17 @@ GLboolean brwCreateContext( int api,
    /* WM maximum threads is number of EUs times number of threads per EU. */
    if (intel->gen >= 6) {
       brw->urb.size = 1024;
-      brw->vs_max_threads = 60;
-      brw->wm_max_threads = 80;
+      if (IS_GT2(intel->intelScreen->deviceID)) {
+	 /* This could possibly be 80, but is supposed to require
+	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
+	  * GPU reset to change.
+	  */
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 60;
+      } else {
+	 brw->wm_max_threads = 40;
+	 brw->vs_max_threads = 24;
+      }
    } else if (intel->gen == 5) {
       brw->urb.size = 1024;
       brw->vs_max_threads = 72;
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index ce0b8ea..a10cec3 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -137,7 +137,7 @@ upload_vs_state(struct brw_context *brw)
 	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
 	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
-   OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */
+   OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
 	     GEN6_VS_STATISTICS_ENABLE |
 	     GEN6_VS_ENABLE);
    ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 78901ec..f4f0475 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -144,7 +144,7 @@ upload_wm_state(struct brw_context *brw)
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
 	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 
-   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
    /* CACHE_NEW_WM_PROG */
    if (brw->wm.prog_data->dispatch_width == 8)
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 4ff9140..f7dcf47 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -125,12 +125,14 @@
 /* Compat macro for intel_decode.c */
 #define IS_IRONLAKE(devid)	IS_GEN5(devid)
 
-#define IS_GEN6(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+#define IS_GT2(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS	|| \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
-				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+
+#define IS_GEN6(devid)		(IS_GT2(devid) || \
+				 devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_S)
 
 #define IS_GT1(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
-- 
1.7.4.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-03-30  1:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-03-29 22:17 [PATCH] i965: Fix the VS thread limits for GT1, and clarify the WM limits on both Eric Anholt
2011-03-30  0:22 ` Kenneth Graunke
2011-03-30  1:49 ` Zou, Nanhai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).