intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/10] UXA Render acceleration for Ivybridge
@ 2011-07-14 21:21 Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 01/10] render: New Ivybridge assembly programs for render acceleration Kenneth Graunke
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Hi,

This patch series adds 2D render acceleration for Ivybridge, using UXA.
It also refactors some (some!) of the duplicated code between Xv and render.

I've done very minimal testing, but it seems to work.

--Kenneth

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 01/10] render: New Ivybridge assembly programs for render acceleration.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 02/10] render: Update SURFACE_STATE for Ivybridge Kenneth Graunke
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

These are exactly the same as the ones for Sandybridge, but with message
registers translated (hopefully) in the same way as Haihao's new
programs (m1 == g65).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c                              |   95 +++++++++++++++++++++++-
 src/render_program/exa_wm_mask_affine.g7a      |   41 ++++++++++
 src/render_program/exa_wm_mask_affine.g7b      |    4 +
 src/render_program/exa_wm_mask_projective.g7a  |   63 ++++++++++++++++
 src/render_program/exa_wm_mask_projective.g7b  |   12 +++
 src/render_program/exa_wm_mask_sample_a.g7a    |   49 ++++++++++++
 src/render_program/exa_wm_mask_sample_a.g7b    |    3 +
 src/render_program/exa_wm_mask_sample_argb.g7a |   49 ++++++++++++
 src/render_program/exa_wm_mask_sample_argb.g7b |    3 +
 src/render_program/exa_wm_src_projective.g7a   |   63 ++++++++++++++++
 src/render_program/exa_wm_src_projective.g7b   |   12 +++
 src/render_program/exa_wm_src_sample_a.g7a     |   48 ++++++++++++
 src/render_program/exa_wm_src_sample_a.g7b     |    3 +
 13 files changed, 442 insertions(+), 3 deletions(-)
 create mode 100644 src/render_program/exa_wm_mask_affine.g7a
 create mode 100644 src/render_program/exa_wm_mask_affine.g7b
 create mode 100644 src/render_program/exa_wm_mask_projective.g7a
 create mode 100644 src/render_program/exa_wm_mask_projective.g7b
 create mode 100644 src/render_program/exa_wm_mask_sample_a.g7a
 create mode 100644 src/render_program/exa_wm_mask_sample_a.g7b
 create mode 100644 src/render_program/exa_wm_mask_sample_argb.g7a
 create mode 100644 src/render_program/exa_wm_mask_sample_argb.g7b
 create mode 100644 src/render_program/exa_wm_src_projective.g7a
 create mode 100644 src/render_program/exa_wm_src_projective.g7b
 create mode 100644 src/render_program/exa_wm_src_sample_a.g7a
 create mode 100644 src/render_program/exa_wm_src_sample_a.g7b

diff --git a/src/i965_render.c b/src/i965_render.c
index b76107d..5ab53c4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -538,6 +538,74 @@ static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
 #include "exa_wm_write.g6b"
 };
 
+/* programs for GEN7 */
+static const uint32_t ps_kernel_nomask_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_nomask_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca.g4b.gen5"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_a.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_argb.g7b"
+#include "exa_wm_ca_srcalpha.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_affine_static_gen7[][4] = {
+#include "exa_wm_src_affine.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_affine.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+static const uint32_t ps_kernel_masknoca_projective_static_gen7[][4] = {
+#include "exa_wm_src_projective.g7b"
+#include "exa_wm_src_sample_argb.g7b"
+#include "exa_wm_mask_projective.g7b"
+#include "exa_wm_mask_sample_a.g7b"
+#include "exa_wm_noca.g6b"
+#include "exa_wm_write.g7b"
+};
+
+
 typedef enum {
 	SAMPLER_STATE_FILTER_NEAREST,
 	SAMPLER_STATE_FILTER_BILINEAR,
@@ -629,6 +697,25 @@ static const struct wm_kernel_info wm_kernels_gen6[] = {
 	       ps_kernel_masknoca_projective_static_gen6, TRUE),
 };
 
+static const struct wm_kernel_info wm_kernels_gen7[] = {
+	KERNEL(WM_KERNEL_NOMASK_AFFINE,
+	       ps_kernel_nomask_affine_static_gen7, FALSE),
+	KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
+	       ps_kernel_nomask_projective_static_gen7, FALSE),
+	KERNEL(WM_KERNEL_MASKCA_AFFINE,
+	       ps_kernel_maskca_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
+	       ps_kernel_maskca_projective_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
+	       ps_kernel_maskca_srcalpha_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
+	       ps_kernel_maskca_srcalpha_projective_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
+	       ps_kernel_masknoca_affine_static_gen7, TRUE),
+	KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
+	       ps_kernel_masknoca_projective_static_gen7, TRUE),
+};
+
 #undef KERNEL
 
 typedef struct _brw_cc_unit_state_padded {
@@ -2665,6 +2752,7 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 	struct gen4_render_state *render;
 	int i, j, k, l, m;
 	drm_intel_bo *border_color_bo;
+	const struct wm_kernel_info *wm_kernels;
 
 	render= intel->gen4_render_state;
 	render->composite_op.vertex_id = -1;
@@ -2675,12 +2763,13 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 	intel->gen6_render_state.kernel = NULL;
 	intel->gen6_render_state.drawrect = -1;
 
+	wm_kernels = IS_GEN7(intel) ? wm_kernels_gen7 : wm_kernels_gen6;
 	for (m = 0; m < KERNEL_COUNT; m++) {
 		render->wm_kernel_bo[m] =
 			intel_bo_alloc_for_data(intel,
-					wm_kernels_gen6[m].data,
-					wm_kernels_gen6[m].size,
-					"WM kernel gen6");
+					wm_kernels[m].data,
+					wm_kernels[m].size,
+					"WM kernel gen6/7");
 	}
 
 	border_color_bo = sampler_border_color_create(intel);
diff --git a/src/render_program/exa_wm_mask_affine.g7a b/src/render_program/exa_wm_mask_affine.g7a
new file mode 100644
index 0000000..4277080
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g7a
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`ul',    `g72')
+define(`uh',    `g73')
+define(`vl',    `g74')
+define(`vh',    `g75')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g8.0<0,1,0>F')
+define(`a0_a_y',`g8.16<0,1,0>F')
+
+include(`exa_wm_affine.g6i')
diff --git a/src/render_program/exa_wm_mask_affine.g7b b/src/render_program/exa_wm_mask_affine.g7b
new file mode 100644
index 0000000..8d72599
--- /dev/null
+++ b/src/render_program/exa_wm_mask_affine.g7b
@@ -0,0 +1,4 @@
+   { 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 },
+   { 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 },
+   { 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 },
+   { 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 },
diff --git a/src/render_program/exa_wm_mask_projective.g7a b/src/render_program/exa_wm_mask_projective.g7a
new file mode 100644
index 0000000..ba4158f
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g7a
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`u',     `g72')
+define(`ul',    `g72')
+define(`uh',    `g73')
+define(`v',     `g74')
+define(`vl',    `g74')
+define(`vh',    `g75')
+define(`w',     `mask_w')
+define(`wl',    `mask_w_0')
+define(`wh',    `mask_w_1')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g8.0<0,1,0>F')
+define(`a0_a_y',`g8.16<0,1,0>F')
+define(`a0_a_z',`g9.0<0,1,0>F')
+
+/* W */
+pln (8) temp_x_0<1>F a0_a_z bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_z bh { align1 }; /* pixel 8-15 */
+math (8) wl<1>F temp_x_0<8,8,1>F null inv { align1 };
+math (8) wh<1>F temp_x_1<8,8,1>F null inv { align1 };
+
+/* U */
+pln (8) temp_x_0<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+mul (8) ul<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) uh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
+
+/* V */
+pln (8) temp_x_0<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
+mul (8) vl<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) vh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
diff --git a/src/render_program/exa_wm_mask_projective.g7b b/src/render_program/exa_wm_mask_projective.g7b
new file mode 100644
index 0000000..a2e9267
--- /dev/null
+++ b/src/render_program/exa_wm_mask_projective.g7b
@@ -0,0 +1,12 @@
+   { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
+   { 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
+   { 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 },
diff --git a/src/render_program/exa_wm_mask_sample_a.g7a b/src/render_program/exa_wm_mask_sample_a.g7a
new file mode 100644
index 0000000..a0d38e1
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g7a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang@intel.com>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load only alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) g71<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g71 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 71		/* msg reg index */
+	mask_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_a.g7b b/src/render_program/exa_wm_mask_sample_a.g7b
new file mode 100644
index 0000000..fa36a59
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_a.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 },
diff --git a/src/render_program/exa_wm_mask_sample_argb.g7a b/src/render_program/exa_wm_mask_sample_argb.g7a
new file mode 100644
index 0000000..984b622
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g7a
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang@intel.com>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the mask surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) g71<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g71 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 71		/* msg reg index */
+	mask_sample_base<1>UW 	/* readback */
+	null
+	sampler (2,1,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
diff --git a/src/render_program/exa_wm_mask_sample_argb.g7b b/src/render_program/exa_wm_mask_sample_argb.g7b
new file mode 100644
index 0000000..01edf7d
--- /dev/null
+++ b/src/render_program/exa_wm_mask_sample_argb.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 },
diff --git a/src/render_program/exa_wm_src_projective.g7a b/src/render_program/exa_wm_src_projective.g7a
new file mode 100644
index 0000000..9fd495c
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g7a
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Fragment to compute src u/v values
+ */
+include(`exa_wm.g4i')
+
+define(`u',     `g66')
+define(`ul',    `g66')
+define(`uh',    `g67')
+define(`v',     `src_v')
+define(`vl',    `src_v')
+define(`vh',    `g69')
+define(`w',     `src_w')
+define(`wl',    `src_w_0')
+define(`wh',    `src_w_1')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
+
+define(`a0_a_x',`g6.0<0,1,0>F')
+define(`a0_a_y',`g6.16<0,1,0>F')
+define(`a0_a_z',`g7.0<0,1,0>F')
+
+/* W */
+pln (8) temp_x_0<1>F a0_a_z bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_z bh { align1 }; /* pixel 8-15 */
+math (8) wl<1>F temp_x_0<8,8,1>F null inv { align1 };
+math (8) wh<1>F temp_x_1<8,8,1>F null inv { align1 };
+
+/* U */
+pln (8) temp_x_0<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
+mul (8) ul<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) uh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
+
+/* V */
+pln (8) temp_x_0<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) temp_x_1<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
+mul (8) vl<1>F temp_x_0<8,8,1>F wl<8,8,1>F { align1 };
+mul (8) vh<1>F temp_x_1<8,8,1>F wh<8,8,1>F { align1 };
diff --git a/src/render_program/exa_wm_src_projective.g7b b/src/render_program/exa_wm_src_projective.g7b
new file mode 100644
index 0000000..73727ff
--- /dev/null
+++ b/src/render_program/exa_wm_src_projective.g7b
@@ -0,0 +1,12 @@
+   { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
+   { 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
+   { 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 },
diff --git a/src/render_program/exa_wm_src_sample_a.g7a b/src/render_program/exa_wm_src_sample_a.g7a
new file mode 100644
index 0000000..d5d24fd
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g7a
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang@intel.com>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load alpha */
+mov (1) g0.8<1>UD	0x00007000UD { align1 mask_disable };
+mov (8) g65<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* g65 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 65		/* msg reg index */
+	src_sample_a_01<1>UW 	/* readback */
+	null
+	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
diff --git a/src/render_program/exa_wm_src_sample_a.g7b b/src/render_program/exa_wm_src_sample_a.g7b
new file mode 100644
index 0000000..73912b7
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_a.g7b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
+   { 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 },
-- 
1.7.4.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 02/10] render: Update SURFACE_STATE for Ivybridge.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 01/10] render: New Ivybridge assembly programs for render acceleration Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 03/10] render: Update SAMPLER_STATE " Kenneth Graunke
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |   91 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index 5ab53c4..bb3c2b7 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -723,10 +723,10 @@ typedef struct _brw_cc_unit_state_padded {
 	char pad[64 - sizeof(struct brw_cc_unit_state)];
 } brw_cc_unit_state_padded;
 
-typedef struct brw_surface_state_padded {
-	struct brw_surface_state state;
-	char pad[32 - sizeof(struct brw_surface_state)];
-} brw_surface_state_padded;
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+#define SURFACE_STATE_PADDED_SIZE ALIGN(MAX(sizeof(struct brw_surface_state), sizeof(struct gen7_surface_state)), 32)
 
 struct gen4_cc_unit_state {
 	/* Index by [src_blend][dst_blend] */
@@ -1161,7 +1161,7 @@ static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
  * picture in the given surface state buffer.
  */
 static int
-i965_set_picture_surface_state(intel_screen_private *intel,
+gen4_set_picture_surface_state(intel_screen_private *intel,
 			       PicturePtr picture, PixmapPtr pixmap,
 			       Bool is_dst)
 {
@@ -1215,7 +1215,70 @@ i965_set_picture_surface_state(intel_screen_private *intel,
 			  priv->bo);
 
 	offset = intel->surface_used;
-	intel->surface_used += sizeof(struct brw_surface_state_padded);
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
+
+	if (is_dst)
+		priv->dst_bound = offset;
+	else
+		priv->src_bound = offset;
+
+	return offset;
+}
+
+static int
+gen7_set_picture_surface_state(intel_screen_private *intel,
+			       PicturePtr picture, PixmapPtr pixmap,
+			       Bool is_dst)
+{
+	struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
+	struct gen7_surface_state *ss;
+	uint32_t write_domain, read_domains;
+	int offset;
+
+	if (is_dst) {
+		write_domain = I915_GEM_DOMAIN_RENDER;
+		read_domains = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domains = I915_GEM_DOMAIN_SAMPLER;
+	}
+	intel_batch_mark_pixmap_domains(intel, priv,
+					read_domains, write_domain);
+	if (is_dst) {
+		if (priv->dst_bound)
+			return priv->dst_bound;
+	} else {
+		if (priv->src_bound)
+			return priv->src_bound;
+	}
+
+	ss = (struct gen7_surface_state *)
+		(intel->surface_data + intel->surface_used);
+
+	memset(ss, 0, sizeof(*ss));
+	ss->ss0.surface_type = BRW_SURFACE_2D;
+	if (is_dst)
+		ss->ss0.surface_format = i965_get_dest_format(picture);
+	else
+		ss->ss0.surface_format = i965_get_card_format(picture);
+
+	ss->ss0.tile_walk = 0;	/* Tiled X */
+	ss->ss0.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
+	ss->ss1.base_addr = priv->bo->offset;
+
+	ss->ss2.height = pixmap->drawable.height - 1;
+	ss->ss2.width = pixmap->drawable.width - 1;
+	ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
+
+	dri_bo_emit_reloc(intel->surface_bo,
+			  read_domains, write_domain,
+			  0,
+			  intel->surface_used +
+			  offsetof(struct gen7_surface_state, ss1),
+			  priv->bo);
+
+	offset = intel->surface_used;
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
 
 	if (is_dst)
 		priv->dst_bound = offset;
@@ -1225,6 +1288,16 @@ i965_set_picture_surface_state(intel_screen_private *intel,
 	return offset;
 }
 
+static inline int
+i965_set_picture_surface_state(intel_screen_private *intel,
+			       PicturePtr picture, PixmapPtr pixmap,
+			       Bool is_dst)
+{
+    if (INTEL_INFO(intel)->gen < 70)
+        return gen4_set_picture_surface_state(intel, picture, pixmap, is_dst);
+    return gen7_set_picture_surface_state(intel, picture, pixmap, is_dst);
+}
+
 static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
 {
 	struct gen4_render_state *render_state = intel->gen4_render_state;
@@ -1968,7 +2041,7 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 	}
 
 	if (sizeof(intel->surface_data) - intel->surface_used <
-	    4 * sizeof(struct brw_surface_state_padded))
+	    4 * SURFACE_STATE_PADDED_SIZE)
 		i965_surface_flush(intel);
 
 	intel->needs_render_state_emit = TRUE;
@@ -2014,11 +2087,11 @@ static void i965_bind_surfaces(struct intel_screen_private *intel)
 {
 	uint32_t *binding_table;
 
-	assert(intel->surface_used + 4 * sizeof(struct brw_surface_state_padded) <= sizeof(intel->surface_data));
+	assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data));
 
 	binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
 	intel->surface_table = intel->surface_used;
-	intel->surface_used += sizeof(struct brw_surface_state_padded);
+	intel->surface_used += SURFACE_STATE_PADDED_SIZE;
 
 	binding_table[0] =
 		i965_set_picture_surface_state(intel,
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 03/10] render: Update SAMPLER_STATE for Ivybridge.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 01/10] render: New Ivybridge assembly programs for render acceleration Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 02/10] render: Update SURFACE_STATE for Ivybridge Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 04/10] render: Set Address Modify Enable in 3DSTATE_VERTEX_BUFFERS on Gen7 Kenneth Graunke
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |  133 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index bb3c2b7..1dfdde4 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -842,7 +842,7 @@ static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
 }
 
 static void
-sampler_state_init(drm_intel_bo * sampler_state_bo,
+gen4_sampler_state_init(drm_intel_bo * sampler_state_bo,
 		   struct brw_sampler_state *sampler_state,
 		   sampler_state_filter_t filter,
 		   sampler_state_extend_t extend,
@@ -907,6 +907,74 @@ sampler_state_init(drm_intel_bo * sampler_state_bo,
 	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
 }
 
+static void
+gen7_sampler_state_init(drm_intel_bo * sampler_state_bo,
+		   struct gen7_sampler_state *sampler_state,
+		   sampler_state_filter_t filter,
+		   sampler_state_extend_t extend,
+		   drm_intel_bo * border_color_bo)
+{
+	uint32_t sampler_state_offset;
+
+	sampler_state_offset = (char *)sampler_state -
+	    (char *)sampler_state_bo->virtual;
+
+	/* PS kernel use this sampler */
+	memset(sampler_state, 0, sizeof(*sampler_state));
+
+	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
+
+	/* We use the legacy mode to get the semantics specified by
+	 * the Render extension. */
+	sampler_state->ss0.default_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
+
+	switch (filter) {
+	default:
+	case SAMPLER_STATE_FILTER_NEAREST:
+		sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+		sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+		break;
+	case SAMPLER_STATE_FILTER_BILINEAR:
+		sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+		sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+		break;
+	}
+
+	switch (extend) {
+	default:
+	case SAMPLER_STATE_EXTEND_NONE:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+		break;
+	case SAMPLER_STATE_EXTEND_REPEAT:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+		break;
+	case SAMPLER_STATE_EXTEND_PAD:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+		break;
+	case SAMPLER_STATE_EXTEND_REFLECT:
+		sampler_state->ss3.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		sampler_state->ss3.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		sampler_state->ss3.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
+		break;
+	}
+
+	sampler_state->ss2.default_color_pointer =
+	    intel_emit_reloc(sampler_state_bo, sampler_state_offset +
+			     offsetof(struct gen7_sampler_state, ss2),
+			     border_color_bo, 0,
+			     I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
+
+	sampler_state->ss3.chroma_key_enable = 0;	/* disable chromakey */
+}
+
+
+
 static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
 					       sampler_state_filter_t src_filter,
 					       sampler_state_extend_t src_extend,
@@ -923,18 +991,65 @@ static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
 	drm_intel_bo_map(sampler_state_bo, TRUE);
 	sampler_state = sampler_state_bo->virtual;
 
-	sampler_state_init(sampler_state_bo,
-			   &sampler_state[0],
-			   src_filter, src_extend, border_color_bo);
-	sampler_state_init(sampler_state_bo,
-			   &sampler_state[1],
-			   mask_filter, mask_extend, border_color_bo);
+	gen4_sampler_state_init(sampler_state_bo,
+				&sampler_state[0],
+				src_filter, src_extend, border_color_bo);
+	gen4_sampler_state_init(sampler_state_bo,
+				&sampler_state[1],
+				mask_filter, mask_extend, border_color_bo);
+
+	drm_intel_bo_unmap(sampler_state_bo);
+
+	return sampler_state_bo;
+}
+
+static drm_intel_bo *
+gen7_create_sampler_state(intel_screen_private *intel,
+			  sampler_state_filter_t src_filter,
+			  sampler_state_extend_t src_extend,
+			  sampler_state_filter_t mask_filter,
+			  sampler_state_extend_t mask_extend,
+			  drm_intel_bo * border_color_bo)
+{
+	drm_intel_bo *sampler_state_bo;
+	struct gen7_sampler_state *sampler_state;
+
+	sampler_state_bo =
+	    drm_intel_bo_alloc(intel->bufmgr, "gen7 sampler state",
+			       sizeof(struct gen7_sampler_state) * 2, 4096);
+	drm_intel_bo_map(sampler_state_bo, TRUE);
+	sampler_state = sampler_state_bo->virtual;
+
+	gen7_sampler_state_init(sampler_state_bo,
+				&sampler_state[0],
+				src_filter, src_extend, border_color_bo);
+	gen7_sampler_state_init(sampler_state_bo,
+				&sampler_state[1],
+				mask_filter, mask_extend, border_color_bo);
 
 	drm_intel_bo_unmap(sampler_state_bo);
 
 	return sampler_state_bo;
 }
 
+static inline drm_intel_bo *
+i965_create_sampler_state(intel_screen_private *intel,
+			  sampler_state_filter_t src_filter,
+			  sampler_state_extend_t src_extend,
+			  sampler_state_filter_t mask_filter,
+			  sampler_state_extend_t mask_extend,
+			  drm_intel_bo * border_color_bo)
+{
+	if (INTEL_INFO(intel)->gen < 70)
+		return gen4_create_sampler_state(intel, src_filter, src_extend,
+						 mask_filter, mask_extend,
+						 border_color_bo);
+	return gen7_create_sampler_state(intel, src_filter, src_extend,
+					 mask_filter, mask_extend,
+					 border_color_bo);
+}
+
+
 static void
 cc_state_init(drm_intel_bo * cc_state_bo,
 	      uint32_t cc_state_offset,
@@ -2267,7 +2382,7 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
 					drm_intel_bo *sampler_state_bo;
 
 					sampler_state_bo =
-					    gen4_create_sampler_state(intel,
+					    i965_create_sampler_state(intel,
 								      i, j,
 								      k, l,
 								      border_color_bo);
@@ -2852,7 +2967,7 @@ gen6_render_state_init(ScrnInfoPtr scrn)
 			for (k = 0; k < FILTER_COUNT; k++) {
 				for (l = 0; l < EXTEND_COUNT; l++) {
 					render->ps_sampler_state_bo[i][j][k][l] =
-						gen4_create_sampler_state(intel,
+						i965_create_sampler_state(intel,
 								i, j,
 								k, l,
 								border_color_bo);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 04/10] render: Set Address Modify Enable in 3DSTATE_VERTEX_BUFFERS on Gen7.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (2 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 03/10] render: Update SAMPLER_STATE " Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 05/10] render: Update 3DPRIMITIVE for Ivybridge Kenneth Graunke
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |    5 +++++
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index 1dfdde4..ec10392 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2167,10 +2167,14 @@ i965_prepare_composite(int op, PicturePtr source_picture,
 static void i965_select_vertex_buffer(struct intel_screen_private *intel)
 {
 	int id = intel->gen4_render_state->composite_op.vertex_id;
+	int modifyenable = 0;
 
 	if (intel->vertex_id & (1 << id))
 		return;
 
+	if (INTEL_INFO(intel)->gen >= 70)
+		modifyenable = GEN7_VB0_ADDRESS_MODIFYENABLE;
+
 	/* Set up the pointer to our (single) vertex buffer */
 	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
 
@@ -2180,6 +2184,7 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
 	if (INTEL_INFO(intel)->gen >= 60) {
 		OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
 			  GEN6_VB0_VERTEXDATA |
+			  modifyenable |
 			  (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
 	} else {
 		OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 05/10] render: Update 3DPRIMITIVE for Ivybridge.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (3 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 04/10] render: Set Address Modify Enable in 3DSTATE_VERTEX_BUFFERS on Gen7 Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 06/10] Xv: Refactor out pipeline setup functions for future reuse in render Kenneth Graunke
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |   16 +++++++++++-----
 1 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index ec10392..c9b3c7a 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2262,11 +2262,17 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	i965_select_vertex_buffer(intel);
 
 	if (intel->vertex_offset == 0) {
-		OUT_BATCH(BRW_3DPRIMITIVE |
-			  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
-			  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
-			  (0 << 9) |
-			  4);
+		if (INTEL_INFO(intel)->gen >= 70) {
+			OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2));
+			OUT_BATCH(BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+				  _3DPRIM_RECTLIST);
+		} else {
+			OUT_BATCH(BRW_3DPRIMITIVE |
+				  BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+				  (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+				  (0 << 9) |
+				  4);
+		}
 		intel->vertex_offset = intel->batch_used;
 		OUT_BATCH(0);	/* vertex count, to be filled in later */
 		OUT_BATCH(intel->vertex_index);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 06/10] Xv: Refactor out pipeline setup functions for future reuse in render.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (4 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 05/10] render: Update 3DPRIMITIVE for Ivybridge Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 07/10] render: Refactor to use newly shared pipeline setup code in i965_3d.c Kenneth Graunke
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 30929 bytes --]

While we're at it, make the functions simply take an intel_screen_private
pointer directly instead of having to fetch it from ScrnInfoPtr.

Also coalesce some gen6/gen7 functions that were 98% identical.

Cc: Xiang, Haihao <haihao.xiang@intel.com>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/Makefile.am  |    1 +
 src/i965_3d.c    |  430 ++++++++++++++++++++++++++++++++++++++++++++++
 src/i965_video.c |  500 +++---------------------------------------------------
 src/intel.h      |   29 +++
 4 files changed, 482 insertions(+), 478 deletions(-)
 create mode 100644 src/i965_3d.c

diff --git a/src/Makefile.am b/src/Makefile.am
index a7f219c..cd1bb36 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -67,6 +67,7 @@ intel_drv_la_SOURCES = \
 	 i915_render.c \
 	 i915_video.c \
 	 i965_reg.h \
+	 i965_3d.c \
 	 i965_video.c \
 	 i965_render.c \
 	 $(NULL)
diff --git a/src/i965_3d.c b/src/i965_3d.c
new file mode 100644
index 0000000..19ddee7
--- /dev/null
+++ b/src/i965_3d.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string.h>
+
+#include "intel.h"
+#include "i965_reg.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+
+void
+gen6_upload_invariant_states(intel_screen_private *intel)
+{
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
+
+	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
+		BRW_PIPE_CONTROL_WC_FLUSH |
+		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+		BRW_PIPE_CONTROL_NOWRITE);
+	OUT_BATCH(0); /* write address */
+	OUT_BATCH(0); /* write data */
+
+	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | ((ivb ? 4 : 3) - 2));
+	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+	OUT_BATCH(0);
+	if (ivb)
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+	OUT_BATCH(1);
+
+	/* Set system instruction pointer */
+	OUT_BATCH(BRW_STATE_SIP | 0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_viewport_state_pointers(intel_screen_private *intel,
+				    drm_intel_bo *cc_vp_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+		(4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_viewport_state_pointers(intel_screen_private *intel,
+				    drm_intel_bo *cc_vp_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
+	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_urb(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
+	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+/*
+ * URB layout on GEN7
+ * ----------------------------------------
+ * | PS Push Constants (8KB) | VS entries |
+ * ----------------------------------------
+ */
+void
+gen7_upload_urb(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
+	OUT_BATCH(8); /* in 1KBs */
+
+	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
+	OUT_BATCH(
+		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
+		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
+		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
+	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+}
+
+void
+gen6_upload_cc_state_pointers(intel_screen_private *intel,
+			      drm_intel_bo *blend_bo,
+			      drm_intel_bo *cc_bo,
+			      drm_intel_bo *depth_stencil_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+	if (blend_bo)
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	if (depth_stencil_bo)
+		OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	if (cc_bo)
+		OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+}
+
+void
+gen7_upload_cc_state_pointers(intel_screen_private *intel,
+			      drm_intel_bo *blend_bo,
+			      drm_intel_bo *cc_bo,
+			      drm_intel_bo *depth_stencil_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
+	if (blend_bo)
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
+	if (cc_bo)
+		OUT_RELOC(cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
+	if (depth_stencil_bo)
+		OUT_RELOC(depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+	else
+		OUT_BATCH(0);
+}
+
+void
+gen6_upload_sampler_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *sampler_bo)
+{
+	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+		(4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_sampler_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *sampler_bo)
+{
+	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
+	OUT_RELOC(sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+void
+gen7_upload_bypass_states(intel_screen_private *intel)
+{
+	/* bypass GS */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* disable HS */
+	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* Disable TE */
+	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* Disable DS */
+	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
+	OUT_BATCH(0);
+
+	/* Disable STREAMOUT */
+	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_vs_state(intel_screen_private *intel)
+{
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
+	/* disable VS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | ((ivb ? 7 : 5) - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	if (ivb) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
+
+	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
+	OUT_BATCH(0); /* without VS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+void
+gen6_upload_gs_state(intel_screen_private *intel)
+{
+	/* disable GS constant buffer */
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
+	OUT_BATCH(0); /* without GS kernel */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+}
+
+void
+gen6_upload_clip_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* pass-through */
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_sf_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
+	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW14 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW19 */
+}
+
+void
+gen7_upload_sf_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
+	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
+		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW4 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* DW9 */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
+	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+void
+gen6_upload_binding_table(intel_screen_private *intel,
+			  uint32_t ps_binding_table_offset)
+{
+	/* Binding table pointers */
+	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
+		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
+		  (4 - 2));
+	OUT_BATCH(0); /* VS */
+	OUT_BATCH(0); /* GS */
+	/* Only the PS uses the binding table */
+	OUT_BATCH(ps_binding_table_offset);
+}
+
+void
+gen7_upload_binding_table(intel_screen_private *intel,
+			  uint32_t ps_binding_table_offset)
+{
+	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
+	OUT_BATCH(ps_binding_table_offset);
+}
+
+void
+gen6_upload_depth_buffer_state(intel_screen_private *intel)
+{
+	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
+		  (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
+	OUT_BATCH(0);
+}
+
+void
+gen7_upload_depth_buffer_state(intel_screen_private *intel)
+{
+	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
+	OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
diff --git a/src/i965_video.c b/src/i965_video.c
index 1054914..7d7ac79 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1530,34 +1530,6 @@ gen6_create_vidoe_objects(ScrnInfoPtr scrn)
 }
 
 static void
-gen6_upload_invarient_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
-	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
-		BRW_PIPE_CONTROL_WC_FLUSH |
-		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		BRW_PIPE_CONTROL_NOWRITE);
-	OUT_BATCH(0); /* write address */
-	OUT_BATCH(0); /* write data */
-
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
 gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1575,88 +1547,6 @@ gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_bin
 }
 
 static void
-gen6_upload_viewport_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
-		GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
-		(4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_RELOC(intel->video.gen4_cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_upload_urb(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
-	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
-		(24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
-	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
-		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
-}
-
-static void
-gen6_upload_cc_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-	OUT_RELOC(intel->video.gen6_blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-	OUT_RELOC(intel->video.gen6_depth_stencil_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-	OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-}
-
-static void
-gen6_upload_sampler_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
-		GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
-		(4 - 2));
-	OUT_BATCH(0); /* VS */
-	OUT_BATCH(0); /* GS */
-	OUT_RELOC(intel->video.gen4_sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* Binding table pointers */
-	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
-		GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
-		(4 - 2));
-	OUT_BATCH(0);		/* vs */
-	OUT_BATCH(0);		/* gs */
-	/* Only the PS uses the binding table */
-	OUT_BATCH(ps_binding_table_offset);
-}
-
-static void
-gen6_upload_depth_buffer_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
-		(BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
 gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1668,87 +1558,6 @@ gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap)
 }
 
 static void 
-gen6_upload_vs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen6_upload_gs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable GS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen6_upload_clip_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-	OUT_BATCH(0);
-}
-
-static void 
-gen6_upload_sf_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
-		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW14 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW19 */
-}
-
-static void 
 gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -1857,262 +1666,25 @@ gen6_emit_video_setup(ScrnInfoPtr scrn,
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_VIDEO;
 
-	gen6_upload_invarient_states(scrn);
+	gen6_upload_invariant_states(intel);
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
-	gen6_upload_viewport_state_pointers(scrn);
-	gen6_upload_urb(scrn);
-	gen6_upload_cc_state_pointers(scrn);
-	gen6_upload_sampler_state_pointers(scrn);
-	gen6_upload_vs_state(scrn);
-	gen6_upload_gs_state(scrn);
-	gen6_upload_clip_state(scrn);
-	gen6_upload_sf_state(scrn);
+	gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
+	gen6_upload_urb(intel);
+	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
+	gen6_upload_vs_state(intel);
+	gen6_upload_gs_state(intel);
+	gen6_upload_clip_state(intel);
+	gen6_upload_sf_state(intel);
 	gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
-	gen6_upload_binding_table(scrn, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
-	gen6_upload_depth_buffer_state(scrn);
+	gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
+	gen6_upload_depth_buffer_state(intel);
 	gen6_upload_drawing_rectangle(scrn, pixmap);
 	gen6_upload_vertex_element_state(scrn);
 	gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset);
 	gen6_upload_primitive(scrn);
 }
 
-static void
-gen7_upload_invarient_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
-	OUT_BATCH(BRW_PIPE_CONTROL_IS_FLUSH |
-		BRW_PIPE_CONTROL_WC_FLUSH |
-		BRW_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-		BRW_PIPE_CONTROL_NOWRITE);
-	OUT_BATCH(0); /* write address */
-	OUT_BATCH(0); /* write data */
-
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
-gen7_upload_viewport_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
-	OUT_RELOC(intel->video.gen4_cc_vp_bo, 
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		0);
-
-	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
-	OUT_BATCH(0);
-}
-
-/*
- * URB layout for Xv on GEN7 
- * ----------------------------------------
- * | PS Push Constants (8KB) | VS entries |
- * ----------------------------------------
- */
-static void
-gen7_upload_urb(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
-	OUT_BATCH(8); /* in 1KBs */
-
-	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
-	OUT_BATCH(
-		(32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
-		(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
-		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-
-	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
-	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
-		(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
-}
-
-static void
-gen7_upload_cc_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen4_cc_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		1);
-
-	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen6_blend_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		1);
-
-	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
-	OUT_RELOC(intel->video.gen6_depth_stencil_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0, 
-		1);
-}
-
-static void
-gen7_upload_sampler_state_pointers(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
-	OUT_RELOC(intel->video.gen4_sampler_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
-		0);
-}
-
-static void 
-gen7_upload_bypass_states(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* bypass GS */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* disable HS */
-	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* Disable TE */
-	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* Disable DS */
-	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
-	OUT_BATCH(0);
-
-	/* Disable STREAMOUT */
-	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-static void 
-gen7_upload_vs_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void 
-gen7_upload_sf_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
-	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
-		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SF | (7 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
 static void 
 gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 {
@@ -2159,34 +1731,6 @@ gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed)
 	OUT_BATCH(0); /* kernel 2 pointer */
 }
 
-static void
-gen7_upload_binding_table(ScrnInfoPtr scrn, uint32_t ps_binding_table_offset)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(ps_binding_table_offset);
-}
-
-static void
-gen7_upload_depth_buffer_state(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-
-	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
-		(BRW_SURFACE_NULL << 29));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
 static void 
 gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset)
 {
@@ -2231,19 +1775,19 @@ gen7_emit_video_setup(ScrnInfoPtr scrn,
 	IntelEmitInvarientState(scrn);
 	intel->last_3d = LAST_3D_VIDEO;
 
-	gen7_upload_invarient_states(scrn);
+	gen6_upload_invariant_states(intel);
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
-	gen7_upload_viewport_state_pointers(scrn);
-	gen7_upload_urb(scrn);
-	gen7_upload_cc_state_pointers(scrn);
-	gen7_upload_sampler_state_pointers(scrn);
-	gen7_upload_bypass_states(scrn);
-	gen7_upload_vs_state(scrn);
-	gen6_upload_clip_state(scrn);
-	gen7_upload_sf_state(scrn);
+	gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
+	gen7_upload_urb(intel);
+	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
+	gen7_upload_bypass_states(intel);
+	gen6_upload_vs_state(intel);
+	gen6_upload_clip_state(intel);
+	gen7_upload_sf_state(intel);
 	gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
-	gen7_upload_binding_table(scrn, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
-	gen7_upload_depth_buffer_state(scrn);
+	gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
+	gen7_upload_depth_buffer_state(intel);
 	gen6_upload_drawing_rectangle(scrn, pixmap);
 	gen6_upload_vertex_element_state(scrn);
 	gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset);
diff --git a/src/intel.h b/src/intel.h
index 6135349..3f48dd4 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -597,6 +597,35 @@ void i965_vertex_flush(intel_screen_private *intel);
 void i965_batch_flush(intel_screen_private *intel);
 void i965_batch_commit_notify(intel_screen_private *intel);
 
+/* i965_3d.c */
+void gen6_upload_invariant_states(intel_screen_private *intel);
+void gen6_upload_viewport_state_pointers(intel_screen_private *intel,
+					 drm_intel_bo *cc_vp_bo);
+void gen7_upload_viewport_state_pointers(intel_screen_private *intel,
+					 drm_intel_bo *cc_vp_bo);
+void gen6_upload_urb(intel_screen_private *intel);
+void gen7_upload_urb(intel_screen_private *intel);
+void gen6_upload_cc_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
+				   drm_intel_bo *depth_stencil_bo);
+void gen7_upload_cc_state_pointers(intel_screen_private *intel,
+				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
+				   drm_intel_bo *depth_stencil_bo);
+void gen6_upload_sampler_state_pointers(intel_screen_private *intel,
+					drm_intel_bo *sampler_bo);
+void gen7_upload_sampler_state_pointers(intel_screen_private *intel,
+					drm_intel_bo *sampler_bo);
+void gen7_upload_bypass_states(intel_screen_private *intel);
+void gen6_upload_gs_state(intel_screen_private *intel);
+void gen6_upload_vs_state(intel_screen_private *intel);
+void gen6_upload_clip_state(intel_screen_private *intel);
+void gen6_upload_sf_state(intel_screen_private *intel);
+void gen7_upload_sf_state(intel_screen_private *intel);
+void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
+void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
+void gen6_upload_depth_buffer_state(intel_screen_private *intel);
+void gen7_upload_depth_buffer_state(intel_screen_private *intel);
+
 Bool intel_transform_is_affine(PictTransformPtr t);
 Bool
 intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
-- 
1.7.4.4


[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 07/10] render: Refactor to use newly shared pipeline setup code in i965_3d.c.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (5 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 06/10] Xv: Refactor out pipeline setup functions for future reuse in render Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 08/10] render: Use Ivybridge variants for 3D pipeline setup Kenneth Graunke
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Slightly generalize the shared SF and CC code to accomodate both.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_3d.c     |   28 +++++---
 src/i965_render.c |  183 +++++------------------------------------------------
 src/i965_video.c  |    8 +-
 src/intel.h       |   10 ++-
 4 files changed, 44 insertions(+), 185 deletions(-)

diff --git a/src/i965_3d.c b/src/i965_3d.c
index 19ddee7..d4d38e5 100644
--- a/src/i965_3d.c
+++ b/src/i965_3d.c
@@ -130,11 +130,13 @@ void
 gen6_upload_cc_state_pointers(intel_screen_private *intel,
 			      drm_intel_bo *blend_bo,
 			      drm_intel_bo *cc_bo,
-			      drm_intel_bo *depth_stencil_bo)
+			      drm_intel_bo *depth_stencil_bo,
+			      uint32_t blend_offset)
 {
 	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
 	if (blend_bo)
-		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  blend_offset | 1);
 	else
 		OUT_BATCH(0);
 
@@ -153,11 +155,13 @@ void
 gen7_upload_cc_state_pointers(intel_screen_private *intel,
 			      drm_intel_bo *blend_bo,
 			      drm_intel_bo *cc_bo,
-			      drm_intel_bo *depth_stencil_bo)
+			      drm_intel_bo *depth_stencil_bo,
+			      uint32_t blend_offset)
 {
 	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
 	if (blend_bo)
-		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+		OUT_RELOC(blend_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  blend_offset | 1);
 	else
 		OUT_BATCH(0);
 
@@ -320,12 +324,14 @@ gen6_upload_clip_state(intel_screen_private *intel)
 }
 
 void
-gen6_upload_sf_state(intel_screen_private *intel)
+gen6_upload_sf_state(intel_screen_private *intel,
+		     int num_sf_outputs,
+		     int read_offset)
 {
 	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
 		(1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+		(read_offset << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
 	OUT_BATCH(0);
 	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
 	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
@@ -347,12 +353,14 @@ gen6_upload_sf_state(intel_screen_private *intel)
 }
 
 void
-gen7_upload_sf_state(intel_screen_private *intel)
+gen7_upload_sf_state(intel_screen_private *intel,
+		     int num_sf_outputs,
+		     int read_offset)
 {
 	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
-	OUT_BATCH((1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
+	OUT_BATCH((num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
 		(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
-		(0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
+		(read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0); /* DW4 */
diff --git a/src/i965_render.c b/src/i965_render.c
index c9b3c7a..17e35c9 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2538,24 +2538,6 @@ gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_invariant_states(intel_screen_private *intel)
-{
-	OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
-
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
-	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
-		  GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
-	OUT_BATCH(1);
-
-	/* Set system instruction pointer */
-	OUT_BATCH(BRW_STATE_SIP | 0);
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_state_base_address(intel_screen_private *intel)
 {
 	OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
@@ -2573,52 +2555,21 @@ gen6_composite_state_base_address(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_viewport_state_pointers(intel_screen_private *intel,
-				       drm_intel_bo *cc_vp_bo)
-{
-
-	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
-		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
-		  (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_composite_urb(intel_screen_private *intel)
-{
-	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
-	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
-		  (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
-	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
-		(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
-}
-
-static void
 gen6_composite_cc_state_pointers(intel_screen_private *intel,
 				 uint32_t blend_offset)
 {
 	struct gen4_render_state *render_state = intel->gen4_render_state;
+	drm_intel_bo *cc_bo = NULL;
+	drm_intel_bo *depth_stencil_bo = NULL;
 
 	if (intel->gen6_render_state.blend == blend_offset)
 		return;
 
-	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
-	OUT_RELOC(render_state->gen6_blend_bo,
-		  I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  blend_offset | 1);
 	if (intel->gen6_render_state.blend == -1) {
-		OUT_RELOC(render_state->gen6_depth_stencil_bo,
-			  I915_GEM_DOMAIN_INSTRUCTION, 0,
-			  1);
-		OUT_RELOC(render_state->cc_state_bo,
-			  I915_GEM_DOMAIN_INSTRUCTION, 0,
-			  1);
-	} else {
-		OUT_BATCH(0);
-		OUT_BATCH(0);
+		cc_bo = render_state->cc_state_bo;
+		depth_stencil_bo = render_state->gen6_depth_stencil_bo;
 	}
+	gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
 
 	intel->gen6_render_state.blend = blend_offset;
 }
@@ -2632,49 +2583,7 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
 
 	intel->gen6_render_state.samplers = bo;
 
-	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
-		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
-		  (4 - 2));
-	OUT_BATCH(0); /* VS */
-	OUT_BATCH(0); /* GS */
-	OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-}
-
-static void
-gen6_composite_vs_state(intel_screen_private *intel)
-{
-	/* disable VS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
-	OUT_BATCH(0); /* without VS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-}
-
-static void
-gen6_composite_gs_state(intel_screen_private *intel)
-{
-	/* disable GS constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
-	OUT_BATCH(0); /* without GS kernel */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
+	gen6_upload_sampler_state_pointers(intel, bo);
 }
 
 static void
@@ -2689,15 +2598,6 @@ gen6_composite_wm_constants(intel_screen_private *intel)
 }
 
 static void
-gen6_composite_clip_state(intel_screen_private *intel)
-{
-	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* pass-through */
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_sf_state(intel_screen_private *intel,
 			Bool has_mask)
 {
@@ -2708,28 +2608,7 @@ gen6_composite_sf_state(intel_screen_private *intel,
 
 	intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
 
-	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
-	OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
-		  (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
-		  (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
-	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW9 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW14 */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0); /* DW19 */
+	gen6_upload_sf_state(intel, num_sf_outputs, 1);
 }
 
 static void
@@ -2763,35 +2642,6 @@ gen6_composite_wm_state(intel_screen_private *intel,
 }
 
 static void
-gen6_composite_binding_table_pointers(intel_screen_private *intel)
-{
-	/* Binding table pointers */
-	OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
-		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
-		  (4 - 2));
-	OUT_BATCH(0);		/* vs */
-	OUT_BATCH(0);		/* gs */
-	/* Only the PS uses the binding table */
-	OUT_BATCH(intel->surface_table);
-}
-
-static void
-gen6_composite_depth_buffer_state(intel_screen_private *intel)
-{
-	OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
-	OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
-		  (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
-	OUT_BATCH(0);
-}
-
-static void
 gen6_composite_drawing_rectangle(intel_screen_private *intel,
 				 PixmapPtr dest)
 {
@@ -2909,16 +2759,15 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 
 	intel->needs_render_state_emit = FALSE;
 	if (intel->needs_3d_invariant) {
-		gen6_composite_invariant_states(intel);
-		gen6_composite_viewport_state_pointers(intel,
-						       render->cc_vp_bo);
-		gen6_composite_urb(intel);
-
-		gen6_composite_vs_state(intel);
-		gen6_composite_gs_state(intel);
-		gen6_composite_clip_state(intel);
+		gen6_upload_invariant_states(intel);
+		gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+		gen6_upload_urb(intel);
+
+		gen6_upload_vs_state(intel);
+		gen6_upload_gs_state(intel);
+		gen6_upload_clip_state(intel);
 		gen6_composite_wm_constants(intel);
-		gen6_composite_depth_buffer_state(intel);
+		gen6_upload_depth_buffer_state(intel);
 
 		intel->needs_3d_invariant = FALSE;
 	}
@@ -2938,7 +2787,7 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	gen6_composite_wm_state(intel,
 				has_mask,
 				render->wm_kernel_bo[composite_op->wm_kernel]);
-	gen6_composite_binding_table_pointers(intel);
+	gen6_upload_binding_table(intel, intel->surface_table);
 
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
 	gen6_composite_vertex_element_state(intel, has_mask, is_affine);
diff --git a/src/i965_video.c b/src/i965_video.c
index 7d7ac79..eb5ff14 100644
--- a/src/i965_video.c
+++ b/src/i965_video.c
@@ -1670,12 +1670,12 @@ gen6_emit_video_setup(ScrnInfoPtr scrn,
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
 	gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
 	gen6_upload_urb(intel);
-	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0);
 	gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
 	gen6_upload_vs_state(intel);
 	gen6_upload_gs_state(intel);
 	gen6_upload_clip_state(intel);
-	gen6_upload_sf_state(intel);
+	gen6_upload_sf_state(intel, 1, 0);
 	gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
 	gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
 	gen6_upload_depth_buffer_state(intel);
@@ -1779,12 +1779,12 @@ gen7_emit_video_setup(ScrnInfoPtr scrn,
 	gen6_upload_state_base_address(scrn, surface_state_binding_table_bo);
 	gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo);
 	gen7_upload_urb(intel);
-	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo);
+	gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0);
 	gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo);
 	gen7_upload_bypass_states(intel);
 	gen6_upload_vs_state(intel);
 	gen6_upload_clip_state(intel);
-	gen7_upload_sf_state(intel);
+	gen7_upload_sf_state(intel, 1, 0);
 	gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE);
 	gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE);
 	gen7_upload_depth_buffer_state(intel);
diff --git a/src/intel.h b/src/intel.h
index 3f48dd4..42afaf4 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -607,10 +607,12 @@ void gen6_upload_urb(intel_screen_private *intel);
 void gen7_upload_urb(intel_screen_private *intel);
 void gen6_upload_cc_state_pointers(intel_screen_private *intel,
 				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo);
+				   drm_intel_bo *depth_stencil_bo,
+				   uint32_t blend_offset);
 void gen7_upload_cc_state_pointers(intel_screen_private *intel,
 				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo);
+				   drm_intel_bo *depth_stencil_bo,
+				   uint32_t blend_offset);
 void gen6_upload_sampler_state_pointers(intel_screen_private *intel,
 					drm_intel_bo *sampler_bo);
 void gen7_upload_sampler_state_pointers(intel_screen_private *intel,
@@ -619,8 +621,8 @@ void gen7_upload_bypass_states(intel_screen_private *intel);
 void gen6_upload_gs_state(intel_screen_private *intel);
 void gen6_upload_vs_state(intel_screen_private *intel);
 void gen6_upload_clip_state(intel_screen_private *intel);
-void gen6_upload_sf_state(intel_screen_private *intel);
-void gen7_upload_sf_state(intel_screen_private *intel);
+void gen6_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
+void gen7_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
 void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
 void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
 void gen6_upload_depth_buffer_state(intel_screen_private *intel);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 08/10] render: Use Ivybridge variants for 3D pipeline setup.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (6 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 07/10] render: Refactor to use newly shared pipeline setup code in i965_3d.c Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 09/10] render: Update pixel shader state for Ivybridge Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 10/10] render: Enable RENDER acceleration on Ivybridge Kenneth Graunke
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |   50 +++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index 17e35c9..5222d1c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2569,7 +2569,11 @@ gen6_composite_cc_state_pointers(intel_screen_private *intel,
 		cc_bo = render_state->cc_state_bo;
 		depth_stencil_bo = render_state->gen6_depth_stencil_bo;
 	}
-	gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	if (INTEL_INFO(intel)->gen >= 70) {
+		gen7_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	} else {
+		gen6_upload_cc_state_pointers(intel, render_state->gen6_blend_bo, cc_bo, depth_stencil_bo, blend_offset);
+	}
 
 	intel->gen6_render_state.blend = blend_offset;
 }
@@ -2583,18 +2587,26 @@ gen6_composite_sampler_state_pointers(intel_screen_private *intel,
 
 	intel->gen6_render_state.samplers = bo;
 
-	gen6_upload_sampler_state_pointers(intel, bo);
+	if (INTEL_INFO(intel)->gen >= 70)
+		gen7_upload_sampler_state_pointers(intel, bo);
+	else
+		gen6_upload_sampler_state_pointers(intel, bo);
 }
 
 static void
 gen6_composite_wm_constants(intel_screen_private *intel)
 {
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
 	/* disable WM constant buffer */
-	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | ((ivb ? 7 : 5) - 2));
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
 	OUT_BATCH(0);
+	if (ivb) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
 }
 
 static void
@@ -2608,7 +2620,10 @@ gen6_composite_sf_state(intel_screen_private *intel,
 
 	intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
 
-	gen6_upload_sf_state(intel, num_sf_outputs, 1);
+	if (INTEL_INFO(intel)->gen >= 70)
+		gen7_upload_sf_state(intel, num_sf_outputs, 1);
+	else
+		gen6_upload_sf_state(intel, num_sf_outputs, 1);
 }
 
 static void
@@ -2754,20 +2769,30 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	sampler_state_extend_t mask_extend = composite_op->mask_extend;
 	Bool is_affine = composite_op->is_affine;
 	Bool has_mask = intel->render_mask != NULL;
+	Bool ivb = INTEL_INFO(intel)->gen >= 70;
 	uint32_t src, dst;
 	drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
 
 	intel->needs_render_state_emit = FALSE;
 	if (intel->needs_3d_invariant) {
 		gen6_upload_invariant_states(intel);
-		gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
-		gen6_upload_urb(intel);
 
+		if (ivb) {
+			gen7_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+			gen7_upload_urb(intel);
+			gen7_upload_bypass_states(intel);
+			gen7_upload_depth_buffer_state(intel);
+		} else {
+			gen6_upload_invariant_states(intel);
+			gen6_upload_viewport_state_pointers(intel, render->cc_vp_bo);
+			gen6_upload_urb(intel);
+
+			gen6_upload_gs_state(intel);
+			gen6_upload_depth_buffer_state(intel);
+		}
+		gen6_composite_wm_constants(intel);
 		gen6_upload_vs_state(intel);
-		gen6_upload_gs_state(intel);
 		gen6_upload_clip_state(intel);
-		gen6_composite_wm_constants(intel);
-		gen6_upload_depth_buffer_state(intel);
 
 		intel->needs_3d_invariant = FALSE;
 	}
@@ -2787,8 +2812,11 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 	gen6_composite_wm_state(intel,
 				has_mask,
 				render->wm_kernel_bo[composite_op->wm_kernel]);
-	gen6_upload_binding_table(intel, intel->surface_table);
-
+	if (ivb) {
+		gen7_upload_binding_table(intel, intel->surface_table);
+	} else {
+		gen6_upload_binding_table(intel, intel->surface_table);
+	}
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
 	gen6_composite_vertex_element_state(intel, has_mask, is_affine);
 }
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 09/10] render: Update pixel shader state for Ivybridge.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (7 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 08/10] render: Use Ivybridge variants for 3D pipeline setup Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  2011-07-14 21:21 ` [PATCH 10/10] render: Enable RENDER acceleration on Ivybridge Kenneth Graunke
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |   38 +++++++++++++++++++++++++++++++++++---
 1 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index 5222d1c..596d070 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -2657,6 +2657,37 @@ gen6_composite_wm_state(intel_screen_private *intel,
 }
 
 static void
+gen7_composite_wm_state(intel_screen_private *intel,
+			Bool has_mask,
+			drm_intel_bo *bo)
+{
+	int num_surfaces = has_mask ? 3 : 2;
+
+	if (intel->gen6_render_state.kernel == bo)
+		return;
+
+	intel->gen6_render_state.kernel = bo;
+
+	OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2));
+	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
+		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
+	OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+	OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
+		  (num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+	OUT_BATCH(0); /* scratch space base offset */
+	OUT_BATCH(((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+		  GEN7_PS_ATTRIBUTE_ENABLE |
+		  GEN7_PS_16_DISPATCH_ENABLE);
+	OUT_BATCH((6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
+	OUT_BATCH(0); /* kernel 1 pointer */
+	OUT_BATCH(0); /* kernel 2 pointer */
+}
+
+
+static void
 gen6_composite_drawing_rectangle(intel_screen_private *intel,
 				 PixmapPtr dest)
 {
@@ -2809,12 +2840,13 @@ gen6_emit_composite_state(struct intel_screen_private *intel)
 					(src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE);
 	gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo);
 	gen6_composite_sf_state(intel, has_mask);
-	gen6_composite_wm_state(intel,
-				has_mask,
-				render->wm_kernel_bo[composite_op->wm_kernel]);
 	if (ivb) {
+		gen7_composite_wm_state(intel, has_mask,
+					render->wm_kernel_bo[composite_op->wm_kernel]);
 		gen7_upload_binding_table(intel, intel->surface_table);
 	} else {
+		gen6_composite_wm_state(intel, has_mask,
+					render->wm_kernel_bo[composite_op->wm_kernel]);
 		gen6_upload_binding_table(intel, intel->surface_table);
 	}
 	gen6_composite_drawing_rectangle(intel, intel->render_dest);
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 10/10] render: Enable RENDER acceleration on Ivybridge.
  2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
                   ` (8 preceding siblings ...)
  2011-07-14 21:21 ` [PATCH 09/10] render: Update pixel shader state for Ivybridge Kenneth Graunke
@ 2011-07-14 21:21 ` Kenneth Graunke
  9 siblings, 0 replies; 11+ messages in thread
From: Kenneth Graunke @ 2011-07-14 21:21 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/i965_render.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/src/i965_render.c b/src/i965_render.c
index 596d070..7e1da5b 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -184,9 +184,6 @@ i965_check_composite(int op,
 	ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
 
-	if (IS_GEN7(intel))
-		return FALSE;
-
 	/* Check for unsupported compositing operations. */
 	if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
 		intel_debug_fallback(scrn,
-- 
1.7.4.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-07-14 21:25 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-14 21:21 [PATCH 00/10] UXA Render acceleration for Ivybridge Kenneth Graunke
2011-07-14 21:21 ` [PATCH 01/10] render: New Ivybridge assembly programs for render acceleration Kenneth Graunke
2011-07-14 21:21 ` [PATCH 02/10] render: Update SURFACE_STATE for Ivybridge Kenneth Graunke
2011-07-14 21:21 ` [PATCH 03/10] render: Update SAMPLER_STATE " Kenneth Graunke
2011-07-14 21:21 ` [PATCH 04/10] render: Set Address Modify Enable in 3DSTATE_VERTEX_BUFFERS on Gen7 Kenneth Graunke
2011-07-14 21:21 ` [PATCH 05/10] render: Update 3DPRIMITIVE for Ivybridge Kenneth Graunke
2011-07-14 21:21 ` [PATCH 06/10] Xv: Refactor out pipeline setup functions for future reuse in render Kenneth Graunke
2011-07-14 21:21 ` [PATCH 07/10] render: Refactor to use newly shared pipeline setup code in i965_3d.c Kenneth Graunke
2011-07-14 21:21 ` [PATCH 08/10] render: Use Ivybridge variants for 3D pipeline setup Kenneth Graunke
2011-07-14 21:21 ` [PATCH 09/10] render: Update pixel shader state for Ivybridge Kenneth Graunke
2011-07-14 21:21 ` [PATCH 10/10] render: Enable RENDER acceleration on Ivybridge Kenneth Graunke

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).