All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Widawsky <ben@bwidawsk.net>
To: intel-gfx@lists.freedesktop.org
Cc: Ben Widawsky <ben@bwidawsk.net>
Subject: [PATCH 10/10] debugging: shader debugging
Date: Wed, 13 Jul 2011 13:51:52 -0700	[thread overview]
Message-ID: <1310590312-21669-11-git-send-email-ben@bwidawsk.net> (raw)
In-Reply-To: <1310590312-21669-1-git-send-email-ben@bwidawsk.net>

high level summary of the files:
  * debug_rdata - get current state from debug registers. Helpful
    when developing the debugger, and could serve some purpose in the
    future.
  * eudb - the debugger itself
  * eviction_macro - generate the proper macro to flush the EU render
    cache until I get control flow working
  * pre_cpp - an evaluating c preprocesser like thing, to be used before
    cpp
  * sr - the system routine, exception handler which runs on the EU
  * test - a very basic test system routine
  * debug.h

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 configure.ac                             |    1 +
 debugger/Makefile.am                     |   12 +
 debugger/debug_rdata.c                   |  141 +++++++
 debugger/eudb.c                          |  590 ++++++++++++++++++++++++++++++
 debugger/system_routine/Makefile         |   84 +++++
 debugger/system_routine/eviction_macro.c |   48 +++
 debugger/system_routine/pre_cpp.py       |  123 +++++++
 debugger/system_routine/sr.g4a           |  277 ++++++++++++++
 debugger/system_routine/test.g4a         |   64 ++++
 lib/debug.h                              |   92 +++++
 10 files changed, 1432 insertions(+), 0 deletions(-)

diff --git a/configure.ac b/configure.ac
index 4eb1c48..d2b30dc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -98,5 +98,6 @@ AC_CONFIG_FILES([
 	scripts/Makefile
 	tests/Makefile
 	tools/Makefile
+	debugger/Makefile
 ])
 AC_OUTPUT
diff --git a/debugger/Makefile.am b/debugger/Makefile.am
new file mode 100644
index 0000000..eff4e2d
--- /dev/null
+++ b/debugger/Makefile.am
@@ -0,0 +1,12 @@
+SUBDIRS=system_routine
+
+bin_PROGRAMS = \
+	eudb \
+	debug_rdata \
+	$(NULL)
+
+LDADD = ../lib/libintel_tools.la $(DRM_LIBS) $(PCIACCESS_LIBS)
+
+AM_CFLAGS = $(DRM_CFLAGS) $(PCIACCESS_CFLAGS) $(WARN_CFLAGS) \
+	-I$(srcdir)/.. \
+	-I$(srcdir)/../lib
diff --git a/debugger/debug_rdata.c b/debugger/debug_rdata.c
new file mode 100644
index 0000000..f7dc424
--- /dev/null
+++ b/debugger/debug_rdata.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "intel_gpu_tools.h"
+
+struct eu_rdata {
+	union {
+		struct {
+			uint8_t sendc_dep : 1;
+			uint8_t swh_dep : 1;
+			uint8_t pwc_dep : 1;
+			uint8_t n2_dep : 1;
+			uint8_t n1_dep : 1;
+			uint8_t n0_dep : 1;
+			uint8_t flag1_dep : 1;
+			uint8_t flag0_dep : 1;
+			uint8_t indx_dep : 1;
+			uint8_t mrf_dep : 1;
+			uint8_t dst_dep : 1;
+			uint8_t src2_dep : 1;
+			uint8_t src1_dep : 1;
+			uint8_t src0_dep : 1;
+			uint8_t mp_dep_pin : 1;
+			uint8_t sp_dep_pin : 1;
+			uint8_t fftid : 8;
+			uint8_t ffid : 4;
+			uint8_t instruction_valid : 1;
+			uint8_t thread_status : 3;
+		};
+		uint32_t dword;
+	} ud0;
+
+	union {
+		struct {
+			uint8_t mrf_addr : 4;
+			uint8_t dst_addr : 7;
+			uint8_t src2_addr : 7;
+			uint8_t src1_addr : 7;
+			uint8_t src0_addr : 7;
+		};
+		uint32_t dword;
+	} ud1;
+
+	union {
+		struct {
+			uint16_t exip : 12;
+			uint8_t opcode : 7;
+			uint8_t pwc : 8;
+			uint8_t instruction_valid : 1;
+			uint8_t mbz : 4;
+		};
+		uint32_t dword;
+	} ud2;
+};
+
+const char *thread_status[] = 
+	{"INVALID", "invalid/no thread", "standby (dependency)", "INVALID", "Executing",
+	 "INVALID" , "INVALID" , "INVALID"};
+
+static struct eu_rdata
+collect_rdata(int eu, int tid) {
+	struct eu_rdata rdata;
+
+	intel_register_write(0x7800, eu << 16 | (3 * tid) << 8);
+	rdata.ud0.dword = intel_register_read(0x7840);
+
+	intel_register_write(0x7800, eu << 16 | (3 * tid + 1) << 8);
+	rdata.ud1.dword = intel_register_read(0x7840);
+
+	intel_register_write(0x7800, eu << 16 | (3 * tid + 2) << 8);
+	rdata.ud2.dword = intel_register_read(0x7840);
+
+	return rdata;
+}
+static void
+print_rdata(struct eu_rdata rdata) {
+	printf("\t%s\n", thread_status[rdata.ud0.thread_status]);
+	printf("\tn1_dep: %d\n", rdata.ud0.n1_dep);
+	printf("\tpwc_dep: %d\n", rdata.ud0.pwc_dep);
+	printf("\tswh_dep: %d\n", rdata.ud0.swh_dep);
+	printf("\tsource 0 %x\n", rdata.ud1.src0_addr);
+	printf("\tsource 1 %x\n", rdata.ud1.src1_addr);
+	printf("\tsource 2 %x\n", rdata.ud1.src2_addr);
+	printf("\tdest  %x\n", rdata.ud1.dst_addr);
+	printf("\tmrf  %x\n", rdata.ud1.mrf_addr);
+	printf("\tIP: %x\n", rdata.ud2.exip);
+	printf("\topcode: %x\n", rdata.ud2.opcode);
+}
+
+static void
+find_stuck_threads(void)
+{
+	int i, j;
+	for (i = 0; i < 15; i++)
+		for (j = 0; j < 5; j++) {
+			struct eu_rdata rdata;
+			rdata = collect_rdata(i, j);
+			if (rdata.ud0.thread_status == 2 ||
+			    rdata.ud0.thread_status == 4) {
+				printf("%d %d:\n", i, j);
+				print_rdata(rdata);
+			}
+	}
+}
+
+int main(int argc, char *argv[]) {
+	struct pci_device *pci_dev;
+	pci_dev = intel_get_pci_device();
+
+	intel_register_access_init(pci_dev, 1);
+	find_stuck_threads();
+//	collect_rdata(atoi(argv[1]), atoi(argv[2]));
+	return 0;
+}
diff --git a/debugger/eudb.c b/debugger/eudb.c
new file mode 100644
index 0000000..d912afc
--- /dev/null
+++ b/debugger/eudb.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ * Notes:
+ *
+ */
+
+#include <signal.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/un.h>
+#include <sys/socket.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_bufmgr.h"
+#include "intel_gpu_tools.h"
+#include "intel_batchbuffer.h"
+#include "intel_debug.h"
+#include "debug.h"
+
+#define EU_ATT		0x7810
+#define EU_ATT_CLR	0x7830
+
+#define RSVD_EU -1
+#define RSVD_THREAD -1
+#define RSVD_ID EUID(-1, -1, -1)
+
+enum {
+	EBAD_SHMEM,
+	EBAD_PROTOCOL,
+	EBAD_MAGIC,
+	EBAD_WRITE
+};
+
+struct debuggee {
+	int euid;
+	int tid;
+	int fd;
+	int clr;
+	uint32_t reg;
+};
+
+struct debugger {
+	struct debuggee *debuggees;
+	int num_threads;
+	int real_num_threads;
+	int threads_per_eu;
+} *eu_info;
+
+drm_intel_bufmgr *bufmgr;
+struct intel_batchbuffer *batch;
+drm_intel_bo *scratch_bo;
+
+int handle;
+int drm_fd;
+int debug_fd = 0;
+const char *debug_file = "dump_debug.bin";
+int debug;
+int clear_waits;
+int shutting_down = 0;
+struct debug_handshake dh;
+int force_clear = 0;
+
+/*
+ * The docs are wrong about the attention clear bits. The clear bits are
+ * provided as part of the structure in case they change in future generations.
+ */
+#define EUID(eu, td, clear) \
+	{ .euid = eu, .tid = td, .reg = EU_ATT, .fd = -1, .clr = clear }
+#define EUID2(eu, td, clear) \
+	{ .euid = eu, .tid = td, .reg = EU_ATT + 4, .fd = -1, .clr = clear }
+struct debuggee gt1_debug_ids[] = {
+	RSVD_ID, RSVD_ID,
+	RSVD_ID, EUID(6, 3, 28), EUID(6, 2, 27), EUID(6, 1, 26), EUID(6, 0, 25),
+	RSVD_ID, EUID(5, 3, 23), EUID(5, 2, 22), EUID(5, 1, 21), EUID(5, 0, 20),
+	RSVD_ID, EUID(4, 3, 18), EUID(4, 2, 17), EUID(4, 1, 16), EUID(4, 0, 15),
+	RSVD_ID, EUID(2, 3, 13), EUID(2, 2, 12), EUID(2, 1, 11), EUID(2, 0, 10),
+	RSVD_ID, EUID(1, 3, 8), EUID(1, 2, 7), EUID(1, 1, 6), EUID(1, 0, 5),
+	RSVD_ID, EUID(0, 3, 3), EUID(0, 2, 2), EUID(0, 1, 1), EUID(0, 0, 0)
+};
+
+struct debuggee gt2_debug_ids[] = {
+	EUID(8, 1, 31), EUID(8, 0, 30),
+	EUID(6, 4, 29), EUID(6, 3, 28), EUID(6, 2, 27), EUID(6, 1, 26), EUID(6, 0, 25),
+	EUID(5, 4, 24), EUID(5, 3, 23), EUID(5, 2, 22), EUID(5, 1, 21), EUID(5, 0, 20),
+	EUID(4, 4, 19), EUID(4, 3, 18), EUID(4, 2, 17), EUID(4, 1, 16), EUID(4, 0, 15),
+	EUID(2, 4, 14), EUID(2, 3, 13), EUID(2, 2, 12), EUID(2, 1, 11), EUID(2, 0, 10),
+	EUID(1, 4, 9), EUID(1, 3, 8), EUID(1, 2, 7), EUID(1, 1, 6), EUID(1, 0, 5),
+	EUID(0, 4, 4), EUID(0, 3, 3), EUID(0, 2, 2), EUID(0, 1, 1), EUID(0, 0, 0),
+	RSVD_ID, RSVD_ID, RSVD_ID, RSVD_ID,
+	EUID2(14, 4, 27), EUID2(14, 3, 26), EUID2(14, 2, 25), EUID2(14, 1, 24), EUID2(14, 0, 23),
+	EUID2(13, 4, 22), EUID2(13, 3, 21), EUID2(13, 2, 20), EUID2(13, 1, 19), EUID2(13, 0, 18),
+	EUID2(12, 4, 17), EUID2(12, 3, 16), EUID2(12, 2, 15), EUID2(12, 1, 14), EUID2(12, 0, 13),
+	EUID2(10, 4, 12), EUID2(10, 3, 11), EUID2(10, 2, 10), EUID2(10, 1, 9), EUID2(10, 0, 8),
+	EUID2(9, 4, 7), EUID2(9, 3, 6), EUID2(9, 2, 5), EUID2(9, 1, 4), EUID2(9, 0, 3),
+	EUID2(8, 4, 2), EUID2(8, 3, 1), EUID2(8, 2, 0)
+};
+
+struct debugger gt1 = {
+	.debuggees = gt1_debug_ids,
+	.num_threads = 32,
+	.real_num_threads = 24,
+	.threads_per_eu = 4
+};
+
+struct debugger gt2 = {
+	.debuggees = gt2_debug_ids,
+	.num_threads = 64,
+	.real_num_threads = 60,
+	.threads_per_eu = 5
+};
+
+static void
+dump_debug(void *buf, size_t count) {
+	if (!debug_fd)
+		debug_fd = open(debug_file, O_CREAT | O_WRONLY | O_TRUNC, S_IRWXO);
+
+	write(debug_fd, buf, count);
+}
+
+static volatile void *
+map_debug_buffer(void) {
+	int ret;
+
+	ret = drm_intel_bo_map(scratch_bo, 0);
+	assert(ret == 0);
+	return scratch_bo->virtual;
+}
+
+static void
+unmap_debug_buffer(void) {
+	drm_intel_bo_unmap(scratch_bo);
+}
+
+static int
+wait_for_attn(int timeout, int *out_bits) {
+	int step = 1;
+	int eus_waiting = 0;
+	int i,j;
+
+	if (timeout <= 0) {
+		timeout = 1;
+		step = 0;
+	}
+
+	for (i = 0; i < timeout; i += step) {
+		for (j = 0; j < 8; j += 4) {
+			uint32_t attn = intel_register_read(EU_ATT + j);
+			if (attn) {
+				int bit = 0;
+				while( (bit = ffs(attn)) != 0) {
+					bit--; // ffs is 1 based
+					assert(bit >= 0);
+					out_bits[eus_waiting] = bit + (j * 8);
+					attn &= ~(1 << bit);
+					eus_waiting++;
+				}
+			}
+		}
+
+		if (intel_register_read(EU_ATT + 8) ||
+		    intel_register_read(EU_ATT + 0xc)) {
+			fprintf(stderr, "Unknown attention bits\n");
+		}
+
+		if (eus_waiting || shutting_down)
+			break;
+	}
+
+	return eus_waiting;
+}
+
+#define eu_fd(bit) eu_info->debuggees[bit].fd
+#define eu_id(bit) eu_info->debuggees[bit].euid
+#define eu_tid(bit) eu_info->debuggees[bit].tid
+static struct eu_state *
+find_eu_shmem(int bit, volatile uint8_t *buf) {
+	struct per_thread_data {
+		uint8_t ____[dh.per_thread_scratch];
+	}__attribute__((packed)) *data;
+	struct eu_state *eu;
+	int mem_tid, mem_euid, i;
+
+	data = (struct per_thread_data *)buf;
+	for(i = 0; i < eu_info->num_threads; i++) {
+		eu = (struct eu_state *)&data[i];
+		mem_tid = eu->sr0 & 0x7;
+		mem_euid = (eu->sr0 >> 8) & 0xf;
+		if (mem_tid == eu_tid(bit) && mem_euid == eu_id(bit))
+			break;
+		eu = NULL;
+	}
+
+	return eu;
+}
+
+#define GRF_CMP(a, b) memcmp(a, b, sizeof(grf))
+#define GRF_CPY(a, b) memcpy(a, b, sizeof(grf))
+static int
+verify(struct eu_state *eu) {
+	if (GRF_CMP(eu->version, protocol_version)) {
+		if (debug) {
+			printf("Bad EU protocol version %x %x\n",
+				((uint32_t *)&eu->version)[0],
+				DEBUG_PROTOCOL_VERSION);
+			dump_debug((void *)eu, sizeof(*eu));
+		}
+		return -EBAD_PROTOCOL;
+	}
+
+	if (GRF_CMP(eu->state_magic, eu_msg)) {
+		if (debug) {
+			printf("Bad EU state magic %x %x\n",
+				((uint32_t *)&eu->state_magic)[0],
+				((uint32_t *)&eu->state_magic)[1]);
+			dump_debug((void *)eu, sizeof(*eu));
+		}
+		return -EBAD_MAGIC;
+	} else {
+		GRF_CPY(eu->state_magic, cpu_ack);
+	}
+
+	eu->sr0 = RSVD_EU << 8 | RSVD_THREAD;
+	return 0;
+}
+
+static int
+collect_data(int bit, volatile uint8_t *buf) {
+	struct eu_state *eu;
+	ssize_t num;
+	int ret;
+
+	assert(eu_id(bit) != RSVD_EU);
+
+	if (eu_fd(bit) == -1) {
+		char name[128];
+		sprintf(name, "dump_eu_%02d_%d.bin", eu_id(bit), eu_tid(bit));
+		eu_fd(bit) = open(name, O_CREAT | O_WRONLY | O_TRUNC, S_IRWXO);
+		if (eu_fd(bit) == -1)
+			return -1;
+	}
+
+	eu = find_eu_shmem(bit, buf);
+
+	if (eu == NULL) {
+		if (debug)
+			printf("Bad offset %d %d\n", eu_id(bit), eu_tid(bit));
+		return -EBAD_SHMEM;
+	}
+
+	ret = verify(eu);
+	if (ret)
+		return ret;
+
+	num = write(eu_fd(bit), (void *)eu, sizeof(*eu));
+	if (num != sizeof(*eu)) {
+		perror("unhandled write failure");
+		return EBAD_WRITE;
+	}
+
+
+	return 0;
+}
+
+static void
+clear_attn(int bit) {
+#if 0
+/*
+ * This works but doesn't allow for easily changed clearing bits
+ */
+static void
+clear_attn_old(int bit) {
+	int bit_to_clear = bit % 32;
+	bit_to_clear = 31 - bit_to_clear;
+	intel_register_write(0x7830 + (bit/32) * 4, 0);
+	intel_register_write(0x7830 + (bit/32) * 4, 1 << bit_to_clear);
+}
+#else
+	if (!force_clear) {
+		int bit_to_clear;
+		bit_to_clear = eu_info->debuggees[bit].clr;
+		intel_register_write(EU_ATT_CLR + (bit/32) * 4, 0);
+		intel_register_write(EU_ATT_CLR + (bit/32) * 4, 1 << bit_to_clear);
+	} else {
+		intel_register_write(EU_ATT_CLR + 0, 0);
+		intel_register_write(EU_ATT_CLR + 4, 0);
+		intel_register_write(EU_ATT_CLR + 0, 0xffffffff);
+		intel_register_write(EU_ATT_CLR + 4, 0xffffffff);
+	}
+#endif
+}
+
+static void
+db_shutdown(int sig) {
+	shutting_down = 1;
+	printf("Shutting down...\n");
+}
+
+static void
+die(int reason) {
+	int i = 0;
+
+	intel_register_write(EU_ATT_CLR, 0);
+	intel_register_write(EU_ATT_CLR + 4, 0);
+
+	if (debug_fd)
+		close(debug_fd);
+
+	for (i = 0; i < eu_info->num_threads; i++) {
+		if (eu_info->debuggees[i].fd != -1)
+			close(eu_info->debuggees[i].fd);
+	}
+
+	unmap_debug_buffer();
+
+	intel_register_access_fini();
+	exit(reason);
+}
+
+static int
+identify_device(int devid) {
+	switch(devid) {
+	case PCI_CHIP_SANDYBRIDGE_GT1:
+	case PCI_CHIP_SANDYBRIDGE_M_GT1:
+	case PCI_CHIP_SANDYBRIDGE_S:
+		eu_info = &gt1;
+		break;
+	case PCI_CHIP_SANDYBRIDGE_GT2:
+	case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
+	case PCI_CHIP_SANDYBRIDGE_M_GT2:
+	case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
+		eu_info = &gt2;
+		break;
+	default:
+		return 1;
+	}
+
+	return 0;
+}
+
+static void
+parse_data(const char *file_name) {
+	struct eu_state *eu_state = NULL;
+	struct stat st;
+	int fd = -1;
+	int ret, i, elements;
+
+	fd = open(file_name, O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		goto out;
+	}
+
+	ret = fstat(fd, &st);
+	if (ret == -1) {
+		perror("fstat");
+		goto out;
+	}
+
+	elements = st.st_size / sizeof(struct eu_state);
+	if (elements == 0) {
+		fprintf(stderr, "File not big enough for 1 entry\n");
+		goto out;
+	}
+
+	eu_state = mmap(0, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (eu_state == MAP_FAILED) {
+		perror("mmap");
+		goto out;
+	}
+
+	for(i = 0; i < elements; i++) {
+		printf("AIP: ");
+			printf("%x\n", ((uint32_t *)eu_state[i].cr0)[2]);
+	}
+out:
+	if (eu_state)
+		munmap(eu_state, st.st_size);
+	if (fd != -1)
+		close(fd);
+}
+
+static int
+wait_for_scratch_bo(void) {
+	struct sockaddr_un addr;
+	uint8_t version;
+	int fd, ret, handle = -1;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd == -1)
+		return -1;
+
+	/* Clean up previous runs */
+	remove(SHADER_DEBUG_SOCKET);
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	strncpy(addr.sun_path, SHADER_DEBUG_SOCKET, sizeof(addr.sun_path) - 1);
+
+	ret = bind(fd, (const struct sockaddr *)&addr, sizeof(addr));
+	if (ret == -1) {
+		perror("listen");
+		return -1;
+	}
+
+	ret = listen(fd, 1);
+	if (ret == -1) {
+		perror("listen");
+		goto done;
+	}
+
+	while(1) {
+		int client_fd;
+		size_t count;
+		char ack[] = DEBUG_HANDSHAKE_ACK;
+
+		client_fd = accept(fd, NULL, NULL);
+		if (client_fd == -1) {
+			perror("accept");
+			goto done;
+		}
+
+		count = read(client_fd, &version, sizeof(version));
+		if (count != sizeof(version)) {
+			perror("read version");
+			goto loop_out;
+		}
+
+		if (version != DEBUG_HANDSHAKE_VERSION) {
+			fprintf(stderr, "Bad debug handshake\n");
+			goto loop_out;
+		}
+
+		count = read(client_fd, ((char *)&dh) + 1, sizeof(dh) - 1);
+		if (count != sizeof(dh) - 1) {
+			perror("read handshake");
+			goto loop_out;
+		}
+
+		count = write(client_fd, ack, sizeof(ack));
+		if (count != sizeof(ack)) {
+			perror("write ack");
+			goto loop_out;
+		}
+		handle = dh.flink_handle;
+		if (debug > 0) {
+			printf("Handshake completed successfully\n"
+				"\tprotocol version = %d\n"
+				"\tflink handle = %d\n"
+				"\tper thread scratch = %x\n", version,
+				dh.flink_handle, dh.per_thread_scratch);
+		}
+
+	loop_out:
+		close(client_fd);
+		break;
+	}
+
+done:
+	close(fd);
+	return handle;
+}
+
+int main(int argc, char* argv[]) {
+	struct pci_device *pci_dev;
+	volatile uint8_t *scratch = NULL;
+	int bits[64];
+	int devid = -1, opt;
+
+	while ((opt = getopt(argc, argv, "cdr:pf?h")) != -1) {
+		switch (opt) {
+		case 'c':
+			clear_waits = 1;
+			break;
+		case 'd':
+			debug = 1;
+			break;
+		case 'r':
+			parse_data(optarg);
+			exit(0);
+			break;
+		case 'p':
+			devid = atoi(optarg);
+			break;
+		case 'f':
+			force_clear  = 1;
+			break;
+		case '?':
+		case 'h':
+		default:
+			exit(0);
+		}
+	}
+
+	pci_dev = intel_get_pci_device();
+	if (devid != -1);
+		devid = pci_dev->device_id;
+	if (identify_device(devid)) {
+		abort();
+	}
+
+	assert(intel_register_access_init(pci_dev, 1) == 0);
+
+	memset(bits, -1, sizeof(bits));
+	/*
+	 * These events have to occur before the SR runs, or we need
+	 * non-blocking versions of the functions.
+	 */
+	if (!clear_waits) {
+		int handle;
+		drm_fd = drm_open_any();
+		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+
+		/* We are probably root, make files world friendly */
+		umask(0);
+		handle = wait_for_scratch_bo();
+		if (handle == -1) {
+			printf("No handle from mesa, please enter manually: ");
+			if (fscanf(stdin, "%1d", &handle) == 0)
+				exit(1);
+		}
+		scratch_bo = intel_bo_gem_create_from_name(bufmgr, "scratch", handle);
+		if (scratch_bo == NULL) {
+			fprintf(stderr, "Couldn't flink buffer\n");
+			abort();
+		}
+		signal(SIGINT, db_shutdown);
+		printf("Press Ctrl-C to stop\n");
+	} else {
+		int time = force_clear ? 0 : 20000;
+		while (wait_for_attn(time, bits)) {
+			clear_attn(bits[0]);
+			memset(bits, -1, sizeof(bits));
+		}
+		die(0);
+	}
+
+	scratch = map_debug_buffer();
+	while (shutting_down == 0) {
+		int num_events, i;
+
+		memset(bits, -1, sizeof(bits));
+		num_events = wait_for_attn(-1, bits);
+		if (num_events == 0)
+			break;
+
+		for (i = 0; i < num_events; i++) {
+			assert(bits[i] < 64 && bits[i] >= 0);
+			if (collect_data(bits[i], scratch)) {
+				bits[i] = -1;
+				continue;
+			}
+			clear_attn(bits[i]);
+		}
+	}
+
+	die(0);
+	return 0;
+}
diff --git a/debugger/system_routine/Makefile b/debugger/system_routine/Makefile
new file mode 100644
index 0000000..1c10ce5
--- /dev/null
+++ b/debugger/system_routine/Makefile
@@ -0,0 +1,84 @@
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors:
+#    Ben Widawsky <ben@bwidawsk.net>
+
+OBJCOPY?=objcopy
+
+PRECPP=./pre_cpp.py
+CPP_FLAGS=-x assembler-with-cpp -P
+
+GEN_AS?=~/intel-gfx/intel-gen4asm/src/intel-gen4asm
+GEN_AS_FLAGS?=-g6 -a -b
+
+TEMP:=$(shell mktemp)
+TEMP2:=$(shell mktemp)
+
+GPU?=SANDYBRIDGE
+DEFINES+=-DGEN_ASM -D$(GPU) -I. -I../..//lib
+
+all: sr
+
+sr.c: sr.g4a eviction_macro evict.h
+	$(PRECPP) $^ > $(TEMP)
+	$(CPP) $(CPP_FLAGS) $(DEFINES) -o $(TEMP2) $(TEMP)
+	$(GEN_AS) $(GEN_AS_FLAGS) $(TEMP2) -o $@
+
+sr : sr.o
+	$(OBJCOPY) -O binary -K gen_eu_bytes $^ $@
+
+# Test files are useful for development. The system routine kernel is very
+# simple and doesn't depend on any external communication to run.
+test.c: test.g4a
+	$(PRECPP) $^ > $(TEMP)
+	$(CPP) $(CPP_FLAGS) $(DEFINES) -o $(TEMP2) $(TEMP)
+	$(GEN_AS) $(GEN_AS_FLAGS) $(TEMP2) -o $@
+
+test : test.o
+	$(OBJCOPY) -O binary -K gen_eu_bytes $^ $@
+
+helper: sr.g4a eviction_macro
+	$(PRECPP) $^ > help
+	$(CPP) $(CPP_FLAGS) $(DEFINES) -o help2 help
+	$(GEN_AS) $(GEN_AS_FLAGS) help2 -o $@
+
+eviction_macro :
+
+evict.h : eviction_macro
+	$(shell ./eviction_macro >| evict.h)
+
+.PHONY : clean
+clean :
+	$(RM) *.o sr.c sr test.c test evict.h eviction_macro
+
+.PHONY : clean
+distclean: clean
+	$(RM) help*
+
+maintainer-clean: clean
+
+EMPTY_AUTOMAKE_TARGETS = install install-data install-exec uninstall \
+			 install-dvi install-html install-info install-ps \
+			 install-pdf installdirs check installcheck \
+			 mostlyclean dvi pdf ps info html tags ctags
+.PHONY: $(EMPTY_AUTOMAKE_TARGETS)
+$(EMPTY_AUTOMAKE_TARGETS):
diff --git a/debugger/system_routine/eviction_macro.c b/debugger/system_routine/eviction_macro.c
new file mode 100644
index 0000000..1da2233
--- /dev/null
+++ b/debugger/system_routine/eviction_macro.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include <stdio.h>
+
+#define START 0x100
+#define END ((128 << 10) / 4)
+
+int main(int argc, char *argv[]) {
+	int i;
+	printf("#ifdef SANDYBRIDGE\n");
+	printf("#define EVICT_CACHE \\\n");
+	printf("\tmov (1) m0.5:ud g0.5:ud FLAGS; \\\n");
+	for (i = START; i < END - 8; i+=0x8) {
+		printf("\tmov (1) m0.2:ud 0x%04x:ud FLAGS; \\\n", i);
+		printf("\tWRITE_SCRATCH4(m0); \\\n");
+	}
+
+	printf("\tmov (1) m0.2:ud 0x%04x:ud FLAGS; \\\n", i);
+	printf("\tWRITE_SCRATCH4(m0)\n");
+	printf("#else\n");
+	printf("#define EVICT_CACHE\n");
+	printf("#endif\n");
+}
diff --git a/debugger/system_routine/pre_cpp.py b/debugger/system_routine/pre_cpp.py
new file mode 100755
index 0000000..effea0e
--- /dev/null
+++ b/debugger/system_routine/pre_cpp.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Authors:
+#    Ben Widawsky <ben@bwidawsk.net>
+
+#very limited C-like preprocessor
+
+#limitations:
+# no macro substitutions
+# no multiline definitions
+# divide operator is //
+
+import sys,re
+
+file = open(sys.argv[1], "r")
+
+lines = file.readlines()
+len(lines)
+out = dict()
+defines = dict()
+
+count = 0
+#create a dict for our output
+for line in lines:
+    out[count] = line
+    count = count + 1
+
+#done is considered #define <name> <number>
+def is_done(string):
+    m = re.match("#define\s+(\w+?)\s+([a-fA-F0-9\-]+?)\s*$", string)
+    return m
+
+#skip macros, the real cpp will handle it
+def skip(string):
+    #macro
+    m = re.match("#define\s+\w+\(.+", string)
+    return m != None
+
+#put contants which are done being evaluated into the dictionary
+def easy_constants():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        m = is_done(string)
+        if m != None:
+            key = m.group(1)
+            value = m.group(2)
+            if not key in defines:
+                    defines[key] = int(eval(value))
+                    ret = 1
+    return ret
+
+#replace names with dictionary values
+def simple_replace():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        for key, value in defines.items():
+            if is_done(string):
+                continue
+            s = re.subn(key, repr(value), string)
+            if s[1] > 0:
+                out[lineno] = s[0]
+                ret = s[1]
+    return ret
+
+#evaluate expressions to try to simplify them
+def collapse_constants():
+    ret = 0
+    for lineno, string in out.items():
+        if skip(string):
+            continue
+        if is_done(string):
+            continue
+        m = re.match("#define\s+(.+?)\s+(.+)$", string)
+        if m != None:
+            try:
+                out[lineno] = "#define " + m.group(1) + " " + repr(eval(m.group(2)))
+                ret = 1
+            except NameError as ne:
+                #this happens before a variable is resolved in simple_replace
+                continue
+            except SyntaxError:
+                #this happens with something like #define foo bar, which the
+                #regular cpp can handle
+                continue
+            except:
+                raise KeyboardInterrupt
+    return ret;
+
+while True:
+    ret = 0
+    ret += easy_constants()
+    ret += simple_replace()
+    ret += collapse_constants()
+    if ret == 0:
+        break;
+
+for lineno, string in out.items():
+    print(string.rstrip())
diff --git a/debugger/system_routine/sr.g4a b/debugger/system_routine/sr.g4a
new file mode 100644
index 0000000..a70e771
--- /dev/null
+++ b/debugger/system_routine/sr.g4a
@@ -0,0 +1,277 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include "debug.h"
+#include "evict.h"
+
+#define CR0_0_ME_STATE_CTRL (1 << 31)
+#define CR0_0_BP_SUPPRESS (1 << 15)
+#define CR0_0_SPF_EN (1 << 2)
+#define CR0_0_ACC_DIS (1 << 1)
+#define CR0_1_BES_CTRL (1 << 31)
+#define CR0_1_HALT_CTRL (1 << 30)
+#define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29)
+#define CR0_1_ILLGL_OP_STS (1 << 28)
+#define CR0_1_STACK_OVRFLW_STS (1 << 27)
+
+#define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS)
+// TODO: Need to fix this for non breakpoint case
+#define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL)
+#define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS)
+
+// TODO: not sure how to make this not hardcoded
+#define PER_THREAD_SCRATCH_SIZE (1 << 20)
+#define PER_THREAD_QWORDS (PER_THREAD_SCRATCH_SIZE >> 4)
+
+/* Should get this from brw_defines.h */
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     		2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     		3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     		4
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE	8
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ	0
+
+/* desc field, ie. dword3 6.3.66.2 and 2.11.2.1.4 */
+#define SEND_MLEN_5		(5<<25)
+#define SEND_MLEN_3		(3<<25)
+#define SEND_MLEN_2		(2<<25)
+#define SEND_MLEN_1		(1<<25)
+#define SEND_RLEN_1		(1<<20)
+#define SEND_RLEN_0		(0<<20)
+#define SEND_HEADER_PRESENT	(1<<19)
+#define SEND_WRITE_COMMIT	(1<<17)
+#define SEND_TYPE_WRITE	(GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE<<13)
+#define SEND_TYPE_READ	(BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ<<13)
+#define SEND_BLOCK_SIZE1	(BRW_DATAPORT_OWORD_BLOCK_2_OWORDS<<8)
+#define SEND_BLOCK_SIZE2	(BRW_DATAPORT_OWORD_BLOCK_4_OWORDS<<8)
+#define SEND_BLOCK_SIZE4	(BRW_DATAPORT_OWORD_BLOCK_8_OWORDS<<8)
+#define SEND_BINDING_TABLE	(255<<0)
+// No write commit
+#define WRITE_DESC1_XXX SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2
+#define WRITE_DESC1_WC SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2 | SEND_WRITE_COMMIT
+#define WRITE_DESC2 SEND_BINDING_TABLE | SEND_BLOCK_SIZE2 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_3
+#define WRITE_DESC4 SEND_BINDING_TABLE | SEND_BLOCK_SIZE4 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_5
+#define RECV_DESC1 SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_READ | SEND_HEADER_PRESENT | SEND_MLEN_1 | SEND_RLEN_1
+//#define SEND_DESC1 0x40902FF
+#define SEND_DESC1_WC 0x40b02FF
+
+/* ex_desc field 6.3.66.2 */
+#define SEND_DP_RENDER_CACHE	(5<<0)
+#define SEND_EOT		(1<<5)
+#define SEND_EX_DESC SEND_DP_RENDER_CACHE
+
+/**
+ * WRITE_SCRATCH1 - Write 2 owords.
+ * cdst.2 - offset
+ * cdst.5 - per thread scratch base, relative to gsba??
+ * cdst+1 - data to be written.
+ */
+#define WRITE_SCRATCH1(cdst) \
+	send (16) null cdst SEND_EX_DESC WRITE_DESC1_XXX FLAGS
+#define WRITE_SCRATCH1_WC(cdst) \
+	send (16) g1 cdst SEND_EX_DESC WRITE_DESC1_WC FLAGS
+#define WRITE_SCRATCH2(cdst) \
+	send (16) null cdst SEND_EX_DESC WRITE_DESC2 FLAGS
+#define WRITE_SCRATCH4(cdst) \
+	send (16) null cdst SEND_EX_DESC WRITE_DESC4 FLAGS
+
+/**
+ * READ_SCRATCH1 - Read 2 owords.
+ * cdst.2 - offset
+ * cdst.5 - per thread scratch base, relative to gsba??
+ * grf - register where read data is populated.
+ */
+#define READ_SCRATCH1(grf, cdst) \
+	send (16) grf:ud cdst SEND_EX_DESC RECV_DESC1 FLAGS
+
+/**
+ * SET_OFFSET - setup mrf for the given offset prior to a send instruction.
+ * mrf - message register to be used as the header.
+ * offset - offset.
+ *
+ * If a WRITE_SCRATCH follows, mrf+1 -> mrf+1+n should contain the data to be
+ * written.
+ */
+#define SET_OFFSET(mrf, offset) \
+	mov (1) mrf.5:ud g0.5:ud FLAGS; \
+	mov (1) mrf.2:ud offset:ud FLAGS
+
+/**
+ * SAVE_CRF - save the control register
+ * clobbers: m0.2, m0.5
+ */
+#define CR_OFFSET 0x40
+#define SAVE_CRF \
+	SET_OFFSET(m0, CR_OFFSET); \
+	mov (8) m1:ud 0xdeadbeef:ud FLAGS; \
+	mov (1) m1.0:ud cr0.0 FLAGS; \
+	mov (1) m1.1:ud cr0.1 FLAGS; \
+	mov (1) m1.2:ud cr0.2 FLAGS; \
+	mov (1) m1.3:ud sr0:ud FLAGS; \
+	WRITE_SCRATCH1(m0)
+
+/*
+ * clobbers: m0.2, m0.5
+ */
+#define STORE_GRF(grf, offset) \
+	SET_OFFSET(m0, offset); \
+	mov (8) m1:ud grf:ud FLAGS; \
+	WRITE_SCRATCH1(m0)
+
+/*
+ * clobbers: m0.2, m0.5
+ */
+#define LOAD_GRF(grf, offset) \
+	SET_OFFSET(m0, offset); \
+	READ_SCRATCH1(grf, m0)
+
+/*
+ * clobbers: mrf.2 mrf.5
+ */
+#define STORE_MRF(mrf, offset) \
+	SET_OFFSET(mrf, offset); \
+	WRITE_SCRATCH1(mrf)
+
+/*
+ * non-quirky semantics, unlike STORE_MRF
+ * clobbers: g1
+ */
+#define LOAD_MRF(mrf, offset) \
+	LOAD_GRF(g1, offset); \
+	mov (8) mrf:ud g1:ud FLAGS
+
+#define SAVE_ALL_MRF \
+	/* m1 is saved already */ \
+	STORE_MRF(m1, 0x2); \
+	STORE_MRF(m2, 0x4); \
+	STORE_MRF(m3, 0x6); \
+	STORE_MRF(m4, 0x8); \
+	STORE_MRF(m5, 0xa); \
+	STORE_MRF(m6, 0xc); \
+	STORE_MRF(m7, 0xe); \
+	STORE_MRF(m8, 0x10); \
+	STORE_MRF(m9, 0x12); \
+	STORE_MRF(m10, 0x14); \
+	STORE_MRF(m11, 0x16); \
+	STORE_MRF(m12, 0x18); \
+	STORE_MRF(m13, 0x1a); \
+	STORE_MRF(m14, 0x1c)
+
+#define RESTORE_ALL_MRF \
+	LOAD_MRF(m15, 0x1c); \
+	LOAD_MRF(m14, 0x1a); \
+	LOAD_MRF(m13, 0x18); \
+	LOAD_MRF(m12, 0x16); \
+	LOAD_MRF(m11, 0x14); \
+	LOAD_MRF(m10, 0x12); \
+	LOAD_MRF(m9, 0x10); \
+	LOAD_MRF(m8, 0xe); \
+	LOAD_MRF(m7, 0xc); \
+	LOAD_MRF(m6, 0xa); \
+	LOAD_MRF(m5, 0x8); \
+	LOAD_MRF(m4, 0x6); \
+	LOAD_MRF(m3, 0x4); \
+	LOAD_MRF(m2, 0x2); \
+	LOAD_MRF(m1, 0x0)
+
+#ifndef SANDYBRIDGE
+	#error Only SandyBridge is supported
+#endif
+
+/* Default flags for an instruction */
+#define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL}
+
+/*
+ * We can clobber m0, and g0.4, everything else must be saved.
+ */
+Enter:
+	nop;
+
+	or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS;
+
+	/*
+	 * g0.5 has the per thread scratch space when running in FS or VS.
+	 * If we don't have a valid g0.5, we can calculate a per thread scratch offset
+	 * using the system registers. The problem is we do not have a good way to know
+	 * the offset from GSBA. The system routine will have to be hardcoded or
+	 * dynamically patched with the correct offset.
+	 * TID is in sr0.0[2:0]
+	 * EUID is in sr0.0[11:8]
+	 */
+
+#ifdef GPGPU
+	mov (1) g0.4:ud 0:ud				FLAGS;
+#if 0
+	/* This should work according to the docs, the add blows up */
+	shr (1) g0.8:uw sr0.0:uw 5			FLAGS;
+	add (1) g0.16:ub gr0.16:ub sr0.0:ub		FLAGS;
+#else
+	shr (1) g0.8:uw sr0.0:uw 5			FLAGS;
+	mov (1) g0.9:uw sr0.0:uw			FLAGS;
+	and (1) g0.9:uw g0.9:uw 0x7:uw			FLAGS;
+	add (1) g0.8:uw g0.8:uw g0.9:uw			FLAGS;
+	mov (1) g0.9:uw 0:uw				FLAGS;
+	mul (1) g0.4:ud g0.4:ud PER_THREAD_QWORDS	FLAGS;
+#endif
+#endif
+
+	mov (8) m0:ud 0:ud FLAGS;
+
+	/* Saves must occur in order so as not to clobber the next register */
+	STORE_MRF(m0, 0);
+	STORE_GRF(g0, 0x20);
+	STORE_GRF(g1, 0x22);
+	SAVE_ALL_MRF;
+
+	mov (8) g1:ud STATE_EU_MSG:ud FLAGS;
+	STORE_GRF(g1, STATE_QWORD);
+
+	mov (8) g1:ud DEBUG_PROTOCOL_VERSION:ud FLAGS;
+	STORE_GRF(g1, COMMUNICATION_QWORD);
+
+	SAVE_CRF;
+
+	EVICT_CACHE;
+	wait n1:ud;
+	EVICT_CACHE;
+
+	/* Using this to try to keep coherency */
+	LOAD_GRF(g1, CR_OFFSET);
+	LOAD_GRF(g1, COMMUNICATION_QWORD);
+	LOAD_GRF(g1, STATE_QWORD);
+
+	RESTORE_ALL_MRF;
+	LOAD_GRF(g1, 0x22);
+	LOAD_GRF(g0, 0x20);
+
+	/* Clear breakpoint status */
+	and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS;
+
+	/* set breakpoint suppress this should be conditional on bes */
+	or  (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS;
+
+	and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS;
+	nop;
diff --git a/debugger/system_routine/test.g4a b/debugger/system_routine/test.g4a
new file mode 100644
index 0000000..e4296e0
--- /dev/null
+++ b/debugger/system_routine/test.g4a
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#include "debug.h"
+
+#define CR0_0_ME_STATE_CTRL (1 << 31)
+#define CR0_0_BP_SUPPRESS (1 << 15)
+#define CR0_0_SPF_EN (1 << 2)
+#define CR0_0_ACC_DIS (1 << 1)
+#define CR0_1_BES_CTRL (1 << 31)
+#define CR0_1_HALT_CTRL (1 << 30)
+#define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29)
+#define CR0_1_ILLGL_OP_STS (1 << 28)
+#define CR0_1_STACK_OVRFLW_STS (1 << 27)
+
+#define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS)
+// TODO: Need to fix this for non breakpoint case
+#define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL)
+#define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS)
+
+#ifndef SANDYBRIDGE
+	#error Only SandyBridge is supported
+#endif
+
+/* Default flags for an instruction */
+#define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL}
+
+Enter:
+	nop;
+
+	or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS;
+
+	/* Clear breakpoint status */
+	and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS;
+
+	/* set breakpoint suppress this should be conditional on bes */
+	or  (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS;
+
+	and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS;
+	nop;
diff --git a/lib/debug.h b/lib/debug.h
new file mode 100644
index 0000000..af9cf39
--- /dev/null
+++ b/lib/debug.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *
+ */
+
+#ifndef _DEBUG_H_
+#define _DEBUG_H_
+
+#define DEBUG_PROTOCOL_VERSION 1
+#define COMMUNICATION_OFFSET 0xc00
+#define COMMUNICATION_QWORD 0xc0
+
+#define STATE_EU_MSG 0x47534d65 /* eMSG */
+#define STATE_CPU_ACK 0x4b434163 /* cACK */
+#define STATE_OFFSET 0xc20
+#define STATE_QWORD 0xc2
+
+#define TX_OFFSET 0xc40
+#define TX_QWORD 0xc4
+#define RX_OFFSET 0xc60
+#define RX_QWORD 0xc6
+
+#ifndef GEN_ASM
+typedef uint32_t grf[8];
+typedef uint32_t mrf[8];
+typedef uint8_t cr[12];
+typedef uint32_t sr;
+
+#define DWORD8(x) {x, x, x, x, x, x, x, x}
+
+const static grf protocol_version = DWORD8(DEBUG_PROTOCOL_VERSION);
+const static grf eu_msg = DWORD8(STATE_EU_MSG);
+const static grf cpu_ack = DWORD8(STATE_CPU_ACK);
+
+struct eu_state {
+	mrf m_regs[15];
+	grf g_regs[16];
+	grf pad;
+
+/* 0x400 */
+	cr cr0;
+	sr sr0;
+	uint32_t beef_pad[4];
+	uint8_t pad2[992 + 1024];
+
+/* 0xc00 COMMUNICATION_OFFSET */
+	grf version;
+	grf state_magic;
+	grf eu_tx;
+	grf eu_rx;
+
+	uint8_t pad3[896];
+} __attribute__((packed));
+
+static inline void
+print_reg(uint8_t reg[32]) {
+	uint32_t *dwords = (uint32_t *)reg;
+	printf("%08x %08x %08x %08x %08x %08x %08x %08x",
+		dwords[7], dwords[6], dwords[5], dwords[4],
+		dwords[3], dwords[2], dwords[1], dwords[0]);
+}
+
+static inline void
+print_creg(uint8_t reg[12]) {
+	uint32_t *dwords = (uint32_t *)reg;
+	printf("%08x %08x %08x", dwords[2], dwords[1], dwords[0]);
+}
+#endif
+
+#endif
-- 
1.7.6

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2011-07-13 20:52 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-13 20:51 [PATCH 0/10] fs shader debugging Ben Widawsky
2011-07-13 20:51 ` [PATCH 01/10] intel: shared header for " Ben Widawsky
2011-07-13 20:56   ` Chris Wilson
2011-07-19 21:06   ` [Intel-gfx] " Julien Cristau
2011-07-21 13:54     ` Ben Widawsky
2011-07-21 21:22       ` [Intel-gfx] " Julien Cristau
2011-07-27 15:08         ` Ben Widawsky
2011-07-27 15:16           ` Julien Cristau
2011-07-27 15:28             ` Alan Cox
2011-07-27 15:40             ` Ben Widawsky
2011-07-13 20:51 ` [PATCH 02/10] i965: copy in system routine, reserve extra scratch Ben Widawsky
2011-07-18 18:13   ` Eric Anholt
2011-07-13 20:51 ` [PATCH 03/10] i965: Reserve scratch space for debugger communication Ben Widawsky
2011-07-13 20:51 ` [PATCH 04/10] i965: setup system routine Ben Widawsky
2011-07-13 21:04   ` [Mesa-dev] " Chris Wilson
2011-07-13 20:51 ` [PATCH 05/10] i965: emit breakpoints Ben Widawsky
2011-07-13 20:51 ` [PATCH 06/10] i965: attach to a listening debugger Ben Widawsky
2011-07-13 20:51 ` [PATCH 07/10] intel-gpu-tools: register range handling for forcewake hooks Ben Widawsky
2011-07-13 21:15   ` Chris Wilson
2011-07-13 20:51 ` [PATCH 08/10] intel-gpu-tools/forcewaked: simple forcewake app Ben Widawsky
2011-07-13 21:18   ` Chris Wilson
2011-07-13 20:51 ` [PATCH 09/10] debugging: add important debug regs Ben Widawsky
2011-07-13 21:20   ` Chris Wilson
2011-07-13 20:51 ` Ben Widawsky [this message]
2011-07-13 22:06 ` [PATCH 0/10] fs shader debugging Ben Widawsky
2011-07-17 23:25 [PATCH 00/10] fs debugging: incorporated Chris' feedback Ben Widawsky
2011-07-17 23:25 ` [PATCH 10/10] debugging: shader debugging Ben Widawsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1310590312-21669-11-git-send-email-ben@bwidawsk.net \
    --to=ben@bwidawsk.net \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.