All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: mttcg@listserver.greensocs.com, mark.burton@greensocs.com,
	fred.konrad@greensocs.com, a.rigo@virtualopensystems.com
Cc: peter.maydell@linaro.org, drjones@redhat.com,
	a.spyridakis@virtualopensystems.com, claudio.fontana@huawei.com,
	qemu-devel@nongnu.org, will.deacon@arm.com,
	crosthwaitepeter@gmail.com, pbonzini@redhat.com,
	"Alex Bennée" <alex.bennee@linaro.org>,
	aurelien@aurel32.net, rth@twiddle.net
Subject: [Qemu-devel] [RFC 11/11] arm/tcg-test: some basic TCG exercising tests
Date: Fri, 26 Feb 2016 13:15:33 +0000	[thread overview]
Message-ID: <1456492533-17171-12-git-send-email-alex.bennee@linaro.org> (raw)
In-Reply-To: <1456492533-17171-1-git-send-email-alex.bennee@linaro.org>

These tests are not really aimed at KVM at all but exist to stretch
QEMU's TCG code generator. In particular these exercise the ability of
the TCG to:

  * Chain TranslationBlocks together (tight)
  * Handle heavy usage of the tb_jump_cache (paged)
  * Pathological case of computed local jumps (computed)

In addition the tests can be varied by adding IPI IRQs or SMC sequences
into the mix to stress the tcg_exit and invalidation mechanisms.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 arm/tcg-test-asm.S           | 170 +++++++++++++++++++++++++++++
 arm/tcg-test.c               | 248 +++++++++++++++++++++++++++++++++++++++++++
 arm/unittests.cfg            |  90 ++++++++++++++++
 config/config-arm-common.mak |   2 +
 4 files changed, 510 insertions(+)
 create mode 100644 arm/tcg-test-asm.S
 create mode 100644 arm/tcg-test.c

diff --git a/arm/tcg-test-asm.S b/arm/tcg-test-asm.S
new file mode 100644
index 0000000..6e823b7
--- /dev/null
+++ b/arm/tcg-test-asm.S
@@ -0,0 +1,170 @@
+/*
+ * TCG Test assembler functions for armv7 tests.
+ *
+ * Copyright (C) 2016, Linaro Ltd, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.
+ *
+ * These helper functions are written in pure asm to control the size
+ * of the basic blocks and ensure they fit neatly into page
+ * aligned chunks. The pattern of branches they follow is determined by
+ * the 32 bit seed they are passed. It should be the same for each set.
+ *
+ * Calling convention
+ *  - r0, iterations
+ *  - r1, jump pattern
+ *  - r2-r3, scratch
+ *
+ * Returns r0
+ */
+
+.arm
+
+.section .text
+
+/* Tight - all blocks should quickly be patched and should run
+ * very fast unless irqs or smc gets in the way
+ */
+
+.global tight_start
+tight_start:
+        subs    r0, r0, #1
+        beq     tight_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     tightA
+        b       tight_start
+
+tightA:
+        subs    r0, r0, #1
+        beq     tight_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     tightB
+        b       tight_start
+
+tightB:
+        subs    r0, r0, #1
+        beq     tight_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     tight_start
+        b       tightA
+
+.global tight_end
+tight_end:
+        mov     pc, lr
+
+/*
+ * Computed jumps cannot be hardwired into the basic blocks so each one
+ * will cause an exit for the main execution loop to look up the next block.
+ *
+ * There is some caching which should ameliorate the cost a little.
+ */
+
+        /* Align << 13 == 4096 byte alignment */
+        .align 13
+        .global computed_start
+computed_start:
+        subs    r0, r0, #1
+        beq     computed_end
+
+        /* Jump table */
+        ror     r1, r1, #1
+        and     r2, r1, #1
+        adr     r3, computed_jump_table
+        ldr     r2, [r3, r2, lsl #2]
+        mov     pc, r2
+
+        b       computed_err
+
+computed_jump_table:
+        .word   computed_start
+        .word   computedA
+
+computedA:
+        subs    r0, r0, #1
+        beq     computed_end
+
+        /* Jump into code */
+        ror     r1, r1, #1
+        and     r2, r1, #1
+        adr     r3, 1f
+        add	r3, r2, lsl #2
+        mov     pc, r3
+1:      b       computed_start
+        b       computedB
+
+        b       computed_err
+
+
+computedB:
+        subs    r0, r0, #1
+        beq     computed_end
+        ror     r1, r1, #1
+
+        /* Conditional register load */
+        adr     r3, computedA
+        tst     r1, #1
+        adreq   r3, computed_start
+        mov     pc, r3
+
+        b       computed_err
+
+computed_err:
+        mov     r0, #1
+        .global computed_end
+computed_end:
+        mov     pc, lr
+
+
+/*
+ * Page hoping
+ *
+ * Each block is in a different page, hence the blocks never get joined
+ */
+        /* Align << 13 == 4096 byte alignment */
+        .align 13
+        .global paged_start
+paged_start:
+        subs    r0, r0, #1
+        beq     paged_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     pagedA
+        b       paged_start
+
+        /* Align << 13 == 4096 byte alignment */
+        .align 13
+pagedA:
+        subs    r0, r0, #1
+        beq     paged_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     pagedB
+        b       paged_start
+
+        /* Align << 13 == 4096 byte alignment */
+        .align 13
+pagedB:
+        subs    r0, r0, #1
+        beq     paged_end
+
+        ror     r1, r1, #1
+        tst     r1, #1
+        beq     paged_start
+        b       pagedA
+
+        /* Align << 13 == 4096 byte alignment */
+        .align 13
+.global paged_end
+paged_end:
+        mov     pc, lr
+
+.global test_code_end
+test_code_end:
diff --git a/arm/tcg-test.c b/arm/tcg-test.c
new file mode 100644
index 0000000..6fa61ba
--- /dev/null
+++ b/arm/tcg-test.c
@@ -0,0 +1,248 @@
+/*
+ * ARM TCG Tests
+ *
+ * These tests are explicitly aimed at stretching the QEMU TCG engine.
+ */
+
+#include <libcflat.h>
+#include <asm/smp.h>
+#include <asm/cpumask.h>
+#include <asm/barrier.h>
+#include <asm/mmu.h>
+#include <asm/gic.h>
+
+#include <prng.h>
+
+#define MAX_CPUS 8
+
+/* These entry points are in the assembly code */
+extern int tight_start(uint32_t count, uint32_t pattern);
+extern int computed_start(uint32_t count, uint32_t pattern);
+extern int paged_start(uint32_t count, uint32_t pattern);
+extern uint32_t tight_end;
+extern uint32_t computed_end;
+extern uint32_t paged_end;
+extern unsigned long test_code_end;
+
+typedef int (*test_fn)(uint32_t count, uint32_t pattern);
+
+typedef struct {
+	const char *test_name;
+	bool       should_pass;
+	test_fn    start_fn;
+	uint32_t   *code_end;
+} test_descr_t;
+
+/* Test array */
+static test_descr_t tests[] = {
+       /*
+	* Tight chain.
+	*
+	* These are a bunch of basic blocks that have fixed branches in
+	* a page aligned space. The branches taken are decided by a
+	* psuedo-random bitmap for each CPU.
+	*
+	* Once the basic blocks have been chained together by the TCG they
+	* should run until they reach their block count. This will be the
+	* most efficient mode in which generated code is run. The only other
+	* exits will be caused by interrupts or TB invalidation.
+	*/
+	{ "tight", true, tight_start, &tight_end },
+	/*
+	 * Computed jumps.
+	 *
+	 * A bunch of basic blocks which just do computed jumps so the basic
+	 * block is never chained but they are all within a page (maybe not
+	 * required). This will exercise the cache lookup but not the new
+	 * generation.
+	 */
+	{ "computed", true, computed_start, &computed_end },
+        /*
+	 * Page ping pong.
+	 *
+	 * Have the blocks are separated by PAGE_SIZE so they can never
+	 * be chained together.
+	 *
+	 */
+	{ "paged", true, paged_start, &paged_end}
+};
+
+static test_descr_t *test = NULL;
+
+static int iterations = 100000;
+static int rounds = 1000;
+static int mod_freq = 5;
+static uint32_t pattern[MAX_CPUS];
+
+static int smc = 0;
+static int irq = 0;
+static int irq_cnt[MAX_CPUS];
+static int errors[MAX_CPUS];
+
+static cpumask_t smp_test_complete;
+
+
+/* This triggers TCGs SMC detection by writing values to the executing
+ * code pages. We are not actually modifying the instructions and the
+ * underlying code will remain unchanged. However this should trigger
+ * invalidation of the Translation Blocks
+ */
+
+void trigger_smc_detection(uint32_t *start, uint32_t *end)
+{
+	volatile uint32_t *ptr = start;
+	while (ptr < end) {
+		uint32_t inst = *ptr;
+		*ptr++ = inst;
+	}
+}
+
+/* Handler for receiving IRQs */
+
+static void irq_handler(struct pt_regs *regs __unused)
+{
+	int cpu = smp_processor_id();
+	irq_cnt[cpu]++;
+	gic_irq_ack();
+}
+
+/* This triggers cross-CPU IRQs. Each IRQ should cause the basic block
+ * execution to finish the main run-loop get entered again.
+ */
+int send_cross_cpu_irqs(int this_cpu)
+{
+	int cpu, sent = 0;
+
+	for_each_present_cpu(cpu) {
+		if (cpu != this_cpu) {
+			gic_send_sgi(cpu, 1);
+			sent++;
+		}
+	}
+
+	return sent;
+}
+
+
+void do_test(void)
+{
+	int cpu = smp_processor_id();
+	int i;
+	int sent_irqs = 0;
+
+	printf("CPU%d: online and setting up with pattern 0x%x\n", cpu, pattern[cpu]);
+
+	if (irq) {
+		gic_enable();
+#ifdef __arm__
+		install_exception_handler(EXCPTN_IRQ, irq_handler);
+#else
+		install_irq_handler(EL1H_IRQ, irq_handler);
+#endif
+		local_irq_enable();
+	}
+
+	for (i=0; i<rounds; i++)
+	{
+		/* Enter the blocks */
+		errors[cpu] += test->start_fn(iterations, pattern[cpu]);
+
+		if ((i + cpu) % mod_freq == 0)
+		{
+			if (smc) {
+				trigger_smc_detection((uint32_t *) test->start_fn,
+						test->code_end);
+			}
+			if (irq) {
+				sent_irqs += send_cross_cpu_irqs(cpu);
+			}
+		}
+	}
+
+	if (irq) {
+		printf("CPU%d: Done with %d irqs sent and %d received\n", cpu, sent_irqs, irq_cnt[cpu]);
+	} else {
+		printf("CPU%d: Done with %d errors\n", cpu, errors[cpu]);
+	}
+
+	cpumask_set_cpu(cpu, &smp_test_complete);
+	if (cpu != 0)
+		halt();
+}
+
+
+void setup_and_run_tcg_test(void)
+{
+	static const unsigned char seed[] = "tcg-test";
+	struct isaac_ctx prng_context;
+	int cpu;
+
+	isaac_init(&prng_context, &seed[0], sizeof(seed));
+
+	if (irq) {
+		gic_enable();
+	}
+
+	/* boot other CPUs */
+	for_each_present_cpu(cpu) {
+		pattern[cpu] = isaac_next_uint32(&prng_context);
+
+		if (cpu == 0)
+			continue;
+
+		smp_boot_secondary(cpu, do_test);
+	}
+
+	do_test();
+
+	while (!cpumask_full(&smp_test_complete))
+		cpu_relax();
+
+	/* how do we detect errors other than not crashing? */
+	report("passed", true);
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+	unsigned int j;
+
+	for (i=0; i<argc; i++) {
+		char *arg = argv[i];
+
+		for (j = 0; j < ARRAY_SIZE(tests); j++) {
+			if (strcmp(arg, tests[j].test_name) == 0)
+				test = & tests[j];
+		}
+
+		/* Test modifiers */
+		if (strstr(arg, "mod=") != NULL) {
+			char *p = strstr(arg, "=");
+			mod_freq = atol(p+1);
+		}
+
+		if (strcmp(arg, "smc") == 0) {
+			unsigned long test_start = (unsigned long) &tight_start;
+			unsigned long test_end = (unsigned long) &test_code_end;
+
+			smc = 1;
+			mmu_set_range_ptes(mmu_idmap, test_start, test_start, test_end,
+					__pgprot(PTE_WBWA));
+
+			report_prefix_push("smc");
+		}
+
+		if (strcmp(arg, "irq") == 0) {
+			irq = 1;
+			report_prefix_push("irq");
+		}
+	}
+
+	if (test) {
+		setup_and_run_tcg_test();
+	} else {
+		report("Unknown test", false);
+	}
+
+	return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 3f05cfa..c00be15 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -39,6 +39,11 @@ smp = $MAX_SMP
 extra_params = -append 'smp'
 groups = selftest
 
+[irq-ipi]
+file = ipi-test.flat
+smp = 2
+group = irq
+
 # TLB Torture Tests
 [tlbflush::all_other]
 file = tlbflush-test.flat
@@ -118,3 +123,88 @@ smp = 2
 extra_params = -append 'mp_acqrel'
 groups = barrier
 accel = tcg
+
+# TCG Tests
+[tcg::tight]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'tight'
+groups = tcg
+accel = tcg
+
+[tcg::tight-smc]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'tight smc'
+groups = tcg
+accel = tcg
+
+[tcg::tight-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'tight irq'
+groups = tcg
+accel = tcg
+
+[tcg::tight-smc-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'tight smc irq'
+groups = tcg
+accel = tcg
+
+[tcg::computed]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'computed'
+groups = tcg
+accel = tcg
+
+[tcg::computed-smc]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'computed smc'
+groups = tcg
+accel = tcg
+
+[tcg::computed-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'computed irq'
+groups = tcg
+accel = tcg
+
+[tcg::computed-smc-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'computed smc irq'
+groups = tcg
+accel = tcg
+
+[tcg::paged]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'paged'
+groups = tcg
+accel = tcg
+
+[tcg::paged-smc]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'paged smc'
+groups = tcg
+accel = tcg
+
+[tcg::paged-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'paged irq'
+groups = tcg
+accel = tcg
+
+[tcg::paged-smc-irq]
+file = tcg-test.flat
+smp = $MAX_SMP
+extra_params = -append 'paged smc irq'
+groups = tcg
+accel = tcg
diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
index 023a0c3..23208b0 100644
--- a/config/config-arm-common.mak
+++ b/config/config-arm-common.mak
@@ -15,6 +15,7 @@ tests-common += $(TEST_DIR)/ipi-test.flat
 tests-common += $(TEST_DIR)/tlbflush-test.flat
 tests-common += $(TEST_DIR)/locking-test.flat
 tests-common += $(TEST_DIR)/barrier-litmus-test.flat
+tests-common += $(TEST_DIR)/tcg-test.flat
 
 all: test_cases
 
@@ -78,3 +79,4 @@ $(TEST_DIR)/ipi-test.elf: $(cstart.o) $(TEST_DIR)/ipi-test.o
 $(TEST_DIR)/tlbflush-test.elf: $(cstart.o) $(TEST_DIR)/tlbflush-test.o
 $(TEST_DIR)/locking-test.elf: $(cstart.o) $(TEST_DIR)/locking-test.o
 $(TEST_DIR)/barrier-litmus-test.elf: $(cstart.o) $(TEST_DIR)/barrier-litmus-test.o
+$(TEST_DIR)/tcg-test.elf: $(cstart.o) $(TEST_DIR)/tcg-test.o $(TEST_DIR)/tcg-test-asm.o
-- 
2.7.1

  parent reply	other threads:[~2016-02-26 13:16 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-26 13:15 [Qemu-devel] [RFC 00/11] Current MTTCG kvm-unit-test patches Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 01/11] config/config-arm-common: build-up tests-common target Alex Bennée
2016-02-26 14:04   ` Andrew Jones
2016-02-26 14:16     ` Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 02/11] arm/arm64: irq enable/disable Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 03/11] arm/arm64: Add initial gic support Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 04/11] arm/arm64: Add IPI test Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 05/11] lib: add isaac prng library from CCAN Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 06/11] arm/run: set indentation defaults for emacs Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 07/11] arm/run: allow aarch64 to start arm binaries Alex Bennée
2016-02-26 14:02   ` Andrew Jones
2016-02-26 14:19     ` Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 08/11] arm/tlbflush-test: Add TLB torture test Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 09/11] arm/locking-tests: add comprehensive locking test Alex Bennée
2016-02-26 13:15 ` [Qemu-devel] [RFC 10/11] arm/barrier-litmus-tests: add some litmus tests Alex Bennée
2016-02-26 13:15 ` Alex Bennée [this message]
2016-02-26 14:12 ` [Qemu-devel] [RFC 00/11] Current MTTCG kvm-unit-test patches Andrew Jones
2016-02-26 14:54   ` Alex Bennée
2016-02-26 15:22     ` Andrew Jones
2016-02-26 16:03       ` Alex Bennée
2016-04-27 15:09   ` Alex Bennée
2016-04-27 15:26     ` Andrew Jones
2016-04-28 18:44       ` Andrew Jones

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1456492533-17171-12-git-send-email-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=a.rigo@virtualopensystems.com \
    --cc=a.spyridakis@virtualopensystems.com \
    --cc=aurelien@aurel32.net \
    --cc=claudio.fontana@huawei.com \
    --cc=crosthwaitepeter@gmail.com \
    --cc=drjones@redhat.com \
    --cc=fred.konrad@greensocs.com \
    --cc=mark.burton@greensocs.com \
    --cc=mttcg@listserver.greensocs.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.