From: Mark Brown <broonie@kernel.org> To: Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>, Shuah Khan <skhan@linuxfoundation.org>, Shuah Khan <shuah@kernel.org> Cc: Alan Hayward <alan.hayward@arm.com>, Luis Machado <luis.machado@arm.com>, Salil Akerkar <Salil.Akerkar@arm.com>, Basant Kumar Dwivedi <Basant.KumarDwivedi@arm.com>, Szabolcs Nagy <szabolcs.nagy@arm.com>, linux-arm-kernel@lists.infradead.org, linux-kselftest@vger.kernel.org, Mark Brown <broonie@kernel.org> Subject: [PATCH v1 35/38] kselftest/arm64: Add stress test for SME ZA context switching Date: Thu, 30 Sep 2021 19:11:41 +0100 [thread overview] Message-ID: <20210930181144.10029-36-broonie@kernel.org> (raw) In-Reply-To: <20210930181144.10029-1-broonie@kernel.org> Add a stress test for context switching of the ZA register state based on the similar tests Dave Martin wrote for FPSIMD and SVE registers. The test loops indefinitely writing a data pattern to ZA then reading it back and verifying that it's what was expected. Unlike the other tests we manually assemble the SME instructions since at present no released toolchain has SME support integrated. Signed-off-by: Mark Brown <broonie@kernel.org> --- tools/testing/selftests/arm64/fp/.gitignore | 1 + tools/testing/selftests/arm64/fp/Makefile | 3 + tools/testing/selftests/arm64/fp/za-stress | 59 +++ tools/testing/selftests/arm64/fp/za-test.S | 545 ++++++++++++++++++++ 4 files changed, 608 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/za-stress create mode 100644 tools/testing/selftests/arm64/fp/za-test.S diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index 73c600e1ab81..1178fecc7aa1 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -7,3 +7,4 @@ sve-test ssve-test vec-syscfg vlset +za-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 11dbe05c5070..4f32cb1041a0 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -6,6 +6,7 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ rdvl-sme rdvl-sve \ sve-test sve-stress \ ssve-test ssve-stress \ + za-test za-stress \ vlset all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) @@ -22,5 +23,7 @@ ssve-test: sve-test.S $(CC) -DSSVE -nostdlib $^ -o $@ vec-syscfg: vec-syscfg.o rdvl.o vlset: vlset.o +za-test: za-test.o + $(CC) -nostdlib $^ -o $@ include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress new file mode 100644 index 000000000000..5ac386b55b95 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./za-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S new file mode 100644 index 000000000000..b9e0e2e07dad --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZA context switch test +// Repeatedly writes unique test patterns into each ZA tile +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +.arch_extension sve + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storage space to shadow ZA register contents and a +// scratch buffer for a vector. +.pushsection .text +.data +.align 4 +zaref: + .space MAXVL_B * MAXVL_B +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in ZA +// x0: pid (16 bits) +// x1: row in ZA (8 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:14 row number +// bits 13: 0 pid + +function pattern + and x0, x0, #0x1fff + orr w1, w0, w1, lsl #13 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for ZA horizontal vector xn +.macro _adrza xd, xn, nrtmp + ldr \xd, =zaref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a ZA horizontal vector +// x0: pid +// x1: row number +// x2: generation +function setup_za + mov x4, x30 + mov x12, x1 // Use x12 for vector select + + bl pattern // Get pattern in scratch buffer + _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 + mov x5, x0 + ldr x1, =scratch + bl memcpy // length set up by pattern + + _ldr_za 12, 5 // load vector w12 from pointer x5 + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 + b memfill +endfunction + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZA vector matches its shadow in memory, else abort +// x0: vector number +// Clobbers x0-x7 and x12. +function check_za + mov x3, x30 + + mov x12, x0 + _adrza x5, x0, 6 // pointer to expected value in x5 + mov x4, x0 + ldr x7, =scratch // x7 is scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + _str_za 12, 7 // save vector w12 to pointer x7 + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZA data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function smstart + // Set SVCR.SM to 3, equivalent to SMSTART but doesn't need a + // SME capable toolchain. + mov x0, #3 + msr S3_3_C4_C2_2, x0 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + puts "Streaming mode " + bl smstart + + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + bl smstart // printing and signals dropped out of SM + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + rdvl x21, #1 // Set up ZA & shadow with test pattern +0: mov x0, x20 + sub x1, x21, #1 + and x2, x22, #0xf + bl setup_za + subs x21, x21, #1 + bne 0b + + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + bl smstart // syscall dropped out of SM + + rdvl x21, #1 // Set up ZA & shadow with test pattern +0: sub x0, x21, #1 + bl check_za + subs x21, x21, #1 + bne 0b + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", row=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction -- 2.20.1
WARNING: multiple messages have this Message-ID (diff)
From: Mark Brown <broonie@kernel.org> To: Catalin Marinas <catalin.marinas@arm.com>, Will Deacon <will@kernel.org>, Shuah Khan <skhan@linuxfoundation.org>, Shuah Khan <shuah@kernel.org> Cc: Alan Hayward <alan.hayward@arm.com>, Luis Machado <luis.machado@arm.com>, Salil Akerkar <Salil.Akerkar@arm.com>, Basant Kumar Dwivedi <Basant.KumarDwivedi@arm.com>, Szabolcs Nagy <szabolcs.nagy@arm.com>, linux-arm-kernel@lists.infradead.org, linux-kselftest@vger.kernel.org, Mark Brown <broonie@kernel.org> Subject: [PATCH v1 35/38] kselftest/arm64: Add stress test for SME ZA context switching Date: Thu, 30 Sep 2021 19:11:41 +0100 [thread overview] Message-ID: <20210930181144.10029-36-broonie@kernel.org> (raw) In-Reply-To: <20210930181144.10029-1-broonie@kernel.org> Add a stress test for context switching of the ZA register state based on the similar tests Dave Martin wrote for FPSIMD and SVE registers. The test loops indefinitely writing a data pattern to ZA then reading it back and verifying that it's what was expected. Unlike the other tests we manually assemble the SME instructions since at present no released toolchain has SME support integrated. Signed-off-by: Mark Brown <broonie@kernel.org> --- tools/testing/selftests/arm64/fp/.gitignore | 1 + tools/testing/selftests/arm64/fp/Makefile | 3 + tools/testing/selftests/arm64/fp/za-stress | 59 +++ tools/testing/selftests/arm64/fp/za-test.S | 545 ++++++++++++++++++++ 4 files changed, 608 insertions(+) create mode 100644 tools/testing/selftests/arm64/fp/za-stress create mode 100644 tools/testing/selftests/arm64/fp/za-test.S diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index 73c600e1ab81..1178fecc7aa1 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -7,3 +7,4 @@ sve-test ssve-test vec-syscfg vlset +za-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 11dbe05c5070..4f32cb1041a0 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -6,6 +6,7 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ rdvl-sme rdvl-sve \ sve-test sve-stress \ ssve-test ssve-stress \ + za-test za-stress \ vlset all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) @@ -22,5 +23,7 @@ ssve-test: sve-test.S $(CC) -DSSVE -nostdlib $^ -o $@ vec-syscfg: vec-syscfg.o rdvl.o vlset: vlset.o +za-test: za-test.o + $(CC) -nostdlib $^ -o $@ include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress new file mode 100644 index 000000000000..5ac386b55b95 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./za-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S new file mode 100644 index 000000000000..b9e0e2e07dad --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZA context switch test +// Repeatedly writes unique test patterns into each ZA tile +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +.arch_extension sve + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storage space to shadow ZA register contents and a +// scratch buffer for a vector. +.pushsection .text +.data +.align 4 +zaref: + .space MAXVL_B * MAXVL_B +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in ZA +// x0: pid (16 bits) +// x1: row in ZA (8 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:14 row number +// bits 13: 0 pid + +function pattern + and x0, x0, #0x1fff + orr w1, w0, w1, lsl #13 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for ZA horizontal vector xn +.macro _adrza xd, xn, nrtmp + ldr \xd, =zaref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a ZA horizontal vector +// x0: pid +// x1: row number +// x2: generation +function setup_za + mov x4, x30 + mov x12, x1 // Use x12 for vector select + + bl pattern // Get pattern in scratch buffer + _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 + mov x5, x0 + ldr x1, =scratch + bl memcpy // length set up by pattern + + _ldr_za 12, 5 // load vector w12 from pointer x5 + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 + b memfill +endfunction + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZA vector matches its shadow in memory, else abort +// x0: vector number +// Clobbers x0-x7 and x12. +function check_za + mov x3, x30 + + mov x12, x0 + _adrza x5, x0, 6 // pointer to expected value in x5 + mov x4, x0 + ldr x7, =scratch // x7 is scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + _str_za 12, 7 // save vector w12 to pointer x7 + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZA data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function smstart + // Set SVCR.SM to 3, equivalent to SMSTART but doesn't need a + // SME capable toolchain. + mov x0, #3 + msr S3_3_C4_C2_2, x0 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + puts "Streaming mode " + bl smstart + + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + bl smstart // printing and signals dropped out of SM + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + rdvl x21, #1 // Set up ZA & shadow with test pattern +0: mov x0, x20 + sub x1, x21, #1 + and x2, x22, #0xf + bl setup_za + subs x21, x21, #1 + bne 0b + + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + bl smstart // syscall dropped out of SM + + rdvl x21, #1 // Set up ZA & shadow with test pattern +0: sub x0, x21, #1 + bl check_za + subs x21, x21, #1 + bne 0b + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", row=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction -- 2.20.1 _______________________________________________ linux-arm-kernel mailing list linux-arm-kernel@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2021-09-30 18:17 UTC|newest] Thread overview: 143+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-09-30 18:11 [PATCH v1 00/38] arm64/sme: Initial support for the Scalable Matrix Extension Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 01/38] arm64/fp: Reindent fpsimd_save() Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 9:39 ` Jonathan Cameron 2021-10-11 9:39 ` Jonathan Cameron 2021-10-11 13:02 ` Mark Brown 2021-10-11 13:02 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 02/38] arm64/sve: Remove sve_load_from_fpsimd_state() Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 03/38] arm64/sve: Make access to FFR optional Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 04/38] arm64/sve: Rename find_supported_vector_length() Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 05/38] arm64/sve: Use accessor functions for vector lengths in thread_struct Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 06/38] arm64/sve: Put system wide vector length information into structs Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-01 3:13 ` kernel test robot 2021-10-01 3:13 ` kernel test robot 2021-10-01 3:13 ` kernel test robot 2021-09-30 18:11 ` [PATCH v1 07/38] arm64/sve: Explicitly load vector length when restoring SVE state Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 08/38] arm64/sve: Track vector lengths for tasks in an array Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 10:20 ` Jonathan Cameron 2021-10-11 10:20 ` Jonathan Cameron 2021-10-11 13:14 ` Mark Brown 2021-10-11 13:14 ` Mark Brown 2021-10-11 13:18 ` Jonathan Cameron 2021-10-11 13:18 ` Jonathan Cameron 2021-09-30 18:11 ` [PATCH v1 09/38] arm64/sve: Make sysctl interface for SVE reusable by SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 10/38] arm64/sve: Generalise vector length configuration prctl() for SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 10:27 ` Jonathan Cameron 2021-10-11 10:27 ` Jonathan Cameron 2021-10-11 13:18 ` Mark Brown 2021-10-11 13:18 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 11/38] selftests: arm64: Parameterise ptrace vector length information Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 12/38] arm64/sme: Provide ABI documentation for SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-08 14:11 ` Alan Hayward 2021-10-08 14:11 ` Alan Hayward 2021-10-08 15:28 ` Mark Brown 2021-10-08 15:28 ` Mark Brown 2021-10-08 16:45 ` Alan Hayward 2021-10-08 16:45 ` Alan Hayward 2021-10-08 17:04 ` Mark Brown 2021-10-08 17:04 ` Mark Brown 2021-10-11 11:15 ` Alan Hayward 2021-10-11 11:15 ` Alan Hayward 2021-10-11 11:48 ` Mark Brown 2021-10-11 11:48 ` Mark Brown 2021-10-11 11:05 ` Jonathan Cameron 2021-10-11 11:05 ` Jonathan Cameron 2021-10-11 13:20 ` Mark Brown 2021-10-11 13:20 ` Mark Brown 2021-10-11 13:17 ` Szabolcs Nagy 2021-10-11 13:17 ` Szabolcs Nagy 2021-10-11 13:23 ` Mark Brown 2021-10-11 13:23 ` Mark Brown 2021-10-11 14:19 ` Szabolcs Nagy 2021-10-11 14:19 ` Szabolcs Nagy 2021-10-11 20:10 ` Mark Brown 2021-10-11 20:10 ` Mark Brown 2021-10-12 8:23 ` Szabolcs Nagy 2021-10-12 8:23 ` Szabolcs Nagy 2021-10-13 18:37 ` Mark Brown 2021-10-13 18:37 ` Mark Brown 2021-10-14 9:57 ` Szabolcs Nagy 2021-10-14 9:57 ` Szabolcs Nagy 2021-09-30 18:11 ` [PATCH v1 13/38] arm64/sme: System register and exception syndrome definitions Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 14/38] arm64/sme: Define macros for manually encoding SME instructions Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 15/38] arm64/sme: Early CPU setup for SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 16/38] arm64/sme: Basic enumeration support Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 17/38] arm64/sme: Identify supported SME vector lengths at boot Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 18/38] arm64/sme: Implement sysctl to set the default vector length Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 19/38] arm64/sme: Implement vector length configuration prctl()s Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-01 5:20 ` kernel test robot 2021-10-01 5:20 ` kernel test robot 2021-10-01 5:20 ` kernel test robot 2021-10-01 12:40 ` Mark Brown 2021-10-01 12:40 ` Mark Brown 2021-10-01 12:40 ` Mark Brown 2021-10-08 1:32 ` [kbuild-all] " Chen, Rong A 2021-10-08 1:32 ` Chen, Rong A 2021-10-08 1:32 ` [kbuild-all] " Chen, Rong A 2021-10-01 16:38 ` kernel test robot 2021-10-01 16:38 ` kernel test robot 2021-10-01 16:38 ` kernel test robot 2021-09-30 18:11 ` [PATCH v1 20/38] arm64/sme: Implement support for TPIDR2 Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 21/38] arm64/sme: Implement SVCR context switching Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 12:15 ` Jonathan Cameron 2021-10-11 12:15 ` Jonathan Cameron 2021-09-30 18:11 ` [PATCH v1 22/38] arm64/sme: Implement streaming SVE " Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 23/38] arm64/sme: Implement ZA " Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 12:27 ` Jonathan Cameron 2021-10-11 12:27 ` Jonathan Cameron 2021-09-30 18:11 ` [PATCH v1 24/38] arm64/sme: Implement traps and syscall handling for SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-10-11 12:37 ` Jonathan Cameron 2021-10-11 12:37 ` Jonathan Cameron 2021-09-30 18:11 ` [PATCH v1 25/38] arm64/sme: Implement streaming SVE signal handling Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 26/38] arm64/sme: Implement ZA " Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 27/38] arm64/sme: Implement ptrace support for streaming mode SVE registers Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 28/38] arm64/sme: Add ptrace support for ZA Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 29/38] arm64/sme: Disable streaming mode and ZA when flushing CPU state Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 30/38] arm64/sme: Save and restore streaming mode over EFI runtime calls Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 31/38] arm64/sme: Provide Kconfig for SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 32/38] kselftest/arm64: Add tests for TPIDR2 Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 33/38] kselftest/arm64: Extend vector configuration API tests to cover SME Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 34/38] kselftest/arm64: sme: Provide streaming mode SVE stress test Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` Mark Brown [this message] 2021-09-30 18:11 ` [PATCH v1 35/38] kselftest/arm64: Add stress test for SME ZA context switching Mark Brown 2021-09-30 18:11 ` [PATCH v1 36/38] kselftest/arm64: signal: Add SME signal handling tests Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 37/38] selftests: arm64: Add streaming SVE to SVE ptrace tests Mark Brown 2021-09-30 18:11 ` Mark Brown 2021-09-30 18:11 ` [PATCH v1 38/38] selftests: arm64: Add coverage for the ZA ptrace interface Mark Brown 2021-09-30 18:11 ` Mark Brown
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210930181144.10029-36-broonie@kernel.org \ --to=broonie@kernel.org \ --cc=Basant.KumarDwivedi@arm.com \ --cc=Salil.Akerkar@arm.com \ --cc=alan.hayward@arm.com \ --cc=catalin.marinas@arm.com \ --cc=linux-arm-kernel@lists.infradead.org \ --cc=linux-kselftest@vger.kernel.org \ --cc=luis.machado@arm.com \ --cc=shuah@kernel.org \ --cc=skhan@linuxfoundation.org \ --cc=szabolcs.nagy@arm.com \ --cc=will@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.