Yay for tests! I have a few minor nits, and one more major one (rc == 2 below). > +/* > + * Copyright 2015, Cyril Bur, IBM Corp. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version > + * 2 of the License, or (at your option) any later version. > + */ I realise this is well past a lost cause by now, but isn't the idea to be version 2, not version 2 or later? > + > +#include "../basic_asm.h" > +#include "../vsx_asm.h" > + Some of your other functions start with a comment. That would be super helpful here - I'm still not super comfortable I understand the calling convention. > +FUNC_START(check_vsx) > + PUSH_BASIC_STACK(32) > + std r3,STACK_FRAME_PARAM(0)(sp) > + addi r3, r3, 16 * 12 #Second half of array > + bl store_vsx > + ld r3,STACK_FRAME_PARAM(0)(sp) > + bl vsx_memcmp > + POP_BASIC_STACK(32) > + blr > +FUNC_END(check_vsx) > + > +long vsx_memcmp(vector int *a) { > + vector int zero = {0,0,0,0}; > + int i; > + > + FAIL_IF(a != varray); > + > + for(i = 0; i < 12; i++) { > + if (memcmp(&a[i + 12], &zero, 16) == 0) { > + fprintf(stderr, "Detected zero from the VSX reg %d\n", i + 12); > + return 1; > + } > + } > + > + if (memcmp(a, &a[12], 12 * 16)) { I'm somewhat confused as to how this comparison works. You're comparing the new saved ones to the old saved ones, yes? > + long *p = (long *)a; > + fprintf(stderr, "VSX mismatch\n"); > + for (i = 0; i < 24; i=i+2) > + fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n", > + i/2 + i%2 + 20, p[i], p[i + 1], p[i + 24], p[i + 25]); > + return 1; > + } > + return 0; > +} > + > +void *preempt_vsx_c(void *p) > +{ > + int i, j; > + long rc; > + srand(pthread_self()); > + for (i = 0; i < 12; i++) > + for (j = 0; j < 4; j++) { > + varray[i][j] = rand(); > + /* Don't want zero because it hides kernel problems */ > + if (varray[i][j] == 0) > + j--; > + } > + rc = preempt_vsx(varray, &threads_starting, &running); > + if (rc == 2) How would rc == 2? AIUI, preempt_vsx returns the value of check_vsx, which in turn returns the value of vsx_memcmp, which returns 1 or 0. > + fprintf(stderr, "Caught zeros in VSX compares\n"); Isn't it zeros or a mismatched value? > + return (void *)rc; > +} > + > +int test_preempt_vsx(void) > +{ > + int i, rc, threads; > + pthread_t *tids; > + > + threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR; > + tids = malloc(threads * sizeof(pthread_t)); > + FAIL_IF(!tids); > + > + running = true; > + threads_starting = threads; > + for (i = 0; i < threads; i++) { > + rc = pthread_create(&tids[i], NULL, preempt_vsx_c, NULL); > + FAIL_IF(rc); > + } > + > + setbuf(stdout, NULL); > + /* Not really nessesary but nice to wait for every thread to start */ > + printf("\tWaiting for %d workers to start...", threads_starting); > + while(threads_starting) > + asm volatile("": : :"memory"); I think __sync_synchronise() might be ... more idiomatic or something? Not super fussy. > + printf("done\n"); > + > + printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME); > + sleep(PREEMPT_TIME); > + printf("done\n"); > + > + printf("\tStopping workers..."); > + /* > + * Working are checking this value every loop. In preempt_vsx 'cmpwi r5,0; bne 2b'. > + * r5 will have loaded the value of running. > + */ > + running = 0; Do you need some sort of synchronisation here? You're assuming it eventually gets to the threads, which is of course true, but maybe it would be a good idea to synchronise it more explicitly? Again, not super fussy. > + for (i = 0; i < threads; i++) { > + void *rc_p; > + pthread_join(tids[i], &rc_p); > + > + /* > + * Harness will say the fail was here, look at why preempt_vsx > + * returned > + */ > + if ((long) rc_p) > + printf("oops\n"); > + FAIL_IF((long) rc_p); > + } > + printf("done\n"); > + > + return 0; > +} > + Regards, Daniel