[PATCH v2 11/13] fuzz/x86_emulate: Add --rerun option to try to track down instability

From: George Dunlap <george.dunlap@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Ian Jackson <ian.jackson@citrix.com>,
	Wei Liu <wei.liu2@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH v2 11/13] fuzz/x86_emulate: Add --rerun option to try to track down instability
Date: Mon, 25 Sep 2017 15:26:46 +0100	[thread overview]
Message-ID: <20170925142648.25959-11-george.dunlap@citrix.com> (raw)
In-Reply-To: <20170925142648.25959-1-george.dunlap@citrix.com>

Current stability numbers are not 100%.  In order to help track this
down, add a --rerun option which will run the same input twice,
resetting the state in between each run, and comparing the state
afterwards.  If the state differs, call abort().

This allows AFL to help the process of tracking down what state is not
being reset properly between runs by proving testcases that
demonstrate the behavior.

To do this:

- Move ctxt into struct fuzz-state to simplify handling

- Rather than copying the data into input, treat the data handed as
  immutable and point each "copy" to it

- Factor out various steps (setting up fuzz state, running an
  individual test) so that they can be efficiently run either once or
  twice (as necessary)

- Compare the states afterwards, printing what's different and calling
  abort() if anything is found.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
v2:
- Fix some coding style issues
- Port over previous changes

CC: Ian Jackson <ian.jackson@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
---
 tools/fuzz/x86_instruction_emulator/afl-harness.c |   9 +-
 tools/fuzz/x86_instruction_emulator/fuzz-emul.c   | 188 ++++++++++++++++++----
 2 files changed, 165 insertions(+), 32 deletions(-)

diff --git a/tools/fuzz/x86_instruction_emulator/afl-harness.c b/tools/fuzz/x86_instruction_emulator/afl-harness.c
index 806f54d606..6b0f66f923 100644
--- a/tools/fuzz/x86_instruction_emulator/afl-harness.c
+++ b/tools/fuzz/x86_instruction_emulator/afl-harness.c
@@ -14,6 +14,7 @@ extern unsigned int fuzz_minimal_input_size(void);
 static uint8_t input[INPUT_SIZE];
 
 extern bool opt_compact;
+extern bool opt_rerun;
 
 int main(int argc, char **argv)
 {
@@ -32,10 +33,12 @@ int main(int argc, char **argv)
         enum {
             OPT_MIN_SIZE,
             OPT_COMPACT,
+            OPT_RERUN,
         };
         static const struct option lopts[] = {
             { "min-input-size", no_argument, NULL, OPT_MIN_SIZE },
             { "compact", required_argument, NULL, OPT_COMPACT },
+            { "rerun", no_argument, NULL, OPT_RERUN },
             { 0, 0, 0, 0 }
         };
         int c = getopt_long_only(argc, argv, "", lopts, NULL);
@@ -54,8 +57,12 @@ int main(int argc, char **argv)
             opt_compact = atoi(optarg);
             break;
             
+        case OPT_RERUN:
+            opt_rerun = true;
+            break;
+
         case '?':
-            printf("Usage: %s [--compact=0|1] $FILE [$FILE...] | [--min-input-size]\n", argv[0]);
+            printf("Usage: %s [--compact=0|1] [--rerun] $FILE [$FILE...] | [--min-input-size]\n", argv[0]);
             exit(-1);
             break;
 
diff --git a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
index d99a50d12c..21d00b7416 100644
--- a/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
+++ b/tools/fuzz/x86_instruction_emulator/fuzz-emul.c
@@ -40,7 +40,7 @@ struct fuzz_state
     struct cpu_user_regs regs;
 
     /* Fuzzer's input data. */
-    struct fuzz_corpus *corpus;
+    const struct fuzz_corpus *corpus;
 
     /* Real amount of data backing corpus->data[]. */
     size_t data_num;
@@ -50,6 +50,7 @@ struct fuzz_state
 
     /* Emulation ops, some of which are disabled based on corpus->options. */
     struct x86_emulate_ops ops;
+    struct x86_emulate_ctxt ctxt;
 };
 #define DATA_OFFSET offsetof(struct fuzz_state, corpus)
 
@@ -496,6 +497,12 @@ static int fuzz_read_msr(
     const struct fuzz_state *s = ctxt->data;
     unsigned int idx;
 
+    /* 
+     * NB at the moment dump_state() relies on the fact that this
+     * cannot fail.  If we add in fuzzed failures we'll have to handle
+     * that differently.
+     */
+    
     switch ( reg )
     {
     case MSR_TSC_AUX:
@@ -616,6 +623,7 @@ static void dump_state(struct x86_emulate_ctxt *ctxt)
 
     printf(" rip: %"PRIx64"\n", regs->rip);
 
+    /* read_msr() never fails at the moment */
     fuzz_read_msr(MSR_EFER, &val, ctxt);
     printf("EFER: %"PRIx64"\n", val);
 }
@@ -660,7 +668,10 @@ static void setup_state(struct x86_emulate_ctxt *ctxt)
     {
         /* Fuzz all of the state in one go */
         if (!input_read(s, s, DATA_OFFSET))
+        {
+            printf("Input size too small\n");
             exit(-1);
+        }
         return;
     }
 
@@ -789,9 +800,8 @@ enum {
         printf("Disabling hook "#h"\n");               \
     }
 
-static void disable_hooks(struct x86_emulate_ctxt *ctxt)
+static void disable_hooks(struct fuzz_state *s)
 {
-    struct fuzz_state *s = ctxt->data;
     unsigned long bitmap = s->options;
 
     /* See also sanitize_input, some hooks can't be disabled. */
@@ -839,7 +849,7 @@ static void disable_hooks(struct x86_emulate_ctxt *ctxt)
  *  - ...bases to below 1Mb, 16-byte aligned
  *  - ...selectors to (base >> 4)
  */
-static void sanitize_input(struct x86_emulate_ctxt *ctxt)
+static void sanitize_state(struct x86_emulate_ctxt *ctxt)
 {
     struct fuzz_state *s = ctxt->data;
     struct cpu_user_regs *regs = ctxt->regs;
@@ -886,21 +896,138 @@ int LLVMFuzzerInitialize(int *argc, char ***argv)
     return 0;
 }
 
-int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
+void setup_fuzz_state(struct fuzz_state *state, const uint8_t *data_p, size_t size)
 {
-    struct fuzz_state state = {
-        .ops = all_fuzzer_ops,
-    };
-    struct x86_emulate_ctxt ctxt = {
-        .data = &state,
-        .regs = &state.regs,
-        .addr_size = 8 * sizeof(void *),
-        .sp_size = 8 * sizeof(void *),
-    };
+    memset(state, 0, sizeof(*state));
+    state->corpus = (struct fuzz_corpus *)data_p;
+    state->data_num = size;
+}
+
+int runtest(struct fuzz_state *state) {
     int rc;
 
-    /* Reset all global state variables */
-    memset(&input, 0, sizeof(input));
+    struct x86_emulate_ctxt *ctxt = &state->ctxt;
+    
+    state->ops = all_fuzzer_ops;
+
+    ctxt->data = state;
+    ctxt->regs = &state->regs;
+    ctxt->addr_size = ctxt->sp_size = 8 * sizeof(void *);
+
+    setup_state(ctxt);
+
+    sanitize_state(ctxt);
+
+    disable_hooks(state);
+
+    do {
+        /* FIXME: Until we actually implement SIGFPE handling properly */
+        setup_fpu_exception_handler();
+
+        set_sizes(ctxt);
+        dump_state(ctxt);
+
+        rc = x86_emulate(ctxt, &state->ops);
+        printf("Emulation result: %d\n", rc);
+    } while ( rc == X86EMUL_OKAY );
+
+    return 0;
+}
+
+void compare_states(struct fuzz_state state[2])
+{
+    // First zero any "internal" pointers
+    state[0].corpus = state[1].corpus = NULL;
+    state[0].ctxt.data = state[1].ctxt.data = NULL;
+    state[0].ctxt.regs = state[1].ctxt.regs = NULL;
+
+    
+    if ( memcmp(&state[0], &state[1], sizeof(struct fuzz_state)) )
+    {
+        int i;
+
+        printf("State mismatch\n");
+
+        for ( i=0; i<5; i++)
+            if ( state[0].cr[i] != state[1].cr[i] )
+                printf("cr[%d]: %lx != %lx\n",
+                       i, state[0].cr[i], state[1].cr[i]);
+        
+        for ( i=0; i<MSR_INDEX_MAX; i++)
+            if ( state[0].msr[i] != state[1].msr[i] )
+                printf("msr[%d]: %lx != %lx\n",
+                       i, state[0].msr[i], state[1].msr[i]);
+        
+        for ( i=0; i<SEG_NUM; i++)
+            if ( memcmp(&state[0].segments[i], &state[1].segments[i],
+                        sizeof(state[0].segments[0])) )
+                printf("segments[%d] differ!\n", i);
+
+        if ( state[0].data_num != state[1].data_num )
+            printf("data_num: %lx !=  %lx\n", state[0].data_num,
+                   state[1].data_num);
+        if ( state[0].data_index != state[1].data_index )
+            printf("data_index: %lx !=  %lx\n", state[0].data_index,
+                   state[1].data_index);
+
+        if ( memcmp(&state[0].regs, &state[1].regs, sizeof(state[0].regs)) )
+        {
+            printf("registers differ!\n");
+            /* Print If Not Equal */
+#define PINE(elem)\
+            if ( state[0].elem != state[1].elem ) \
+                printf(#elem " differ: %lx != %lx\n", \
+                       (unsigned long)state[0].elem, \
+                       (unsigned long)state[1].elem)
+            PINE(regs.r15);
+            PINE(regs.r14);
+            PINE(regs.r13);
+            PINE(regs.r12);
+            PINE(regs.rbp);
+            PINE(regs.rbx);
+            PINE(regs.r10);
+            PINE(regs.r11);
+            PINE(regs.r9);
+            PINE(regs.r8);
+            PINE(regs.rax);
+            PINE(regs.rcx);
+            PINE(regs.rdx);
+            PINE(regs.rsi);
+            PINE(regs.rdi);
+
+            for ( i = offsetof(struct cpu_user_regs, error_code) / sizeof(unsigned);
+                  i < sizeof(state[1].regs)/sizeof(unsigned); i++ )
+            {
+                printf("[%04lu] %08x %08x\n",
+                        i * sizeof(unsigned), ((unsigned *)&state[0].regs)[i],
+                       ((unsigned *)&state[1].regs)[i]);
+            }
+        }
+
+        if ( memcmp(&state[0].ops, &state[1].ops, sizeof(state[0].ops)) )
+            printf("ops differ!\n");
+
+        if ( memcmp(&state[0].ctxt, &state[1].ctxt, sizeof(state[0].ctxt)) )
+        {
+            printf("ctxt differs!\n");
+            for ( i = 0;  i < sizeof(state[0].ctxt)/sizeof(unsigned); i++ )
+            {
+                printf("[%04lu] %08x %08x\n",
+                        i * sizeof(unsigned), ((unsigned *)&state[0].ctxt)[i],
+                       ((unsigned *)&state[1].ctxt)[i]);
+            }
+            
+        }
+
+        abort();
+    }
+}
+
+bool opt_rerun = false;
+
+int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
+{
+    struct fuzz_state state[2];
 
     if ( size < fuzz_minimal_input_size() )
     {
@@ -908,7 +1035,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
         return 1;
     }
 
-    if ( size > sizeof(input) )
+    if ( size > sizeof(struct fuzz_corpus) )
     {
         printf("Input too large\n");
         return 1;
@@ -916,25 +1043,24 @@ int LLVMFuzzerTestOneInput(const uint8_t *data_p, size_t size)
 
     memcpy(&input, data_p, size);
 
-    state.corpus = &input;
-    state.data_num = size;
-
-    setup_state(&ctxt);
+    setup_fuzz_state(&state[0], data_p, size);
+    
+    if ( opt_rerun )
+        printf("||| INITIAL RUN |||\n");
+    
+    runtest(&state[0]);
 
-    sanitize_input(&ctxt);
+    if ( !opt_rerun )
+        return 0;
 
-    disable_hooks(&ctxt);
+    /* Reset all global state variables again */
+    setup_fuzz_state(&state[1], data_p, size);
 
-    do {
-        /* FIXME: Until we actually implement SIGFPE handling properly */
-        setup_fpu_exception_handler();
+    printf("||| SECOND RUN |||\n");
 
-        set_sizes(&ctxt);
-        dump_state(&ctxt);
+    runtest(&state[1]);
 
-        rc = x86_emulate(&ctxt, &state.ops);
-        printf("Emulation result: %d\n", rc);
-    } while ( rc == X86EMUL_OKAY );
+    compare_states(state);
 
     return 0;
 }
-- 
2.14.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel