All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 8] xenalyze: Optimizations
@ 2012-01-26 17:20 George Dunlap
  2012-01-26 17:20 ` [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm George Dunlap
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:20 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

This is a series of optimizations to xenalyze.  The optimizations in
this series can speed up the summary-mode analysis of my 700MiB test
file from over 100 seconds down to around 10 seconds.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
@ 2012-01-26 17:20 ` George Dunlap
  2012-01-26 17:20 ` [PATCH 2 of 8] xenalyze: Remove spurious dump_header construction George Dunlap
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:20 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

The old method of finding the next record was to search through
all active pcpus to find the one with the lowest tsc value.  This
was simple but incredibly wasteful, especially as frequently the
same cpu had several records to process in a row, and only a small
subset of pcpus was active at any one time.

This patch introduces a sorted list, s.t. the top of the list is
always the next record to process.  After that record is processed,
the next record is found, and the record is then "bubbled down" to
its appropriate place on the list.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r d8c1c4df4535 -r f8c3c44be309 xenalyze.c
--- a/xenalyze.c	Thu Jan 12 14:27:56 2012 +0100
+++ b/xenalyze.c	Thu Jan 26 17:11:34 2012 +0000
@@ -6522,7 +6522,7 @@ void shadow_process(struct pcpu_info *p)
         if(sevt.minor <= PF_XEN_LAST_FAULT) {
             shadow_fault_generic_process(ri, h);
         } else {
-            fprintf(warn, "Warning: processing shadow as generic\n");
+            warn_once("Warning: processing shadow as generic\n");
             process_generic(ri);
         }
         break;
@@ -8558,6 +8558,10 @@ void base_process(struct pcpu_info *p) {
 
 
 /* Non-compat only */
+void record_order_insert(struct pcpu_info *new);
+void record_order_remove(struct pcpu_info *rem);
+void record_order_bubble(struct pcpu_info *last);
+
 struct cpu_change_data {
     int cpu;
     unsigned window_size;
@@ -8624,6 +8628,8 @@ loff_t scan_for_new_pcpu(loff_t offset) 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
 
+        record_order_insert(p);
+
         offset += r + cd->window_size;
 
         sched_default_vcpu_activate(p);
@@ -8675,6 +8681,9 @@ void deactivate_pcpu(struct pcpu_info *p
         lose_vcpu(p->current, p->last_tsc);
     }
     p->active = 0;
+
+    record_order_remove(p);
+
     if ( p->pid == P.max_active_pcpu )
     {
         int i, max_active_pcpu = -1;
@@ -8840,11 +8849,6 @@ void process_cpu_change(struct pcpu_info
         if(r->cpu > P.max_active_pcpu)
             P.max_active_pcpu = r->cpu;
 
-        fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
-                __func__, r->cpu, (unsigned long long)p->file_offset);
-        
-        sched_default_vcpu_activate(p2);
-
         /* Taking this record as the first record should make everything
          * run swimmingly. */
         p2->ri = *ri;
@@ -8852,6 +8856,13 @@ void process_cpu_change(struct pcpu_info
         p2->ri.d = p2->ri.rec.u.notsc.data;
         p2->file_offset = p->file_offset;
         p2->next_cpu_change_offset = p->file_offset;
+
+        fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
+                __func__, r->cpu, (unsigned long long)p->file_offset);
+        
+        record_order_insert(p2);
+
+        sched_default_vcpu_activate(p2);
     }
 
     p->last_cpu_change_pid = r->cpu;
@@ -8979,6 +8990,7 @@ int toplevel_assert_check(int toplevel, 
         if ( ! (v->data_type == VCPU_DATA_NONE
                 || v->data_type == mask.vcpu_data_mode) )
         {
+            /* This may happen for track_dirty_vram, which causes a SHADOW_WRMAP_BF trace f/ dom0 */
             fprintf(warn, "WARNING: Unexpected vcpu data type for d%dv%d on proc %d! Expected %d got %d. Not processing\n",
                     v->d->did, v->vid, p->pid,
                     mask.vcpu_data_mode,
@@ -9317,31 +9329,102 @@ char * pcpu_string(int pcpu)
     return s;
 }
 
+/* Null terminated */
+struct pcpu_info *record_order[MAX_CPUS+1] = { 0 };
+
+/* In the case of identical tsc values, the old algorithm would favor the
+ * pcpu with the lowest number.  By default the new algorithm favors the
+ * pcpu which has been processed most recently.
+ *
+ * I think the second way is better; but it's good to be able to use the
+ * old ordering, at very lest to verify that there are no (other) ordering
+ * differences.  Enabling the below flag will cause the insertion / bubble
+ * routines to order by pcpu id as well as tsc, preserving the old order. */
+//#define PRESERVE_PCPU_ORDERING
+
+/* Steady state:
+ * + Entire list is in order, except (potentially) for the first entry
+ * + last is pointing to the first entry.
+ */
+void record_order_bubble(struct pcpu_info *last)
+{
+    int i;
+
+    /* Find the pcpu to "bubble".  This is usually the
+     * first one, but if other pcpus have been activated, it may
+     * not be. */
+    for(i=0; record_order[i] && record_order[i]!=last; i++);
+
+    assert(record_order[i]);
+
+    /* Now bubble it down */
+    for( ;
+        record_order[i+1]
+             && ( record_order[i+1]->order_tsc < last->order_tsc
+#ifdef PRESERVE_PCPU_ORDERING
+                  || ( record_order[i+1]->order_tsc == last->order_tsc
+                       && record_order[i+1]->pid < last->pid )
+#endif
+                 ) ;
+        i++)
+        record_order[i]=record_order[i+1];
+    record_order[i]=last;
+}
+
+void record_order_insert(struct pcpu_info *new)
+{
+    int i;
+    struct pcpu_info *p=NULL, *t=NULL;
+
+    /* Sanity check: Make sure it's not already in there */
+    for(i=0; record_order[i]; i++)
+        assert(record_order[i]!=new);
+
+    /* Find where to insert it */
+    for(i=0;
+        record_order[i]
+             && ( record_order[i]->order_tsc < new->order_tsc
+#ifdef PRESERVE_PCPU_ORDERING
+                  || ( record_order[i]->order_tsc == new->order_tsc
+                       && record_order[i]->pid < new->pid )
+#endif
+                 ) ;
+        i++)
+        ;
+
+    /* And insert it */
+    for( p=new; p ; i++)
+    {
+        t=record_order[i];
+        record_order[i]=p;
+        p=t;
+    }
+}
+
+void record_order_remove(struct pcpu_info *rem)
+{
+    int i;
+
+    /* Find where the record is */
+    for(i=0; record_order[i] && record_order[i]!=rem; i++)
+        ;
+
+    /* Sanity check: Make sure it's actually there! */
+    assert(record_order[i]);
+
+    /* And move everyone forward */
+    for(; (record_order[i]=record_order[i+1]); i++) 
+        ;
+}
+
 struct pcpu_info * choose_next_record(void)
 {
-    int i;
-    struct pcpu_info * p, *min_p=NULL;
-    loff_t min_offset = 0;
-
-    /* Need to:
-     * - find the pcpu with the lowest order_tsc
-     * - Find the lowest file offset
-     */
-    for(i=0; i<=P.max_active_pcpu; i++)
-    {
-        p = P.pcpu+i;
-        if(!p->active)
-            continue;
-
-        if(!min_p || p->order_tsc < min_p->order_tsc)
-            min_p = p;
-
-        if(!min_offset || p->file_offset < min_offset)
-            min_offset = p->file_offset;
-    }
-
-    if(opt.progress && min_offset >= G.progress.update_offset)
-        progress_update(min_offset);
+    struct pcpu_info *min_p=NULL;
+
+    min_p=record_order[0];
+
+    if(opt.progress && min_p && min_p->file_offset >= G.progress.update_offset)
+        progress_update(min_p->file_offset);
 
     /* If there are active pcpus, make sure we chose one */
     assert(min_p || (P.max_active_pcpu==-1));
@@ -9372,6 +9455,9 @@ void process_records(void) {
         else
             read_record(G.fd, p);
 
+        /* Update this pcpu in the processing order */
+        if ( p->active )
+            record_order_bubble(p);
     }
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2 of 8] xenalyze: Remove spurious dump_header construction
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
  2012-01-26 17:20 ` [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm George Dunlap
@ 2012-01-26 17:20 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 3 of 8] xenalyze: Remove --dump-cooked George Dunlap
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:20 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

The h->dump_header information was being filled out on summary
runs, even though it's not used.  Use it only if opt.dump_all is
enabled.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r f8c3c44be309 -r 2ab3da778828 xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:11:34 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:16:32 2012 +0000
@@ -5327,7 +5327,7 @@ void hvm_vmexit_process(struct record_in
     h->post_process = hvm_generic_postprocess;
     h->inflight.generic.event = 0;
   
-    {
+    if ( opt.dump_all) {
         struct time_struct t;
         char * c;
         int len, r;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3 of 8] xenalyze: Remove --dump-cooked
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
  2012-01-26 17:20 ` [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm George Dunlap
  2012-01-26 17:20 ` [PATCH 2 of 8] xenalyze: Remove spurious dump_header construction George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 4 of 8] xenalyze: Enable -O2 optimization level George Dunlap
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

Remove a vestigal option that hasn't been used or maintained in years.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 2ab3da778828 -r 6772e9e46ab2 xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:16:32 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:16:35 2012 +0000
@@ -156,7 +156,6 @@ struct {
         scatterplot_irq:1,
         histogram_interrupt_eip:1,
         interval_mode:1,
-        dump_cooked:1,
         dump_all:1,
         dump_raw_process:1,
         dump_raw_reads:1,
@@ -231,7 +230,6 @@ struct {
     .scatterplot_rdtsc=0,
     .scatterplot_irq=0,
     .histogram_interrupt_eip=0,
-    .dump_cooked = 0,
     .dump_all = 0,
     .dump_raw_process = 0,
     .dump_raw_reads = 0,
@@ -3425,30 +3423,6 @@ void hvm_pf_xen_postprocess(struct hvm_d
         /* Set summary handler */
         hvm_set_summary_handler(h, hvm_pf_xen_summary, NULL);
     }
-
-    if(opt.dump_cooked)
-    {
-        switch(e->pf_case)
-        {
-        case PF_XEN_EMULATE:
-            printf(" %s pf_xen:emulate va %llx ec %x eip %llx%s lvl %d corr %llx\n",
-                   h->dump_header, e->va, e->error_code,
-                   h->rip, find_symbol(h->rip),
-                   e->pt_level, e->corresponding_va);
-            break;
-        case PF_XEN_MMIO:
-            printf(" %s pf_xen:mmio va %llx ec %x eip %llx%s data %x\n",
-                   h->dump_header, e->va, e->error_code,
-                   h->rip, find_symbol(h->rip),
-                   e->data);
-            break;
-        default:
-            printf(" %s pf_xen va %llx ec %x eip %llx%s\n",
-                   h->dump_header, e->va, e->error_code,
-                   h->rip, find_symbol(h->rip));
-            break;
-        }
-    }
 }
 
 void hvm_pf_xen_process(struct record_info *ri, struct hvm_data *h) {
@@ -3610,7 +3584,7 @@ void hvm_vlapic_icr_handler(struct hvm_d
 
         o->count++;
 
-        if((opt.dump_all || opt.dump_cooked)
+        if((opt.dump_all)
 #if 0
            && (ov->runstate.state != RUNSTATE_RUNNING
                || ov->hvm.vmexit_valid) 
@@ -3624,7 +3598,7 @@ void hvm_vlapic_icr_handler(struct hvm_d
     }
 
     if(m->is_write) {
-        if(opt.dump_all || opt.dump_cooked) {
+        if(opt.dump_all) {
             printf("              [vla] d%dv%d icr vec %d %s\n",
                    h->v->d->did, h->v->vid,
                    icr.vec,
@@ -3655,7 +3629,7 @@ void hvm_vlapic_icr_handler(struct hvm_d
         }
     } else {
         /* Read */
-        if(opt.dump_all || opt.dump_cooked) {
+        if(opt.dump_all) {
             printf("              [vla] d%dv%d icr status %s\n",
                    h->v->d->did, h->v->vid,
                    icr.delivery_status?"pending":"idle");
@@ -3683,7 +3657,7 @@ void hvm_vlapic_inject(struct vcpu_data 
 }
 
 void hvm_vlapic_eoi_handler(struct hvm_data *h) {
-    if(opt.dump_all || opt.dump_cooked)
+    if(opt.dump_all)
         printf("              [vla] d%dv%d eoi\n",
                h->v->d->did, h->v->vid);
 }
@@ -3834,7 +3808,7 @@ void hvm_inj_virq_process(struct record_
         int vector, fake;
     } *r = (typeof(r))h->d;
 
-    if(opt.dump_cooked | opt.dump_all) {
+    if(opt.dump_all) {
         printf(" %s inj_virq vec %u  %s\n",
                ri->dump_header,
                r->vector, r->fake?"fake":"real");
@@ -3956,24 +3930,12 @@ void hvm_io_address_summary(struct io_ad
 
 void hvm_io_write_postprocess(struct hvm_data *h)
 {
-    if(opt.dump_cooked)
-    {
-        printf(" %s io_write port %x val %x\n",
-               h->dump_header, h->inflight.io.port,
-               h->inflight.io.val);
-    }
     if(opt.with_pio_enumeration)
         update_io_address(&h->summary.io.pio, h->inflight.io.port, 1, h->arc_cycles, 0);
 }
 
 void hvm_io_read_postprocess(struct hvm_data *h)
 {
-    if(opt.dump_cooked)
-    {
-        printf(" %s io_read port %x val %x\n",
-               h->dump_header, h->inflight.io.port,
-               h->inflight.io.val);
-    }
     if(opt.with_pio_enumeration)
         update_io_address(&h->summary.io.pio, h->inflight.io.port, 0, h->arc_cycles, 0);
     if(opt.scatterplot_io && h->inflight.io.port == opt.scatterplot_io_port)
@@ -4275,24 +4237,6 @@ void hvm_cr_write_postprocess(struct hvm
                 }
                 flush=1;
             }
-
-            if(opt.dump_cooked)
-            {
-                printf(" %s cr_write cr3 val %llx oval %llx (%d resyncs) %s\n",
-                       h->dump_header, 
-                       new_val,
-                       oval,
-                       h->resyncs,
-                       flush?"flush":"");
-            }
-        } else {
-            if(opt.dump_cooked)
-            {
-                printf(" %s cr_write cr3 val %llx (%d resyncs)\n",
-                       h->dump_header, 
-                       h->inflight.cr_write.val,
-                       h->resyncs);
-            }
         }
 
         if(opt.summary_info) {
@@ -4313,14 +4257,6 @@ void hvm_cr_write_postprocess(struct hvm
         if(!flush)
             cr3_switch(new_val, h);
     } else {
-        if(opt.dump_cooked)
-        {
-            printf(" %s cr_write cr%d val %llx\n",
-                   h->dump_header, 
-                   h->inflight.cr_write.cr,
-                   h->inflight.cr_write.val);
-        }
-
         if(opt.summary_info)
         {
             if(h->inflight.cr_write.cr < CR_MAX)
@@ -4420,14 +4356,6 @@ void hvm_msr_write_summary(struct hvm_da
 
 void hvm_msr_write_postprocess(struct hvm_data *h)
 {
-    if(opt.dump_cooked)
-    {
-        printf(" %s msr_write msr %d val %llx\n",
-               h->dump_header, 
-               h->inflight.msr.addr,
-               h->inflight.msr.val);
-    }
-
     if(opt.summary_info) {
     }
 
@@ -4466,14 +4394,6 @@ void hvm_msr_read_summary(struct hvm_dat
 
 void hvm_msr_read_postprocess(struct hvm_data *h)
 {
-    if(opt.dump_cooked)
-    {
-        printf(" %s msr_read msr %d val %llx\n",
-               h->dump_header, 
-               h->inflight.msr.addr,
-               h->inflight.msr.val);
-    }
-
     if(opt.summary_info) {
     }
 
@@ -4564,7 +4484,7 @@ void hvm_inj_exc_process(struct record_i
         unsigned vec, ec;
     } *r = (typeof(r))h->d;
 
-    if ( opt.dump_cooked || opt.dump_all )
+    if ( opt.dump_all )
     {
         if(r->vec < HVM_TRAP_MAX)
             printf(" %3u.%09u %s inj_exc trap %s ec %x\n",
@@ -4620,7 +4540,7 @@ void hvm_intr_process(struct hvm_data *h
 
     h->inflight.intr.vec = vec;
 
-    if ( opt.dump_cooked || opt.dump_all )
+    if ( opt.dump_all )
     {
         if ( vec < EXTERNAL_INTERRUPT_MAX &&
              hvm_extint_vector_name[vec] )
@@ -4742,7 +4662,7 @@ void hvm_pf_inject_process(struct record
         ec = r->x32.ec;
     }
 
-    if ( opt.dump_cooked || opt.dump_all )
+    if ( opt.dump_all )
     {
             printf(" %3u.%09u %s pf_inject%s guest_cr2 %llx  guest_ec %x\n",
                    ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
@@ -4938,7 +4858,7 @@ void hvm_handler_process(struct record_i
         hvm_pf_inject_process(ri, h);
         break;
     case TRC_HVM_REINJ_VIRQ:
-        if ( opt.dump_cooked || opt.dump_all )
+        if ( opt.dump_all )
         {
             printf(" %3u.%09u %s inj_virq vec %u\n",
                    ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
@@ -4964,7 +4884,7 @@ void hvm_handler_process(struct record_i
         } else if(opt.with_cr3_enumeration) {
             fprintf(warn, "Warning: destroy_proc: don't know current cr3\n");
         }
-        if ( opt.dump_cooked || opt.dump_all )
+        if ( opt.dump_all )
         {
             printf(" %3u.%09u %s destroy_proc cur_cr3 %llx\n",
                    ri->t.s, ri->t.ns, pcpu_string(ri->cpu), h->v->cr3.val);
@@ -5681,18 +5601,6 @@ void shadow_emulate_postprocess(struct h
         hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE);
     }
 
-    if ( opt.dump_cooked )
-    {
-        printf(" %s emulate va %llx eip %llx%s lvl %d/%d corr %llx wval %llx flags %s\n",
-               h->dump_header, e->va,
-               h->rip, find_symbol(h->rip),
-               e->pt_level, h->v->guest_paging_levels,
-               e->corresponding_va,
-               e->wval,
-               flag_string(e));
-    }
-
-
     if(opt.scatterplot_unpin_promote) {
         if(e->flag_early_unshadow)
             scatterplot_vs_time(h->exit_tsc, -10);
@@ -5906,14 +5814,6 @@ void shadow_unsync_postprocess(struct hv
             update_summary(&h->summary.pf_xen_unsync[h->resyncs],
                            h->arc_cycles);
     }
-
-    if(opt.dump_cooked)
-    {
-        printf(" %s unsync gfn %llx %s (%d resyncs)\n",
-               h->dump_header, e->gfn,
-               h->resyncs?"(resync)":"",
-               h->resyncs);
-    }
 }
 
 
@@ -6013,17 +5913,6 @@ void shadow_fixup_postprocess(struct hvm
             hvm_update_short_summary(h, HVM_SHORT_SUMMARY_FIXUP);
     }
 
-    if ( opt.dump_cooked )
-    {
-        printf(" %s fixup%s va %llx eip %llx%s gl1e %llx flags %s\n",
-               h->dump_header,
-               e->flag_unsync?":unsync":"",
-               e->va,
-               h->rip, find_symbol(h->rip),
-               e->gl1e,
-               flag_string(e));
-    }
-
     if(opt.scatterplot_unpin_promote) {
         if(h->prealloc_unpin)
             scatterplot_vs_time(h->exit_tsc, 0);
@@ -6133,8 +6022,6 @@ void shadow_fixup_process(struct record_
 void shadow_mmio_postprocess(struct hvm_data *h)
 {
     struct pf_xen_extra *e = &h->inflight.pf_xen;
-    struct mmio_info *m = &h->inflight.mmio;
-
     if ( opt.summary_info )
     {
         if(e->pf_case)
@@ -6148,28 +6035,6 @@ void shadow_mmio_postprocess(struct hvm_
 
     if(opt.with_mmio_enumeration)
         enumerate_mmio(h);
-
-    if ( opt.dump_cooked )
-    {
-        if(m->data_valid)
-            printf(" %s %smmio %s va %llx eip %llx%s gpa %llx data %x\n",
-                   h->dump_header,
-                   (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
-                   m->is_write?"write":"read",
-                   e->va,
-                   h->rip, find_symbol(h->rip),
-                   m->gpa,
-                   m->data);
-        else
-            printf(" %s %smmio %s va %llx eip %llx%s gpa %llx (no data)\n",
-                   h->dump_header,
-                   (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
-                   m->is_write?"write":"read",
-                   m->va,
-                   h->rip, find_symbol(h->rip),
-                   m->gpa);
-    }
-
 }
 
 void shadow_mmio_process(struct record_info *ri, struct hvm_data *h)
@@ -6403,7 +6268,7 @@ void shadow_resync_process(struct record
         unsigned long long gfn;
     } *r = (typeof(r))ri->d;
 
-    if(opt.dump_all || opt.dump_cooked)
+    if(opt.dump_all)
         printf(" %s oos resync %s gfn %llx\n",
                ri->dump_header,
                (ri->event == TRC_SHADOW_RESYNC_FULL)?"full":"only",
@@ -6417,7 +6282,7 @@ void shadow_prealloc_unpin_process(struc
         unsigned long long gfn;
     } *r = (typeof(r))ri->d;
 
-    if(opt.dump_all || opt.dump_cooked)
+    if(opt.dump_all)
         printf(" %s prealloc-unpin gfn %llx\n",
                ri->dump_header, r->gfn);
 
@@ -6435,7 +6300,7 @@ void shadow_wrmap_bf_process(struct reco
         unsigned long long gfn;
     } *r = (typeof(r))ri->d;
 
-    if(opt.dump_all || opt.dump_cooked)
+    if(opt.dump_all)
         printf(" %s wrmap-bf gfn %llx\n",
                ri->dump_header, r->gfn);
 
@@ -6569,7 +6434,7 @@ void pv_hypercall_process(struct record_
             pv->hypercall_count[eax]++;
     }
 
-    if(opt.dump_cooked || opt.dump_all) {
+    if(opt.dump_all) {
         if(eax < HYPERCALL_MAX)
             printf(" %s hypercall %2x (%s) eip %llx\n",
                    ri->dump_header, eax,
@@ -6616,7 +6481,7 @@ void pv_trap_process(struct record_info 
             pv->trap_count[trapnr]++;
     }
 
-    if(opt.dump_cooked || opt.dump_all) {
+    if(opt.dump_all) {
         printf(" %s trap %x eip %llx",
                ri->dump_header, trapnr, eip);
         if(use_ec)
@@ -6673,7 +6538,7 @@ void pv_ptwr_emulation_process(struct re
         return;
     }
 
-    if ( opt.dump_cooked || opt.dump_all )
+    if ( opt.dump_all )
     {
         printf(" %s ptwr l1e %llx eip %llx addr %llx\n",
                ri->dump_header,
@@ -6711,7 +6576,7 @@ void pv_update_va_mapping_process(struct
         e.flags = r->x32.flags;
     }
 
-    if ( opt.dump_cooked || opt.dump_all )
+    if ( opt.dump_all )
     {
         printf(" %s update_va_mapping l1e %llx va %llx flags %llx\n",
                ri->dump_header,
@@ -6721,7 +6586,7 @@ void pv_update_va_mapping_process(struct
 
 void pv_generic_process(struct record_info *ri, struct pv_data *pv) {
     union pv_event pevt = { .event = ri->event };
-    if ( opt.dump_cooked || opt.dump_all ) {
+    if ( opt.dump_all ) {
         if(pevt.minor < PV_MAX && pv_name[pevt.minor])
             printf(" %s %s",
                    ri->dump_header,
@@ -7145,7 +7010,7 @@ void sched_runstate_process(struct pcpu_
 
     perfctrs = (ri->extra_words == 5);
 
-    if(opt.dump_cooked || opt.dump_all) {
+    if(opt.dump_all) {
         if( perfctrs ) {
             printf(" %s %s {%lld,%lld} d%uv%u %s->%s\n",
                    ri->dump_header,
@@ -7199,9 +7064,6 @@ void sched_runstate_process(struct pcpu_
     if(sevt.new_runstate == RUNSTATE_RUNNABLE
        && v->data_type == VCPU_DATA_HVM
        && v->hvm.vmexit_valid) {
-        if(opt.dump_cooked)
-            printf("%s: d%dv%d changing to state runnable, closing vmexit\n",
-                   __func__, r->dom, r->vcpu);
         hvm_close_vmexit(&v->hvm, ri->tsc);
     }
      
@@ -7408,7 +7270,7 @@ update:
 
             cpi = ((double)run_cycles) / run_instr;
 
-            if(opt.dump_cooked || opt.dump_all) {
+            if(opt.dump_all) {
                 printf("   cpi: %2.2lf ( %lld / %lld )\n",
                        cpi, run_cycles, run_instr);
             }
@@ -7485,7 +7347,7 @@ void sched_switch_process(struct pcpu_in
         unsigned int prev_dom, prev_vcpu, next_dom, next_vcpu;
     } * r = (typeof(r))ri->d;
 
-    if(opt.dump_cooked || opt.dump_all)
+    if(opt.dump_all)
         printf("%s sched_switch prev d%uv%u next d%uv%u\n",
                ri->dump_header,
                r->prev_dom, r->prev_vcpu,
@@ -7786,7 +7648,7 @@ void mem_process(struct pcpu_info *p) {
         mem_pod_superpage_splinter_process(p);
         break;
     default:
-        if(opt.dump_all || opt.dump_cooked) {
+        if(opt.dump_all) {
             dump_generic(stdout, ri);
         }
 
@@ -7848,7 +7710,7 @@ void pm_process(struct pcpu_info *p) {
         pcpu_string_draw(p);
         break;
     default:
-        if(opt.dump_all || opt.dump_cooked) {
+        if(opt.dump_all) {
             dump_generic(stdout, ri);
         }
         break;
@@ -8209,7 +8071,7 @@ void irq_process(struct pcpu_info *p) {
     case TRC_HW_IRQ_CLEAR_VECTOR:
     case TRC_HW_IRQ_MOVE_FINISH :
     default:
-        if(opt.dump_all || opt.dump_cooked) {
+        if(opt.dump_all) {
             dump_generic(stdout, ri);
         }
         break;
@@ -8311,7 +8173,7 @@ void process_generic(struct record_info 
 
     error(ERR_STRICT, ri);
 
-    if(opt.dump_cooked || opt.dump_all) {
+    if(opt.dump_all) {
         dump_generic(stdout, ri);
     }
 }
@@ -8384,7 +8246,7 @@ void process_lost_records(struct pcpu_in
 
     first_tsc = r->first_tsc;
 
-    if(opt.dump_cooked || opt.dump_all)
+    if(opt.dump_all)
     {
         if(p->current)
             printf(" %s lost_records count %d d%uv%u (cur d%dv%d) first_tsc %lld\n",
@@ -8490,7 +8352,7 @@ void process_lost_records_end(struct pcp
      * Update the information. */
     if(ri->tsc > p->lost_record.tsc)
     {
-        if(opt.dump_cooked || opt.dump_all)
+        if(opt.dump_all)
             printf("               %s lost_records end ---\n",
                    pcpu_string(p->pid));
 
@@ -8500,7 +8362,7 @@ void process_lost_records_end(struct pcp
             int did = p->lost_record.did,
                 vid = p->lost_record.vid;
 
-            if(opt.dump_cooked || opt.dump_all)
+            if(opt.dump_all)
                 printf("               %s lost_records end d%dv%d---\n",
                        pcpu_string(p->pid),
                        did, vid);
@@ -8523,7 +8385,7 @@ void process_lost_records_end(struct pcp
             p->lost_record.seen_valid_schedule=0; /* Let next vcpu_next_update know that
                                                      this one was inferred */
         } else {
-            if(opt.dump_cooked || opt.dump_all)
+            if(opt.dump_all)
                 printf("               %s lost_records end (domain invalid)---\n",
                        pcpu_string(p->pid));
         }
@@ -9026,7 +8888,7 @@ void process_record(struct pcpu_info *p)
 
     process_record_tsc(p->order_tsc, ri);
 
-    if(opt.dump_cooked || opt.dump_all) 
+    if(opt.dump_all) 
         create_dump_header(ri, p);
 
 
@@ -9592,7 +9454,6 @@ void init_pcpus(void) {
 enum {
     OPT_NULL=0,
     /* Dumping info */
-    OPT_DUMP_COOKED,
     OPT_DUMP_RAW_READS,
     OPT_DUMP_RAW_PROCESS,
     OPT_DUMP_NO_PROCESSING,
@@ -9765,10 +9626,6 @@ error_t cmd_parser(int key, char *arg, s
     switch (key)
     {
         /* Dump group */
-    case OPT_DUMP_COOKED:
-        opt.dump_cooked = 1;
-        G.output_defined = 1;
-        break;
     case OPT_DUMP_ALL:
         opt.dump_all = 1;
         G.output_defined = 1;
@@ -10157,11 +10014,6 @@ error_t cmd_parser(int key, char *arg, s
 
 const struct argp_option cmd_opts[] =  {
     /* Dump group */
-    { .name = "dump-cooked",
-      .key = OPT_DUMP_COOKED,
-      .group = OPT_GROUP_DUMP,
-      .doc = "Dump a sanitized summary of vmexit/vmentry.", },
-
     { .name = "dump-all",
       .key = OPT_DUMP_ALL,
       .group = OPT_GROUP_DUMP,
@@ -10453,7 +10305,7 @@ int main(int argc, char *argv[]) {
     if (G.symbol_file != NULL)
         parse_symbol_file(G.symbol_file);
 
-    if(opt.dump_cooked || opt.dump_all)
+    if(opt.dump_all)
         warn = stdout;
         
     init_pcpus();

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 4 of 8] xenalyze: Enable -O2 optimization level
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
                   ` (2 preceding siblings ...)
  2012-01-26 17:21 ` [PATCH 3 of 8] xenalyze: Remove --dump-cooked George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 5 of 8] xenalyze: Rework math to remove two 64-bit divisions George Dunlap
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

-O3 is marginally better, but the automatic inlining
obscures where the guest is spending its time.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 6772e9e46ab2 -r 0663e3e8f69d Makefile
--- a/Makefile	Thu Jan 26 17:16:35 2012 +0000
+++ b/Makefile	Thu Jan 26 17:16:59 2012 +0000
@@ -1,6 +1,6 @@
 CC = gcc
 
-CFLAGS += -g
+CFLAGS += -g -O2
 CFLAGS += -fno-strict-aliasing
 CFLAGS += -std=gnu99
 CFLAGS += -Wall -Wstrict-prototypes

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 5 of 8] xenalyze: Rework math to remove two 64-bit divisions
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
                   ` (3 preceding siblings ...)
  2012-01-26 17:21 ` [PATCH 4 of 8] xenalyze: Enable -O2 optimization level George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 6 of 8] xenalyze: Eliminate unnecessary cycles_to_time calculation George Dunlap
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

abs_cycles_to_time is called on every record for dump mode; it has
four 64-bit divisions (well, 3 divisions and 1 mod), which map to
library functions on a 32-bit platform.  A simple rework of the math
can eliminate two of those.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 0663e3e8f69d -r 4b3639bd3255 xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:16:59 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:17:19 2012 +0000
@@ -1976,16 +1976,19 @@ void cpumask_union(cpu_mask_t *d, const 
 /* -- Time code -- */
 
 void cycles_to_time(unsigned long long c, struct time_struct *t) {
-    t->time = ((c) * 1000) / (opt.cpu_hz / 1000000 );    
-    t->s = t->time / 1000000000;                        
-    t->ns = t->time % 1000000000;                       
+    t->time = ((c - P.f.first_tsc) * 1000 * 1000000) / opt.cpu_hz;
+    t->s = t->time / 1000000000;
+    t->ns = t->time - (t->s * 1000000000);
 }
 
 void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) {
     if(ac > P.f.first_tsc) {
-        t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 );    
-        t->s = t->time / 1000000000;                        
-        t->ns = t->time % 1000000000;
+        /* t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 );     */
+        /* t->s = t->time / 1000000000;                         */
+        /* t->ns = t->time % 1000000000; */
+        t->time = ((ac - P.f.first_tsc) * 1000 * 1000000) / opt.cpu_hz;
+        t->s = t->time / 1000000000;
+        t->ns = t->time - (t->s * 1000000000);
     } else {
         t->time = t->s = t->ns = 0;
     }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 6 of 8] xenalyze: Eliminate unnecessary cycles_to_time calculation
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
                   ` (4 preceding siblings ...)
  2012-01-26 17:21 ` [PATCH 5 of 8] xenalyze: Rework math to remove two 64-bit divisions George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism George Dunlap
  2012-01-26 17:21 ` [PATCH 8 of 8] xenalyze: Get rid of redundant hvm dump_header George Dunlap
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

The time elements are only really accessed in dump mode, but they
cost a significant amount to calculate per record.  Only calculate
them if we're in dump mode.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 4b3639bd3255 -r 108d02354403 xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:17:19 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:17:53 2012 +0000
@@ -8601,9 +8601,6 @@ void process_record_tsc(tsc_t order_tsc,
 
         P.now = tsc;
     }
-    
-    /* Convert to s/ns once for convenience */
-    abs_cycles_to_time(ri->tsc, &ri->t);
 }
 
 /* Standardized part of dump output */
@@ -8615,6 +8612,8 @@ void create_dump_header(struct record_in
     len = DUMP_HEADER_MAX;
     c = ri->dump_header;
 
+    abs_cycles_to_time(ri->tsc, &ri->t);
+
     if ( ri->t.time )
     {
         r=snprintf(c, len, "%3u.%09u", ri->t.s, ri->t.ns);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
                   ` (5 preceding siblings ...)
  2012-01-26 17:21 ` [PATCH 6 of 8] xenalyze: Eliminate unnecessary cycles_to_time calculation George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  2012-01-26 17:21 ` [PATCH 8 of 8] xenalyze: Get rid of redundant hvm dump_header George Dunlap
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

The pread functionality was convenient for reading at arbitrary
offsets within a file, but it was incredibly inefficient; after
recent optimizations, a summary analysis was spending 30% of its
time doing system calls, and indications were that a big chunk
of the rest of the overhead was due to cache misses coming out of
that path.

This patch introduces a read using mmap.  Because the file may
be too big to map on 32-bit systems, I use a "mapcache" of 8
different 2MiB buffers.  If an offset is in what's already mapped,
I just copy it; if not, I unmap one of the buffers and map it instead.

This optimization alone took the summary processing time for my test
trace from 34s down to 10s.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 108d02354403 -r f86ebfe66384 Makefile
--- a/Makefile	Thu Jan 26 17:17:53 2012 +0000
+++ b/Makefile	Thu Jan 26 17:18:13 2012 +0000
@@ -13,7 +13,7 @@ CFLAGS  += -Werror
 
 BIN      = xenalyze dump-raw
 
-HDRS = trace.h analyze.h
+HDRS = trace.h analyze.h mread.h
 
 all: $(BIN)
 
@@ -23,3 +23,6 @@ clean:
 
 %: %.c $(HDRS) Makefile
 	$(CC) $(CFLAGS) -o $@ $<
+
+xenalyze: xenalyze.o mread.o
+	$(CC) $(CFLAGS) -o $@ $^
diff -r 108d02354403 -r f86ebfe66384 mread.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mread.c	Thu Jan 26 17:18:13 2012 +0000
@@ -0,0 +1,160 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include "mread.h"
+
+mread_handle_t mread_init(int fd)
+{
+    struct stat64 s;
+    mread_handle_t h;
+    
+    h=malloc(sizeof(struct mread_ctrl));
+
+    if (!h)
+    {
+        perror("malloc");
+        exit(1);
+    }
+
+    bzero(h, sizeof(struct mread_ctrl));
+
+    h->fd = fd;
+
+    fstat64(fd, &s);
+    h->file_size = s.st_size;
+
+    return h;
+}
+
+ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset)
+{
+    /* Idea: have a "cache" of N mmaped regions.  If the offset is
+     * in one of the regions, just copy it.  If not, evict one of the
+     * regions and map the appropriate range.
+     *
+     * Basic algorithm:
+     *  - See if the offset is in one of the regions
+     *    - If not, map it
+     *       - evict an old region
+     *       - map the new region
+     *  - Copy
+     */
+    char * b=NULL;
+    int bind=-1;
+    loff_t boffset=0;
+    ssize_t bsize;
+
+#define dprintf(x...)
+//#define dprintf fprintf
+
+    dprintf(warn, "%s: offset %llx len %d\n", __func__,
+            offset, len);
+    if ( offset > h->file_size )
+    {
+        dprintf(warn, " offset > file size %llx, returning 0\n",
+                h->file_size);
+        return 0;
+    }
+    if ( offset + len > h->file_size )
+    {
+        dprintf(warn, " offset+len > file size %llx, truncating\n",
+                h->file_size);
+        len = h->file_size - offset;
+    }
+
+    /* Try to find the offset in our range */
+    dprintf(warn, " Trying last, %d\n", last);
+    if ( h->map[h->last].buffer
+         && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset )
+    {
+        bind=h->last;
+        goto copy;
+    }
+
+    /* Scan to see if it's anywhere else */
+    dprintf(warn, " Scanning\n");
+    for(bind=0; bind<MREAD_MAPS; bind++)
+        if ( h->map[bind].buffer
+             && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset )
+        {
+            dprintf(warn, "  Found, index %d\n", bind);
+            break;
+        }
+
+    /* If we didn't find it, evict someone and map it */
+    if ( bind == MREAD_MAPS )
+    {
+        dprintf(warn, " Clock\n");
+        while(1)
+        {
+            h->clock++;
+            if(h->clock >= MREAD_MAPS)
+                h->clock=0;
+            dprintf(warn, "  %d\n", h->clock);
+            if(h->map[h->clock].buffer == NULL)
+            {
+                dprintf(warn, "  Buffer null, using\n");
+                break;
+            }
+            if(!h->map[h->clock].accessed)
+            {
+                dprintf(warn, "  Not accessed, using\n");
+                break;
+            }
+            h->map[h->clock].accessed=0;
+        }
+        if(h->map[h->clock].buffer)
+        {
+            dprintf(warn, "  Unmapping\n");
+            munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE);
+        }
+        /* FIXME: Try MAP_HUGETLB? */
+        /* FIXME: Make sure this works on large files... */
+        h->map[h->clock].start_offset = offset & MREAD_BUF_MASK;
+        dprintf(warn, "  Mapping %llx from offset %llx\n",
+                MREAD_BUF_SIZE, h->map[h->clock].start_offset);
+        h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ,
+                                  MAP_SHARED,
+                                  h->fd,
+                                  h->map[h->clock].start_offset);
+        dprintf(warn, "   mmap returned %p\n", h->map[h->clock].buffer);
+        if ( h->map[h->clock].buffer == MAP_FAILED )
+        {
+            h->map[h->clock].buffer = NULL;
+            perror("mmap");
+            exit(1);
+        }
+        bind = h->clock;
+    }
+
+    h->last=bind;
+copy:
+    h->map[bind].accessed=1;
+    b=h->map[bind].buffer;
+    boffset=offset - h->map[bind].start_offset;
+    if ( boffset + len > MREAD_BUF_SIZE )
+        bsize = MREAD_BUF_SIZE - boffset;
+    else
+        bsize = len;
+    dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len %d\n",
+            bind, b, boffset, bsize);
+
+    bcopy(b+boffset, rec, len);
+
+    /* Handle the boundary case; make sure this is after doing anything
+     * with the static variables*/
+    if ( len > bsize )
+    {
+        dprintf(warn, "  Finishing up by reading l %d o %llx\n",
+                len-bsize, offset+bsize);
+        mread64(h, rec+bsize, len-bsize, offset+bsize);
+    }
+
+    /* FIXME: ?? */
+    return len;
+#undef dprintf
+}
diff -r 108d02354403 -r f86ebfe66384 mread.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mread.h	Thu Jan 26 17:18:13 2012 +0000
@@ -0,0 +1,18 @@
+#define MREAD_MAPS 8
+#define MREAD_BUF_SHIFT 9
+#define PAGE_SHIFT 12
+#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT))
+#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1))
+typedef struct mread_ctrl {
+    int fd;
+    loff_t file_size;
+    struct mread_buffer {
+        char * buffer;
+        loff_t start_offset;
+        int accessed;
+    } map[MREAD_MAPS];
+    int clock, last;
+} *mread_handle_t;
+
+mread_handle_t mread_init(int fd);
+ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset);
diff -r 108d02354403 -r f86ebfe66384 xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:17:53 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:18:13 2012 +0000
@@ -31,11 +31,15 @@
 #include <unistd.h>
 #include "trace.h"
 #include "analyze.h"
+#include "mread.h"
 #include <errno.h>
 #include <strings.h>
 #include <string.h>
 #include <assert.h>
 
+struct mread_ctrl;
+
+
 typedef unsigned long long tsc_t;
 
 #define DEFAULT_CPU_HZ 2400000000LL
@@ -60,6 +64,7 @@ struct array_struct {
 /* -- Global variables -- */
 struct {
     int fd;
+    struct mread_ctrl *mh;
     struct symbol_struct * symbols;
     char * symbol_file;
     char * trace_file;
@@ -257,18 +262,23 @@ struct {
 
 /* -- on-disk trace buffer definitions -- */
 struct trace_record {
-    unsigned event:28,
-        extra_words:3,
-        cycle_flag:1;
     union {
         struct {
-            uint32_t tsc_lo, tsc_hi;
-            uint32_t data[7];
-        } tsc;
-        struct {
-            uint32_t data[7];
-        } notsc;
-    } u;
+            unsigned event:28,
+                extra_words:3,
+                cycle_flag:1;
+            union {
+                struct {
+                    uint32_t tsc_lo, tsc_hi;
+                    uint32_t data[7];
+                } tsc;
+                struct {
+                    uint32_t data[7];
+                } notsc;
+            } u;
+        };
+        uint32_t raw[8];
+    };
 };
 
 FILE *warn = NULL;
@@ -1943,7 +1953,7 @@ char * pcpu_string(int pcpu);
 void pcpu_string_draw(struct pcpu_info *p);
 void process_generic(struct record_info *ri);
 void dump_generic(FILE *f, struct record_info *ri);
-ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset);
+ssize_t __read_record(struct trace_record *rec, loff_t offset);
 void error(enum error_level l, struct record_info *ri);
 void update_io_address(struct io_address ** list, unsigned int pa, int dir,
                        tsc_t arc_cycles, unsigned int va);
@@ -8123,17 +8133,26 @@ void dump_raw(char * s, struct record_in
     int i;
 
     if(ri->rec.cycle_flag)
-        printf("%s %x %d %lld [",
+        printf("%s %7x %d %14lld [",
                s, ri->event, ri->extra_words, ri->tsc);
     else
-        printf("%s %x %d - [",
-               s, ri->event, ri->extra_words);
-
-    for(i=0; i<ri->extra_words; i++) {
-        printf(" %x", ri->d[i]);
+        printf("%s %7x %d %14s [",
+               s, ri->event, ri->extra_words, "-");
+
+    for(i=0; i<7; i++) {
+        if ( i < ri->extra_words )
+            printf(" %8x", ri->d[i]);
+        else
+            printf("         ");
     }
         
-    printf(" ]\n");
+    printf(" ] | ");
+
+    for (i=0; i<8; i++) {
+        printf(" %08x", ri->rec.raw[i]);
+    }
+
+    printf(" |\n");
 }
 
 void error(enum error_level l, struct record_info *ri)
@@ -8455,7 +8474,7 @@ loff_t scan_for_new_pcpu(loff_t offset) 
     struct trace_record rec;
     struct cpu_change_data *cd;
     
-    r=__read_record(G.fd, &rec, offset);
+    r=__read_record(&rec, offset);
 
     if(r==0)
         return 0;
@@ -9028,11 +9047,11 @@ void progress_finish(void) {
     }
 }
 
-ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset)
+ssize_t __read_record(struct trace_record *rec, loff_t offset)
 {
     ssize_t r, rsize;
 
-    r=pread64(G.fd, rec, sizeof(*rec), offset);
+    r=mread64(G.mh, rec, sizeof(*rec), offset);
 
     if(r < 0) {
         /* Read error */
@@ -9110,14 +9129,14 @@ void __fill_in_record_info(struct pcpu_i
     ri->cpu = p->pid;
 }
 
-ssize_t read_record(int fd, struct pcpu_info * p) {
+ssize_t read_record(struct pcpu_info * p) {
     loff_t * offset;
     struct record_info *ri;
 
     offset = &p->file_offset;
     ri = &p->ri;
 
-    ri->size = __read_record(fd, &ri->rec, *offset);
+    ri->size = __read_record(&ri->rec, *offset);
     if(ri->size)
     {
         __fill_in_record_info(p);
@@ -9317,7 +9336,7 @@ void process_records(void) {
             }
         }
         else
-            read_record(G.fd, p);
+            read_record(p);
 
         /* Update this pcpu in the processing order */
         if ( p->active )
@@ -10304,6 +10323,10 @@ int main(int argc, char *argv[]) {
         fstat64(G.fd, &s);
         G.file_size = s.st_size;
     }
+
+    if ( (G.mh = mread_init(G.fd)) == NULL )
+        perror("mread");
+
     if (G.symbol_file != NULL)
         parse_symbol_file(G.symbol_file);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 8 of 8] xenalyze: Get rid of redundant hvm dump_header
  2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
                   ` (6 preceding siblings ...)
  2012-01-26 17:21 ` [PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism George Dunlap
@ 2012-01-26 17:21 ` George Dunlap
  7 siblings, 0 replies; 9+ messages in thread
From: George Dunlap @ 2012-01-26 17:21 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

No need to have a second dump_header.

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r f86ebfe66384 -r 84d9c7ac3cbf xenalyze.c
--- a/xenalyze.c	Thu Jan 26 17:18:13 2012 +0000
+++ b/xenalyze.c	Thu Jan 26 17:18:43 2012 +0000
@@ -1435,7 +1435,6 @@ struct hvm_data {
     tsc_t exit_tsc, arc_cycles, entry_tsc;
     unsigned long long rip;
     unsigned exit_reason, event_handler;
-    char dump_header[256];
     int short_summary_done:1, prealloc_unpin:1, wrmap_bf:1;
 
     /* Immediate processing */
@@ -3800,11 +3799,11 @@ void hvm_mmio_assist_process(struct reco
     {
         if(m->data_valid)
             printf("]%s mmio_assist %c gpa %llx data %x\n",
-                   h->dump_header,
+                   ri->dump_header,
                    mevt.write?'w':'r',
                    m->gpa, m->data);
         else
-            printf("]%s mmio_assist %c gpa %llx (no data)\n", h->dump_header,
+            printf("]%s mmio_assist %c gpa %llx (no data)\n", ri->dump_header,
                    mevt.write?'w':'r', m->gpa);
     }
 
@@ -3995,7 +3994,7 @@ void hvm_io_assist_process(struct record
     if(opt.dump_all)
     {
         printf(" %s io %s port %x val %x\n",
-               h->dump_header,
+               ri->dump_header,
                mevt.write?"write":"read",
                r->x32.port,
                r->x32.data);
@@ -4348,13 +4347,13 @@ void hvm_cr_write_process(struct record_
     {
         if(cr == 3 && h->v->cr3.val) {
             printf("]%s cr_write cr3 val %llx oval %llx %s\n",
-                   h->dump_header, 
+                   ri->dump_header, 
                    val,
                    h->v->cr3.val,
                    (h->v->cr3.val == val)?"flush":"switch");
         } else {
             printf(" %s cr_write cr%d val %llx\n",
-                   h->dump_header, 
+                   ri->dump_header, 
                    cr, val);
 
         }
@@ -4392,7 +4391,7 @@ void hvm_msr_write_process(struct record
     if(opt.dump_all)
     {
         printf(" %s msr_write addr %x val %llx\n",
-               h->dump_header,
+               ri->dump_header,
                r->addr, r->val);
     }
 
@@ -4430,7 +4429,7 @@ void hvm_msr_read_process(struct record_
     if(opt.dump_all)
     {
         printf(" %s msr_read addr %x val %llx\n",
-               h->dump_header,
+               ri->dump_header,
                r->addr, r->val);
     }
 
@@ -4476,12 +4475,12 @@ void hvm_vmcall_process(struct record_in
     if(opt.dump_all) {
         if(r->eax < HYPERCALL_MAX)
             printf(" %s vmcall %2x (%s)\n",
-                   h->dump_header,
+                   ri->dump_header,
                    r->eax,
                    hypercall_name[r->eax]);
         else
             printf(" %s vmcall %2x\n",
-                   h->dump_header,
+                   ri->dump_header,
                    r->eax);
     }
 
@@ -4534,7 +4533,7 @@ void hvm_intr_summary(struct hvm_data *h
 }
 
 
-void hvm_intr_process(struct hvm_data *h)
+void hvm_intr_process(struct record_info *ri, struct hvm_data *h)
 {
     unsigned vec = *(unsigned *)h->d;
 
@@ -4558,12 +4557,12 @@ void hvm_intr_process(struct hvm_data *h
         if ( vec < EXTERNAL_INTERRUPT_MAX &&
              hvm_extint_vector_name[vec] )
             printf(" %s intr vec %s(%x)\n",
-                   h->dump_header,
+                   ri->dump_header,
                    hvm_extint_vector_name[vec],
                    vec);
         else
             printf(" %s intr vec %x\n",
-                   h->dump_header, vec);
+                   ri->dump_header, vec);
     }
 
     if(opt.scatterplot_interrupt_eip
@@ -4695,7 +4694,7 @@ void hvm_npf_process(struct record_info 
 
     if ( opt.dump_all )
         printf(" %s npf gpa %llx q %x mfn %llx t %d\n",
-               h->dump_header,
+               ri->dump_header,
                (unsigned long long)r->gpa, r->qualification,
                (unsigned long long)r->mfn, r->p2mt);
 }
@@ -4708,7 +4707,7 @@ void hvm_rdtsc_process(struct record_inf
 
     if ( opt.dump_all )
         printf(" %s rdtsc %llx %lld %s\n",
-               h->dump_header,
+               ri->dump_header,
                (unsigned long long)r->tsc,
                (unsigned long long)r->tsc,
                h->last_rdtsc > r->tsc ? "BACKWARDS" : "");
@@ -4922,7 +4921,7 @@ needs_vmexit:
     switch(ri->event) {
         /* Records adding to the vmexit reason */
     case TRC_HVM_INTR:
-        hvm_intr_process(h);
+        hvm_intr_process(ri, h);
         break;
     case TRC_HVM_PF_XEN:
     case TRC_HVM_PF_XEN64:
@@ -5259,50 +5258,6 @@ void hvm_vmexit_process(struct record_in
 
     h->post_process = hvm_generic_postprocess;
     h->inflight.generic.event = 0;
-  
-    if ( opt.dump_all) {
-        struct time_struct t;
-        char * c;
-        int len, r;
-
-        abs_cycles_to_time(h->exit_tsc, &t);
-
-        len = DUMP_HEADER_MAX;
-        c = h->dump_header;
-
-        if ( t.time )
-        {
-            r=snprintf(c, len, "%3u.%09u", t.s, t.ns);
-            c+=r;
-            len-=r;
-        }
-        else
-        {
-            r=snprintf(c,
-                       len,
-                       "              ");
-            c+=r;
-            len-=r;
-        }
-        
-        r = snprintf(c, len, " %s", pcpu_string(ri->cpu));
-        c+=r;
-        len-=r;
-        
-        if ( v )
-        {
-            r = snprintf(c, len, " d%dv%d", v->d->did, v->vid);
-            c+=r;
-            len-=r;
-        }
-        else
-        {
-            r = snprintf(c, len, " d?v?");
-            c+=r;
-            len-=r;
-        }
-        
-    }
 }
 
 void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) {
@@ -5844,7 +5799,7 @@ void shadow_unsync_process(struct record
 
     if(opt.dump_all)
         printf("]%s shadow unsync gmfn %llx va %llx pt_level %d corr %llx\n",
-               h->dump_header,
+               ri->dump_header,
                e->gmfn,
                e->va,
                e->pt_level,
@@ -5873,7 +5828,7 @@ void shadow_emulate_other_process(struct
 
     if(opt.dump_all)
         printf("]%s shadow %s gfn %llx va %llx\n",
-               h->dump_header,
+               ri->dump_header,
                pf_xen_name[sevt.minor],
                e->gfn,
                e->va);
@@ -6016,14 +5971,14 @@ void shadow_fixup_process(struct record_
     {
         if ( e->flag_unsync )
             printf("]%s fixup:unsync va %llx gl1e %llx corr %llx flags (%x)%s\n",
-                   h->dump_header,
+                   ri->dump_header,
                    e->va, e->gl1e,
                    e->corresponding_va,
                    e->flags,
                    flag_string(e));
         else
             printf("]%s fixup va %llx gl1e %llx flags (%x)%s\n",
-                   h->dump_header,
+                   ri->dump_header,
                    e->va, e->gl1e, e->flags,
                    flag_string(e));
     }
@@ -6103,7 +6058,7 @@ void shadow_mmio_process(struct record_i
 
     if(opt.dump_all)
         printf("]%s %smmio va %llx\n",
-                h->dump_header,
+                ri->dump_header,
                 (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
                 e->va);
 
@@ -6200,7 +6155,7 @@ void shadow_propagate_process(struct rec
 
     if(opt.dump_all)
         printf("]%s propagate va %llx gl1e %llx flags (%x)%s\n",
-               h->dump_header,
+               ri->dump_header,
                e->va, e->gl1e, e->flags,
                flag_string(e));

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-01-26 17:21 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-26 17:20 [PATCH 0 of 8] xenalyze: Optimizations George Dunlap
2012-01-26 17:20 ` [PATCH 1 of 8] xenalyze: Improve record-sorting algorithm George Dunlap
2012-01-26 17:20 ` [PATCH 2 of 8] xenalyze: Remove spurious dump_header construction George Dunlap
2012-01-26 17:21 ` [PATCH 3 of 8] xenalyze: Remove --dump-cooked George Dunlap
2012-01-26 17:21 ` [PATCH 4 of 8] xenalyze: Enable -O2 optimization level George Dunlap
2012-01-26 17:21 ` [PATCH 5 of 8] xenalyze: Rework math to remove two 64-bit divisions George Dunlap
2012-01-26 17:21 ` [PATCH 6 of 8] xenalyze: Eliminate unnecessary cycles_to_time calculation George Dunlap
2012-01-26 17:21 ` [PATCH 7 of 8] xenalyze: Introduce more efficient read mechanism George Dunlap
2012-01-26 17:21 ` [PATCH 8 of 8] xenalyze: Get rid of redundant hvm dump_header George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.