All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
@ 2011-05-06 18:25 Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

This series implements non-contiguous trace buffers.
Please review.

For some reason its not possible to allocate more than 128MB with repeated
calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of
256MB per cpu?
Also the error path in alloc_trace_bufs() needs a fix, I always run into the
assert there.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size()
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd
# Parent  39f2942fe56bda90d3285b9f2d4e214f0712375f
Move the global variable t_info_first_offset into calculate_tbuf_size()
because it is only used there. Change the type from u32 to uint32_t to
match type in other places.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 39f2942fe56b -r a19b5f66ce46 xen/common/trace.c
--- a/xen/common/trace.c	Wed May 04 14:46:32 2011 +0100
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -55,7 +55,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
 static u32 data_size;
-static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
@@ -94,10 +93,10 @@ static struct notifier_block cpu_nfb = {
     .notifier_call = cpu_callback
 };
 
-static void calc_tinfo_first_offset(void)
+static uint32_t calc_tinfo_first_offset(void)
 {
     int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
-    t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+    return fit_to_type(uint32_t, offset_in_bytes);
 }
 
 /**
@@ -107,7 +106,7 @@ static void calc_tinfo_first_offset(void
  * The t_info layout is fixed and cant be changed without breaking xentrace.
  * Initialize t_info_pages based on number of trace pages.
  */
-static int calculate_tbuf_size(unsigned int pages)
+static int calculate_tbuf_size(unsigned int pages, uint32_t t_info_first_offset)
 {
     struct t_buf dummy_size;
     typeof(dummy_size.prod) max_size;
@@ -156,6 +155,7 @@ static int alloc_trace_bufs(unsigned int
     int i, cpu, order;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list;
+    uint32_t t_info_first_offset;
     int offset;
 
     if ( t_info )
@@ -165,9 +165,9 @@ static int alloc_trace_bufs(unsigned int
         return -EINVAL;
 
     /* Calculate offset in u32 of first mfn */
-    calc_tinfo_first_offset();
+    t_info_first_offset = calc_tinfo_first_offset();
 
-    pages = calculate_tbuf_size(pages);
+    pages = calculate_tbuf_size(pages, t_info_first_offset);
     order = get_order_from_pages(pages);
 
     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2 of 5] Mark data_size __read_mostly because its only written once
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID 91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76
# Parent  a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd
Mark data_size __read_mostly because its only written once.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r a19b5f66ce46 -r 91c40bb4c01a xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -54,7 +54,7 @@ static unsigned int t_info_pages;
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static u32 data_size;
+static u32 data_size __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
  2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID 1631b61acaa8e88437d0f1861409ab1824de2721
# Parent  91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76
Remove unneeded cast when assigning pointer value to dst.
Both arrays are uint32_t and memcpy takes a void pointer.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 91c40bb4c01a -r 1631b61acaa8 xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -483,7 +483,7 @@ static inline void __insert_record(struc
                                    const void *extra_data)
 {
     struct t_rec *rec;
-    unsigned char *dst;
+    uint32_t *dst;
     unsigned int extra_word = extra / sizeof(u32);
     unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
@@ -508,13 +508,13 @@ static inline void __insert_record(struc
 
     rec->event = event;
     rec->extra_u32 = extra_word;
-    dst = (unsigned char *)rec->u.nocycles.extra_u32;
+    dst = rec->u.nocycles.extra_u32;
     if ( (rec->cycles_included = cycles) != 0 )
     {
         u64 tsc = (u64)get_cycles();
         rec->u.cycles.cycles_lo = (uint32_t)tsc;
         rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
-        dst = (unsigned char *)rec->u.cycles.extra_u32;
+        dst = rec->u.cycles.extra_u32;
     } 
 
     if ( extra_data && extra )

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (2 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-09  9:03   ` Keir Fraser
  2011-05-09 11:24   ` George Dunlap
  2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
  2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  5 siblings, 2 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304700881 -7200
# Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
# Parent  1631b61acaa8e88437d0f1861409ab1824de2721
Update __insert_record() to copy the trace record to individual mfns.
This is a prereq before changing the per-cpu allocation from contiguous
to non-contiguous allocation.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
@@ -52,7 +52,6 @@ static struct t_info *t_info;
 static unsigned int t_info_pages;
 
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
 static u32 data_size __read_mostly;
 
@@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
 
         per_cpu(t_bufs, cpu) = buf = rawbuf;
         buf->cons = buf->prod = 0;
-        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
     }
 
     offset = t_info_first_offset;
@@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *next_record(const struct t_buf *buf,
-                                        uint32_t *next)
+static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
+                                 unsigned char **next_page,
+                                 uint32_t *offset_in_page)
 {
     u32 x = buf->prod, cons = buf->cons;
+    uint32_t per_cpu_mfn_offset;
+    uint32_t per_cpu_mfn_nr;
+    uint32_t *mfn_list;
+    uint32_t mfn;
+    unsigned char *this_page;
 
     barrier(); /* must read buf->prod and buf->cons only once */
     *next = x;
@@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
 
     ASSERT(x < data_size);
 
-    return (struct t_rec *)&this_cpu(t_data)[x];
+    /* add leading header to get total offset of next record */
+    x += sizeof(struct t_buf);
+    *offset_in_page = x % PAGE_SIZE;
+
+    /* offset into array of mfns */
+    per_cpu_mfn_nr = x / PAGE_SIZE;
+    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
+    mfn_list = (uint32_t *)t_info;
+    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
+    this_page = mfn_to_virt(mfn);
+    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
+    {
+        /* reached end of buffer? */
+        *next_page = NULL;
+    }
+    else
+    {
+        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
+        *next_page = mfn_to_virt(mfn);
+    }
+    return this_page;
 }
 
 static inline void __insert_record(struct t_buf *buf,
@@ -482,28 +506,37 @@ static inline void __insert_record(struc
                                    unsigned int rec_size,
                                    const void *extra_data)
 {
-    struct t_rec *rec;
+    struct t_rec split_rec, *rec;
     uint32_t *dst;
+    unsigned char *this_page, *next_page;
     unsigned int extra_word = extra / sizeof(u32);
     unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
+    uint32_t offset;
+    uint32_t remaining;
 
     BUG_ON(local_rec_size != rec_size);
     BUG_ON(extra & 3);
 
-    rec = next_record(buf, &next);
-    if ( !rec )
+    this_page = next_record(buf, &next, &next_page, &offset);
+    if ( !this_page )
         return;
-    /* Double-check once more that we have enough space.
-     * Don't bugcheck here, in case the userland tool is doing
-     * something stupid. */
-    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+
+    remaining = PAGE_SIZE - offset;
+
+    if ( unlikely(rec_size > remaining) )
     {
-        if ( printk_ratelimit() )
+        if ( next_page == NULL )
+        {
+            /* access beyond end of buffer */
             printk(XENLOG_WARNING
-                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
-                   __func__, data_size, next, buf->cons, rec_size);
-        return;
+                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
+                   __func__, data_size, next, buf->cons, rec_size, remaining);
+            return;
+        }
+        rec = &split_rec;
+    } else {
+        rec = (struct t_rec*)(this_page + offset);
     }
 
     rec->event = event;
@@ -520,6 +553,12 @@ static inline void __insert_record(struc
     if ( extra_data && extra )
         memcpy(dst, extra_data, extra);
 
+    if ( unlikely(rec_size > remaining) )
+    {
+        memcpy(this_page + offset, rec, remaining);
+        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
+    }
+
     wmb();
 
     next += rec_size;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (3 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304706230 -7200
# Node ID bcd0b17bf8a3ab08760b8dcc1ca276defab1ed71
# Parent  1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
Allocate non-contiguous per-cpu trace buffers.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1c5da4d9e33c -r bcd0b17bf8a3 xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 20:23:50 2011 +0200
@@ -151,7 +151,7 @@ static int calculate_tbuf_size(unsigned 
  */
 static int alloc_trace_bufs(unsigned int pages)
 {
-    int i, cpu, order;
+    int i, cpu;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list;
     uint32_t t_info_first_offset;
@@ -167,32 +167,10 @@ static int alloc_trace_bufs(unsigned int
     t_info_first_offset = calc_tinfo_first_offset();
 
     pages = calculate_tbuf_size(pages, t_info_first_offset);
-    order = get_order_from_pages(pages);
 
     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
     if ( t_info == NULL )
-        goto out_dealloc;
-
-    /*
-     * First, allocate buffers for all of the cpus.  If any
-     * fails, deallocate what you have so far and exit. 
-     */
-    for_each_online_cpu(cpu)
-    {
-        void *rawbuf;
-        struct t_buf *buf;
-
-        if ( (rawbuf = alloc_xenheap_pages(
-                order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
-        {
-            printk(XENLOG_INFO "xentrace: memory allocation failed "
-                   "on cpu %d\n", cpu);
-            goto out_dealloc;
-        }
-
-        per_cpu(t_bufs, cpu) = buf = rawbuf;
-        buf->cons = buf->prod = 0;
-    }
+        goto out_dealloc_t_info;
 
     offset = t_info_first_offset;
     t_info_mfn_list = (uint32_t *)t_info;
@@ -204,27 +182,50 @@ static int alloc_trace_bufs(unsigned int
     t_info->tbuf_size = pages;
 
     /*
-     * Now share the pages so xentrace can map them, and write them in
-     * the global t_info structure.
+     * Allocate buffers for all of the cpus.
+     * If any fails, deallocate what you have so far and exit.
      */
     for_each_online_cpu(cpu)
     {
-        void *rawbuf = per_cpu(t_bufs, cpu);
-        struct page_info *p = virt_to_page(rawbuf);
-        uint32_t mfn = virt_to_mfn(rawbuf);
-
-        for ( i = 0; i < pages; i++ )
-        {
-            share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
-
-            t_info_mfn_list[offset + i]=mfn + i;
-        }
-        t_info->mfn_offset[cpu]=offset;
-        printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
-               cpu, mfn, offset);
-        offset+=i;
+        void *p;
+        struct t_buf *buf;
+        struct page_info *pg;
 
         spin_lock_init(&per_cpu(t_lock, cpu));
+        /* first allocate the first page, it contains the per-cpu metadata */
+        p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+        if ( !p )
+        {
+            printk(XENLOG_INFO "xentrace: memory allocation failed "
+                   "on cpu %d after %d pages\n", cpu, 0);
+            goto out_dealloc;
+        }
+        per_cpu(t_bufs, cpu) = buf = p;
+        buf->cons = buf->prod = 0;
+
+        t_info->mfn_offset[cpu] = offset;
+        t_info_mfn_list[offset] = virt_to_mfn(p);
+        pg = virt_to_page(p);
+        share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+
+        printk(XENLOG_INFO "xentrace: p%d mfn %lx offset %d\n",
+               cpu, virt_to_mfn(p), offset);
+
+        /* now the remaining trace pages */
+        offset++;
+        for ( i = 1; i < pages; i++ )
+        {
+            p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+            if ( !p )
+            {
+                printk(XENLOG_INFO "xentrace: memory allocation failed "
+                       "on cpu %d after %d pages\n", cpu, i);
+                goto out_dealloc;
+            }
+            t_info_mfn_list[offset++] = virt_to_mfn(p);
+            pg = virt_to_page(p);
+            share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+        }
     }
 
     data_size  = (pages * PAGE_SIZE - sizeof(struct t_buf));
@@ -240,14 +241,18 @@ static int alloc_trace_bufs(unsigned int
 out_dealloc:
     for_each_online_cpu(cpu)
     {
-        void *rawbuf = per_cpu(t_bufs, cpu);
         per_cpu(t_bufs, cpu) = NULL;
-        if ( rawbuf )
+        offset = t_info->mfn_offset[cpu];
+        for ( i = 0; i < pages; i++ )
         {
-            ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
-            free_xenheap_pages(rawbuf, order);
+            uint32_t mfn = t_info_mfn_list[offset + i];
+            if ( !mfn )
+                break;
+            ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated));
+            free_xenheap_pages(mfn_to_virt(mfn), 0);
         }
     }
+out_dealloc_t_info:
     free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
     t_info = NULL;
     printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (4 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
@ 2011-05-08 15:07 ` Olaf Hering
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-08 15:07 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

On Fri, May 06, Olaf Hering wrote:

> For some reason its not possible to allocate more than 128MB with repeated
> calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of
> 256MB per cpu?

The dom0 needs to be ballooned down to make room for the tracebuffers.

> Also the error path in alloc_trace_bufs() needs a fix, I always run into the
> assert there.

Beside this issue, the checks in the bogus() function trigger with a
256MB per-cpu buffer.

I will revisit the series and post a new version once I have fixes for
these issues.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
@ 2011-05-09  9:03   ` Keir Fraser
  2011-05-09  9:31     ` Olaf Hering
  2011-05-09 11:24   ` George Dunlap
  1 sibling, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2011-05-09  9:03 UTC (permalink / raw)
  To: Olaf Hering, xen-devel; +Cc: George Dunlap

On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote:

> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1304700881 -7200
> # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> Update __insert_record() to copy the trace record to individual mfns.
> This is a prereq before changing the per-cpu allocation from contiguous
> to non-contiguous allocation.

I applied the trivial patches 1-3. I'll wait for Acks from George for
patches 4-5.

 -- Keir

> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> 
> diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
> --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200
> +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200
> @@ -52,7 +52,6 @@ static struct t_info *t_info;
>  static unsigned int t_info_pages;
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
>  static u32 data_size __read_mostly;
>  
> @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
>  
>          per_cpu(t_bufs, cpu) = buf = rawbuf;
>          buf->cons = buf->prod = 0;
> -        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>      }
>  
>      offset = t_info_first_offset;
> @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
>      return data_size - calc_unconsumed_bytes(buf);
>  }
>  
> -static inline struct t_rec *next_record(const struct t_buf *buf,
> -                                        uint32_t *next)
> +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
> +                                 unsigned char **next_page,
> +                                 uint32_t *offset_in_page)
>  {
>      u32 x = buf->prod, cons = buf->cons;
> +    uint32_t per_cpu_mfn_offset;
> +    uint32_t per_cpu_mfn_nr;
> +    uint32_t *mfn_list;
> +    uint32_t mfn;
> +    unsigned char *this_page;
>  
>      barrier(); /* must read buf->prod and buf->cons only once */
>      *next = x;
> @@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
>  
>      ASSERT(x < data_size);
>  
> -    return (struct t_rec *)&this_cpu(t_data)[x];
> +    /* add leading header to get total offset of next record */
> +    x += sizeof(struct t_buf);
> +    *offset_in_page = x % PAGE_SIZE;
> +
> +    /* offset into array of mfns */
> +    per_cpu_mfn_nr = x / PAGE_SIZE;
> +    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
> +    mfn_list = (uint32_t *)t_info;
> +    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
> +    this_page = mfn_to_virt(mfn);
> +    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
> +    {
> +        /* reached end of buffer? */
> +        *next_page = NULL;
> +    }
> +    else
> +    {
> +        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
> +        *next_page = mfn_to_virt(mfn);
> +    }
> +    return this_page;
>  }
>  
>  static inline void __insert_record(struct t_buf *buf,
> @@ -482,28 +506,37 @@ static inline void __insert_record(struc
>                                     unsigned int rec_size,
>                                     const void *extra_data)
>  {
> -    struct t_rec *rec;
> +    struct t_rec split_rec, *rec;
>      uint32_t *dst;
> +    unsigned char *this_page, *next_page;
>      unsigned int extra_word = extra / sizeof(u32);
>      unsigned int local_rec_size = calc_rec_size(cycles, extra);
>      uint32_t next;
> +    uint32_t offset;
> +    uint32_t remaining;
>  
>      BUG_ON(local_rec_size != rec_size);
>      BUG_ON(extra & 3);
>  
> -    rec = next_record(buf, &next);
> -    if ( !rec )
> +    this_page = next_record(buf, &next, &next_page, &offset);
> +    if ( !this_page )
>          return;
> -    /* Double-check once more that we have enough space.
> -     * Don't bugcheck here, in case the userland tool is doing
> -     * something stupid. */
> -    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
> +
> +    remaining = PAGE_SIZE - offset;
> +
> +    if ( unlikely(rec_size > remaining) )
>      {
> -        if ( printk_ratelimit() )
> +        if ( next_page == NULL )
> +        {
> +            /* access beyond end of buffer */
>              printk(XENLOG_WARNING
> -                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
> -                   __func__, data_size, next, buf->cons, rec_size);
> -        return;
> +                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
> +                   __func__, data_size, next, buf->cons, rec_size,
> remaining);
> +            return;
> +        }
> +        rec = &split_rec;
> +    } else {
> +        rec = (struct t_rec*)(this_page + offset);
>      }
>  
>      rec->event = event;
> @@ -520,6 +553,12 @@ static inline void __insert_record(struc
>      if ( extra_data && extra )
>          memcpy(dst, extra_data, extra);
>  
> +    if ( unlikely(rec_size > remaining) )
> +    {
> +        memcpy(this_page + offset, rec, remaining);
> +        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
> +    }
> +
>      wmb();
>  
>      next += rec_size;
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-09  9:03   ` Keir Fraser
@ 2011-05-09  9:31     ` Olaf Hering
  0 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-09  9:31 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, George Dunlap

On Mon, May 09, Keir Fraser wrote:

> On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote:
> 
> > # HG changeset patch
> > # User Olaf Hering <olaf@aepfle.de>
> > # Date 1304700881 -7200
> > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> > # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> > Update __insert_record() to copy the trace record to individual mfns.
> > This is a prereq before changing the per-cpu allocation from contiguous
> > to non-contiguous allocation.
> 
> I applied the trivial patches 1-3. I'll wait for Acks from George for
> patches 4-5.

Thanks Keir.

There are still issues with large buffers, I will post a new series.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
  2011-05-09  9:03   ` Keir Fraser
@ 2011-05-09 11:24   ` George Dunlap
  1 sibling, 0 replies; 10+ messages in thread
From: George Dunlap @ 2011-05-09 11:24 UTC (permalink / raw)
  To: Olaf Hering; +Cc: George Dunlap, xen-devel

On Fri, 2011-05-06 at 19:25 +0100, Olaf Hering wrote:
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1304700881 -7200
> # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> Update __insert_record() to copy the trace record to individual mfns.
> This is a prereq before changing the per-cpu allocation from contiguous
> to non-contiguous allocation.
> 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> 
> diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
> --- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
> +++ b/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
> @@ -52,7 +52,6 @@ static struct t_info *t_info;
>  static unsigned int t_info_pages;
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
>  static u32 data_size __read_mostly;
>  
> @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
>  
>          per_cpu(t_bufs, cpu) = buf = rawbuf;
>          buf->cons = buf->prod = 0;
> -        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>      }
>  
>      offset = t_info_first_offset;
> @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
>      return data_size - calc_unconsumed_bytes(buf);
>  }
>  
> -static inline struct t_rec *next_record(const struct t_buf *buf,
> -                                        uint32_t *next)
> +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
> +                                 unsigned char **next_page,
> +                                 uint32_t *offset_in_page)
>  {
>      u32 x = buf->prod, cons = buf->cons;
> +    uint32_t per_cpu_mfn_offset;
> +    uint32_t per_cpu_mfn_nr;
> +    uint32_t *mfn_list;
> +    uint32_t mfn;
> +    unsigned char *this_page;
>  
>      barrier(); /* must read buf->prod and buf->cons only once */
>      *next = x;
> @@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
>  
>      ASSERT(x < data_size);
>  
> -    return (struct t_rec *)&this_cpu(t_data)[x];
> +    /* add leading header to get total offset of next record */
> +    x += sizeof(struct t_buf);
> +    *offset_in_page = x % PAGE_SIZE;
> +
> +    /* offset into array of mfns */
> +    per_cpu_mfn_nr = x / PAGE_SIZE;
> +    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
> +    mfn_list = (uint32_t *)t_info;
> +    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
> +    this_page = mfn_to_virt(mfn);
> +    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
> +    {
> +        /* reached end of buffer? */
> +        *next_page = NULL;
> +    }
> +    else
> +    {
> +        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
> +        *next_page = mfn_to_virt(mfn);
> +    }
> +    return this_page;
>  }

General approach here looks good, but I'm wondering if there's a way to
reduce the math here.  The amount of work done for each trace record
posted is really getting pretty big.

I guess first of all the '%' and '/' should be &(PAGE_SIZE-1) and
>>(PAGE_SHIFT), respectively.

Would it make sense to pre-cache the virtual address of the various MFNs
(i.e., keep a per-cpu virtual address list) rather than doing the
calculation each time?  That might reduce the number of instructions to
find the approprate virtual addresses.

 -George

>  
>  static inline void __insert_record(struct t_buf *buf,
> @@ -482,28 +506,37 @@ static inline void __insert_record(struc
>                                     unsigned int rec_size,
>                                     const void *extra_data)
>  {
> -    struct t_rec *rec;
> +    struct t_rec split_rec, *rec;
>      uint32_t *dst;
> +    unsigned char *this_page, *next_page;
>      unsigned int extra_word = extra / sizeof(u32);
>      unsigned int local_rec_size = calc_rec_size(cycles, extra);
>      uint32_t next;
> +    uint32_t offset;
> +    uint32_t remaining;
>  
>      BUG_ON(local_rec_size != rec_size);
>      BUG_ON(extra & 3);
>  
> -    rec = next_record(buf, &next);
> -    if ( !rec )
> +    this_page = next_record(buf, &next, &next_page, &offset);
> +    if ( !this_page )
>          return;
> -    /* Double-check once more that we have enough space.
> -     * Don't bugcheck here, in case the userland tool is doing
> -     * something stupid. */
> -    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
> +
> +    remaining = PAGE_SIZE - offset;
> +
> +    if ( unlikely(rec_size > remaining) )
>      {
> -        if ( printk_ratelimit() )
> +        if ( next_page == NULL )
> +        {
> +            /* access beyond end of buffer */
>              printk(XENLOG_WARNING
> -                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
> -                   __func__, data_size, next, buf->cons, rec_size);
> -        return;
> +                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
> +                   __func__, data_size, next, buf->cons, rec_size, remaining);
> +            return;
> +        }
> +        rec = &split_rec;
> +    } else {
> +        rec = (struct t_rec*)(this_page + offset);
>      }
>  
>      rec->event = event;
> @@ -520,6 +553,12 @@ static inline void __insert_record(struc
>      if ( extra_data && extra )
>          memcpy(dst, extra_data, extra);
>  
> +    if ( unlikely(rec_size > remaining) )
> +    {
> +        memcpy(this_page + offset, rec, remaining);
> +        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
> +    }
> +
>      wmb();
>  
>      next += rec_size;

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-05-09 11:24 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
2011-05-09  9:03   ` Keir Fraser
2011-05-09  9:31     ` Olaf Hering
2011-05-09 11:24   ` George Dunlap
2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.