qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*()
@ 2016-02-17 21:29 Hollis Blanchard
  2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
  2016-02-24 14:12 ` [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Stefan Hajnoczi
  0 siblings, 2 replies; 10+ messages in thread
From: Hollis Blanchard @ 2016-02-17 21:29 UTC (permalink / raw)
  To: stefanha; +Cc: pbonzini, Hollis Blanchard, qemu-devel

Knowing which CPU performed an action is essential for understanding SMP guest
behavior.

However, cpu_physical_memory_rw() may be executed by a machine init function,
before any VCPUs are running, when there is no CPU running ('current_cpu' is
NULL). In this case, store -1 in the trace record as the CPU index. Trace
analysis tools may need to be aware of this special case.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
---
 memory.c     | 48 ++++++++++++++++++++++++++++++++++++------------
 trace-events |  8 ++++----
 2 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/memory.c b/memory.c
index 2d87c21..6ae7bae 100644
--- a/memory.c
+++ b/memory.c
@@ -395,13 +395,17 @@ static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
                                                        MemTxAttrs attrs)
 {
     uint64_t tmp;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
     if (mr->subpage) {
-        trace_memory_region_subpage_read(mr, addr, tmp, size);
+        trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
     }
     *value |= (tmp & mask) << shift;
     return MEMTX_OK;
@@ -416,13 +420,17 @@ static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
                                                 MemTxAttrs attrs)
 {
     uint64_t tmp;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     tmp = mr->ops->read(mr->opaque, addr, size);
     if (mr->subpage) {
-        trace_memory_region_subpage_read(mr, addr, tmp, size);
+        trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
     }
     *value |= (tmp & mask) << shift;
     return MEMTX_OK;
@@ -438,13 +446,17 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
 {
     uint64_t tmp = 0;
     MemTxResult r;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
     if (mr->subpage) {
-        trace_memory_region_subpage_read(mr, addr, tmp, size);
+        trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_read(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
     }
     *value |= (tmp & mask) << shift;
     return r;
@@ -459,13 +471,17 @@ static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
                                                         MemTxAttrs attrs)
 {
     uint64_t tmp;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     tmp = (*value >> shift) & mask;
     if (mr->subpage) {
-        trace_memory_region_subpage_write(mr, addr, tmp, size);
+        trace_memory_region_subpage_write(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp, size);
     }
     mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
     return MEMTX_OK;
@@ -480,13 +496,17 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
                                                 MemTxAttrs attrs)
 {
     uint64_t tmp;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     tmp = (*value >> shift) & mask;
     if (mr->subpage) {
-        trace_memory_region_subpage_write(mr, addr, tmp, size);
+        trace_memory_region_subpage_write(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp, size);
     }
     mr->ops->write(mr->opaque, addr, tmp, size);
     return MEMTX_OK;
@@ -501,13 +521,17 @@ static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
                                                            MemTxAttrs attrs)
 {
     uint64_t tmp;
+    int cpu_index = -1;
+
+    if (current_cpu)
+        cpu_index = current_cpu->cpu_index;
 
     tmp = (*value >> shift) & mask;
     if (mr->subpage) {
-        trace_memory_region_subpage_write(mr, addr, tmp, size);
+        trace_memory_region_subpage_write(cpu_index, mr, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
-        trace_memory_region_ops_write(mr, abs_addr, tmp, size);
+        trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp, size);
     }
     return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
 }
diff --git a/trace-events b/trace-events
index a80dc1c..756ce86 100644
--- a/trace-events
+++ b/trace-events
@@ -1626,10 +1626,10 @@ disable exec_tb_exit(void *next_tb, unsigned int flags) "tb:%p flags=%x"
 translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0x%"PRIxPTR", tb_code:%p"
 
 # memory.c
-memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_ops_write(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_subpage_read(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
-memory_region_subpage_write(void *mr, uint64_t offset, uint64_t value, unsigned size) "mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
 
 # qom/object.c
 object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-02-17 21:29 [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Hollis Blanchard
@ 2016-02-17 21:29 ` Hollis Blanchard
  2016-02-18 18:37   ` Hollis Blanchard
                     ` (2 more replies)
  2016-02-24 14:12 ` [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Stefan Hajnoczi
  1 sibling, 3 replies; 10+ messages in thread
From: Hollis Blanchard @ 2016-02-17 21:29 UTC (permalink / raw)
  To: stefanha; +Cc: pbonzini, Hollis Blanchard, qemu-devel

Memory accesses to code which has previously been translated into a TB show up
in the MMIO path, so that they may invalidate the TB. It's extremely confusing
to mix those in with device MMIOs, so split them into their own tracepoint.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>

---
It took many hours to figure out why some RAM accesses were coming through the
MMIO path instead of being handled inline in the TBs.

On IRC, Paolo expressed some concern about performance, but ultimately agreed
that adding one conditional to an already heavy codepath wouldn't have much
impact.
---
 memory.c     | 25 +++++++++++++++++++++++++
 trace-events |  2 ++
 2 files changed, 27 insertions(+)

diff --git a/memory.c b/memory.c
index 6ae7bae..3d125c9 100644
--- a/memory.c
+++ b/memory.c
@@ -403,6 +403,11 @@ static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
     tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
     if (mr->subpage) {
         trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
+    } else if (mr == &io_mem_notdirty) {
+        /* Accesses to code which has previously been translated into a TB show
+         * up in the MMIO path, as accesses to the io_mem_notdirty
+         * MemoryRegion. */
+        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
@@ -428,6 +433,11 @@ static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
     tmp = mr->ops->read(mr->opaque, addr, size);
     if (mr->subpage) {
         trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
+    } else if (mr == &io_mem_notdirty) {
+        /* Accesses to code which has previously been translated into a TB show
+         * up in the MMIO path, as accesses to the io_mem_notdirty
+         * MemoryRegion. */
+        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
@@ -454,6 +464,11 @@ static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
     r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
     if (mr->subpage) {
         trace_memory_region_subpage_read(cpu_index, mr, addr, tmp, size);
+    } else if (mr == &io_mem_notdirty) {
+        /* Accesses to code which has previously been translated into a TB show
+         * up in the MMIO path, as accesses to the io_mem_notdirty
+         * MemoryRegion. */
+        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp, size);
@@ -479,6 +494,11 @@ static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
     tmp = (*value >> shift) & mask;
     if (mr->subpage) {
         trace_memory_region_subpage_write(cpu_index, mr, addr, tmp, size);
+    } else if (mr == &io_mem_notdirty) {
+        /* Accesses to code which has previously been translated into a TB show
+         * up in the MMIO path, as accesses to the io_mem_notdirty
+         * MemoryRegion. */
+        trace_memory_region_ops_tb_write(cpu_index, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp, size);
@@ -504,6 +524,11 @@ static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
     tmp = (*value >> shift) & mask;
     if (mr->subpage) {
         trace_memory_region_subpage_write(cpu_index, mr, addr, tmp, size);
+    } else if (mr == &io_mem_notdirty) {
+        /* Accesses to code which has previously been translated into a TB show
+         * up in the MMIO path, as accesses to the io_mem_notdirty
+         * MemoryRegion. */
+        trace_memory_region_ops_tb_write(cpu_index, addr, tmp, size);
     } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
         hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
         trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp, size);
diff --git a/trace-events b/trace-events
index 756ce86..7994420 100644
--- a/trace-events
+++ b/trace-events
@@ -1630,6 +1630,8 @@ memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
 memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
 memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
 memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ops_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
 
 # qom/object.c
 object_dynamic_cast_assert(const char *type, const char *target, const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
@ 2016-02-18 18:37   ` Hollis Blanchard
  2016-02-24 14:13   ` Stefan Hajnoczi
  2016-03-23 16:47   ` Hollis Blanchard
  2 siblings, 0 replies; 10+ messages in thread
From: Hollis Blanchard @ 2016-02-18 18:37 UTC (permalink / raw)
  To: stefanha; +Cc: pbonzini, qemu-devel

On 02/17/2016 01:29 PM, Hollis Blanchard wrote:
> diff --git a/trace-events b/trace-events
> index 756ce86..7994420 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1630,6 +1630,8 @@ memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
>   memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
>   memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
>   memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
> +memory_region_ops_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
> +memory_region_ops_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"

Actually, I'd like to rename these new tracepoints to 
"memory_region_tb_read/write", for consistency with 
"memory_region_subpage_read/write". I'll wait for other comments before 
re-submitting though.

Hollis Blanchard
Mentor Graphics Emulation Division

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*()
  2016-02-17 21:29 [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Hollis Blanchard
  2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
@ 2016-02-24 14:12 ` Stefan Hajnoczi
  1 sibling, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2016-02-24 14:12 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: pbonzini, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1614 bytes --]

On Wed, Feb 17, 2016 at 01:29:14PM -0800, Hollis Blanchard wrote:
> Knowing which CPU performed an action is essential for understanding SMP guest
> behavior.
> 
> However, cpu_physical_memory_rw() may be executed by a machine init function,
> before any VCPUs are running, when there is no CPU running ('current_cpu' is
> NULL). In this case, store -1 in the trace record as the CPU index. Trace
> analysis tools may need to be aware of this special case.
> 
> Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
> ---
>  memory.c     | 48 ++++++++++++++++++++++++++++++++++++------------
>  trace-events |  8 ++++----
>  2 files changed, 40 insertions(+), 16 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 2d87c21..6ae7bae 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -395,13 +395,17 @@ static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
>                                                         MemTxAttrs attrs)
>  {
>      uint64_t tmp;
> +    int cpu_index = -1;
> +
> +    if (current_cpu)
> +        cpu_index = current_cpu->cpu_index;

QEMU coding style always uses curly braces, even when the if statement
body only has one line.

Cases like these should be caught by scripts/checkpatch.pl.  I use a git
hook to run it automatically on commit:
http://blog.vmsplice.net/2011/03/how-to-automatically-run-checkpatchpl.html


A helper function would avoid the code duplication throughout this patch:

static int get_cpu_index(void) {
    if (current_cpu) {
        return current_cpu->cpu_index;
    }
    return -1;
}

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
  2016-02-18 18:37   ` Hollis Blanchard
@ 2016-02-24 14:13   ` Stefan Hajnoczi
  2016-03-23 16:47   ` Hollis Blanchard
  2 siblings, 0 replies; 10+ messages in thread
From: Stefan Hajnoczi @ 2016-02-24 14:13 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: pbonzini, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 876 bytes --]

On Wed, Feb 17, 2016 at 01:29:15PM -0800, Hollis Blanchard wrote:
> Memory accesses to code which has previously been translated into a TB show up
> in the MMIO path, so that they may invalidate the TB. It's extremely confusing
> to mix those in with device MMIOs, so split them into their own tracepoint.
> 
> Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
> 
> ---
> It took many hours to figure out why some RAM accesses were coming through the
> MMIO path instead of being handled inline in the TBs.
> 
> On IRC, Paolo expressed some concern about performance, but ultimately agreed
> that adding one conditional to an already heavy codepath wouldn't have much
> impact.
> ---
>  memory.c     | 25 +++++++++++++++++++++++++
>  trace-events |  2 ++
>  2 files changed, 27 insertions(+)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
  2016-02-18 18:37   ` Hollis Blanchard
  2016-02-24 14:13   ` Stefan Hajnoczi
@ 2016-03-23 16:47   ` Hollis Blanchard
  2016-03-23 16:53     ` Paolo Bonzini
  2 siblings, 1 reply; 10+ messages in thread
From: Hollis Blanchard @ 2016-03-23 16:47 UTC (permalink / raw)
  To: pbonzini; +Cc: qemu-devel, stefanha

[-- Attachment #1: Type: text/plain, Size: 6657 bytes --]

Paolo, is it true that only TB-invalidating writes go through the
io_mem_notdirty path? I'm looking at the live migration code now, and
it seems like every memory write will go through that path when global
dirty memory logging is enabled.

-- 
Hollis Blanchard <hollis_blanchard@mentor.com>
Mentor Graphics Emulation DivisionOn Wed, 2016-02-17 at 13:29 -0800, Hollis Blanchard wrote:
> Memory accesses to code which has previously been translated into a
> TB show up
> in the MMIO path, so that they may invalidate the TB. It's extremely
> confusing
> to mix those in with device MMIOs, so split them into their own
> tracepoint.
> 
> Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
> 
> ---
> It took many hours to figure out why some RAM accesses were coming
> through the
> MMIO path instead of being handled inline in the TBs.
> 
> On IRC, Paolo expressed some concern about performance, but
> ultimately agreed
> that adding one conditional to an already heavy codepath wouldn't
> have much
> impact.
> ---
>  memory.c     | 25 +++++++++++++++++++++++++
>  trace-events |  2 ++
>  2 files changed, 27 insertions(+)
> 
> diff --git a/memory.c b/memory.c
> index 6ae7bae..3d125c9 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -403,6 +403,11 @@ static MemTxResult
> memory_region_oldmmio_read_accessor(MemoryRegion *mr,
>      tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
>      if (mr->subpage) {
>          trace_memory_region_subpage_read(cpu_index, mr, addr, tmp,
> size);
> +    } else if (mr == &io_mem_notdirty) {
> +        /* Accesses to code which has previously been translated
> into a TB show
> +         * up in the MMIO path, as accesses to the io_mem_notdirty
> +         * MemoryRegion. */
> +        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
>      } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
>          hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
>          trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp,
> size);
> @@ -428,6 +433,11 @@ static
> MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
>      tmp = mr->ops->read(mr->opaque, addr, size);
>      if (mr->subpage) {
>          trace_memory_region_subpage_read(cpu_index, mr, addr, tmp,
> size);
> +    } else if (mr == &io_mem_notdirty) {
> +        /* Accesses to code which has previously been translated
> into a TB show
> +         * up in the MMIO path, as accesses to the io_mem_notdirty
> +         * MemoryRegion. */
> +        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
>      } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
>          hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
>          trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp,
> size);
> @@ -454,6 +464,11 @@ static MemTxResult
> memory_region_read_with_attrs_accessor(MemoryRegion *mr,
>      r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size,
> attrs);
>      if (mr->subpage) {
>          trace_memory_region_subpage_read(cpu_index, mr, addr, tmp,
> size);
> +    } else if (mr == &io_mem_notdirty) {
> +        /* Accesses to code which has previously been translated
> into a TB show
> +         * up in the MMIO path, as accesses to the io_mem_notdirty
> +         * MemoryRegion. */
> +        trace_memory_region_ops_tb_read(cpu_index, addr, tmp, size);
>      } else if (TRACE_MEMORY_REGION_OPS_READ_ENABLED) {
>          hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
>          trace_memory_region_ops_read(cpu_index, mr, abs_addr, tmp,
> size);
> @@ -479,6 +494,11 @@ static MemTxResult
> memory_region_oldmmio_write_accessor(MemoryRegion *mr,
>      tmp = (*value >> shift) & mask;
>      if (mr->subpage) {
>          trace_memory_region_subpage_write(cpu_index, mr, addr, tmp,
> size);
> +    } else if (mr == &io_mem_notdirty) {
> +        /* Accesses to code which has previously been translated
> into a TB show
> +         * up in the MMIO path, as accesses to the io_mem_notdirty
> +         * MemoryRegion. */
> +        trace_memory_region_ops_tb_write(cpu_index, addr, tmp,
> size);
>      } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
>          hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
>          trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp,
> size);
> @@ -504,6 +524,11 @@ static MemTxResult
> memory_region_write_accessor(MemoryRegion *mr,
>      tmp = (*value >> shift) & mask;
>      if (mr->subpage) {
>          trace_memory_region_subpage_write(cpu_index, mr, addr, tmp,
> size);
> +    } else if (mr == &io_mem_notdirty) {
> +        /* Accesses to code which has previously been translated
> into a TB show
> +         * up in the MMIO path, as accesses to the io_mem_notdirty
> +         * MemoryRegion. */
> +        trace_memory_region_ops_tb_write(cpu_index, addr, tmp,
> size);
>      } else if (TRACE_MEMORY_REGION_OPS_WRITE_ENABLED) {
>          hwaddr abs_addr = memory_region_to_absolute_addr(mr, addr);
>          trace_memory_region_ops_write(cpu_index, mr, abs_addr, tmp,
> size);
> diff --git a/trace-events b/trace-events
> index 756ce86..7994420 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1630,6 +1630,8 @@ memory_region_ops_read(int cpu_index, void *mr,
> uint64_t addr, uint64_t value, u
>  memory_region_ops_write(int cpu_index, void *mr, uint64_t addr,
> uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value
> %#"PRIx64" size %u"
>  memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset,
> uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value
> %#"PRIx64" size %u"
>  memory_region_subpage_write(int cpu_index, void *mr, uint64_t
> offset, uint64_t value, unsigned size) "cpu %d mr %p offset
> %#"PRIx64" value %#"PRIx64" size %u"
> +memory_region_ops_tb_read(int cpu_index, uint64_t addr, uint64_t
> value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size
> %u"
> +memory_region_ops_tb_write(int cpu_index, uint64_t addr, uint64_t
> value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size
> %u"
>  
>  # qom/object.c
>  object_dynamic_cast_assert(const char *type, const char *target,
> const char *file, int line, const char *func) "%s->%s (%s:%d:%s)"

[-- Attachment #2: Type: text/html, Size: 6333 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-03-23 16:47   ` Hollis Blanchard
@ 2016-03-23 16:53     ` Paolo Bonzini
  2016-03-23 17:16       ` Hollis Blanchard
  2016-03-24 19:30       ` [Qemu-devel] io_mem_notdirty and live migration Hollis Blanchard
  0 siblings, 2 replies; 10+ messages in thread
From: Paolo Bonzini @ 2016-03-23 16:53 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: qemu-devel, stefanha



On 23/03/2016 17:47, Hollis Blanchard wrote:
> Paolo, is it true that only TB-invalidating writes go through the
> io_mem_notdirty path? I'm looking at the live migration code now, and it
> seems like every memory write will go through that path when global
> dirty memory logging is enabled.

When live migration is enabled, writes to clean memory (almost all of
them) will go through that path indeed.  Some writes to the framebuffer
will go through that path too.

It depends on

      cpu_physical_memory_is_clean(
                        memory_region_get_ram_addr(section->mr) + xlat))

in tlb_set_page_with_attrs.

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints
  2016-03-23 16:53     ` Paolo Bonzini
@ 2016-03-23 17:16       ` Hollis Blanchard
  2016-03-24 19:30       ` [Qemu-devel] io_mem_notdirty and live migration Hollis Blanchard
  1 sibling, 0 replies; 10+ messages in thread
From: Hollis Blanchard @ 2016-03-23 17:16 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel, stefanha

On Wed, 2016-03-23 at 17:53 +0100, Paolo Bonzini wrote:
> 
> On 23/03/2016 17:47, Hollis Blanchard wrote:
> > 
> > Paolo, is it true that only TB-invalidating writes go through the
> > io_mem_notdirty path? I'm looking at the live migration code now,
> > and it
> > seems like every memory write will go through that path when global
> > dirty memory logging is enabled.
> When live migration is enabled, writes to clean memory (almost all of
> them) will go through that path indeed.  Some writes to the
> framebuffer
> will go through that path too.
> 
> It depends on
> 
>       cpu_physical_memory_is_clean(
>                         memory_region_get_ram_addr(section->mr) +
> xlat))
> 
> in tlb_set_page_with_attrs.

Would "memory_region_notdirty_read/write" be a better tracepoint name
than "memory_region_tb_read/write"?

-- 
Hollis Blanchard <hollis_blanchard@mentor.com>
Mentor Graphics Emulation Division

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] io_mem_notdirty and live migration
  2016-03-23 16:53     ` Paolo Bonzini
  2016-03-23 17:16       ` Hollis Blanchard
@ 2016-03-24 19:30       ` Hollis Blanchard
  2016-03-24 22:01         ` Paolo Bonzini
  1 sibling, 1 reply; 10+ messages in thread
From: Hollis Blanchard @ 2016-03-24 19:30 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: qemu-devel

On 03/23/2016 09:53 AM, Paolo Bonzini wrote:
> On 23/03/2016 17:47, Hollis Blanchard wrote:
>> Paolo, is it true that only TB-invalidating writes go through the
>> io_mem_notdirty path? I'm looking at the live migration code now, and it
>> seems like every memory write will go through that path when global
>> dirty memory logging is enabled.
> When live migration is enabled, writes to clean memory (almost all of
> them) will go through that path indeed.  Some writes to the framebuffer
> will go through that path too.
>
> It depends on
>
>        cpu_physical_memory_is_clean(
>                          memory_region_get_ram_addr(section->mr) + xlat))
>
> in tlb_set_page_with_attrs.

I'm guessing that when live migration starts (ram_save_setup), the TLB 
must be flushed so that new entries can be created with the TLB_NOTDIRTY 
flag. Otherwise, pre-migration entries without TLB_NOTDIRTY flag could 
live on, allowing the TBs to directly modify guest RAM without tracking, 
right?

I can't find anything underneath ram_save_setup() that does this, 
though. Am I just missing it?

-- 
Hollis Blanchard <hollis_blanchard@mentor.com>
Mentor Graphics Emulation Division

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] io_mem_notdirty and live migration
  2016-03-24 19:30       ` [Qemu-devel] io_mem_notdirty and live migration Hollis Blanchard
@ 2016-03-24 22:01         ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2016-03-24 22:01 UTC (permalink / raw)
  To: Hollis Blanchard; +Cc: qemu-devel



----- Original Message -----
> From: "Hollis Blanchard" <hollis_blanchard@mentor.com>
> To: "Paolo Bonzini" <pbonzini@redhat.com>
> Cc: qemu-devel@nongnu.org
> Sent: Thursday, March 24, 2016 8:30:01 PM
> Subject: Re: io_mem_notdirty and live migration
> 
> On 03/23/2016 09:53 AM, Paolo Bonzini wrote:
> > On 23/03/2016 17:47, Hollis Blanchard wrote:
> >> Paolo, is it true that only TB-invalidating writes go through the
> >> io_mem_notdirty path? I'm looking at the live migration code now, and it
> >> seems like every memory write will go through that path when global
> >> dirty memory logging is enabled.
> > When live migration is enabled, writes to clean memory (almost all of
> > them) will go through that path indeed.  Some writes to the framebuffer
> > will go through that path too.
> >
> > It depends on
> >
> >        cpu_physical_memory_is_clean(
> >                          memory_region_get_ram_addr(section->mr) + xlat))
> >
> > in tlb_set_page_with_attrs.
> 
> I'm guessing that when live migration starts (ram_save_setup), the TLB
> must be flushed so that new entries can be created with the TLB_NOTDIRTY
> flag. Otherwise, pre-migration entries without TLB_NOTDIRTY flag could
> live on, allowing the TBs to directly modify guest RAM without tracking,
> right?
> 
> I can't find anything underneath ram_save_setup() that does this,
> though. Am I just missing it?

It's done (in a pretty roundabout way) by tcg_commit, which is called
by memory_global_dirty_log_start's call to memory_region_transaction_commit.

Paolo

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-03-24 22:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-17 21:29 [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Hollis Blanchard
2016-02-17 21:29 ` [Qemu-devel] [PATCH 2/2] trace: separate MMIO tracepoints from TB-access tracepoints Hollis Blanchard
2016-02-18 18:37   ` Hollis Blanchard
2016-02-24 14:13   ` Stefan Hajnoczi
2016-03-23 16:47   ` Hollis Blanchard
2016-03-23 16:53     ` Paolo Bonzini
2016-03-23 17:16       ` Hollis Blanchard
2016-03-24 19:30       ` [Qemu-devel] io_mem_notdirty and live migration Hollis Blanchard
2016-03-24 22:01         ` Paolo Bonzini
2016-02-24 14:12 ` [Qemu-devel] [PATCH 1/2] trace: include CPU index in trace_memory_region_ops_*() Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).