All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/2] plugins/cache: multicore cache modelling
@ 2021-08-02 13:31 Mahmoud Mandour
  2021-08-02 13:31 ` [PATCH v4 1/2] plugins/cache: supported " Mahmoud Mandour
  2021-08-02 13:31 ` [PATCH v4 2/2] docs/devel/tcg-plugins: added cores arg to cache plugin Mahmoud Mandour
  0 siblings, 2 replies; 4+ messages in thread
From: Mahmoud Mandour @ 2021-08-02 13:31 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, cota, alex.bennee

Hello,

This series introduce multicore cache modelling in contrib/plugins/cache.c

Multi-core cache modelling is handled such that for full-system
emulation, a private L1 cache is maintained to each core available to
the system. For multi-threaded userspace emulation, a static number of
cores is maintained for the overall system, and every memory access go
through one of these, even if the number of fired threads is more than
that number.

The changes in the patches are identical to the previous version of the series,
however:

v3 -> v4:
    1. Re-based the patched on top of the current master.
    2. Dropped the patches that were already merged.

Mahmoud Mandour (2):
  plugins/cache: supported multicore cache modelling
  docs/devel/tcg-plugins: added cores arg to cache plugin

 contrib/plugins/cache.c    | 154 ++++++++++++++++++++++++++++---------
 docs/devel/tcg-plugins.rst |  13 ++--
 2 files changed, 127 insertions(+), 40 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v4 1/2] plugins/cache: supported multicore cache modelling
  2021-08-02 13:31 [PATCH v4 0/2] plugins/cache: multicore cache modelling Mahmoud Mandour
@ 2021-08-02 13:31 ` Mahmoud Mandour
  2021-08-02 17:11   ` Alex Bennée
  2021-08-02 13:31 ` [PATCH v4 2/2] docs/devel/tcg-plugins: added cores arg to cache plugin Mahmoud Mandour
  1 sibling, 1 reply; 4+ messages in thread
From: Mahmoud Mandour @ 2021-08-02 13:31 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexandre Iooss, Mahmoud Mandour, cota, alex.bennee

Multicore L1 cache modelling is introduced and is supported for both
full system emulation and linux-user.

For full-system emulation, L1 icache and dcache are maintained for each
available core, since this information is exposed to the plugin through
`qemu_plugin_n_vcpus()`.

For linux-user, a static number of cores is assumed (default 1 core, and
can be provided as a plugin argument `cores=N`). Every memory access
goes through one of these caches, this approach is taken as it's
somewhat akin to what happens on real setup, where a program that
dispatches more threads than the available cores, they'll thrash
each other

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 154 +++++++++++++++++++++++++++++++---------
 1 file changed, 119 insertions(+), 35 deletions(-)

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index 066ea6d8ec..971569cc9d 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -23,12 +23,6 @@ static GRand *rng;
 static int limit;
 static bool sys;
 
-static uint64_t dmem_accesses;
-static uint64_t dmisses;
-
-static uint64_t imem_accesses;
-static uint64_t imisses;
-
 enum EvictionPolicy {
     LRU,
     FIFO,
@@ -90,13 +84,22 @@ typedef struct {
     uint64_t imisses;
 } InsnData;
 
+typedef struct {
+    uint64_t dmem_accesses;
+    uint64_t dmisses;
+    uint64_t imem_accesses;
+    uint64_t imisses;
+} CoreStats;
+
 void (*update_hit)(Cache *cache, int set, int blk);
 void (*update_miss)(Cache *cache, int set, int blk);
 
 void (*metadata_init)(Cache *cache);
 void (*metadata_destroy)(Cache *cache);
 
-Cache *dcache, *icache;
+static int cores;
+CoreStats *stats;
+Cache **dcaches, **icaches;
 
 static int pow_of_two(int num)
 {
@@ -233,14 +236,16 @@ static bool bad_cache_params(int blksize, int assoc, int cachesize)
 
 static Cache *cache_init(int blksize, int assoc, int cachesize)
 {
-    if (bad_cache_params(blksize, assoc, cachesize)) {
-        return NULL;
-    }
-
     Cache *cache;
     int i;
     uint64_t blk_mask;
 
+    /*
+     * This function shall not be called directly, and hence expects suitable
+     * parameters.
+     */
+    g_assert(!bad_cache_params(blksize, assoc, cachesize));
+
     cache = g_new(Cache, 1);
     cache->assoc = assoc;
     cache->cachesize = cachesize;
@@ -263,6 +268,24 @@ static Cache *cache_init(int blksize, int assoc, int cachesize)
     return cache;
 }
 
+static Cache **caches_init(int blksize, int assoc, int cachesize)
+{
+    Cache **caches;
+    int i;
+
+    if (bad_cache_params(blksize, assoc, cachesize)) {
+        return NULL;
+    }
+
+    caches = g_new(Cache *, cores);
+
+    for (i = 0; i < cores; i++) {
+        caches[i] = cache_init(blksize, assoc, cachesize);
+    }
+
+    return caches;
+}
+
 static int get_invalid_block(Cache *cache, uint64_t set)
 {
     int i;
@@ -353,6 +376,7 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
 {
     uint64_t effective_addr;
     struct qemu_plugin_hwaddr *hwaddr;
+    int cache_idx;
     InsnData *insn;
 
     hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
@@ -361,14 +385,15 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
     }
 
     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+    cache_idx = vcpu_index % cores;
 
     g_mutex_lock(&mtx);
-    if (!access_cache(dcache, effective_addr)) {
+    if (!access_cache(dcaches[cache_idx], effective_addr)) {
         insn = (InsnData *) userdata;
         insn->dmisses++;
-        dmisses++;
+        stats[cache_idx].dmisses++;
     }
-    dmem_accesses++;
+    stats[cache_idx].dmem_accesses++;
     g_mutex_unlock(&mtx);
 }
 
@@ -376,16 +401,18 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
 {
     uint64_t insn_addr;
     InsnData *insn;
+    int cache_idx;
 
     g_mutex_lock(&mtx);
     insn_addr = ((InsnData *) userdata)->addr;
+    cache_idx = vcpu_index % cores;
 
-    if (!access_cache(icache, insn_addr)) {
+    if (!access_cache(icaches[cache_idx], insn_addr)) {
         insn = (InsnData *) userdata;
         insn->imisses++;
-        imisses++;
+        stats[cache_idx].imisses++;
     }
-    imem_accesses++;
+    stats[cache_idx].imem_accesses++;
     g_mutex_unlock(&mtx);
 }
 
@@ -453,6 +480,15 @@ static void cache_free(Cache *cache)
     g_free(cache);
 }
 
+static void caches_free(Cache **caches)
+{
+    int i;
+
+    for (i = 0; i < cores; i++) {
+        cache_free(caches[i]);
+    }
+}
+
 static int dcmp(gconstpointer a, gconstpointer b)
 {
     InsnData *insn_a = (InsnData *) a;
@@ -461,6 +497,36 @@ static int dcmp(gconstpointer a, gconstpointer b)
     return insn_a->dmisses < insn_b->dmisses ? 1 : -1;
 }
 
+static void append_stats_line(GString *line, CoreStats cs)
+{
+    double dmiss_rate, imiss_rate;
+
+    dmiss_rate = ((double) cs.dmisses) / (cs.dmem_accesses) * 100.0;
+    imiss_rate = ((double) cs.imisses) / (cs.imem_accesses) * 100.0;
+
+    g_string_append_printf(line, "%-14lu %-12lu %9.4lf%%  %-14lu %-12lu"
+                           " %9.4lf%%\n",
+                           cs.dmem_accesses,
+                           cs.dmisses,
+                           cs.dmem_accesses ? dmiss_rate : 0.0,
+                           cs.imem_accesses,
+                           cs.imisses,
+                           cs.imem_accesses ? imiss_rate : 0.0);
+}
+
+static void sum_stats(void)
+{
+    int i;
+
+    g_assert(cores > 1);
+    for (i = 0; i < cores; i++) {
+        stats[cores].imisses += stats[i].imisses;
+        stats[cores].dmisses += stats[i].dmisses;
+        stats[cores].dmem_accesses += stats[i].dmem_accesses;
+        stats[cores].imem_accesses += stats[i].imem_accesses;
+    }
+}
+
 static int icmp(gconstpointer a, gconstpointer b)
 {
     InsnData *insn_a = (InsnData *) a;
@@ -471,19 +537,25 @@ static int icmp(gconstpointer a, gconstpointer b)
 
 static void log_stats(void)
 {
-    g_autoptr(GString) rep = g_string_new("");
-    g_string_append_printf(rep,
-        "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
-        dmem_accesses,
-        dmisses,
-        ((double) dmisses / (double) dmem_accesses) * 100.0);
-
-    g_string_append_printf(rep,
-        "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
-        imem_accesses,
-        imisses,
-        ((double) imisses / (double) imem_accesses) * 100.0);
+    int i, iters;
+
+    g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
+                                          " dmiss rate, insn accesses,"
+                                          " insn misses, imiss rate\n");
+
+    /* Only iterate and print a sum row if cores > 1 */
+    iters = cores == 1 ? 1 : cores + 1;
+    for (i = 0; i < iters; i++) {
+        if (i == cores) {
+            g_string_append_printf(rep, "%-8s", "sum");
+            sum_stats();
+        } else {
+            g_string_append_printf(rep, "%-8d", i);
+        }
+        append_stats_line(rep, stats[i]);
+    }
 
+    g_string_append(rep, "\n");
     qemu_plugin_outs(rep->str);
 }
 
@@ -530,10 +602,12 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
     log_stats();
     log_top_insns();
 
-    cache_free(dcache);
-    cache_free(icache);
+    caches_free(dcaches);
+    caches_free(icaches);
 
     g_hash_table_destroy(miss_ht);
+
+    g_free(stats);
 }
 
 static void policy_init(void)
@@ -579,6 +653,8 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 
     policy = LRU;
 
+    cores = sys ? qemu_plugin_n_vcpus() : 1;
+
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
         if (g_str_has_prefix(opt, "iblksize=")) {
@@ -595,6 +671,8 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
             dcachesize = g_ascii_strtoll(opt + 11, NULL, 10);
         } else if (g_str_has_prefix(opt, "limit=")) {
             limit = g_ascii_strtoll(opt + 6, NULL, 10);
+        } else if (g_str_has_prefix(opt, "cores=")) {
+            cores = g_ascii_strtoll(opt + 6, NULL, 10);
         } else if (g_str_has_prefix(opt, "evict=")) {
             gchar *p = opt + 6;
             if (g_strcmp0(p, "rand") == 0) {
@@ -615,22 +693,28 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 
     policy_init();
 
-    dcache = cache_init(dblksize, dassoc, dcachesize);
-    if (!dcache) {
+    dcaches = caches_init(dblksize, dassoc, dcachesize);
+    if (!dcaches) {
         const char *err = cache_config_error(dblksize, dassoc, dcachesize);
         fprintf(stderr, "dcache cannot be constructed from given parameters\n");
         fprintf(stderr, "%s\n", err);
         return -1;
     }
 
-    icache = cache_init(iblksize, iassoc, icachesize);
-    if (!icache) {
+    icaches = caches_init(iblksize, iassoc, icachesize);
+    if (!icaches) {
         const char *err = cache_config_error(iblksize, iassoc, icachesize);
         fprintf(stderr, "icache cannot be constructed from given parameters\n");
         fprintf(stderr, "%s\n", err);
         return -1;
     }
 
+    /*
+     * plus one to save the sum in. If only one core is used then no need to
+     * get an auxiliary struct.
+     */
+    stats = g_new0(CoreStats, cores == 1 ? 1 : cores + 1);
+
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v4 2/2] docs/devel/tcg-plugins: added cores arg to cache plugin
  2021-08-02 13:31 [PATCH v4 0/2] plugins/cache: multicore cache modelling Mahmoud Mandour
  2021-08-02 13:31 ` [PATCH v4 1/2] plugins/cache: supported " Mahmoud Mandour
@ 2021-08-02 13:31 ` Mahmoud Mandour
  1 sibling, 0 replies; 4+ messages in thread
From: Mahmoud Mandour @ 2021-08-02 13:31 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexandre Iooss, Mahmoud Mandour, cota, alex.bennee

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 docs/devel/tcg-plugins.rst | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 7e54f12837..863828809d 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -355,11 +355,8 @@ configuration when a given working set is run::
 
 will report the following::
 
-    Data accesses: 996479, Misses: 507
-    Miss rate: 0.050879%
-
-    Instruction accesses: 2641737, Misses: 18617
-    Miss rate: 0.704726%
+    core #, data accesses, data misses, dmiss rate, insn accesses, insn misses, imiss rate
+    0       996695         508             0.0510%  2642799        18617           0.7044%
 
     address, data misses, instruction
     0x424f1e (_int_malloc), 109, movq %rax, 8(%rcx)
@@ -403,3 +400,9 @@ The plugin has a number of arguments, all of them are optional:
   Sets the eviction policy to POLICY. Available policies are: :code:`lru`,
   :code:`fifo`, and :code:`rand`. The plugin will use the specified policy for
   both instruction and data caches. (default: POLICY = :code:`lru`)
+
+  * arg="cores=N"
+
+  Sets the number of cores for which we maintain separate icache and dcache.
+  (default: for linux-user, N = 1, for full system emulation: N = cores
+  available to guest)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v4 1/2] plugins/cache: supported multicore cache modelling
  2021-08-02 13:31 ` [PATCH v4 1/2] plugins/cache: supported " Mahmoud Mandour
@ 2021-08-02 17:11   ` Alex Bennée
  0 siblings, 0 replies; 4+ messages in thread
From: Alex Bennée @ 2021-08-02 17:11 UTC (permalink / raw)
  To: Mahmoud Mandour; +Cc: Alexandre Iooss, cota, qemu-devel


Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> Multicore L1 cache modelling is introduced and is supported for both
> full system emulation and linux-user.
>
> For full-system emulation, L1 icache and dcache are maintained for each
> available core, since this information is exposed to the plugin through
> `qemu_plugin_n_vcpus()`.
>
> For linux-user, a static number of cores is assumed (default 1 core, and
> can be provided as a plugin argument `cores=N`). Every memory access
> goes through one of these caches, this approach is taken as it's
> somewhat akin to what happens on real setup, where a program that
> dispatches more threads than the available cores, they'll thrash
> each other
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 154 +++++++++++++++++++++++++++++++---------
>  1 file changed, 119 insertions(+), 35 deletions(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index 066ea6d8ec..971569cc9d 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -23,12 +23,6 @@ static GRand *rng;
>  static int limit;
>  static bool sys;
>  
> -static uint64_t dmem_accesses;
> -static uint64_t dmisses;
> -
> -static uint64_t imem_accesses;
> -static uint64_t imisses;
> -
>  enum EvictionPolicy {
>      LRU,
>      FIFO,
> @@ -90,13 +84,22 @@ typedef struct {
>      uint64_t imisses;
>  } InsnData;
>  
> +typedef struct {
> +    uint64_t dmem_accesses;
> +    uint64_t dmisses;
> +    uint64_t imem_accesses;
> +    uint64_t imisses;
> +} CoreStats;
> +
>  void (*update_hit)(Cache *cache, int set, int blk);
>  void (*update_miss)(Cache *cache, int set, int blk);
>  
>  void (*metadata_init)(Cache *cache);
>  void (*metadata_destroy)(Cache *cache);
>  
> -Cache *dcache, *icache;
> +static int cores;
> +CoreStats *stats;
> +Cache **dcaches, **icaches;
>  
>  static int pow_of_two(int num)
>  {
> @@ -233,14 +236,16 @@ static bool bad_cache_params(int blksize, int assoc, int cachesize)
>  
>  static Cache *cache_init(int blksize, int assoc, int cachesize)
>  {
> -    if (bad_cache_params(blksize, assoc, cachesize)) {
> -        return NULL;
> -    }
> -
>      Cache *cache;
>      int i;
>      uint64_t blk_mask;
>  
> +    /*
> +     * This function shall not be called directly, and hence expects suitable
> +     * parameters.
> +     */
> +    g_assert(!bad_cache_params(blksize, assoc, cachesize));
> +
>      cache = g_new(Cache, 1);
>      cache->assoc = assoc;
>      cache->cachesize = cachesize;
> @@ -263,6 +268,24 @@ static Cache *cache_init(int blksize, int assoc, int cachesize)
>      return cache;
>  }
>  
> +static Cache **caches_init(int blksize, int assoc, int cachesize)
> +{
> +    Cache **caches;
> +    int i;
> +
> +    if (bad_cache_params(blksize, assoc, cachesize)) {
> +        return NULL;
> +    }
> +
> +    caches = g_new(Cache *, cores);
> +
> +    for (i = 0; i < cores; i++) {
> +        caches[i] = cache_init(blksize, assoc, cachesize);
> +    }
> +
> +    return caches;
> +}
> +
>  static int get_invalid_block(Cache *cache, uint64_t set)
>  {
>      int i;
> @@ -353,6 +376,7 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>  {
>      uint64_t effective_addr;
>      struct qemu_plugin_hwaddr *hwaddr;
> +    int cache_idx;
>      InsnData *insn;
>  
>      hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
> @@ -361,14 +385,15 @@ static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
>      }
>  
>      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
> +    cache_idx = vcpu_index % cores;
>  
>      g_mutex_lock(&mtx);
> -    if (!access_cache(dcache, effective_addr)) {
> +    if (!access_cache(dcaches[cache_idx], effective_addr)) {
>          insn = (InsnData *) userdata;
>          insn->dmisses++;
> -        dmisses++;
> +        stats[cache_idx].dmisses++;
>      }
> -    dmem_accesses++;
> +    stats[cache_idx].dmem_accesses++;
>      g_mutex_unlock(&mtx);
>  }
>  
> @@ -376,16 +401,18 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
>  {
>      uint64_t insn_addr;
>      InsnData *insn;
> +    int cache_idx;
>  
>      g_mutex_lock(&mtx);

I've been running some experiments and unsurprisingly I'm seeing fairly
massive lock contention here:

  21.31%  qemu-system-aar  [kernel.kallsyms]        [k] syscall_exit_to_user_mode
  10.15%  qemu-system-aar  [kernel.kallsyms]        [k] syscall_return_via_sysret
   7.63%  qemu-system-aar  [kernel.kallsyms]        [k] entry_SYSCALL_64
   4.68%  qemu-system-aar  libcache.so              [.] vcpu_insn_exec
   3.09%  qemu-system-aar  libcache.so              [.] in_cache
   2.92%  qemu-system-aar  libglib-2.0.so.0.6600.8  [.] g_mutex_lock_slowpath
   2.72%  qemu-system-aar  [kernel.kallsyms]        [k] futex_wait_setup
   2.46%  qemu-system-aar  libglib-2.0.so.0.6600.8  [.] g_mutex_unlock
   2.35%  qemu-system-aar  libglib-2.0.so.0.6600.8  [.] g_mutex_lock
   2.26%  qemu-system-aar  [kernel.kallsyms]        [k] futex_wake
   2.26%  qemu-system-aar  [kernel.kallsyms]        [k] entry_SYSCALL_64_after_hwframe
   2.12%  qemu-system-aar  libc-2.31.so             [.] syscall
   1.93%  qemu-system-aar  [kernel.kallsyms]        [k] _raw_spin_lock
   1.77%  qemu-system-aar  [kernel.kallsyms]        [k] native_queued_spin_lock_slowpath
   1.74%  qemu-system-aar  libcache.so              [.] vcpu_mem_access    

While we need locking because we are multi-core I think we need to split
this up a bit. You probably want to keep the shared mutex for inserting
instruction data into the hash table and looking it up on generation.
However the individual cache structures should have their own mutexes -
which in the system case should ensure we always use the fast path.

>      insn_addr = ((InsnData *) userdata)->addr;
> +    cache_idx = vcpu_index % cores;
>  
> -    if (!access_cache(icache, insn_addr)) {
> +    if (!access_cache(icaches[cache_idx], insn_addr)) {
>          insn = (InsnData *) userdata;
>          insn->imisses++;
> -        imisses++;

I don't know if it's worth having a mutex per instruction or just
going for an atomic inc operation. I suspect for x86 it will probably do
a fairly decent job when the same instruction isn't contended.

> +        stats[cache_idx].imisses++;

If we wrap the stats into the cache structure themselves they can share
the same mutex.

>      }
> -    imem_accesses++;
> +    stats[cache_idx].imem_accesses++;
>      g_mutex_unlock(&mtx);
>  }
>  
> @@ -453,6 +480,15 @@ static void cache_free(Cache *cache)
>      g_free(cache);
>  }
>  
> +static void caches_free(Cache **caches)
> +{
> +    int i;
> +
> +    for (i = 0; i < cores; i++) {
> +        cache_free(caches[i]);
> +    }
> +}
> +
>  static int dcmp(gconstpointer a, gconstpointer b)
>  {
>      InsnData *insn_a = (InsnData *) a;
> @@ -461,6 +497,36 @@ static int dcmp(gconstpointer a, gconstpointer b)
>      return insn_a->dmisses < insn_b->dmisses ? 1 : -1;
>  }
>  
> +static void append_stats_line(GString *line, CoreStats cs)
> +{
> +    double dmiss_rate, imiss_rate;
> +
> +    dmiss_rate = ((double) cs.dmisses) / (cs.dmem_accesses) * 100.0;
> +    imiss_rate = ((double) cs.imisses) / (cs.imem_accesses) * 100.0;
> +
> +    g_string_append_printf(line, "%-14lu %-12lu %9.4lf%%  %-14lu %-12lu"
> +                           " %9.4lf%%\n",
> +                           cs.dmem_accesses,
> +                           cs.dmisses,
> +                           cs.dmem_accesses ? dmiss_rate : 0.0,
> +                           cs.imem_accesses,
> +                           cs.imisses,
> +                           cs.imem_accesses ? imiss_rate : 0.0);
> +}
> +
> +static void sum_stats(void)
> +{
> +    int i;
> +
> +    g_assert(cores > 1);
> +    for (i = 0; i < cores; i++) {
> +        stats[cores].imisses += stats[i].imisses;
> +        stats[cores].dmisses += stats[i].dmisses;
> +        stats[cores].dmem_accesses += stats[i].dmem_accesses;
> +        stats[cores].imem_accesses += stats[i].imem_accesses;
> +    }
> +}
> +
>  static int icmp(gconstpointer a, gconstpointer b)
>  {
>      InsnData *insn_a = (InsnData *) a;
> @@ -471,19 +537,25 @@ static int icmp(gconstpointer a, gconstpointer b)
>  
>  static void log_stats(void)
>  {
> -    g_autoptr(GString) rep = g_string_new("");
> -    g_string_append_printf(rep,
> -        "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> -        dmem_accesses,
> -        dmisses,
> -        ((double) dmisses / (double) dmem_accesses) * 100.0);
> -
> -    g_string_append_printf(rep,
> -        "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> -        imem_accesses,
> -        imisses,
> -        ((double) imisses / (double) imem_accesses) * 100.0);
> +    int i, iters;
> +
> +    g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
> +                                          " dmiss rate, insn accesses,"
> +                                          " insn misses, imiss rate\n");
> +
> +    /* Only iterate and print a sum row if cores > 1 */
> +    iters = cores == 1 ? 1 : cores + 1;
> +    for (i = 0; i < iters; i++) {
> +        if (i == cores) {
> +            g_string_append_printf(rep, "%-8s", "sum");
> +            sum_stats();
> +        } else {
> +            g_string_append_printf(rep, "%-8d", i);
> +        }
> +        append_stats_line(rep, stats[i]);
> +    }
>  
> +    g_string_append(rep, "\n");
>      qemu_plugin_outs(rep->str);
>  }
>  
> @@ -530,10 +602,12 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
>      log_stats();
>      log_top_insns();
>  
> -    cache_free(dcache);
> -    cache_free(icache);
> +    caches_free(dcaches);
> +    caches_free(icaches);
>  
>      g_hash_table_destroy(miss_ht);
> +
> +    g_free(stats);
>  }
>  
>  static void policy_init(void)
> @@ -579,6 +653,8 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>  
>      policy = LRU;
>  
> +    cores = sys ? qemu_plugin_n_vcpus() : 1;
> +
>      for (i = 0; i < argc; i++) {
>          char *opt = argv[i];
>          if (g_str_has_prefix(opt, "iblksize=")) {
> @@ -595,6 +671,8 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>              dcachesize = g_ascii_strtoll(opt + 11, NULL, 10);
>          } else if (g_str_has_prefix(opt, "limit=")) {
>              limit = g_ascii_strtoll(opt + 6, NULL, 10);
> +        } else if (g_str_has_prefix(opt, "cores=")) {
> +            cores = g_ascii_strtoll(opt + 6, NULL, 10);
>          } else if (g_str_has_prefix(opt, "evict=")) {
>              gchar *p = opt + 6;
>              if (g_strcmp0(p, "rand") == 0) {
> @@ -615,22 +693,28 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>  
>      policy_init();
>  
> -    dcache = cache_init(dblksize, dassoc, dcachesize);
> -    if (!dcache) {
> +    dcaches = caches_init(dblksize, dassoc, dcachesize);
> +    if (!dcaches) {
>          const char *err = cache_config_error(dblksize, dassoc, dcachesize);
>          fprintf(stderr, "dcache cannot be constructed from given parameters\n");
>          fprintf(stderr, "%s\n", err);
>          return -1;
>      }
>  
> -    icache = cache_init(iblksize, iassoc, icachesize);
> -    if (!icache) {
> +    icaches = caches_init(iblksize, iassoc, icachesize);
> +    if (!icaches) {
>          const char *err = cache_config_error(iblksize, iassoc, icachesize);
>          fprintf(stderr, "icache cannot be constructed from given parameters\n");
>          fprintf(stderr, "%s\n", err);
>          return -1;
>      }
>  
> +    /*
> +     * plus one to save the sum in. If only one core is used then no need to
> +     * get an auxiliary struct.
> +     */
> +    stats = g_new0(CoreStats, cores == 1 ? 1 : cores + 1);
> +
>      qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>      qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-02 17:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-02 13:31 [PATCH v4 0/2] plugins/cache: multicore cache modelling Mahmoud Mandour
2021-08-02 13:31 ` [PATCH v4 1/2] plugins/cache: supported " Mahmoud Mandour
2021-08-02 17:11   ` Alex Bennée
2021-08-02 13:31 ` [PATCH v4 2/2] docs/devel/tcg-plugins: added cores arg to cache plugin Mahmoud Mandour

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.