All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] Cache modelling TCG plugin
@ 2021-05-29 15:22 Mahmoud Mandour
  2021-05-29 15:22 ` [RFC PATCH 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Mahmoud Mandour @ 2021-05-29 15:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour

In this RFC patch series, I propose an initial cache modelling TCG
plugin. As of now, it models separate L1 data cache and L1 instruction
cache. It supports three eviction policies: LRU, random, and FIFO. Once
a policy is chosen, it's used for both instruction and data caches.

Mahmoud Mandour (3):
  plugins: Added a new cache modelling plugin
  plugins: cache: Enabled parameterization and added trace printing
  plugins: cache: Added FIFO and LRU eviction policies.

 contrib/plugins/Makefile |   1 +
 contrib/plugins/cache.c  | 595 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 596 insertions(+)
 create mode 100644 contrib/plugins/cache.c

-- 
2.25.1



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH 1/3] plugins: Added a new cache modelling plugin
  2021-05-29 15:22 [RFC PATCH 0/3] Cache modelling TCG plugin Mahmoud Mandour
@ 2021-05-29 15:22 ` Mahmoud Mandour
  2021-06-01  9:53   ` Alex Bennée
  2021-05-29 15:22 ` [RFC PATCH 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Mahmoud Mandour @ 2021-05-29 15:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Added a cache modelling plugin that uses a static configuration used in
many of the commercial microprocessors and uses random eviction policy.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/Makefile |   1 +
 contrib/plugins/cache.c  | 398 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 399 insertions(+)
 create mode 100644 contrib/plugins/cache.c

diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b9d7935e5e..2237b47f8b 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -18,6 +18,7 @@ NAMES += hotpages
 NAMES += howvec
 NAMES += lockstep
 NAMES += hwprofile
+NAMES += cache
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
new file mode 100644
index 0000000000..f8c15ebed2
--- /dev/null
+++ b/contrib/plugins/cache.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static GRand *rng;
+static GHashTable *dmiss_ht;
+static GHashTable *imiss_ht;
+
+static GMutex dmtx, imtx;
+
+static int limit;
+static bool sys;
+
+static uint64_t dmem_accesses;
+static uint64_t dmisses;
+
+static uint64_t imem_accesses;
+static uint64_t imisses;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+enum AccessResult {
+    HIT = 0,
+    MISS = 1
+};
+
+struct InsnData {
+    char *disas_str;
+    uint64_t addr;
+    uint64_t misses;
+};
+
+struct CacheBlock {
+    uint64_t tag;
+    bool valid;
+};
+
+struct CacheSet {
+    struct CacheBlock *blocks;
+};
+
+struct Cache {
+    struct CacheSet *sets;
+    int num_sets;
+
+    int cachesize;
+    int blksize;
+    int assoc;
+
+    uint64_t blk_mask;
+    uint64_t set_mask;
+    uint64_t tag_mask;
+};
+
+struct Cache *dcache, *icache;
+
+static int pow_of_two(int num)
+{
+    g_assert((num & (num - 1)) == 0);
+    int ret = 0;
+    while (num /= 2) {
+        ret++;
+    }
+    return ret;
+}
+
+static inline uint64_t extract_tag(struct Cache *cache, uint64_t addr)
+{
+    return (addr & cache->tag_mask) >>
+        (pow_of_two(cache->num_sets) + pow_of_two(cache->blksize));
+}
+
+static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
+{
+    return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
+}
+
+static struct Cache *cache_init(int blksize, int assoc, int cachesize)
+{
+    struct Cache *cache;
+    int i;
+
+    cache = g_new(struct Cache, 1);
+    cache->blksize = blksize;
+    cache->assoc = assoc;
+    cache->cachesize = cachesize;
+    cache->num_sets = cachesize / (blksize * assoc);
+    cache->sets = g_new(struct CacheSet, cache->num_sets);
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].blocks = g_new0(struct CacheBlock, assoc);
+    }
+
+    cache->blk_mask = blksize - 1;
+    cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
+    cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
+
+    return cache;
+}
+
+static int get_invalid_block(struct Cache *cache, uint64_t set)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (!cache->sets[set].blocks[i].valid) {
+            /* conflict miss */
+            return i;
+        }
+    }
+
+    /* compulsary miss */
+    return -1;
+}
+
+static int get_replaced_block(struct Cache *cache)
+{
+    return g_rand_int_range(rng, 0, cache->assoc);
+}
+
+static bool in_cache(struct Cache *cache, uint64_t addr)
+{
+    int i;
+    uint64_t tag, set;
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set].blocks[i].tag == tag &&
+                cache->sets[set].blocks[i].valid) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
+{
+    uint64_t tag, set;
+    int replaced_blk;
+
+    if (in_cache(cache, addr)) {
+        return HIT;
+    }
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    replaced_blk = get_invalid_block(cache, set);
+
+    if (replaced_blk == -1) {
+        replaced_blk = get_replaced_block(cache);
+    }
+
+    cache->sets[set].blocks[replaced_blk].tag = tag;
+    cache->sets[set].blocks[replaced_blk].valid = true;
+
+    return MISS;
+}
+
+struct InsnData *get_or_create(GHashTable *ht, struct InsnData *insn_data,
+                               uint64_t addr)
+{
+    struct InsnData *insn = g_hash_table_lookup(ht, GUINT_TO_POINTER(addr));
+    if (!insn) {
+        g_hash_table_insert(ht, GUINT_TO_POINTER(addr), (gpointer) insn_data);
+        insn = insn_data;
+    }
+
+    return insn;
+}
+
+static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
+                            uint64_t vaddr, void *userdata)
+{
+    uint64_t insn_addr;
+    uint64_t effective_addr;
+    struct qemu_plugin_hwaddr *hwaddr;
+
+    g_mutex_lock(&dmtx);
+    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+        g_free(userdata);
+        return;
+    }
+
+    insn_addr = ((struct InsnData *) userdata)->addr;
+    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+
+    if (access_cache(dcache, effective_addr) == MISS) {
+        struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
+        insn->misses++;
+        dmisses++;
+    }
+    dmem_accesses++;
+    g_mutex_unlock(&dmtx);
+}
+
+static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
+{
+    uint64_t addr;
+
+    g_mutex_lock(&imtx);
+    addr = ((struct InsnData *) userdata)->addr;
+
+    if (access_cache(icache, addr) == MISS) {
+        struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
+        insn->misses++;
+        imisses++;
+    }
+    imem_accesses++;
+    g_mutex_unlock(&imtx);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns;
+    size_t i;
+
+    n_insns = qemu_plugin_tb_n_insns(tb);
+    for (i = 0; i < n_insns; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        uint64_t effective_addr;
+
+        if (sys) {
+            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
+        } else {
+            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
+        }
+
+        struct InsnData *ddata = g_new(struct InsnData, 1);
+        struct InsnData *idata = g_new(struct InsnData, 1);
+
+        ddata->disas_str = qemu_plugin_insn_disas(insn);
+        ddata->misses = 0;
+        ddata->addr = effective_addr;
+
+        idata->disas_str = g_strdup(ddata->disas_str);
+        idata->misses = 0;
+        idata->addr = effective_addr;
+
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         rw, ddata);
+
+        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+                                               QEMU_PLUGIN_CB_NO_REGS, idata);
+    }
+}
+
+static void print_entry(gpointer data)
+{
+    struct InsnData *insn = (struct InsnData *) data;
+    g_autoptr(GString) xx = g_string_new("");
+    g_string_append_printf(xx, "0x%" PRIx64 ": %s - misses: %lu\n",
+            insn->addr, insn->disas_str, insn->misses);
+    qemu_plugin_outs(xx->str);
+}
+
+static void free_insn(gpointer data)
+{
+    struct InsnData *insn = (struct InsnData *) data;
+    g_free(insn->disas_str);
+    g_free(insn);
+}
+
+static void free_cache(struct Cache *cache)
+{
+    for (int i = 0; i < cache->num_sets; i++) {
+        g_free(cache->sets[i].blocks);
+    }
+
+    g_free(cache->sets);
+}
+
+static int cmp(gconstpointer a, gconstpointer b)
+{
+    struct InsnData *insn_a = (struct InsnData *) a;
+    struct InsnData *insn_b = (struct InsnData *) b;
+
+    return insn_a->misses < insn_b->misses ? 1 : -1;
+}
+
+static void print_stats()
+{
+    g_autoptr(GString) rep = g_string_new("");
+    g_string_append_printf(rep,
+            "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
+            dmem_accesses,
+            dmisses,
+            ((double)dmisses / dmem_accesses) * 100.0);
+
+    g_string_append_printf(rep,
+            "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
+            imem_accesses,
+            imisses,
+            ((double)imisses / imem_accesses) * 100.0);
+
+    qemu_plugin_outs(rep->str);
+}
+
+static void plugin_exit()
+{
+    GList *curr;
+    int i;
+
+    g_mutex_lock(&imtx);
+    g_mutex_lock(&dmtx);
+    GList *dmiss_insns = g_hash_table_get_values(dmiss_ht);
+    GList *imiss_insns = g_hash_table_get_values(imiss_ht);
+    dmiss_insns = g_list_sort(dmiss_insns, cmp);
+    imiss_insns = g_list_sort(imiss_insns, cmp);
+
+    print_stats();
+
+    qemu_plugin_outs("Most data-missing instructions\n");
+    for (curr = dmiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        print_entry(curr->data);
+    }
+
+    qemu_plugin_outs("\nMost fetch-missing instructions\n");
+    for (curr = imiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        print_entry(curr->data);
+    }
+
+    free_cache(dcache);
+    free_cache(icache);
+
+    g_list_free(dmiss_insns);
+    g_list_free(imiss_insns);
+
+    g_hash_table_destroy(dmiss_ht);
+    g_hash_table_destroy(imiss_ht);
+
+    g_mutex_unlock(&dmtx);
+    g_mutex_unlock(&imtx);
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    int i;
+    int iassoc, iblksize, icachesize;
+    int dassoc, dblksize, dcachesize;
+
+    limit = 32;
+    sys = info->system_emulation;
+
+    dassoc = 8;
+    dblksize = 64;
+    dcachesize = dblksize * dassoc * 32;
+
+    iassoc = 8;
+    iblksize = 64;
+    icachesize = iblksize * iassoc * 32;
+
+    rng = g_rand_new();
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_str_has_prefix(opt, "limit=")) {
+            limit = g_ascii_strtoull(opt + 6, NULL, 10);
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    dcache = cache_init(dblksize, dassoc, dcachesize);
+    icache = cache_init(iblksize, iassoc, icachesize);
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    dmiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
+    imiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
+
+    return 0;
+}
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-05-29 15:22 [RFC PATCH 0/3] Cache modelling TCG plugin Mahmoud Mandour
  2021-05-29 15:22 ` [RFC PATCH 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
@ 2021-05-29 15:22 ` Mahmoud Mandour
  2021-05-29 15:22 ` [RFC PATCH 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
  2021-05-30  6:36 ` [RFC PATCH 0/3] Cache modelling TCG plugin Philippe Mathieu-Daudé
  3 siblings, 0 replies; 8+ messages in thread
From: Mahmoud Mandour @ 2021-05-29 15:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Made both icache and dcache configurable through plugin arguments
and added memory trace printing in a separate file.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 68 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index f8c15ebed2..7d1d185480 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -22,7 +22,7 @@ static GRand *rng;
 static GHashTable *dmiss_ht;
 static GHashTable *imiss_ht;
 
-static GMutex dmtx, imtx;
+static GMutex dmtx, imtx, fmtx;
 
 static int limit;
 static bool sys;
@@ -33,6 +33,8 @@ static uint64_t dmisses;
 static uint64_t imem_accesses;
 static uint64_t imisses;
 
+FILE *tracefile;
+
 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
 
 enum AccessResult {
@@ -205,6 +207,16 @@ static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
     insn_addr = ((struct InsnData *) userdata)->addr;
     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
 
+    if (tracefile) {
+        g_mutex_lock(&fmtx);
+        g_autoptr(GString) rep = g_string_new("");
+        bool is_store = qemu_plugin_mem_is_store(info);
+        g_string_append_printf(rep, "%c: 0x%" PRIx64,
+                is_store ? 'S' : 'L', effective_addr);
+        fprintf(tracefile, "%s\n", rep->str);
+        g_mutex_unlock(&fmtx);
+    }
+
     if (access_cache(dcache, effective_addr) == MISS) {
         struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
         insn->misses++;
@@ -221,11 +233,20 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
     g_mutex_lock(&imtx);
     addr = ((struct InsnData *) userdata)->addr;
 
+    if (tracefile) {
+        g_mutex_lock(&fmtx);
+        g_autoptr(GString) rep = g_string_new("");
+        g_string_append_printf(rep, "I: 0x%" PRIx64, addr);
+        fprintf(tracefile, "%s\n", rep->str);
+        g_mutex_unlock(&fmtx);
+    }
+
     if (access_cache(icache, addr) == MISS) {
         struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
         insn->misses++;
         imisses++;
     }
+
     imem_accesses++;
     g_mutex_unlock(&imtx);
 }
@@ -352,6 +373,15 @@ static void plugin_exit()
 
     g_mutex_unlock(&dmtx);
     g_mutex_unlock(&imtx);
+
+    if (tracefile) {
+        fclose(tracefile);
+    }
+}
+
+static bool bad_cache_params(int blksize, int assoc, int cachesize)
+{
+    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
 }
 
 QEMU_PLUGIN_EXPORT
@@ -377,14 +407,48 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
-        if (g_str_has_prefix(opt, "limit=")) {
+        if (g_str_has_prefix(opt, "I=")) {
+            gchar **toks = g_strsplit(opt + 2, " ", -1);
+            if (g_strv_length(toks) != 3) {
+                fprintf(stderr, "option parsing failed: %s\n", opt);
+                return -1;
+            }
+            icachesize = g_ascii_strtoull(toks[0], NULL, 10);
+            iassoc = g_ascii_strtoull(toks[1], NULL, 10);
+            iblksize = g_ascii_strtoull(toks[2], NULL, 10);
+        } else if (g_str_has_prefix(opt, "D=")) {
+            gchar **toks = g_strsplit(opt + 2, " ", -1);
+            if (g_strv_length(toks) != 3) {
+                fprintf(stderr, "option parsing failed: %s\n", opt);
+                return -1;
+            }
+            dcachesize = g_ascii_strtoull(toks[0], NULL, 10);
+            dassoc = g_ascii_strtoull(toks[1], NULL, 10);
+            dblksize = g_ascii_strtoull(toks[2], NULL, 10);
+        } else if (g_str_has_prefix(opt, "limit=")) {
             limit = g_ascii_strtoull(opt + 6, NULL, 10);
+        } else if (g_str_has_prefix(opt, "tracefile=")) {
+            char *file_name = opt + 10;
+            tracefile = fopen(file_name, "w");
+            if (!tracefile) {
+                fprintf(stderr, "could not open: %s for writing\n", file_name);
+            }
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
         }
     }
 
+    if (bad_cache_params(iblksize, iassoc, icachesize)) {
+        fprintf(stderr, "icache cannot be constructed from given parameters\n");
+        return -1;
+    }
+
+    if (bad_cache_params(dblksize, dassoc, dcachesize)) {
+        fprintf(stderr, "dcache cannot be constructed from given parameters\n");
+        return -1;
+    }
+
     dcache = cache_init(dblksize, dassoc, dcachesize);
     icache = cache_init(iblksize, iassoc, icachesize);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH 3/3] plugins: cache: Added FIFO and LRU eviction policies.
  2021-05-29 15:22 [RFC PATCH 0/3] Cache modelling TCG plugin Mahmoud Mandour
  2021-05-29 15:22 ` [RFC PATCH 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
  2021-05-29 15:22 ` [RFC PATCH 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
@ 2021-05-29 15:22 ` Mahmoud Mandour
  2021-05-30  6:36 ` [RFC PATCH 0/3] Cache modelling TCG plugin Philippe Mathieu-Daudé
  3 siblings, 0 replies; 8+ messages in thread
From: Mahmoud Mandour @ 2021-05-29 15:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Now one of the three eviction policies can be chosen as an argument. On
not specifying an argument, LRU is used by default.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 159 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 146 insertions(+), 13 deletions(-)

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index 7d1d185480..341cd64e41 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -18,6 +18,8 @@
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
+static bool fifo, lru, rnd;
+
 static GRand *rng;
 static GHashTable *dmiss_ht;
 static GHashTable *imiss_ht;
@@ -55,6 +57,8 @@ struct CacheBlock {
 
 struct CacheSet {
     struct CacheBlock *blocks;
+    uint16_t *priorities;
+    GQueue *evict_queue;
 };
 
 struct Cache {
@@ -93,6 +97,84 @@ static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
     return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
 }
 
+static void lru_priorities_init(struct Cache *cache)
+{
+    int i, j;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].priorities = g_new(uint16_t, cache->assoc);
+        for (j = 0; j < cache->assoc; j++) {
+            cache->sets[i].priorities[j] = cache->assoc - j - 1;
+        }
+    }
+}
+
+static void lru_update_on_miss(struct Cache *cache,
+                                      int set_idx,
+                                      int blk_idx)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        cache->sets[set_idx].priorities[i]++;
+    }
+
+    cache->sets[set_idx].priorities[blk_idx] = 0;
+}
+
+static void lru_update_on_hit(struct Cache *cache,
+                                         int set_idx,
+                                         int blk_idx)
+{
+    uint16_t blk_priority;
+    int i;
+
+    blk_priority = cache->sets[set_idx].priorities[blk_idx];
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set_idx].priorities[i] < blk_priority) {
+            cache->sets[set_idx].priorities[i]++;
+        }
+    }
+    cache->sets[set_idx].priorities[blk_idx] = 0;
+}
+
+static int lru_get_lru_block(struct Cache *cache, int set_idx)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set_idx].priorities[i] == cache->assoc - 1) {
+            return i;
+        }
+    }
+
+    g_assert_not_reached();
+}
+
+static void fifo_init(struct Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].evict_queue = g_queue_new();
+    }
+}
+
+static int fifo_get_first_in_block(struct Cache *cache, int set)
+{
+    GQueue *q = cache->sets[set].evict_queue;
+    return GPOINTER_TO_INT(g_queue_pop_tail(q));
+}
+
+static void fifo_update_on_miss(struct Cache *cache,
+                                int set,
+                                int blk_idx)
+{
+    GQueue *q = cache->sets[set].evict_queue;
+    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
+}
+
+
 static struct Cache *cache_init(int blksize, int assoc, int cachesize)
 {
     struct Cache *cache;
@@ -113,6 +195,12 @@ static struct Cache *cache_init(int blksize, int assoc, int cachesize)
     cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
     cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
 
+    if (lru) {
+        lru_priorities_init(cache);
+    } else if (fifo) {
+        fifo_init(cache);
+    }
+
     return cache;
 }
 
@@ -131,12 +219,20 @@ static int get_invalid_block(struct Cache *cache, uint64_t set)
     return -1;
 }
 
-static int get_replaced_block(struct Cache *cache)
+static int get_replaced_block(struct Cache *cache, int set)
 {
-    return g_rand_int_range(rng, 0, cache->assoc);
+    if (rnd) {
+        return g_rand_int_range(rng, 0, cache->assoc);
+    } else if (lru) {
+        return lru_get_lru_block(cache, set);
+    } else if (fifo) {
+        return fifo_get_first_in_block(cache, set);
+    }
+
+    g_assert_not_reached();
 }
 
-static bool in_cache(struct Cache *cache, uint64_t addr)
+static int in_cache(struct Cache *cache, uint64_t addr)
 {
     int i;
     uint64_t tag, set;
@@ -147,29 +243,39 @@ static bool in_cache(struct Cache *cache, uint64_t addr)
     for (i = 0; i < cache->assoc; i++) {
         if (cache->sets[set].blocks[i].tag == tag &&
                 cache->sets[set].blocks[i].valid) {
-            return true;
+            return i;
         }
     }
 
-    return false;
+    return -1;
 }
 
 static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
 {
     uint64_t tag, set;
-    int replaced_blk;
-
-    if (in_cache(cache, addr)) {
-        return HIT;
-    }
+    int hit_blk, replaced_blk;
 
     tag = extract_tag(cache, addr);
     set = extract_set(cache, addr);
+    hit_blk = in_cache(cache, addr);
+
+    if (hit_blk != -1) {
+        if (lru) {
+            lru_update_on_hit(cache, set, hit_blk);
+        }
+        return HIT;
+    }
 
     replaced_blk = get_invalid_block(cache, set);
 
     if (replaced_blk == -1) {
-        replaced_blk = get_replaced_block(cache);
+        replaced_blk = get_replaced_block(cache, set);
+    }
+
+    if (lru) {
+        lru_update_on_miss(cache, set, replaced_blk);
+    } else if (fifo) {
+        fifo_update_on_miss(cache, set, replaced_blk);
     }
 
     cache->sets[set].blocks[replaced_blk].tag = tag;
@@ -307,6 +413,11 @@ static void free_cache(struct Cache *cache)
 {
     for (int i = 0; i < cache->num_sets; i++) {
         g_free(cache->sets[i].blocks);
+        if (lru) {
+            g_free(cache->sets[i].priorities);
+        } else if (fifo) {
+            g_queue_free(cache->sets[i].evict_queue);
+        }
     }
 
     g_free(cache->sets);
@@ -403,8 +514,6 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
     iblksize = 64;
     icachesize = iblksize * iassoc * 32;
 
-    rng = g_rand_new();
-
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
         if (g_str_has_prefix(opt, "I=")) {
@@ -433,6 +542,22 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
             if (!tracefile) {
                 fprintf(stderr, "could not open: %s for writing\n", file_name);
             }
+        } else if (g_str_has_prefix(opt, "evict=")) {
+            if (lru || rnd || fifo) {
+                fprintf(stderr, "eviction policy specified more than once\n");
+                return -1;
+            }
+            gchar *policy = opt + 6;
+            if (g_strcmp0(policy, "rand") == 0) {
+                rnd = true;
+            } else if (g_strcmp0(policy, "lru") == 0) {
+                lru = true;
+            } else if (g_strcmp0(policy, "fifo") == 0) {
+                fifo = true;
+            } else {
+                fprintf(stderr, "invalid eviction policy: %s\n", opt);
+                return -1;
+            }
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
@@ -449,6 +574,14 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
         return -1;
     }
 
+    if (!rnd && !lru && !fifo) {
+        lru = true;
+    }
+
+    if (rnd) {
+        rng = g_rand_new();
+    }
+
     dcache = cache_init(dblksize, dassoc, dcachesize);
     icache = cache_init(iblksize, iassoc, icachesize);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/3] Cache modelling TCG plugin
  2021-05-29 15:22 [RFC PATCH 0/3] Cache modelling TCG plugin Mahmoud Mandour
                   ` (2 preceding siblings ...)
  2021-05-29 15:22 ` [RFC PATCH 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
@ 2021-05-30  6:36 ` Philippe Mathieu-Daudé
  2021-05-30  6:41   ` Mahmoud Mandour
  3 siblings, 1 reply; 8+ messages in thread
From: Philippe Mathieu-Daudé @ 2021-05-30  6:36 UTC (permalink / raw)
  To: Mahmoud Mandour, qemu-devel; +Cc: Emilio G. Cota

Cc'ing Emilio too.

On 5/29/21 5:22 PM, Mahmoud Mandour wrote:
> In this RFC patch series, I propose an initial cache modelling TCG
> plugin. As of now, it models separate L1 data cache and L1 instruction
> cache. It supports three eviction policies: LRU, random, and FIFO. Once
> a policy is chosen, it's used for both instruction and data caches.
> 
> Mahmoud Mandour (3):
>   plugins: Added a new cache modelling plugin
>   plugins: cache: Enabled parameterization and added trace printing
>   plugins: cache: Added FIFO and LRU eviction policies.
> 
>  contrib/plugins/Makefile |   1 +
>  contrib/plugins/cache.c  | 595 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 596 insertions(+)
>  create mode 100644 contrib/plugins/cache.c
> 



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 0/3] Cache modelling TCG plugin
  2021-05-30  6:36 ` [RFC PATCH 0/3] Cache modelling TCG plugin Philippe Mathieu-Daudé
@ 2021-05-30  6:41   ` Mahmoud Mandour
  0 siblings, 0 replies; 8+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:41 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé; +Cc: Emilio G. Cota, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 952 bytes --]

Thank you, I'll also CC Emilio in the v2 of this series.

On Sun, May 30, 2021 at 8:36 AM Philippe Mathieu-Daudé <f4bug@amsat.org>
wrote:

> Cc'ing Emilio too.
>
> On 5/29/21 5:22 PM, Mahmoud Mandour wrote:
> > In this RFC patch series, I propose an initial cache modelling TCG
> > plugin. As of now, it models separate L1 data cache and L1 instruction
> > cache. It supports three eviction policies: LRU, random, and FIFO. Once
> > a policy is chosen, it's used for both instruction and data caches.
> >
> > Mahmoud Mandour (3):
> >   plugins: Added a new cache modelling plugin
> >   plugins: cache: Enabled parameterization and added trace printing
> >   plugins: cache: Added FIFO and LRU eviction policies.
> >
> >  contrib/plugins/Makefile |   1 +
> >  contrib/plugins/cache.c  | 595 +++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 596 insertions(+)
> >  create mode 100644 contrib/plugins/cache.c
> >
>
>

[-- Attachment #2: Type: text/html, Size: 1333 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/3] plugins: Added a new cache modelling plugin
  2021-05-29 15:22 ` [RFC PATCH 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
@ 2021-06-01  9:53   ` Alex Bennée
  2021-06-02  3:51     ` Mahmoud Mandour
  0 siblings, 1 reply; 8+ messages in thread
From: Alex Bennée @ 2021-06-01  9:53 UTC (permalink / raw)
  To: Mahmoud Mandour; +Cc: qemu-devel


Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> Added a cache modelling plugin that uses a static configuration used in
> many of the commercial microprocessors and uses random eviction policy.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/Makefile |   1 +
>  contrib/plugins/cache.c  | 398 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 399 insertions(+)
>  create mode 100644 contrib/plugins/cache.c
>
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index b9d7935e5e..2237b47f8b 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -18,6 +18,7 @@ NAMES += hotpages
>  NAMES += howvec
>  NAMES += lockstep
>  NAMES += hwprofile
> +NAMES += cache
>  
>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
>  
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> new file mode 100644
> index 0000000000..f8c15ebed2
> --- /dev/null
> +++ b/contrib/plugins/cache.c
> @@ -0,0 +1,398 @@
> +/*
> + * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
> + *
> + * License: GNU GPL, version 2 or later.
> + *   See the COPYING file in the top-level directory.
> + */
> +
> +#include <inttypes.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <glib.h>
> +
> +#include <qemu-plugin.h>
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +static GRand *rng;
> +static GHashTable *dmiss_ht;
> +static GHashTable *imiss_ht;
> +
> +static GMutex dmtx, imtx;
> +
> +static int limit;
> +static bool sys;
> +
> +static uint64_t dmem_accesses;
> +static uint64_t dmisses;
> +
> +static uint64_t imem_accesses;
> +static uint64_t imisses;
> +
> +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
> +
> +enum AccessResult {
> +    HIT = 0,
> +    MISS = 1
> +};
> +
> +struct InsnData {
> +    char *disas_str;
> +    uint64_t addr;
> +    uint64_t misses;
> +};

A little commentary to the relationship between CacheSet and CacheBlock
would be useful here for those trying to follow the code. Maybe a little
ascii art if your up to it?

> +
> +struct CacheBlock {
> +    uint64_t tag;
> +    bool valid;
> +};
> +
> +struct CacheSet {
> +    struct CacheBlock *blocks;
> +};
> +
> +struct Cache {
> +    struct CacheSet *sets;
> +    int num_sets;
> +
> +    int cachesize;
> +    int blksize;
> +    int assoc;
> +
> +    uint64_t blk_mask;
> +    uint64_t set_mask;
> +    uint64_t tag_mask;
> +};
> +
> +struct Cache *dcache, *icache;
> +
> +static int pow_of_two(int num)
> +{
> +    g_assert((num & (num - 1)) == 0);
> +    int ret = 0;
> +    while (num /= 2) {
> +        ret++;
> +    }
> +    return ret;
> +}

You could probably eliminate this by:

  a) pre-calculating masks and shifts at start-up
  b) expressing cache-size as a power of 2 (are caches ever not?)

Currently it is by far the biggest hit on the CPU:

  46.42%  qemu-aarch64  libcache.so              [.] pow_of_two
  16.71%  qemu-aarch64  libcache.so              [.] lru_update_on_hit
  14.12%  qemu-aarch64  libcache.so              [.] in_cache
   6.73%  qemu-aarch64  libcache.so              [.] extract_tag
   4.52%  qemu-aarch64  libcache.so              [.] extract_set
   4.48%  qemu-aarch64  libcache.so              [.] access_cache
   2.34%  qemu-aarch64  libcache.so              [.] vcpu_insn_exec
   1.63%  qemu-aarch64  libcache.so              [.] vcpu_mem_access
   0.72%  qemu-aarch64  libglib-2.0.so.0.5800.3  [.] g_mutex_lock

> +static inline uint64_t extract_tag(struct Cache *cache, uint64_t addr)
> +{
> +    return (addr & cache->tag_mask) >>
> +        (pow_of_two(cache->num_sets) + pow_of_two(cache->blksize));
> +}
> +
> +static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
> +{
> +    return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
> +}

It would make sense to enforce pow_of_two for num_sets and blksize on
initialisation to avoid doing it for every tag. Maybe rename them to
set_shift and blksize_shift to better indicate their usage.

> +
> +static struct Cache *cache_init(int blksize, int assoc, int cachesize)
> +{
> +    struct Cache *cache;
> +    int i;
> +
> +    cache = g_new(struct Cache, 1);
> +    cache->blksize = blksize;
> +    cache->assoc = assoc;
> +    cache->cachesize = cachesize;
> +    cache->num_sets = cachesize / (blksize * assoc);
> +    cache->sets = g_new(struct CacheSet, cache->num_sets);
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].blocks = g_new0(struct CacheBlock, assoc);
> +    }
> +
> +    cache->blk_mask = blksize - 1;
> +    cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
> +    cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
> +
> +    return cache;
> +}
> +
> +static int get_invalid_block(struct Cache *cache, uint64_t set)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (!cache->sets[set].blocks[i].valid) {
> +            /* conflict miss */
> +            return i;
> +        }
> +    }
> +
> +    /* compulsary miss */
> +    return -1;
> +}
> +
> +static int get_replaced_block(struct Cache *cache)
> +{
> +    return g_rand_int_range(rng, 0, cache->assoc);
> +}
> +
> +static bool in_cache(struct Cache *cache, uint64_t addr)
> +{
> +    int i;
> +    uint64_t tag, set;
> +
> +    tag = extract_tag(cache, addr);
> +    set = extract_set(cache, addr);
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set].blocks[i].tag == tag &&
> +                cache->sets[set].blocks[i].valid) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> +static enum AccessResult access_cache(struct Cache *cache, uint64_t
> addr)

Does the enum really make things easier compared to a straight bool? You
could just explain things with a comment:

  /**
   * access_cache() - simulate a cache access
   * @cache: reference to the cache being used
   * @addr: address of cached entity
   *
   * Returns true if the cache hit, false otherwise and the cache is
   * updated for next time.
   */
  static bool access_cache(struct Cache *cache, uint64_t addr)
  {

And then have:

  if (!access_cache(icache, addr)) {
      struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
      insn->misses++;
      imisses++;
  }


> +{
> +    uint64_t tag, set;
> +    int replaced_blk;
> +
> +    if (in_cache(cache, addr)) {
> +        return HIT;
> +    }
> +
> +    tag = extract_tag(cache, addr);
> +    set = extract_set(cache, addr);
> +
> +    replaced_blk = get_invalid_block(cache, set);
> +
> +    if (replaced_blk == -1) {
> +        replaced_blk = get_replaced_block(cache);
> +    }
> +
> +    cache->sets[set].blocks[replaced_blk].tag = tag;
> +    cache->sets[set].blocks[replaced_blk].valid = true;
> +
> +    return MISS;
> +}
> +
> +struct InsnData *get_or_create(GHashTable *ht, struct InsnData *insn_data,
> +                               uint64_t addr)
> +{
> +    struct InsnData *insn = g_hash_table_lookup(ht, GUINT_TO_POINTER(addr));
> +    if (!insn) {
> +        g_hash_table_insert(ht, GUINT_TO_POINTER(addr), (gpointer) insn_data);
> +        insn = insn_data;
> +    }
> +
> +    return insn;
> +}
> +
> +static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
> +                            uint64_t vaddr, void *userdata)
> +{
> +    uint64_t insn_addr;
> +    uint64_t effective_addr;
> +    struct qemu_plugin_hwaddr *hwaddr;
> +
> +    g_mutex_lock(&dmtx);
> +    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
> +    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
> +        g_free(userdata);
> +        return;
> +    }
> +
> +    insn_addr = ((struct InsnData *) userdata)->addr;
> +    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
> +
> +    if (access_cache(dcache, effective_addr) == MISS) {
> +        struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
> +        insn->misses++;
> +        dmisses++;
> +    }
> +    dmem_accesses++;
> +    g_mutex_unlock(&dmtx);
> +}
> +
> +static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
> +{
> +    uint64_t addr;
> +
> +    g_mutex_lock(&imtx);
> +    addr = ((struct InsnData *) userdata)->addr;
> +
> +    if (access_cache(icache, addr) == MISS) {
> +        struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
> +        insn->misses++;
> +        imisses++;
> +    }
> +    imem_accesses++;
> +    g_mutex_unlock(&imtx);
> +}
> +
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    size_t n_insns;
> +    size_t i;
> +
> +    n_insns = qemu_plugin_tb_n_insns(tb);
> +    for (i = 0; i < n_insns; i++) {
> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
> +        uint64_t effective_addr;
> +
> +        if (sys) {
> +            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
> +        } else {
> +            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
> +        }
> +
> +        struct InsnData *ddata = g_new(struct InsnData, 1);
> +        struct InsnData *idata = g_new(struct InsnData, 1);

OK I think I see what you where saying on the sync up earlier. You need
to take into account any given instruction may get translated multiple
times so I think for any given instruction you are tracking you want to
get or create an entry here.

> +
> +        ddata->disas_str = qemu_plugin_insn_disas(insn);
> +        ddata->misses = 0;
> +        ddata->addr = effective_addr;
> +
> +        idata->disas_str = g_strdup(ddata->disas_str);
> +        idata->misses = 0;
> +        idata->addr = effective_addr;

And you might as well combine the InsnData to track both data and icache
misses in one structure to avoid the duplication of records and strings.

> +
> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
> +                                         QEMU_PLUGIN_CB_NO_REGS,
> +                                         rw, ddata);
> +
> +        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
> +                                               QEMU_PLUGIN_CB_NO_REGS, idata);
> +    }
> +}
> +
> +static void print_entry(gpointer data)
> +{
> +    struct InsnData *insn = (struct InsnData *) data;
> +    g_autoptr(GString) xx = g_string_new("");
> +    g_string_append_printf(xx, "0x%" PRIx64 ": %s - misses: %lu\n",
> +            insn->addr, insn->disas_str, insn->misses);

As you are likely going to want to post-process this data I would
suggest a slightly more machine readable format:

  address, misses, instruction
  0x419298, 2, mov x0, x21
  0x41aa40, 2, add x0, x0, #0x17
  0x419640, 2,  add x5, x4, #0x218
  0x41aa10, 2,  adrp x1, #0x48b000
  0x4002d0, 2,  adrp x16, #0x48b000

> +    qemu_plugin_outs(xx->str);
> +}
> +
> +static void free_insn(gpointer data)
> +{
> +    struct InsnData *insn = (struct InsnData *) data;
> +    g_free(insn->disas_str);
> +    g_free(insn);
> +}
> +
> +static void free_cache(struct Cache *cache)
> +{
> +    for (int i = 0; i < cache->num_sets; i++) {
> +        g_free(cache->sets[i].blocks);
> +    }
> +
> +    g_free(cache->sets);
> +}
> +
> +static int cmp(gconstpointer a, gconstpointer b)

This will likely need renaming if you ever want to sort by different
things.

> +{
> +    struct InsnData *insn_a = (struct InsnData *) a;
> +    struct InsnData *insn_b = (struct InsnData *) b;
> +
> +    return insn_a->misses < insn_b->misses ? 1 : -1;
> +}
> +
> +static void print_stats()
> +{
> +    g_autoptr(GString) rep = g_string_new("");
> +    g_string_append_printf(rep,
> +            "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> +            dmem_accesses,
> +            dmisses,
> +            ((double)dmisses / dmem_accesses) * 100.0);
> +
> +    g_string_append_printf(rep,
> +            "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> +            imem_accesses,
> +            imisses,
> +            ((double)imisses / imem_accesses) * 100.0);
> +
> +    qemu_plugin_outs(rep->str);
> +}
> +
> +static void plugin_exit()
> +{
> +    GList *curr;
> +    int i;
> +
> +    g_mutex_lock(&imtx);
> +    g_mutex_lock(&dmtx);
> +    GList *dmiss_insns = g_hash_table_get_values(dmiss_ht);
> +    GList *imiss_insns = g_hash_table_get_values(imiss_ht);
> +    dmiss_insns = g_list_sort(dmiss_insns, cmp);
> +    imiss_insns = g_list_sort(imiss_insns, cmp);
> +
> +    print_stats();
> +
> +    qemu_plugin_outs("Most data-missing instructions\n");
> +    for (curr = dmiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
> +        print_entry(curr->data);
> +    }
> +
> +    qemu_plugin_outs("\nMost fetch-missing instructions\n");
> +    for (curr = imiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
> +        print_entry(curr->data);
> +    }
> +
> +    free_cache(dcache);
> +    free_cache(icache);
> +
> +    g_list_free(dmiss_insns);
> +    g_list_free(imiss_insns);
> +
> +    g_hash_table_destroy(dmiss_ht);
> +    g_hash_table_destroy(imiss_ht);
> +
> +    g_mutex_unlock(&dmtx);
> +    g_mutex_unlock(&imtx);
> +}
> +
> +QEMU_PLUGIN_EXPORT
> +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
> +                        int argc, char **argv)
> +{
> +    int i;
> +    int iassoc, iblksize, icachesize;
> +    int dassoc, dblksize, dcachesize;
> +
> +    limit = 32;
> +    sys = info->system_emulation;
> +
> +    dassoc = 8;
> +    dblksize = 64;
> +    dcachesize = dblksize * dassoc * 32;
> +
> +    iassoc = 8;
> +    iblksize = 64;
> +    icachesize = iblksize * iassoc * 32;
> +
> +    rng = g_rand_new();
> +
> +    for (i = 0; i < argc; i++) {
> +        char *opt = argv[i];
> +        if (g_str_has_prefix(opt, "limit=")) {
> +            limit = g_ascii_strtoull(opt + 6, NULL, 10);
> +        } else {
> +            fprintf(stderr, "option parsing failed: %s\n", opt);
> +            return -1;
> +        }
> +    }
> +
> +    dcache = cache_init(dblksize, dassoc, dcachesize);
> +    icache = cache_init(iblksize, iassoc, icachesize);
> +
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +
> +    dmiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
> +    imiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
> +
> +    return 0;
> +}


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 1/3] plugins: Added a new cache modelling plugin
  2021-06-01  9:53   ` Alex Bennée
@ 2021-06-02  3:51     ` Mahmoud Mandour
  0 siblings, 0 replies; 8+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  3:51 UTC (permalink / raw)
  To: Alex Bennée; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 17339 bytes --]

On Tue, Jun 1, 2021 at 1:12 PM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > Added a cache modelling plugin that uses a static configuration used in
> > many of the commercial microprocessors and uses random eviction policy.
> >
> > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> > ---
> >  contrib/plugins/Makefile |   1 +
> >  contrib/plugins/cache.c  | 398 +++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 399 insertions(+)
> >  create mode 100644 contrib/plugins/cache.c
> >
> > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> > index b9d7935e5e..2237b47f8b 100644
> > --- a/contrib/plugins/Makefile
> > +++ b/contrib/plugins/Makefile
> > @@ -18,6 +18,7 @@ NAMES += hotpages
> >  NAMES += howvec
> >  NAMES += lockstep
> >  NAMES += hwprofile
> > +NAMES += cache
> >
> >  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
> >
> > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> > new file mode 100644
> > index 0000000000..f8c15ebed2
> > --- /dev/null
> > +++ b/contrib/plugins/cache.c
> > @@ -0,0 +1,398 @@
> > +/*
> > + * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
> > + *
> > + * License: GNU GPL, version 2 or later.
> > + *   See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include <inttypes.h>
> > +#include <assert.h>
> > +#include <stdlib.h>
> > +#include <inttypes.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <stdio.h>
> > +#include <glib.h>
> > +
> > +#include <qemu-plugin.h>
> > +
> > +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> > +
> > +static GRand *rng;
> > +static GHashTable *dmiss_ht;
> > +static GHashTable *imiss_ht;
> > +
> > +static GMutex dmtx, imtx;
> > +
> > +static int limit;
> > +static bool sys;
> > +
> > +static uint64_t dmem_accesses;
> > +static uint64_t dmisses;
> > +
> > +static uint64_t imem_accesses;
> > +static uint64_t imisses;
> > +
> > +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
> > +
> > +enum AccessResult {
> > +    HIT = 0,
> > +    MISS = 1
> > +};
> > +
> > +struct InsnData {
> > +    char *disas_str;
> > +    uint64_t addr;
> > +    uint64_t misses;
> > +};
>
> A little commentary to the relationship between CacheSet and CacheBlock
> would be useful here for those trying to follow the code. Maybe a little
> ascii art if your up to it?
>
> > +
> > +struct CacheBlock {
> > +    uint64_t tag;
> > +    bool valid;
> > +};
> > +
> > +struct CacheSet {
> > +    struct CacheBlock *blocks;
> > +};
> > +
> > +struct Cache {
> > +    struct CacheSet *sets;
> > +    int num_sets;
> > +
> > +    int cachesize;
> > +    int blksize;
> > +    int assoc;
> > +
> > +    uint64_t blk_mask;
> > +    uint64_t set_mask;
> > +    uint64_t tag_mask;
> > +};
> > +
> > +struct Cache *dcache, *icache;
> > +
> > +static int pow_of_two(int num)
> > +{
> > +    g_assert((num & (num - 1)) == 0);
> > +    int ret = 0;
> > +    while (num /= 2) {
> > +        ret++;
> > +    }
> > +    return ret;
> > +}
>
> You could probably eliminate this by:
>
>   a) pre-calculating masks and shifts at start-up
>   b) expressing cache-size as a power of 2 (are caches ever not?)
>
> Currently it is by far the biggest hit on the CPU:
>
>   46.42%  qemu-aarch64  libcache.so              [.] pow_of_two
>   16.71%  qemu-aarch64  libcache.so              [.] lru_update_on_hit
>   14.12%  qemu-aarch64  libcache.so              [.] in_cache
>    6.73%  qemu-aarch64  libcache.so              [.] extract_tag
>    4.52%  qemu-aarch64  libcache.so              [.] extract_set
>    4.48%  qemu-aarch64  libcache.so              [.] access_cache
>    2.34%  qemu-aarch64  libcache.so              [.] vcpu_insn_exec
>    1.63%  qemu-aarch64  libcache.so              [.] vcpu_mem_access
>    0.72%  qemu-aarch64  libglib-2.0.so.0.5800.3  [.] g_mutex_lock
>
> > +static inline uint64_t extract_tag(struct Cache *cache, uint64_t addr)
> > +{
> > +    return (addr & cache->tag_mask) >>
> > +        (pow_of_two(cache->num_sets) + pow_of_two(cache->blksize));
> > +}
> > +
> > +static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
> > +{
> > +    return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
> > +}
>
> It would make sense to enforce pow_of_two for num_sets and blksize on
> initialisation to avoid doing it for every tag. Maybe rename them to
> set_shift and blksize_shift to better indicate their usage.
>
> That's of course problematic, I guess I need to precompute it once
for each cache and not for each tag extraction. However, I  think that I
got this
wrong since I do not even need to shift down the tag. I can extract the tag
as
the high-order bits by only masking for every access and also store that in
the
cache, which works fine. pow_of_two would still propose a  problem since I
need to compute it for extract_set, so I can store that once in the cache,
yes.

Also, can you please tell me what do you use to access plugin profiling
data? To my
knowledge, gprof is not able to work out profiling data for loaded shared
libraries.
I tried sprof but I'm getting an error caused by a seemingly-known bug
since 2009
and not yet solved :D

I guess callgrind works but I think its output is not as clean as the
output you posted.


> > +
> > +static struct Cache *cache_init(int blksize, int assoc, int cachesize)
> > +{
> > +    struct Cache *cache;
> > +    int i;
> > +
> > +    cache = g_new(struct Cache, 1);
> > +    cache->blksize = blksize;
> > +    cache->assoc = assoc;
> > +    cache->cachesize = cachesize;
> > +    cache->num_sets = cachesize / (blksize * assoc);
> > +    cache->sets = g_new(struct CacheSet, cache->num_sets);
> > +
> > +    for (i = 0; i < cache->num_sets; i++) {
> > +        cache->sets[i].blocks = g_new0(struct CacheBlock, assoc);
> > +    }
> > +
> > +    cache->blk_mask = blksize - 1;
> > +    cache->set_mask = ((cache->num_sets - 1) <<
> (pow_of_two(cache->blksize)));
> > +    cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
> > +
> > +    return cache;
> > +}
> > +
> > +static int get_invalid_block(struct Cache *cache, uint64_t set)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < cache->assoc; i++) {
> > +        if (!cache->sets[set].blocks[i].valid) {
> > +            /* conflict miss */
> > +            return i;
> > +        }
> > +    }
> > +
> > +    /* compulsary miss */
> > +    return -1;
> > +}
> > +
> > +static int get_replaced_block(struct Cache *cache)
> > +{
> > +    return g_rand_int_range(rng, 0, cache->assoc);
> > +}
> > +
> > +static bool in_cache(struct Cache *cache, uint64_t addr)
> > +{
> > +    int i;
> > +    uint64_t tag, set;
> > +
> > +    tag = extract_tag(cache, addr);
> > +    set = extract_set(cache, addr);
> > +
> > +    for (i = 0; i < cache->assoc; i++) {
> > +        if (cache->sets[set].blocks[i].tag == tag &&
> > +                cache->sets[set].blocks[i].valid) {
> > +            return true;
> > +        }
> > +    }
> > +
> > +    return false;
> > +}
> > +
> > +static enum AccessResult access_cache(struct Cache *cache, uint64_t
> > addr)
>
> Does the enum really make things easier compared to a straight bool? You
> could just explain things with a comment:
>
>   /**
>    * access_cache() - simulate a cache access
>    * @cache: reference to the cache being used
>    * @addr: address of cached entity
>    *
>    * Returns true if the cache hit, false otherwise and the cache is
>    * updated for next time.
>    */
>   static bool access_cache(struct Cache *cache, uint64_t addr)
>   {
>
> And then have:
>
>   if (!access_cache(icache, addr)) {
>       struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
>       insn->misses++;
>       imisses++;
>   }
>
I applied that, probably better. Thanks.


> > +{
> > +    uint64_t tag, set;
> > +    int replaced_blk;
> > +
> > +    if (in_cache(cache, addr)) {
> > +        return HIT;
> > +    }
> > +
> > +    tag = extract_tag(cache, addr);
> > +    set = extract_set(cache, addr);
> > +
> > +    replaced_blk = get_invalid_block(cache, set);
> > +
> > +    if (replaced_blk == -1) {
> > +        replaced_blk = get_replaced_block(cache);
> > +    }
> > +
> > +    cache->sets[set].blocks[replaced_blk].tag = tag;
> > +    cache->sets[set].blocks[replaced_blk].valid = true;
> > +
> > +    return MISS;
> > +}
> > +
> > +struct InsnData *get_or_create(GHashTable *ht, struct InsnData
> *insn_data,
> > +                               uint64_t addr)
> > +{
> > +    struct InsnData *insn = g_hash_table_lookup(ht,
> GUINT_TO_POINTER(addr));
> > +    if (!insn) {
> > +        g_hash_table_insert(ht, GUINT_TO_POINTER(addr), (gpointer)
> insn_data);
> > +        insn = insn_data;
> > +    }
> > +
> > +    return insn;
> > +}
> > +
> > +static void vcpu_mem_access(unsigned int cpu_index,
> qemu_plugin_meminfo_t info,
> > +                            uint64_t vaddr, void *userdata)
> > +{
> > +    uint64_t insn_addr;
> > +    uint64_t effective_addr;
> > +    struct qemu_plugin_hwaddr *hwaddr;
> > +
> > +    g_mutex_lock(&dmtx);
> > +    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
> > +    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
> > +        g_free(userdata);
> > +        return;
> > +    }
> > +
> > +    insn_addr = ((struct InsnData *) userdata)->addr;
> > +    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
> > +
> > +    if (access_cache(dcache, effective_addr) == MISS) {
> > +        struct InsnData *insn = get_or_create(dmiss_ht, userdata,
> insn_addr);
> > +        insn->misses++;
> > +        dmisses++;
> > +    }
> > +    dmem_accesses++;
> > +    g_mutex_unlock(&dmtx);
> > +}
> > +
> > +static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
> > +{
> > +    uint64_t addr;
> > +
> > +    g_mutex_lock(&imtx);
> > +    addr = ((struct InsnData *) userdata)->addr;
> > +
> > +    if (access_cache(icache, addr) == MISS) {
> > +        struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
> > +        insn->misses++;
> > +        imisses++;
> > +    }
> > +    imem_accesses++;
> > +    g_mutex_unlock(&imtx);
> > +}
> > +
> > +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb
> *tb)
> > +{
> > +    size_t n_insns;
> > +    size_t i;
> > +
> > +    n_insns = qemu_plugin_tb_n_insns(tb);
> > +    for (i = 0; i < n_insns; i++) {
> > +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
> > +        uint64_t effective_addr;
> > +
> > +        if (sys) {
> > +            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
> > +        } else {
> > +            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
> > +        }
> > +
> > +        struct InsnData *ddata = g_new(struct InsnData, 1);
> > +        struct InsnData *idata = g_new(struct InsnData, 1);
>
> OK I think I see what you where saying on the sync up earlier. You need
> to take into account any given instruction may get translated multiple
> times so I think for any given instruction you are tracking you want to
> get or create an entry here.
>
> Thanks for explaining that instruction can get translated multiple
times, I did not know that. Applied, thanks.

> > +
> > +        ddata->disas_str = qemu_plugin_insn_disas(insn);
> > +        ddata->misses = 0;
> > +        ddata->addr = effective_addr;
> > +
> > +        idata->disas_str = g_strdup(ddata->disas_str);
> > +        idata->misses = 0;
> > +        idata->addr = effective_addr;
>
> And you might as well combine the InsnData to track both data and icache
> misses in one structure to avoid the duplication of records and strings.
>
> Applied, thanks.


> > +
> > +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
> > +                                         QEMU_PLUGIN_CB_NO_REGS,
> > +                                         rw, ddata);
> > +
> > +        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
> > +                                               QEMU_PLUGIN_CB_NO_REGS,
> idata);
> > +    }
> > +}
> > +
> > +static void print_entry(gpointer data)
> > +{
> > +    struct InsnData *insn = (struct InsnData *) data;
> > +    g_autoptr(GString) xx = g_string_new("");
> > +    g_string_append_printf(xx, "0x%" PRIx64 ": %s - misses: %lu\n",
> > +            insn->addr, insn->disas_str, insn->misses);
>
> As you are likely going to want to post-process this data I would
> suggest a slightly more machine readable format:
>
>   address, misses, instruction
>   0x419298, 2, mov x0, x21
>   0x41aa40, 2, add x0, x0, #0x17
>   0x419640, 2,  add x5, x4, #0x218
>   0x41aa10, 2,  adrp x1, #0x48b000
>   0x4002d0, 2,  adrp x16, #0x48b000
>
> Applied, thanks.


> > +    qemu_plugin_outs(xx->str);
> > +}
> > +
> > +static void free_insn(gpointer data)
> > +{
> > +    struct InsnData *insn = (struct InsnData *) data;
> > +    g_free(insn->disas_str);
> > +    g_free(insn);
> > +}
> > +
> > +static void free_cache(struct Cache *cache)
> > +{
> > +    for (int i = 0; i < cache->num_sets; i++) {
> > +        g_free(cache->sets[i].blocks);
> > +    }
> > +
> > +    g_free(cache->sets);
> > +}
> > +
> > +static int cmp(gconstpointer a, gconstpointer b)
>
> This will likely need renaming if you ever want to sort by different
> things.
>
Yes, I will have two comparator functions, one for data misses and one
for instruction misses.

>
> > +{
> > +    struct InsnData *insn_a = (struct InsnData *) a;
> > +    struct InsnData *insn_b = (struct InsnData *) b;
> > +
> > +    return insn_a->misses < insn_b->misses ? 1 : -1;
> > +}
> > +
> > +static void print_stats()
> > +{
> > +    g_autoptr(GString) rep = g_string_new("");
> > +    g_string_append_printf(rep,
> > +            "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> > +            dmem_accesses,
> > +            dmisses,
> > +            ((double)dmisses / dmem_accesses) * 100.0);
> > +
> > +    g_string_append_printf(rep,
> > +            "Instruction accesses: %lu, Misses: %lu\nMiss rate:
> %lf%%\n\n",
> > +            imem_accesses,
> > +            imisses,
> > +            ((double)imisses / imem_accesses) * 100.0);
> > +
> > +    qemu_plugin_outs(rep->str);
> > +}
> > +
> > +static void plugin_exit()
> > +{
> > +    GList *curr;
> > +    int i;
> > +
> > +    g_mutex_lock(&imtx);
> > +    g_mutex_lock(&dmtx);
> > +    GList *dmiss_insns = g_hash_table_get_values(dmiss_ht);
> > +    GList *imiss_insns = g_hash_table_get_values(imiss_ht);
> > +    dmiss_insns = g_list_sort(dmiss_insns, cmp);
> > +    imiss_insns = g_list_sort(imiss_insns, cmp);
> > +
> > +    print_stats();
> > +
> > +    qemu_plugin_outs("Most data-missing instructions\n");
> > +    for (curr = dmiss_insns, i = 0; curr && i < limit; i++, curr =
> curr->next) {
> > +        print_entry(curr->data);
> > +    }
> > +
> > +    qemu_plugin_outs("\nMost fetch-missing instructions\n");
> > +    for (curr = imiss_insns, i = 0; curr && i < limit; i++, curr =
> curr->next) {
> > +        print_entry(curr->data);
> > +    }
> > +
> > +    free_cache(dcache);
> > +    free_cache(icache);
> > +
> > +    g_list_free(dmiss_insns);
> > +    g_list_free(imiss_insns);
> > +
> > +    g_hash_table_destroy(dmiss_ht);
> > +    g_hash_table_destroy(imiss_ht);
> > +
> > +    g_mutex_unlock(&dmtx);
> > +    g_mutex_unlock(&imtx);
> > +}
> > +
> > +QEMU_PLUGIN_EXPORT
> > +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
> > +                        int argc, char **argv)
> > +{
> > +    int i;
> > +    int iassoc, iblksize, icachesize;
> > +    int dassoc, dblksize, dcachesize;
> > +
> > +    limit = 32;
> > +    sys = info->system_emulation;
> > +
> > +    dassoc = 8;
> > +    dblksize = 64;
> > +    dcachesize = dblksize * dassoc * 32;
> > +
> > +    iassoc = 8;
> > +    iblksize = 64;
> > +    icachesize = iblksize * iassoc * 32;
> > +
> > +    rng = g_rand_new();
> > +
> > +    for (i = 0; i < argc; i++) {
> > +        char *opt = argv[i];
> > +        if (g_str_has_prefix(opt, "limit=")) {
> > +            limit = g_ascii_strtoull(opt + 6, NULL, 10);
> > +        } else {
> > +            fprintf(stderr, "option parsing failed: %s\n", opt);
> > +            return -1;
> > +        }
> > +    }
> > +
> > +    dcache = cache_init(dblksize, dassoc, dcachesize);
> > +    icache = cache_init(iblksize, iassoc, icachesize);
> > +
> > +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> > +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> > +
> > +    dmiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL,
> free_insn);
> > +    imiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL,
> free_insn);
> > +
> > +    return 0;
> > +}
>
>
> --
> Alex Bennée
>

[-- Attachment #2: Type: text/html, Size: 22420 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-06-02  3:54 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-29 15:22 [RFC PATCH 0/3] Cache modelling TCG plugin Mahmoud Mandour
2021-05-29 15:22 ` [RFC PATCH 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
2021-06-01  9:53   ` Alex Bennée
2021-06-02  3:51     ` Mahmoud Mandour
2021-05-29 15:22 ` [RFC PATCH 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
2021-05-29 15:22 ` [RFC PATCH 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
2021-05-30  6:36 ` [RFC PATCH 0/3] Cache modelling TCG plugin Philippe Mathieu-Daudé
2021-05-30  6:41   ` Mahmoud Mandour

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.