All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v2 0/3] Cache modelling TCG plugin
@ 2021-05-30  6:37 Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour

In this RFC patch series, I propose an initial cache modelling TCG
plugin. As of now, it models separate L1 data cache and L1 instruction
cache. It supports three eviction policies: LRU, random, and FIFO. Once
a policy is chosen, it's used for both instruction and data caches.

v1 -> v2: Unlocked dmtx on early return in vcpu_mem_access & removed a
          (probably?) bad InsnData free.
          This is probably still problematic since it does not free the
          ``idata`` allocated for the vcpu_mem_access callback even
          once, but if it's placed, it would double-free it.
          How do I mitigate this? I need to free the InsnData passed to
          vcpu_mem_access only once if we find out that it's an IO
          access since we do not need it anymore and it will early
          return every time.

Mahmoud Mandour (3):
  plugins: Added a new cache modelling plugin
  plugins: cache: Enabled parameterization and added trace printing
  plugins: cache: Added FIFO and LRU eviction policies.

 contrib/plugins/Makefile |   1 +
 contrib/plugins/cache.c  | 595 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 596 insertions(+)
 create mode 100644 contrib/plugins/cache.c

-- 
2.25.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin
  2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
@ 2021-05-30  6:37 ` Mahmoud Mandour
  2021-06-02  3:14   ` Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Added a cache modelling plugin that uses a static configuration used in
many of the commercial microprocessors and uses random eviction policy.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/Makefile |   1 +
 contrib/plugins/cache.c  | 398 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 399 insertions(+)
 create mode 100644 contrib/plugins/cache.c

diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index b9d7935e5e..2237b47f8b 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -18,6 +18,7 @@ NAMES += hotpages
 NAMES += howvec
 NAMES += lockstep
 NAMES += hwprofile
+NAMES += cache
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
new file mode 100644
index 0000000000..8c9d1dd538
--- /dev/null
+++ b/contrib/plugins/cache.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static GRand *rng;
+static GHashTable *dmiss_ht;
+static GHashTable *imiss_ht;
+
+static GMutex dmtx, imtx;
+
+static int limit;
+static bool sys;
+
+static uint64_t dmem_accesses;
+static uint64_t dmisses;
+
+static uint64_t imem_accesses;
+static uint64_t imisses;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+enum AccessResult {
+    HIT = 0,
+    MISS = 1
+};
+
+struct InsnData {
+    char *disas_str;
+    uint64_t addr;
+    uint64_t misses;
+};
+
+struct CacheBlock {
+    uint64_t tag;
+    bool valid;
+};
+
+struct CacheSet {
+    struct CacheBlock *blocks;
+};
+
+struct Cache {
+    struct CacheSet *sets;
+    int num_sets;
+
+    int cachesize;
+    int blksize;
+    int assoc;
+
+    uint64_t blk_mask;
+    uint64_t set_mask;
+    uint64_t tag_mask;
+};
+
+struct Cache *dcache, *icache;
+
+static int pow_of_two(int num)
+{
+    g_assert((num & (num - 1)) == 0);
+    int ret = 0;
+    while (num /= 2) {
+        ret++;
+    }
+    return ret;
+}
+
+static inline uint64_t extract_tag(struct Cache *cache, uint64_t addr)
+{
+    return (addr & cache->tag_mask) >>
+        (pow_of_two(cache->num_sets) + pow_of_two(cache->blksize));
+}
+
+static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
+{
+    return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
+}
+
+static struct Cache *cache_init(int blksize, int assoc, int cachesize)
+{
+    struct Cache *cache;
+    int i;
+
+    cache = g_new(struct Cache, 1);
+    cache->blksize = blksize;
+    cache->assoc = assoc;
+    cache->cachesize = cachesize;
+    cache->num_sets = cachesize / (blksize * assoc);
+    cache->sets = g_new(struct CacheSet, cache->num_sets);
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].blocks = g_new0(struct CacheBlock, assoc);
+    }
+
+    cache->blk_mask = blksize - 1;
+    cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
+    cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
+
+    return cache;
+}
+
+static int get_invalid_block(struct Cache *cache, uint64_t set)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (!cache->sets[set].blocks[i].valid) {
+            /* conflict miss */
+            return i;
+        }
+    }
+
+    /* compulsary miss */
+    return -1;
+}
+
+static int get_replaced_block(struct Cache *cache)
+{
+    return g_rand_int_range(rng, 0, cache->assoc);
+}
+
+static bool in_cache(struct Cache *cache, uint64_t addr)
+{
+    int i;
+    uint64_t tag, set;
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set].blocks[i].tag == tag &&
+                cache->sets[set].blocks[i].valid) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
+{
+    uint64_t tag, set;
+    int replaced_blk;
+
+    if (in_cache(cache, addr)) {
+        return HIT;
+    }
+
+    tag = extract_tag(cache, addr);
+    set = extract_set(cache, addr);
+
+    replaced_blk = get_invalid_block(cache, set);
+
+    if (replaced_blk == -1) {
+        replaced_blk = get_replaced_block(cache);
+    }
+
+    cache->sets[set].blocks[replaced_blk].tag = tag;
+    cache->sets[set].blocks[replaced_blk].valid = true;
+
+    return MISS;
+}
+
+struct InsnData *get_or_create(GHashTable *ht, struct InsnData *insn_data,
+                               uint64_t addr)
+{
+    struct InsnData *insn = g_hash_table_lookup(ht, GUINT_TO_POINTER(addr));
+    if (!insn) {
+        g_hash_table_insert(ht, GUINT_TO_POINTER(addr), (gpointer) insn_data);
+        insn = insn_data;
+    }
+
+    return insn;
+}
+
+static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
+                            uint64_t vaddr, void *userdata)
+{
+    uint64_t insn_addr;
+    uint64_t effective_addr;
+    struct qemu_plugin_hwaddr *hwaddr;
+
+    g_mutex_lock(&dmtx);
+    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+        g_mutex_unlock(&dmtx);
+        return;
+    }
+
+    insn_addr = ((struct InsnData *) userdata)->addr;
+    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+
+    if (access_cache(dcache, effective_addr) == MISS) {
+        struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
+        insn->misses++;
+        dmisses++;
+    }
+    dmem_accesses++;
+    g_mutex_unlock(&dmtx);
+}
+
+static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
+{
+    uint64_t addr;
+
+    g_mutex_lock(&imtx);
+    addr = ((struct InsnData *) userdata)->addr;
+
+    if (access_cache(icache, addr) == MISS) {
+        struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
+        insn->misses++;
+        imisses++;
+    }
+    imem_accesses++;
+    g_mutex_unlock(&imtx);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns;
+    size_t i;
+
+    n_insns = qemu_plugin_tb_n_insns(tb);
+    for (i = 0; i < n_insns; i++) {
+        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+        uint64_t effective_addr;
+
+        if (sys) {
+            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
+        } else {
+            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
+        }
+
+        struct InsnData *ddata = g_new(struct InsnData, 1);
+        struct InsnData *idata = g_new(struct InsnData, 1);
+
+        ddata->disas_str = qemu_plugin_insn_disas(insn);
+        ddata->misses = 0;
+        ddata->addr = effective_addr;
+
+        idata->disas_str = g_strdup(ddata->disas_str);
+        idata->misses = 0;
+        idata->addr = effective_addr;
+
+        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
+                                         QEMU_PLUGIN_CB_NO_REGS,
+                                         rw, ddata);
+
+        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+                                               QEMU_PLUGIN_CB_NO_REGS, idata);
+    }
+}
+
+static void print_entry(gpointer data)
+{
+    struct InsnData *insn = (struct InsnData *) data;
+    g_autoptr(GString) xx = g_string_new("");
+    g_string_append_printf(xx, "0x%" PRIx64 ": %s - misses: %lu\n",
+            insn->addr, insn->disas_str, insn->misses);
+    qemu_plugin_outs(xx->str);
+}
+
+static void free_insn(gpointer data)
+{
+    struct InsnData *insn = (struct InsnData *) data;
+    g_free(insn->disas_str);
+    g_free(insn);
+}
+
+static void free_cache(struct Cache *cache)
+{
+    for (int i = 0; i < cache->num_sets; i++) {
+        g_free(cache->sets[i].blocks);
+    }
+
+    g_free(cache->sets);
+}
+
+static int cmp(gconstpointer a, gconstpointer b)
+{
+    struct InsnData *insn_a = (struct InsnData *) a;
+    struct InsnData *insn_b = (struct InsnData *) b;
+
+    return insn_a->misses < insn_b->misses ? 1 : -1;
+}
+
+static void print_stats()
+{
+    g_autoptr(GString) rep = g_string_new("");
+    g_string_append_printf(rep,
+            "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
+            dmem_accesses,
+            dmisses,
+            ((double)dmisses / dmem_accesses) * 100.0);
+
+    g_string_append_printf(rep,
+            "Instruction accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
+            imem_accesses,
+            imisses,
+            ((double)imisses / imem_accesses) * 100.0);
+
+    qemu_plugin_outs(rep->str);
+}
+
+static void plugin_exit()
+{
+    GList *curr;
+    int i;
+
+    g_mutex_lock(&imtx);
+    g_mutex_lock(&dmtx);
+    GList *dmiss_insns = g_hash_table_get_values(dmiss_ht);
+    GList *imiss_insns = g_hash_table_get_values(imiss_ht);
+    dmiss_insns = g_list_sort(dmiss_insns, cmp);
+    imiss_insns = g_list_sort(imiss_insns, cmp);
+
+    print_stats();
+
+    qemu_plugin_outs("Most data-missing instructions\n");
+    for (curr = dmiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        print_entry(curr->data);
+    }
+
+    qemu_plugin_outs("\nMost fetch-missing instructions\n");
+    for (curr = imiss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+        print_entry(curr->data);
+    }
+
+    free_cache(dcache);
+    free_cache(icache);
+
+    g_list_free(dmiss_insns);
+    g_list_free(imiss_insns);
+
+    g_hash_table_destroy(dmiss_ht);
+    g_hash_table_destroy(imiss_ht);
+
+    g_mutex_unlock(&dmtx);
+    g_mutex_unlock(&imtx);
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+                        int argc, char **argv)
+{
+    int i;
+    int iassoc, iblksize, icachesize;
+    int dassoc, dblksize, dcachesize;
+
+    limit = 32;
+    sys = info->system_emulation;
+
+    dassoc = 8;
+    dblksize = 64;
+    dcachesize = dblksize * dassoc * 32;
+
+    iassoc = 8;
+    iblksize = 64;
+    icachesize = iblksize * iassoc * 32;
+
+    rng = g_rand_new();
+
+    for (i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        if (g_str_has_prefix(opt, "limit=")) {
+            limit = g_ascii_strtoull(opt + 6, NULL, 10);
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    dcache = cache_init(dblksize, dassoc, dcachesize);
+    icache = cache_init(iblksize, iassoc, icachesize);
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    dmiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
+    imiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, free_insn);
+
+    return 0;
+}
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
@ 2021-05-30  6:37 ` Mahmoud Mandour
  2021-06-01 11:18   ` Alex Bennée
  2021-06-02  3:15   ` Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Made both icache and dcache configurable through plugin arguments
and added memory trace printing in a separate file.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 68 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index 8c9d1dd538..fa0bf1dd40 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -22,7 +22,7 @@ static GRand *rng;
 static GHashTable *dmiss_ht;
 static GHashTable *imiss_ht;
 
-static GMutex dmtx, imtx;
+static GMutex dmtx, imtx, fmtx;
 
 static int limit;
 static bool sys;
@@ -33,6 +33,8 @@ static uint64_t dmisses;
 static uint64_t imem_accesses;
 static uint64_t imisses;
 
+FILE *tracefile;
+
 static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
 
 enum AccessResult {
@@ -205,6 +207,16 @@ static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
     insn_addr = ((struct InsnData *) userdata)->addr;
     effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
 
+    if (tracefile) {
+        g_mutex_lock(&fmtx);
+        g_autoptr(GString) rep = g_string_new("");
+        bool is_store = qemu_plugin_mem_is_store(info);
+        g_string_append_printf(rep, "%c: 0x%" PRIx64,
+                is_store ? 'S' : 'L', effective_addr);
+        fprintf(tracefile, "%s\n", rep->str);
+        g_mutex_unlock(&fmtx);
+    }
+
     if (access_cache(dcache, effective_addr) == MISS) {
         struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
         insn->misses++;
@@ -221,11 +233,20 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
     g_mutex_lock(&imtx);
     addr = ((struct InsnData *) userdata)->addr;
 
+    if (tracefile) {
+        g_mutex_lock(&fmtx);
+        g_autoptr(GString) rep = g_string_new("");
+        g_string_append_printf(rep, "I: 0x%" PRIx64, addr);
+        fprintf(tracefile, "%s\n", rep->str);
+        g_mutex_unlock(&fmtx);
+    }
+
     if (access_cache(icache, addr) == MISS) {
         struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
         insn->misses++;
         imisses++;
     }
+
     imem_accesses++;
     g_mutex_unlock(&imtx);
 }
@@ -352,6 +373,15 @@ static void plugin_exit()
 
     g_mutex_unlock(&dmtx);
     g_mutex_unlock(&imtx);
+
+    if (tracefile) {
+        fclose(tracefile);
+    }
+}
+
+static bool bad_cache_params(int blksize, int assoc, int cachesize)
+{
+    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
 }
 
 QEMU_PLUGIN_EXPORT
@@ -377,14 +407,48 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
 
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
-        if (g_str_has_prefix(opt, "limit=")) {
+        if (g_str_has_prefix(opt, "I=")) {
+            gchar **toks = g_strsplit(opt + 2, " ", -1);
+            if (g_strv_length(toks) != 3) {
+                fprintf(stderr, "option parsing failed: %s\n", opt);
+                return -1;
+            }
+            icachesize = g_ascii_strtoull(toks[0], NULL, 10);
+            iassoc = g_ascii_strtoull(toks[1], NULL, 10);
+            iblksize = g_ascii_strtoull(toks[2], NULL, 10);
+        } else if (g_str_has_prefix(opt, "D=")) {
+            gchar **toks = g_strsplit(opt + 2, " ", -1);
+            if (g_strv_length(toks) != 3) {
+                fprintf(stderr, "option parsing failed: %s\n", opt);
+                return -1;
+            }
+            dcachesize = g_ascii_strtoull(toks[0], NULL, 10);
+            dassoc = g_ascii_strtoull(toks[1], NULL, 10);
+            dblksize = g_ascii_strtoull(toks[2], NULL, 10);
+        } else if (g_str_has_prefix(opt, "limit=")) {
             limit = g_ascii_strtoull(opt + 6, NULL, 10);
+        } else if (g_str_has_prefix(opt, "tracefile=")) {
+            char *file_name = opt + 10;
+            tracefile = fopen(file_name, "w");
+            if (!tracefile) {
+                fprintf(stderr, "could not open: %s for writing\n", file_name);
+            }
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
         }
     }
 
+    if (bad_cache_params(iblksize, iassoc, icachesize)) {
+        fprintf(stderr, "icache cannot be constructed from given parameters\n");
+        return -1;
+    }
+
+    if (bad_cache_params(dblksize, dassoc, dcachesize)) {
+        fprintf(stderr, "dcache cannot be constructed from given parameters\n");
+        return -1;
+    }
+
     dcache = cache_init(dblksize, dassoc, dcachesize);
     icache = cache_init(iblksize, iassoc, icachesize);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies.
  2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
  2021-05-30  6:37 ` [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
@ 2021-05-30  6:37 ` Mahmoud Mandour
  2021-06-01 12:43   ` Alex Bennée
  2021-06-02  3:16   ` Mahmoud Mandour
  2021-05-30  6:44 ` [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
  2021-06-01 13:30 ` Alex Bennée
  4 siblings, 2 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: Mahmoud Mandour, Alex Bennée

Now one of the three eviction policies can be chosen as an argument. On
not specifying an argument, LRU is used by default.

Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
---
 contrib/plugins/cache.c | 159 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 146 insertions(+), 13 deletions(-)

diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
index fa0bf1dd40..1e323494bf 100644
--- a/contrib/plugins/cache.c
+++ b/contrib/plugins/cache.c
@@ -18,6 +18,8 @@
 
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
+static bool fifo, lru, rnd;
+
 static GRand *rng;
 static GHashTable *dmiss_ht;
 static GHashTable *imiss_ht;
@@ -55,6 +57,8 @@ struct CacheBlock {
 
 struct CacheSet {
     struct CacheBlock *blocks;
+    uint16_t *priorities;
+    GQueue *evict_queue;
 };
 
 struct Cache {
@@ -93,6 +97,84 @@ static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
     return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
 }
 
+static void lru_priorities_init(struct Cache *cache)
+{
+    int i, j;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].priorities = g_new(uint16_t, cache->assoc);
+        for (j = 0; j < cache->assoc; j++) {
+            cache->sets[i].priorities[j] = cache->assoc - j - 1;
+        }
+    }
+}
+
+static void lru_update_on_miss(struct Cache *cache,
+                                      int set_idx,
+                                      int blk_idx)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        cache->sets[set_idx].priorities[i]++;
+    }
+
+    cache->sets[set_idx].priorities[blk_idx] = 0;
+}
+
+static void lru_update_on_hit(struct Cache *cache,
+                                         int set_idx,
+                                         int blk_idx)
+{
+    uint16_t blk_priority;
+    int i;
+
+    blk_priority = cache->sets[set_idx].priorities[blk_idx];
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set_idx].priorities[i] < blk_priority) {
+            cache->sets[set_idx].priorities[i]++;
+        }
+    }
+    cache->sets[set_idx].priorities[blk_idx] = 0;
+}
+
+static int lru_get_lru_block(struct Cache *cache, int set_idx)
+{
+    int i;
+
+    for (i = 0; i < cache->assoc; i++) {
+        if (cache->sets[set_idx].priorities[i] == cache->assoc - 1) {
+            return i;
+        }
+    }
+
+    g_assert_not_reached();
+}
+
+static void fifo_init(struct Cache *cache)
+{
+    int i;
+
+    for (i = 0; i < cache->num_sets; i++) {
+        cache->sets[i].evict_queue = g_queue_new();
+    }
+}
+
+static int fifo_get_first_in_block(struct Cache *cache, int set)
+{
+    GQueue *q = cache->sets[set].evict_queue;
+    return GPOINTER_TO_INT(g_queue_pop_tail(q));
+}
+
+static void fifo_update_on_miss(struct Cache *cache,
+                                int set,
+                                int blk_idx)
+{
+    GQueue *q = cache->sets[set].evict_queue;
+    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
+}
+
+
 static struct Cache *cache_init(int blksize, int assoc, int cachesize)
 {
     struct Cache *cache;
@@ -113,6 +195,12 @@ static struct Cache *cache_init(int blksize, int assoc, int cachesize)
     cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
     cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
 
+    if (lru) {
+        lru_priorities_init(cache);
+    } else if (fifo) {
+        fifo_init(cache);
+    }
+
     return cache;
 }
 
@@ -131,12 +219,20 @@ static int get_invalid_block(struct Cache *cache, uint64_t set)
     return -1;
 }
 
-static int get_replaced_block(struct Cache *cache)
+static int get_replaced_block(struct Cache *cache, int set)
 {
-    return g_rand_int_range(rng, 0, cache->assoc);
+    if (rnd) {
+        return g_rand_int_range(rng, 0, cache->assoc);
+    } else if (lru) {
+        return lru_get_lru_block(cache, set);
+    } else if (fifo) {
+        return fifo_get_first_in_block(cache, set);
+    }
+
+    g_assert_not_reached();
 }
 
-static bool in_cache(struct Cache *cache, uint64_t addr)
+static int in_cache(struct Cache *cache, uint64_t addr)
 {
     int i;
     uint64_t tag, set;
@@ -147,29 +243,39 @@ static bool in_cache(struct Cache *cache, uint64_t addr)
     for (i = 0; i < cache->assoc; i++) {
         if (cache->sets[set].blocks[i].tag == tag &&
                 cache->sets[set].blocks[i].valid) {
-            return true;
+            return i;
         }
     }
 
-    return false;
+    return -1;
 }
 
 static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
 {
     uint64_t tag, set;
-    int replaced_blk;
-
-    if (in_cache(cache, addr)) {
-        return HIT;
-    }
+    int hit_blk, replaced_blk;
 
     tag = extract_tag(cache, addr);
     set = extract_set(cache, addr);
+    hit_blk = in_cache(cache, addr);
+
+    if (hit_blk != -1) {
+        if (lru) {
+            lru_update_on_hit(cache, set, hit_blk);
+        }
+        return HIT;
+    }
 
     replaced_blk = get_invalid_block(cache, set);
 
     if (replaced_blk == -1) {
-        replaced_blk = get_replaced_block(cache);
+        replaced_blk = get_replaced_block(cache, set);
+    }
+
+    if (lru) {
+        lru_update_on_miss(cache, set, replaced_blk);
+    } else if (fifo) {
+        fifo_update_on_miss(cache, set, replaced_blk);
     }
 
     cache->sets[set].blocks[replaced_blk].tag = tag;
@@ -307,6 +413,11 @@ static void free_cache(struct Cache *cache)
 {
     for (int i = 0; i < cache->num_sets; i++) {
         g_free(cache->sets[i].blocks);
+        if (lru) {
+            g_free(cache->sets[i].priorities);
+        } else if (fifo) {
+            g_queue_free(cache->sets[i].evict_queue);
+        }
     }
 
     g_free(cache->sets);
@@ -403,8 +514,6 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
     iblksize = 64;
     icachesize = iblksize * iassoc * 32;
 
-    rng = g_rand_new();
-
     for (i = 0; i < argc; i++) {
         char *opt = argv[i];
         if (g_str_has_prefix(opt, "I=")) {
@@ -433,6 +542,22 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
             if (!tracefile) {
                 fprintf(stderr, "could not open: %s for writing\n", file_name);
             }
+        } else if (g_str_has_prefix(opt, "evict=")) {
+            if (lru || rnd || fifo) {
+                fprintf(stderr, "eviction policy specified more than once\n");
+                return -1;
+            }
+            gchar *policy = opt + 6;
+            if (g_strcmp0(policy, "rand") == 0) {
+                rnd = true;
+            } else if (g_strcmp0(policy, "lru") == 0) {
+                lru = true;
+            } else if (g_strcmp0(policy, "fifo") == 0) {
+                fifo = true;
+            } else {
+                fprintf(stderr, "invalid eviction policy: %s\n", opt);
+                return -1;
+            }
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
@@ -449,6 +574,14 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
         return -1;
     }
 
+    if (!rnd && !lru && !fifo) {
+        lru = true;
+    }
+
+    if (rnd) {
+        rng = g_rand_new();
+    }
+
     dcache = cache_init(dblksize, dassoc, dcachesize);
     icache = cache_init(iblksize, iassoc, icachesize);
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 0/3] Cache modelling TCG plugin
  2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
                   ` (2 preceding siblings ...)
  2021-05-30  6:37 ` [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
@ 2021-05-30  6:44 ` Mahmoud Mandour
  2021-06-01 13:30 ` Alex Bennée
  4 siblings, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-05-30  6:44 UTC (permalink / raw)
  To: qemu-devel; +Cc: Emilio G. Cota

[-- Attachment #1: Type: text/plain, Size: 1351 bytes --]

On Sun, May 30, 2021 at 8:37 AM Mahmoud Mandour <ma.mandourr@gmail.com>
wrote:

> In this RFC patch series, I propose an initial cache modelling TCG
> plugin. As of now, it models separate L1 data cache and L1 instruction
> cache. It supports three eviction policies: LRU, random, and FIFO. Once
> a policy is chosen, it's used for both instruction and data caches.
>
> v1 -> v2: Unlocked dmtx on early return in vcpu_mem_access & removed a
>           (probably?) bad InsnData free.
>           This is probably still problematic since it does not free the
>           ``idata`` allocated for the vcpu_mem_access callback even
>           once, but if it's placed, it would double-free it.
>           How do I mitigate this? I need to free the InsnData passed to
>           vcpu_mem_access only once if we find out that it's an IO
>           access since we do not need it anymore and it will early
>           return every time.
>
> Mahmoud Mandour (3):
>   plugins: Added a new cache modelling plugin
>   plugins: cache: Enabled parameterization and added trace printing
>   plugins: cache: Added FIFO and LRU eviction policies.
>
>  contrib/plugins/Makefile |   1 +
>  contrib/plugins/cache.c  | 595 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 596 insertions(+)
>  create mode 100644 contrib/plugins/cache.c
>
> --
> 2.25.1
>
>

[-- Attachment #2: Type: text/html, Size: 1795 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-05-30  6:37 ` [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
@ 2021-06-01 11:18   ` Alex Bennée
  2021-06-02  4:29     ` Mahmoud Mandour
  2021-06-03  9:39     ` Stefan Hajnoczi
  2021-06-02  3:15   ` Mahmoud Mandour
  1 sibling, 2 replies; 15+ messages in thread
From: Alex Bennée @ 2021-06-01 11:18 UTC (permalink / raw)
  To: Mahmoud Mandour; +Cc: qemu-devel, Stefan Hajnoczi


(Stefan CC'ed for tracing discussion)

Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> Made both icache and dcache configurable through plugin arguments
> and added memory trace printing in a separate file.

Please keep the commits discreet and single topic. The memory trace is
an extra feature so should be in it's own commit.

>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 68 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index 8c9d1dd538..fa0bf1dd40 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -22,7 +22,7 @@ static GRand *rng;
>  static GHashTable *dmiss_ht;
>  static GHashTable *imiss_ht;
>  
> -static GMutex dmtx, imtx;
> +static GMutex dmtx, imtx, fmtx;
>  
>  static int limit;
>  static bool sys;
> @@ -33,6 +33,8 @@ static uint64_t dmisses;
>  static uint64_t imem_accesses;
>  static uint64_t imisses;
>  
> +FILE *tracefile;
> +
>  static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
>  
>  enum AccessResult {
> @@ -205,6 +207,16 @@ static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t info,
>      insn_addr = ((struct InsnData *) userdata)->addr;
>      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
>  
> +    if (tracefile) {
> +        g_mutex_lock(&fmtx);
> +        g_autoptr(GString) rep = g_string_new("");
> +        bool is_store = qemu_plugin_mem_is_store(info);
> +        g_string_append_printf(rep, "%c: 0x%" PRIx64,
> +                is_store ? 'S' : 'L', effective_addr);
> +        fprintf(tracefile, "%s\n", rep->str);
> +        g_mutex_unlock(&fmtx);
> +    }

I can see this would be useful for debugging but I'm wary of adding
ad-hoc tracing formats when QEMU already has support for a wide range of
tracing formats. We discussed this a bit in:

  Subject: trace_FOO_tcg bit-rotted?
  Date: Tue, 06 Apr 2021 17:00:20 +0100
  Message-ID: <87eefnwd0l.fsf@linaro.org>

However I don't know how easy it would be to leverage the existing
tracing infrastructure from inside a plugin. As I understand it QEMU
currently builds a static list of trace points during the build so maybe
we would need additional infrastructure for a plugin to register a trace
point and for the final output to be use-able. For example the binary
trace output I think still needs to reference the source trace-events
file?

So that's not a NACK but maybe we could spend a little time working out
if we can come up with a cleaner solution?

Stefan, any thoughts?

>      if (access_cache(dcache, effective_addr) == MISS) {
>          struct InsnData *insn = get_or_create(dmiss_ht, userdata, insn_addr);
>          insn->misses++;
> @@ -221,11 +233,20 @@ static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
>      g_mutex_lock(&imtx);
>      addr = ((struct InsnData *) userdata)->addr;
>  
> +    if (tracefile) {
> +        g_mutex_lock(&fmtx);
> +        g_autoptr(GString) rep = g_string_new("");
> +        g_string_append_printf(rep, "I: 0x%" PRIx64, addr);
> +        fprintf(tracefile, "%s\n", rep->str);
> +        g_mutex_unlock(&fmtx);
> +    }
> +
>      if (access_cache(icache, addr) == MISS) {
>          struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
>          insn->misses++;
>          imisses++;
>      }
> +
>      imem_accesses++;
>      g_mutex_unlock(&imtx);
>  }
> @@ -352,6 +373,15 @@ static void plugin_exit()
>  
>      g_mutex_unlock(&dmtx);
>      g_mutex_unlock(&imtx);
> +
> +    if (tracefile) {
> +        fclose(tracefile);
> +    }
> +}
> +
> +static bool bad_cache_params(int blksize, int assoc, int cachesize)
> +{
> +    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
>  }
>  
>  QEMU_PLUGIN_EXPORT
> @@ -377,14 +407,48 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>  
>      for (i = 0; i < argc; i++) {
>          char *opt = argv[i];
> -        if (g_str_has_prefix(opt, "limit=")) {
> +        if (g_str_has_prefix(opt, "I=")) {
> +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> +            if (g_strv_length(toks) != 3) {
> +                fprintf(stderr, "option parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +            icachesize = g_ascii_strtoull(toks[0], NULL, 10);
> +            iassoc = g_ascii_strtoull(toks[1], NULL, 10);
> +            iblksize = g_ascii_strtoull(toks[2], NULL, 10);
> +        } else if (g_str_has_prefix(opt, "D=")) {
> +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> +            if (g_strv_length(toks) != 3) {
> +                fprintf(stderr, "option parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +            dcachesize = g_ascii_strtoull(toks[0], NULL, 10);
> +            dassoc = g_ascii_strtoull(toks[1], NULL, 10);
> +            dblksize = g_ascii_strtoull(toks[2], NULL, 10);
> +        } else if (g_str_has_prefix(opt, "limit=")) {
>              limit = g_ascii_strtoull(opt + 6, NULL, 10);
> +        } else if (g_str_has_prefix(opt, "tracefile=")) {
> +            char *file_name = opt + 10;
> +            tracefile = fopen(file_name, "w");
> +            if (!tracefile) {
> +                fprintf(stderr, "could not open: %s for writing\n", file_name);
> +            }
>          } else {
>              fprintf(stderr, "option parsing failed: %s\n", opt);
>              return -1;
>          }
>      }
>  
> +    if (bad_cache_params(iblksize, iassoc, icachesize)) {
> +        fprintf(stderr, "icache cannot be constructed from given parameters\n");
> +        return -1;
> +    }
> +
> +    if (bad_cache_params(dblksize, dassoc, dcachesize)) {
> +        fprintf(stderr, "dcache cannot be constructed from given parameters\n");
> +        return -1;
> +    }
> +

Perhaps roll bad_cache_params into cache_init and return NULL if it
fails, so:

  dcache = cache_init(dblksize, dassoc, dcachesize);
  if (!dcache) {
    fprintf(stderr, "dcache cannot be constructed from given parameters\n");
    return -1;
  }

>      dcache = cache_init(dblksize, dassoc, dcachesize);
>      icache = cache_init(iblksize, iassoc, icachesize);


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies.
  2021-05-30  6:37 ` [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
@ 2021-06-01 12:43   ` Alex Bennée
  2021-06-02  5:23     ` Mahmoud Mandour
  2021-06-02  3:16   ` Mahmoud Mandour
  1 sibling, 1 reply; 15+ messages in thread
From: Alex Bennée @ 2021-06-01 12:43 UTC (permalink / raw)
  To: Mahmoud Mandour; +Cc: qemu-devel


Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> Now one of the three eviction policies can be chosen as an argument. On
> not specifying an argument, LRU is used by default.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 159 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 146 insertions(+), 13 deletions(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index fa0bf1dd40..1e323494bf 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -18,6 +18,8 @@
>  
>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>  
> +static bool fifo, lru, rnd;
> +

Ironically this would be a good use for a single variable with an enum,
or alternatively a function pointer which can be set on initialisation.

>  static GRand *rng;
>  static GHashTable *dmiss_ht;
>  static GHashTable *imiss_ht;
> @@ -55,6 +57,8 @@ struct CacheBlock {
>  
>  struct CacheSet {
>      struct CacheBlock *blocks;
> +    uint16_t *priorities;
> +    GQueue *evict_queue;
>  };
>  
>  struct Cache {
> @@ -93,6 +97,84 @@ static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
>      return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
>  }

I think it would be useful to summarise the LRU behaviour here in a
comment and explain how the priorities are meant to change as the cache
is used.

>  
> +static void lru_priorities_init(struct Cache *cache)
> +{
> +    int i, j;
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].priorities = g_new(uint16_t, cache->assoc);
> +        for (j = 0; j < cache->assoc; j++) {
> +            cache->sets[i].priorities[j] = cache->assoc - j - 1;
> +        }
> +    }
> +}
> +
> +static void lru_update_on_miss(struct Cache *cache,
> +                                      int set_idx,
> +                                      int blk_idx)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        cache->sets[set_idx].priorities[i]++;
> +    }
> +
> +    cache->sets[set_idx].priorities[blk_idx] = 0;

So we increment priority for all non-hit blocks and reset it for the
entry just used? This isn't totally clear to follow however see bellow:

> +}
> +
> +static void lru_update_on_hit(struct Cache *cache,
> +                                         int set_idx,
> +                                         int blk_idx)
> +{
> +    uint16_t blk_priority;
> +    int i;
> +
> +    blk_priority = cache->sets[set_idx].priorities[blk_idx];
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set_idx].priorities[i] < blk_priority) {
> +            cache->sets[set_idx].priorities[i]++;
> +        }
> +    }
> +    cache->sets[set_idx].priorities[blk_idx] = 0;

This seems pretty expensive depending on the number of blocks. Another
approach would be to have a generation number that is incremented on
each access and stored in the appropriate set. Then...

> +}
> +
> +static int lru_get_lru_block(struct Cache *cache, int set_idx)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set_idx].priorities[i] == cache->assoc - 1) {
> +            return i;
> +        }
> +    }

when you get to search for a "stale" block you just look for the lowest
generation number. The eviction logic should be being called less than
the update logic right?

> +
> +    g_assert_not_reached();
> +}
> +
> +static void fifo_init(struct Cache *cache)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].evict_queue = g_queue_new();
> +    }
> +}
> +
> +static int fifo_get_first_in_block(struct Cache *cache, int set)
> +{
> +    GQueue *q = cache->sets[set].evict_queue;
> +    return GPOINTER_TO_INT(g_queue_pop_tail(q));
> +}
> +
> +static void fifo_update_on_miss(struct Cache *cache,
> +                                int set,
> +                                int blk_idx)
> +{
> +    GQueue *q = cache->sets[set].evict_queue;
> +    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
> +}

Again some commentary would be helpful around above the fifo functions.

> +
> +
>  static struct Cache *cache_init(int blksize, int assoc, int cachesize)
>  {
>      struct Cache *cache;
> @@ -113,6 +195,12 @@ static struct Cache *cache_init(int blksize, int assoc, int cachesize)
>      cache->set_mask = ((cache->num_sets - 1) << (pow_of_two(cache->blksize)));
>      cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
>  
> +    if (lru) {
> +        lru_priorities_init(cache);
> +    } else if (fifo) {
> +        fifo_init(cache);
> +    }
> +
>      return cache;
>  }
>  
> @@ -131,12 +219,20 @@ static int get_invalid_block(struct Cache *cache, uint64_t set)
>      return -1;
>  }
>  
> -static int get_replaced_block(struct Cache *cache)
> +static int get_replaced_block(struct Cache *cache, int set)
>  {
> -    return g_rand_int_range(rng, 0, cache->assoc);
> +    if (rnd) {
> +        return g_rand_int_range(rng, 0, cache->assoc);
> +    } else if (lru) {
> +        return lru_get_lru_block(cache, set);
> +    } else if (fifo) {
> +        return fifo_get_first_in_block(cache, set);
> +    }
> +
> +    g_assert_not_reached();
>  }
>  
> -static bool in_cache(struct Cache *cache, uint64_t addr)
> +static int in_cache(struct Cache *cache, uint64_t addr)
>  {
>      int i;
>      uint64_t tag, set;
> @@ -147,29 +243,39 @@ static bool in_cache(struct Cache *cache, uint64_t addr)
>      for (i = 0; i < cache->assoc; i++) {
>          if (cache->sets[set].blocks[i].tag == tag &&
>                  cache->sets[set].blocks[i].valid) {
> -            return true;
> +            return i;
>          }
>      }
>  
> -    return false;
> +    return -1;
>  }
>  
>  static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
>  {
>      uint64_t tag, set;
> -    int replaced_blk;
> -
> -    if (in_cache(cache, addr)) {
> -        return HIT;
> -    }
> +    int hit_blk, replaced_blk;
>  
>      tag = extract_tag(cache, addr);
>      set = extract_set(cache, addr);
> +    hit_blk = in_cache(cache, addr);
> +
> +    if (hit_blk != -1) {
> +        if (lru) {
> +            lru_update_on_hit(cache, set, hit_blk);
> +        }
> +        return HIT;
> +    }
>  
>      replaced_blk = get_invalid_block(cache, set);
>  
>      if (replaced_blk == -1) {
> -        replaced_blk = get_replaced_block(cache);
> +        replaced_blk = get_replaced_block(cache, set);
> +    }
> +
> +    if (lru) {
> +        lru_update_on_miss(cache, set, replaced_blk);
> +    } else if (fifo) {
> +        fifo_update_on_miss(cache, set, replaced_blk);
>      }

I wonder if just having a update_hit and update_miss function pointer
would keep things cleaner?

  if (update_hit) {
       update_hit(cache, set, hit, block)
  }

etc...

>  
>      cache->sets[set].blocks[replaced_blk].tag = tag;
> @@ -307,6 +413,11 @@ static void free_cache(struct Cache *cache)
>  {
>      for (int i = 0; i < cache->num_sets; i++) {
>          g_free(cache->sets[i].blocks);
> +        if (lru) {
> +            g_free(cache->sets[i].priorities);

Hmm I've obviously missed something about how priorities are meant to be sued.

> +        } else if (fifo) {
> +            g_queue_free(cache->sets[i].evict_queue);
> +        }
>      }
>  
>      g_free(cache->sets);
> @@ -403,8 +514,6 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>      iblksize = 64;
>      icachesize = iblksize * iassoc * 32;
>  
> -    rng = g_rand_new();
> -
>      for (i = 0; i < argc; i++) {
>          char *opt = argv[i];
>          if (g_str_has_prefix(opt, "I=")) {
> @@ -433,6 +542,22 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>              if (!tracefile) {
>                  fprintf(stderr, "could not open: %s for writing\n", file_name);
>              }
> +        } else if (g_str_has_prefix(opt, "evict=")) {
> +            if (lru || rnd || fifo) {
> +                fprintf(stderr, "eviction policy specified more than once\n");
> +                return -1;
> +            }

This is one argument for the separate bools although generally QEMU
operates on the basis of last argument wins ;-) 

> +            gchar *policy = opt + 6;
> +            if (g_strcmp0(policy, "rand") == 0) {
> +                rnd = true;
> +            } else if (g_strcmp0(policy, "lru") == 0) {
> +                lru = true;
> +            } else if (g_strcmp0(policy, "fifo") == 0) {
> +                fifo = true;
> +            } else {
> +                fprintf(stderr, "invalid eviction policy: %s\n", opt);
> +                return -1;
> +            }
>          } else {
>              fprintf(stderr, "option parsing failed: %s\n", opt);
>              return -1;
> @@ -449,6 +574,14 @@ int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
>          return -1;
>      }
>  
> +    if (!rnd && !lru && !fifo) {
> +        lru = true;
> +    }
> +
> +    if (rnd) {
> +        rng = g_rand_new();
> +    }
> +
>      dcache = cache_init(dblksize, dassoc, dcachesize);
>      icache = cache_init(iblksize, iassoc, icachesize);


-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 0/3] Cache modelling TCG plugin
  2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
                   ` (3 preceding siblings ...)
  2021-05-30  6:44 ` [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
@ 2021-06-01 13:30 ` Alex Bennée
  2021-06-02  6:22   ` Mahmoud Mandour
  4 siblings, 1 reply; 15+ messages in thread
From: Alex Bennée @ 2021-06-01 13:30 UTC (permalink / raw)
  To: Mahmoud Mandour; +Cc: qemu-devel


Mahmoud Mandour <ma.mandourr@gmail.com> writes:

> In this RFC patch series, I propose an initial cache modelling TCG
> plugin. As of now, it models separate L1 data cache and L1 instruction
> cache. It supports three eviction policies: LRU, random, and FIFO. Once
> a policy is chosen, it's used for both instruction and data caches.
>
> v1 -> v2: Unlocked dmtx on early return in vcpu_mem_access & removed a
>           (probably?) bad InsnData free.
>           This is probably still problematic since it does not free the
>           ``idata`` allocated for the vcpu_mem_access callback even
>           once, but if it's placed, it would double-free it.
>           How do I mitigate this? I need to free the InsnData passed to
>           vcpu_mem_access only once if we find out that it's an IO
>           access since we do not need it anymore and it will early
>           return every time.

OK I've done my review, sorry I reviewed 1/3 from the previous series.
I've made some comments inline in those patches but I think this is an
excellent start to the project. I think we can get the core plugin
up-streamed fairly quickly and then spend some time examining better
integration and possibly enhancing the plugin API.

-- 
Alex Bennée


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin
  2021-05-30  6:37 ` [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
@ 2021-06-02  3:14   ` Mahmoud Mandour
  0 siblings, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  3:14 UTC (permalink / raw)
  To: qemu-devel; +Cc: Emilio G. Cota, Alex Bennée

[-- Attachment #1: Type: text/plain, Size: 12055 bytes --]

On Sun, May 30, 2021 at 8:37 AM Mahmoud Mandour <ma.mandourr@gmail.com>
wrote:

> Added a cache modelling plugin that uses a static configuration used in
> many of the commercial microprocessors and uses random eviction policy.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/Makefile |   1 +
>  contrib/plugins/cache.c  | 398 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 399 insertions(+)
>  create mode 100644 contrib/plugins/cache.c
>
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index b9d7935e5e..2237b47f8b 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -18,6 +18,7 @@ NAMES += hotpages
>  NAMES += howvec
>  NAMES += lockstep
>  NAMES += hwprofile
> +NAMES += cache
>
>  SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> new file mode 100644
> index 0000000000..8c9d1dd538
> --- /dev/null
> +++ b/contrib/plugins/cache.c
> @@ -0,0 +1,398 @@
> +/*
> + * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
> + *
> + * License: GNU GPL, version 2 or later.
> + *   See the COPYING file in the top-level directory.
> + */
> +
> +#include <inttypes.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <glib.h>
> +
> +#include <qemu-plugin.h>
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +static GRand *rng;
> +static GHashTable *dmiss_ht;
> +static GHashTable *imiss_ht;
> +
> +static GMutex dmtx, imtx;
> +
> +static int limit;
> +static bool sys;
> +
> +static uint64_t dmem_accesses;
> +static uint64_t dmisses;
> +
> +static uint64_t imem_accesses;
> +static uint64_t imisses;
> +
> +static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
> +
> +enum AccessResult {
> +    HIT = 0,
> +    MISS = 1
> +};
> +
> +struct InsnData {
> +    char *disas_str;
> +    uint64_t addr;
> +    uint64_t misses;
> +};
> +
> +struct CacheBlock {
> +    uint64_t tag;
> +    bool valid;
> +};
> +
> +struct CacheSet {
> +    struct CacheBlock *blocks;
> +};
> +
> +struct Cache {
> +    struct CacheSet *sets;
> +    int num_sets;
> +
> +    int cachesize;
> +    int blksize;
> +    int assoc;
> +
> +    uint64_t blk_mask;
> +    uint64_t set_mask;
> +    uint64_t tag_mask;
> +};
> +
> +struct Cache *dcache, *icache;
> +
> +static int pow_of_two(int num)
> +{
> +    g_assert((num & (num - 1)) == 0);
> +    int ret = 0;
> +    while (num /= 2) {
> +        ret++;
> +    }
> +    return ret;
> +}
> +
> +static inline uint64_t extract_tag(struct Cache *cache, uint64_t addr)
> +{
> +    return (addr & cache->tag_mask) >>
> +        (pow_of_two(cache->num_sets) + pow_of_two(cache->blksize));
> +}
> +
> +static inline uint64_t extract_set(struct Cache *cache, uint64_t addr)
> +{
> +    return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
> +}
> +
> +static struct Cache *cache_init(int blksize, int assoc, int cachesize)
> +{
> +    struct Cache *cache;
> +    int i;
> +
> +    cache = g_new(struct Cache, 1);
> +    cache->blksize = blksize;
> +    cache->assoc = assoc;
> +    cache->cachesize = cachesize;
> +    cache->num_sets = cachesize / (blksize * assoc);
> +    cache->sets = g_new(struct CacheSet, cache->num_sets);
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].blocks = g_new0(struct CacheBlock, assoc);
> +    }
> +
> +    cache->blk_mask = blksize - 1;
> +    cache->set_mask = ((cache->num_sets - 1) <<
> (pow_of_two(cache->blksize)));
> +    cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
> +
> +    return cache;
> +}
> +
> +static int get_invalid_block(struct Cache *cache, uint64_t set)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (!cache->sets[set].blocks[i].valid) {
> +            /* conflict miss */
> +            return i;
> +        }
> +    }
> +
> +    /* compulsary miss */
> +    return -1;
> +}
> +
> +static int get_replaced_block(struct Cache *cache)
> +{
> +    return g_rand_int_range(rng, 0, cache->assoc);
> +}
> +
> +static bool in_cache(struct Cache *cache, uint64_t addr)
> +{
> +    int i;
> +    uint64_t tag, set;
> +
> +    tag = extract_tag(cache, addr);
> +    set = extract_set(cache, addr);
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set].blocks[i].tag == tag &&
> +                cache->sets[set].blocks[i].valid) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> +
> +static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
> +{
> +    uint64_t tag, set;
> +    int replaced_blk;
> +
> +    if (in_cache(cache, addr)) {
> +        return HIT;
> +    }
> +
> +    tag = extract_tag(cache, addr);
> +    set = extract_set(cache, addr);
> +
> +    replaced_blk = get_invalid_block(cache, set);
> +
> +    if (replaced_blk == -1) {
> +        replaced_blk = get_replaced_block(cache);
> +    }
> +
> +    cache->sets[set].blocks[replaced_blk].tag = tag;
> +    cache->sets[set].blocks[replaced_blk].valid = true;
> +
> +    return MISS;
> +}
> +
> +struct InsnData *get_or_create(GHashTable *ht, struct InsnData *insn_data,
> +                               uint64_t addr)
> +{
> +    struct InsnData *insn = g_hash_table_lookup(ht,
> GUINT_TO_POINTER(addr));
> +    if (!insn) {
> +        g_hash_table_insert(ht, GUINT_TO_POINTER(addr), (gpointer)
> insn_data);
> +        insn = insn_data;
> +    }
> +
> +    return insn;
> +}
> +
> +static void vcpu_mem_access(unsigned int cpu_index, qemu_plugin_meminfo_t
> info,
> +                            uint64_t vaddr, void *userdata)
> +{
> +    uint64_t insn_addr;
> +    uint64_t effective_addr;
> +    struct qemu_plugin_hwaddr *hwaddr;
> +
> +    g_mutex_lock(&dmtx);
> +    hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
> +    if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
> +        g_mutex_unlock(&dmtx);
> +        return;
> +    }
> +
> +    insn_addr = ((struct InsnData *) userdata)->addr;
> +    effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
> +
> +    if (access_cache(dcache, effective_addr) == MISS) {
> +        struct InsnData *insn = get_or_create(dmiss_ht, userdata,
> insn_addr);
> +        insn->misses++;
> +        dmisses++;
> +    }
> +    dmem_accesses++;
> +    g_mutex_unlock(&dmtx);
> +}
> +
> +static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
> +{
> +    uint64_t addr;
> +
> +    g_mutex_lock(&imtx);
> +    addr = ((struct InsnData *) userdata)->addr;
> +
> +    if (access_cache(icache, addr) == MISS) {
> +        struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
> +        insn->misses++;
> +        imisses++;
> +    }
> +    imem_accesses++;
> +    g_mutex_unlock(&imtx);
> +}
> +
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    size_t n_insns;
> +    size_t i;
> +
> +    n_insns = qemu_plugin_tb_n_insns(tb);
> +    for (i = 0; i < n_insns; i++) {
> +        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
> +        uint64_t effective_addr;
> +
> +        if (sys) {
> +            effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
> +        } else {
> +            effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
> +        }
> +
> +        struct InsnData *ddata = g_new(struct InsnData, 1);
> +        struct InsnData *idata = g_new(struct InsnData, 1);
> +
> +        ddata->disas_str = qemu_plugin_insn_disas(insn);
> +        ddata->misses = 0;
> +        ddata->addr = effective_addr;
> +
> +        idata->disas_str = g_strdup(ddata->disas_str);
> +        idata->misses = 0;
> +        idata->addr = effective_addr;
> +
> +        qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
> +                                         QEMU_PLUGIN_CB_NO_REGS,
> +                                         rw, ddata);
> +
> +        qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
> +                                               QEMU_PLUGIN_CB_NO_REGS,
> idata);
> +    }
> +}
> +
> +static void print_entry(gpointer data)
> +{
> +    struct InsnData *insn = (struct InsnData *) data;
> +    g_autoptr(GString) xx = g_string_new("");
> +    g_string_append_printf(xx, "0x%" PRIx64 ": %s - misses: %lu\n",
> +            insn->addr, insn->disas_str, insn->misses);
> +    qemu_plugin_outs(xx->str);
> +}
> +
> +static void free_insn(gpointer data)
> +{
> +    struct InsnData *insn = (struct InsnData *) data;
> +    g_free(insn->disas_str);
> +    g_free(insn);
> +}
> +
> +static void free_cache(struct Cache *cache)
> +{
> +    for (int i = 0; i < cache->num_sets; i++) {
> +        g_free(cache->sets[i].blocks);
> +    }
> +
> +    g_free(cache->sets);
> +}
> +
> +static int cmp(gconstpointer a, gconstpointer b)
> +{
> +    struct InsnData *insn_a = (struct InsnData *) a;
> +    struct InsnData *insn_b = (struct InsnData *) b;
> +
> +    return insn_a->misses < insn_b->misses ? 1 : -1;
> +}
> +
> +static void print_stats()
> +{
> +    g_autoptr(GString) rep = g_string_new("");
> +    g_string_append_printf(rep,
> +            "Data accesses: %lu, Misses: %lu\nMiss rate: %lf%%\n\n",
> +            dmem_accesses,
> +            dmisses,
> +            ((double)dmisses / dmem_accesses) * 100.0);
> +
> +    g_string_append_printf(rep,
> +            "Instruction accesses: %lu, Misses: %lu\nMiss rate:
> %lf%%\n\n",
> +            imem_accesses,
> +            imisses,
> +            ((double)imisses / imem_accesses) * 100.0);
> +
> +    qemu_plugin_outs(rep->str);
> +}
> +
> +static void plugin_exit()
> +{
> +    GList *curr;
> +    int i;
> +
> +    g_mutex_lock(&imtx);
> +    g_mutex_lock(&dmtx);
> +    GList *dmiss_insns = g_hash_table_get_values(dmiss_ht);
> +    GList *imiss_insns = g_hash_table_get_values(imiss_ht);
> +    dmiss_insns = g_list_sort(dmiss_insns, cmp);
> +    imiss_insns = g_list_sort(imiss_insns, cmp);
> +
> +    print_stats();
> +
> +    qemu_plugin_outs("Most data-missing instructions\n");
> +    for (curr = dmiss_insns, i = 0; curr && i < limit; i++, curr =
> curr->next) {
> +        print_entry(curr->data);
> +    }
> +
> +    qemu_plugin_outs("\nMost fetch-missing instructions\n");
> +    for (curr = imiss_insns, i = 0; curr && i < limit; i++, curr =
> curr->next) {
> +        print_entry(curr->data);
> +    }
> +
> +    free_cache(dcache);
> +    free_cache(icache);
> +
> +    g_list_free(dmiss_insns);
> +    g_list_free(imiss_insns);
> +
> +    g_hash_table_destroy(dmiss_ht);
> +    g_hash_table_destroy(imiss_ht);
> +
> +    g_mutex_unlock(&dmtx);
> +    g_mutex_unlock(&imtx);
> +}
> +
> +QEMU_PLUGIN_EXPORT
> +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
> +                        int argc, char **argv)
> +{
> +    int i;
> +    int iassoc, iblksize, icachesize;
> +    int dassoc, dblksize, dcachesize;
> +
> +    limit = 32;
> +    sys = info->system_emulation;
> +
> +    dassoc = 8;
> +    dblksize = 64;
> +    dcachesize = dblksize * dassoc * 32;
> +
> +    iassoc = 8;
> +    iblksize = 64;
> +    icachesize = iblksize * iassoc * 32;
> +
> +    rng = g_rand_new();
> +
> +    for (i = 0; i < argc; i++) {
> +        char *opt = argv[i];
> +        if (g_str_has_prefix(opt, "limit=")) {
> +            limit = g_ascii_strtoull(opt + 6, NULL, 10);
> +        } else {
> +            fprintf(stderr, "option parsing failed: %s\n", opt);
> +            return -1;
> +        }
> +    }
> +
> +    dcache = cache_init(dblksize, dassoc, dcachesize);
> +    icache = cache_init(iblksize, iassoc, icachesize);
> +
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +
> +    dmiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL,
> free_insn);
> +    imiss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL,
> free_insn);
> +
> +    return 0;
> +}
> --
> 2.25.1
>
>
CC'ing Emilio

[-- Attachment #2: Type: text/html, Size: 14903 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-05-30  6:37 ` [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
  2021-06-01 11:18   ` Alex Bennée
@ 2021-06-02  3:15   ` Mahmoud Mandour
  1 sibling, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  3:15 UTC (permalink / raw)
  To: qemu-devel, Emilio G. Cota, Alex Bennée

[-- Attachment #1: Type: text/plain, Size: 5056 bytes --]

CC'ing Emilio

On Sun, May 30, 2021 at 8:37 AM Mahmoud Mandour <ma.mandourr@gmail.com>
wrote:

> Made both icache and dcache configurable through plugin arguments
> and added memory trace printing in a separate file.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 68 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index 8c9d1dd538..fa0bf1dd40 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -22,7 +22,7 @@ static GRand *rng;
>  static GHashTable *dmiss_ht;
>  static GHashTable *imiss_ht;
>
> -static GMutex dmtx, imtx;
> +static GMutex dmtx, imtx, fmtx;
>
>  static int limit;
>  static bool sys;
> @@ -33,6 +33,8 @@ static uint64_t dmisses;
>  static uint64_t imem_accesses;
>  static uint64_t imisses;
>
> +FILE *tracefile;
> +
>  static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
>
>  enum AccessResult {
> @@ -205,6 +207,16 @@ static void vcpu_mem_access(unsigned int cpu_index,
> qemu_plugin_meminfo_t info,
>      insn_addr = ((struct InsnData *) userdata)->addr;
>      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
>
> +    if (tracefile) {
> +        g_mutex_lock(&fmtx);
> +        g_autoptr(GString) rep = g_string_new("");
> +        bool is_store = qemu_plugin_mem_is_store(info);
> +        g_string_append_printf(rep, "%c: 0x%" PRIx64,
> +                is_store ? 'S' : 'L', effective_addr);
> +        fprintf(tracefile, "%s\n", rep->str);
> +        g_mutex_unlock(&fmtx);
> +    }
> +
>      if (access_cache(dcache, effective_addr) == MISS) {
>          struct InsnData *insn = get_or_create(dmiss_ht, userdata,
> insn_addr);
>          insn->misses++;
> @@ -221,11 +233,20 @@ static void vcpu_insn_exec(unsigned int vcpu_index,
> void *userdata)
>      g_mutex_lock(&imtx);
>      addr = ((struct InsnData *) userdata)->addr;
>
> +    if (tracefile) {
> +        g_mutex_lock(&fmtx);
> +        g_autoptr(GString) rep = g_string_new("");
> +        g_string_append_printf(rep, "I: 0x%" PRIx64, addr);
> +        fprintf(tracefile, "%s\n", rep->str);
> +        g_mutex_unlock(&fmtx);
> +    }
> +
>      if (access_cache(icache, addr) == MISS) {
>          struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
>          insn->misses++;
>          imisses++;
>      }
> +
>      imem_accesses++;
>      g_mutex_unlock(&imtx);
>  }
> @@ -352,6 +373,15 @@ static void plugin_exit()
>
>      g_mutex_unlock(&dmtx);
>      g_mutex_unlock(&imtx);
> +
> +    if (tracefile) {
> +        fclose(tracefile);
> +    }
> +}
> +
> +static bool bad_cache_params(int blksize, int assoc, int cachesize)
> +{
> +    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc)
> != 0);
>  }
>
>  QEMU_PLUGIN_EXPORT
> @@ -377,14 +407,48 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
>
>      for (i = 0; i < argc; i++) {
>          char *opt = argv[i];
> -        if (g_str_has_prefix(opt, "limit=")) {
> +        if (g_str_has_prefix(opt, "I=")) {
> +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> +            if (g_strv_length(toks) != 3) {
> +                fprintf(stderr, "option parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +            icachesize = g_ascii_strtoull(toks[0], NULL, 10);
> +            iassoc = g_ascii_strtoull(toks[1], NULL, 10);
> +            iblksize = g_ascii_strtoull(toks[2], NULL, 10);
> +        } else if (g_str_has_prefix(opt, "D=")) {
> +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> +            if (g_strv_length(toks) != 3) {
> +                fprintf(stderr, "option parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +            dcachesize = g_ascii_strtoull(toks[0], NULL, 10);
> +            dassoc = g_ascii_strtoull(toks[1], NULL, 10);
> +            dblksize = g_ascii_strtoull(toks[2], NULL, 10);
> +        } else if (g_str_has_prefix(opt, "limit=")) {
>              limit = g_ascii_strtoull(opt + 6, NULL, 10);
> +        } else if (g_str_has_prefix(opt, "tracefile=")) {
> +            char *file_name = opt + 10;
> +            tracefile = fopen(file_name, "w");
> +            if (!tracefile) {
> +                fprintf(stderr, "could not open: %s for writing\n",
> file_name);
> +            }
>          } else {
>              fprintf(stderr, "option parsing failed: %s\n", opt);
>              return -1;
>          }
>      }
>
> +    if (bad_cache_params(iblksize, iassoc, icachesize)) {
> +        fprintf(stderr, "icache cannot be constructed from given
> parameters\n");
> +        return -1;
> +    }
> +
> +    if (bad_cache_params(dblksize, dassoc, dcachesize)) {
> +        fprintf(stderr, "dcache cannot be constructed from given
> parameters\n");
> +        return -1;
> +    }
> +
>      dcache = cache_init(dblksize, dassoc, dcachesize);
>      icache = cache_init(iblksize, iassoc, icachesize);
>
> --
> 2.25.1
>
>

[-- Attachment #2: Type: text/html, Size: 6450 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies.
  2021-05-30  6:37 ` [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
  2021-06-01 12:43   ` Alex Bennée
@ 2021-06-02  3:16   ` Mahmoud Mandour
  1 sibling, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  3:16 UTC (permalink / raw)
  To: qemu-devel; +Cc: Emilio G. Cota, Alex Bennée

[-- Attachment #1: Type: text/plain, Size: 7985 bytes --]

On Sun, May 30, 2021 at 8:37 AM Mahmoud Mandour <ma.mandourr@gmail.com>
wrote:

> Now one of the three eviction policies can be chosen as an argument. On
> not specifying an argument, LRU is used by default.
>
> Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> ---
>  contrib/plugins/cache.c | 159 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 146 insertions(+), 13 deletions(-)
>
> diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> index fa0bf1dd40..1e323494bf 100644
> --- a/contrib/plugins/cache.c
> +++ b/contrib/plugins/cache.c
> @@ -18,6 +18,8 @@
>
>  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>
> +static bool fifo, lru, rnd;
> +
>  static GRand *rng;
>  static GHashTable *dmiss_ht;
>  static GHashTable *imiss_ht;
> @@ -55,6 +57,8 @@ struct CacheBlock {
>
>  struct CacheSet {
>      struct CacheBlock *blocks;
> +    uint16_t *priorities;
> +    GQueue *evict_queue;
>  };
>
>  struct Cache {
> @@ -93,6 +97,84 @@ static inline uint64_t extract_set(struct Cache *cache,
> uint64_t addr)
>      return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
>  }
>
> +static void lru_priorities_init(struct Cache *cache)
> +{
> +    int i, j;
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].priorities = g_new(uint16_t, cache->assoc);
> +        for (j = 0; j < cache->assoc; j++) {
> +            cache->sets[i].priorities[j] = cache->assoc - j - 1;
> +        }
> +    }
> +}
> +
> +static void lru_update_on_miss(struct Cache *cache,
> +                                      int set_idx,
> +                                      int blk_idx)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        cache->sets[set_idx].priorities[i]++;
> +    }
> +
> +    cache->sets[set_idx].priorities[blk_idx] = 0;
> +}
> +
> +static void lru_update_on_hit(struct Cache *cache,
> +                                         int set_idx,
> +                                         int blk_idx)
> +{
> +    uint16_t blk_priority;
> +    int i;
> +
> +    blk_priority = cache->sets[set_idx].priorities[blk_idx];
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set_idx].priorities[i] < blk_priority) {
> +            cache->sets[set_idx].priorities[i]++;
> +        }
> +    }
> +    cache->sets[set_idx].priorities[blk_idx] = 0;
> +}
> +
> +static int lru_get_lru_block(struct Cache *cache, int set_idx)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->assoc; i++) {
> +        if (cache->sets[set_idx].priorities[i] == cache->assoc - 1) {
> +            return i;
> +        }
> +    }
> +
> +    g_assert_not_reached();
> +}
> +
> +static void fifo_init(struct Cache *cache)
> +{
> +    int i;
> +
> +    for (i = 0; i < cache->num_sets; i++) {
> +        cache->sets[i].evict_queue = g_queue_new();
> +    }
> +}
> +
> +static int fifo_get_first_in_block(struct Cache *cache, int set)
> +{
> +    GQueue *q = cache->sets[set].evict_queue;
> +    return GPOINTER_TO_INT(g_queue_pop_tail(q));
> +}
> +
> +static void fifo_update_on_miss(struct Cache *cache,
> +                                int set,
> +                                int blk_idx)
> +{
> +    GQueue *q = cache->sets[set].evict_queue;
> +    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
> +}
> +
> +
>  static struct Cache *cache_init(int blksize, int assoc, int cachesize)
>  {
>      struct Cache *cache;
> @@ -113,6 +195,12 @@ static struct Cache *cache_init(int blksize, int
> assoc, int cachesize)
>      cache->set_mask = ((cache->num_sets - 1) <<
> (pow_of_two(cache->blksize)));
>      cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
>
> +    if (lru) {
> +        lru_priorities_init(cache);
> +    } else if (fifo) {
> +        fifo_init(cache);
> +    }
> +
>      return cache;
>  }
>
> @@ -131,12 +219,20 @@ static int get_invalid_block(struct Cache *cache,
> uint64_t set)
>      return -1;
>  }
>
> -static int get_replaced_block(struct Cache *cache)
> +static int get_replaced_block(struct Cache *cache, int set)
>  {
> -    return g_rand_int_range(rng, 0, cache->assoc);
> +    if (rnd) {
> +        return g_rand_int_range(rng, 0, cache->assoc);
> +    } else if (lru) {
> +        return lru_get_lru_block(cache, set);
> +    } else if (fifo) {
> +        return fifo_get_first_in_block(cache, set);
> +    }
> +
> +    g_assert_not_reached();
>  }
>
> -static bool in_cache(struct Cache *cache, uint64_t addr)
> +static int in_cache(struct Cache *cache, uint64_t addr)
>  {
>      int i;
>      uint64_t tag, set;
> @@ -147,29 +243,39 @@ static bool in_cache(struct Cache *cache, uint64_t
> addr)
>      for (i = 0; i < cache->assoc; i++) {
>          if (cache->sets[set].blocks[i].tag == tag &&
>                  cache->sets[set].blocks[i].valid) {
> -            return true;
> +            return i;
>          }
>      }
>
> -    return false;
> +    return -1;
>  }
>
>  static enum AccessResult access_cache(struct Cache *cache, uint64_t addr)
>  {
>      uint64_t tag, set;
> -    int replaced_blk;
> -
> -    if (in_cache(cache, addr)) {
> -        return HIT;
> -    }
> +    int hit_blk, replaced_blk;
>
>      tag = extract_tag(cache, addr);
>      set = extract_set(cache, addr);
> +    hit_blk = in_cache(cache, addr);
> +
> +    if (hit_blk != -1) {
> +        if (lru) {
> +            lru_update_on_hit(cache, set, hit_blk);
> +        }
> +        return HIT;
> +    }
>
>      replaced_blk = get_invalid_block(cache, set);
>
>      if (replaced_blk == -1) {
> -        replaced_blk = get_replaced_block(cache);
> +        replaced_blk = get_replaced_block(cache, set);
> +    }
> +
> +    if (lru) {
> +        lru_update_on_miss(cache, set, replaced_blk);
> +    } else if (fifo) {
> +        fifo_update_on_miss(cache, set, replaced_blk);
>      }
>
>      cache->sets[set].blocks[replaced_blk].tag = tag;
> @@ -307,6 +413,11 @@ static void free_cache(struct Cache *cache)
>  {
>      for (int i = 0; i < cache->num_sets; i++) {
>          g_free(cache->sets[i].blocks);
> +        if (lru) {
> +            g_free(cache->sets[i].priorities);
> +        } else if (fifo) {
> +            g_queue_free(cache->sets[i].evict_queue);
> +        }
>      }
>
>      g_free(cache->sets);
> @@ -403,8 +514,6 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
>      iblksize = 64;
>      icachesize = iblksize * iassoc * 32;
>
> -    rng = g_rand_new();
> -
>      for (i = 0; i < argc; i++) {
>          char *opt = argv[i];
>          if (g_str_has_prefix(opt, "I=")) {
> @@ -433,6 +542,22 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
>              if (!tracefile) {
>                  fprintf(stderr, "could not open: %s for writing\n",
> file_name);
>              }
> +        } else if (g_str_has_prefix(opt, "evict=")) {
> +            if (lru || rnd || fifo) {
> +                fprintf(stderr, "eviction policy specified more than
> once\n");
> +                return -1;
> +            }
> +            gchar *policy = opt + 6;
> +            if (g_strcmp0(policy, "rand") == 0) {
> +                rnd = true;
> +            } else if (g_strcmp0(policy, "lru") == 0) {
> +                lru = true;
> +            } else if (g_strcmp0(policy, "fifo") == 0) {
> +                fifo = true;
> +            } else {
> +                fprintf(stderr, "invalid eviction policy: %s\n", opt);
> +                return -1;
> +            }
>          } else {
>              fprintf(stderr, "option parsing failed: %s\n", opt);
>              return -1;
> @@ -449,6 +574,14 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
>          return -1;
>      }
>
> +    if (!rnd && !lru && !fifo) {
> +        lru = true;
> +    }
> +
> +    if (rnd) {
> +        rng = g_rand_new();
> +    }
> +
>      dcache = cache_init(dblksize, dassoc, dcachesize);
>      icache = cache_init(iblksize, iassoc, icachesize);
>
> --
> 2.25.1
>
>
CC'ing Emilio

[-- Attachment #2: Type: text/html, Size: 10113 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-06-01 11:18   ` Alex Bennée
@ 2021-06-02  4:29     ` Mahmoud Mandour
  2021-06-03  9:39     ` Stefan Hajnoczi
  1 sibling, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  4:29 UTC (permalink / raw)
  To: Alex Bennée; +Cc: Emilio G. Cota, qemu-devel, Stefan Hajnoczi

[-- Attachment #1: Type: text/plain, Size: 7031 bytes --]

On Tue, Jun 1, 2021 at 1:29 PM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> (Stefan CC'ed for tracing discussion)
>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > Made both icache and dcache configurable through plugin arguments
> > and added memory trace printing in a separate file.
>
> Please keep the commits discreet and single topic. The memory trace is
> an extra feature so should be in it's own commit.
>
> >
> > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> > ---
> >  contrib/plugins/cache.c | 68 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 66 insertions(+), 2 deletions(-)
> >
> > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> > index 8c9d1dd538..fa0bf1dd40 100644
> > --- a/contrib/plugins/cache.c
> > +++ b/contrib/plugins/cache.c
> > @@ -22,7 +22,7 @@ static GRand *rng;
> >  static GHashTable *dmiss_ht;
> >  static GHashTable *imiss_ht;
> >
> > -static GMutex dmtx, imtx;
> > +static GMutex dmtx, imtx, fmtx;
> >
> >  static int limit;
> >  static bool sys;
> > @@ -33,6 +33,8 @@ static uint64_t dmisses;
> >  static uint64_t imem_accesses;
> >  static uint64_t imisses;
> >
> > +FILE *tracefile;
> > +
> >  static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
> >
> >  enum AccessResult {
> > @@ -205,6 +207,16 @@ static void vcpu_mem_access(unsigned int cpu_index,
> qemu_plugin_meminfo_t info,
> >      insn_addr = ((struct InsnData *) userdata)->addr;
> >      effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) :
> vaddr;
> >
> > +    if (tracefile) {
> > +        g_mutex_lock(&fmtx);
> > +        g_autoptr(GString) rep = g_string_new("");
> > +        bool is_store = qemu_plugin_mem_is_store(info);
> > +        g_string_append_printf(rep, "%c: 0x%" PRIx64,
> > +                is_store ? 'S' : 'L', effective_addr);
> > +        fprintf(tracefile, "%s\n", rep->str);
> > +        g_mutex_unlock(&fmtx);
> > +    }
>
> I can see this would be useful for debugging but I'm wary of adding
> ad-hoc tracing formats when QEMU already has support for a wide range of
> tracing formats. We discussed this a bit in:
>
>   Subject: trace_FOO_tcg bit-rotted?
>   Date: Tue, 06 Apr 2021 17:00:20 +0100
>   Message-ID: <87eefnwd0l.fsf@linaro.org>
>
> However I don't know how easy it would be to leverage the existing
> tracing infrastructure from inside a plugin. As I understand it QEMU
> currently builds a static list of trace points during the build so maybe
> we would need additional infrastructure for a plugin to register a trace
> point and for the final output to be use-able. For example the binary
> trace output I think still needs to reference the source trace-events
> file?
>
> So that's not a NACK but maybe we could spend a little time working out
> if we can come up with a cleaner solution?

Alright then, I will have it removed for now and maybe add it if there's a
better
solution for it.

>


> Stefan, any thoughts?
>
> >      if (access_cache(dcache, effective_addr) == MISS) {
> >          struct InsnData *insn = get_or_create(dmiss_ht, userdata,
> insn_addr);
> >          insn->misses++;
> > @@ -221,11 +233,20 @@ static void vcpu_insn_exec(unsigned int
> vcpu_index, void *userdata)
> >      g_mutex_lock(&imtx);
> >      addr = ((struct InsnData *) userdata)->addr;
> >
> > +    if (tracefile) {
> > +        g_mutex_lock(&fmtx);
> > +        g_autoptr(GString) rep = g_string_new("");
> > +        g_string_append_printf(rep, "I: 0x%" PRIx64, addr);
> > +        fprintf(tracefile, "%s\n", rep->str);
> > +        g_mutex_unlock(&fmtx);
> > +    }
> > +
> >      if (access_cache(icache, addr) == MISS) {
> >          struct InsnData *insn = get_or_create(imiss_ht, userdata, addr);
> >          insn->misses++;
> >          imisses++;
> >      }
> > +
> >      imem_accesses++;
> >      g_mutex_unlock(&imtx);
> >  }
> > @@ -352,6 +373,15 @@ static void plugin_exit()
> >
> >      g_mutex_unlock(&dmtx);
> >      g_mutex_unlock(&imtx);
> > +
> > +    if (tracefile) {
> > +        fclose(tracefile);
> > +    }
> > +}
> > +
> > +static bool bad_cache_params(int blksize, int assoc, int cachesize)
> > +{
> > +    return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc)
> != 0);
> >  }
> >
> >  QEMU_PLUGIN_EXPORT
> > @@ -377,14 +407,48 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
> >
> >      for (i = 0; i < argc; i++) {
> >          char *opt = argv[i];
> > -        if (g_str_has_prefix(opt, "limit=")) {
> > +        if (g_str_has_prefix(opt, "I=")) {
> > +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> > +            if (g_strv_length(toks) != 3) {
> > +                fprintf(stderr, "option parsing failed: %s\n", opt);
> > +                return -1;
> > +            }
> > +            icachesize = g_ascii_strtoull(toks[0], NULL, 10);
> > +            iassoc = g_ascii_strtoull(toks[1], NULL, 10);
> > +            iblksize = g_ascii_strtoull(toks[2], NULL, 10);
> > +        } else if (g_str_has_prefix(opt, "D=")) {
> > +            gchar **toks = g_strsplit(opt + 2, " ", -1);
> > +            if (g_strv_length(toks) != 3) {
> > +                fprintf(stderr, "option parsing failed: %s\n", opt);
> > +                return -1;
> > +            }
> > +            dcachesize = g_ascii_strtoull(toks[0], NULL, 10);
> > +            dassoc = g_ascii_strtoull(toks[1], NULL, 10);
> > +            dblksize = g_ascii_strtoull(toks[2], NULL, 10);
> > +        } else if (g_str_has_prefix(opt, "limit=")) {
> >              limit = g_ascii_strtoull(opt + 6, NULL, 10);
> > +        } else if (g_str_has_prefix(opt, "tracefile=")) {
> > +            char *file_name = opt + 10;
> > +            tracefile = fopen(file_name, "w");
> > +            if (!tracefile) {
> > +                fprintf(stderr, "could not open: %s for writing\n",
> file_name);
> > +            }
> >          } else {
> >              fprintf(stderr, "option parsing failed: %s\n", opt);
> >              return -1;
> >          }
> >      }
> >
> > +    if (bad_cache_params(iblksize, iassoc, icachesize)) {
> > +        fprintf(stderr, "icache cannot be constructed from given
> parameters\n");
> > +        return -1;
> > +    }
> > +
> > +    if (bad_cache_params(dblksize, dassoc, dcachesize)) {
> > +        fprintf(stderr, "dcache cannot be constructed from given
> parameters\n");
> > +        return -1;
> > +    }
> > +
>
> Perhaps roll bad_cache_params into cache_init and return NULL if it
> fails, so:
>
>   dcache = cache_init(dblksize, dassoc, dcachesize);
>   if (!dcache) {
>     fprintf(stderr, "dcache cannot be constructed from given
> parameters\n");
>     return -1;
>   }

Applied, thanks

>
> >      dcache = cache_init(dblksize, dassoc, dcachesize);
> >      icache = cache_init(iblksize, iassoc, icachesize);
>
>
> --
> Alex Bennée
>

[-- Attachment #2: Type: text/html, Size: 9395 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies.
  2021-06-01 12:43   ` Alex Bennée
@ 2021-06-02  5:23     ` Mahmoud Mandour
  0 siblings, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  5:23 UTC (permalink / raw)
  To: Alex Bennée; +Cc: Emilio G. Cota, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 12778 bytes --]

On Tue, Jun 1, 2021 at 3:27 PM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > Now one of the three eviction policies can be chosen as an argument. On
> > not specifying an argument, LRU is used by default.
> >
> > Signed-off-by: Mahmoud Mandour <ma.mandourr@gmail.com>
> > ---
> >  contrib/plugins/cache.c | 159 ++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 146 insertions(+), 13 deletions(-)
> >
> > diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
> > index fa0bf1dd40..1e323494bf 100644
> > --- a/contrib/plugins/cache.c
> > +++ b/contrib/plugins/cache.c
> > @@ -18,6 +18,8 @@
> >
> >  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> >
> > +static bool fifo, lru, rnd;
> > +
>
> Ironically this would be a good use for a single variable with an enum,
> or alternatively a function pointer which can be set on initialisation.
>
> I think that I will implement the function pointers thing because as you
mention bellow it will somewhat reduce the clutter of checking the current
eviction policy each time and decide on which function to call.

>  static GRand *rng;
> >  static GHashTable *dmiss_ht;
> >  static GHashTable *imiss_ht;
> > @@ -55,6 +57,8 @@ struct CacheBlock {
> >
> >  struct CacheSet {
> >      struct CacheBlock *blocks;
> > +    uint16_t *priorities;
> > +    GQueue *evict_queue;
> >  };
> >
> >  struct Cache {
> > @@ -93,6 +97,84 @@ static inline uint64_t extract_set(struct Cache
> *cache, uint64_t addr)
> >      return (addr & cache->set_mask) >> (pow_of_two(cache->blksize));
> >  }
>
> I think it would be useful to summarise the LRU behaviour here in a
> comment and explain how the priorities are meant to change as the cache
> is used.


> >
> > +static void lru_priorities_init(struct Cache *cache)
> > +{
> > +    int i, j;
> > +
> > +    for (i = 0; i < cache->num_sets; i++) {
> > +        cache->sets[i].priorities = g_new(uint16_t, cache->assoc);
> > +        for (j = 0; j < cache->assoc; j++) {
> > +            cache->sets[i].priorities[j] = cache->assoc - j - 1;
> > +        }
> > +    }
> > +}
> > +
> > +static void lru_update_on_miss(struct Cache *cache,
> > +                                      int set_idx,
> > +                                      int blk_idx)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < cache->assoc; i++) {
> > +        cache->sets[set_idx].priorities[i]++;
> > +    }
> > +
> > +    cache->sets[set_idx].priorities[blk_idx] = 0;
>
> So we increment priority for all non-hit blocks and reset it for the
> entry just used? This isn't totally clear to follow however see bellow:


> > +}
> > +
> > +static void lru_update_on_hit(struct Cache *cache,
> > +                                         int set_idx,
> > +                                         int blk_idx)
> > +{
> > +    uint16_t blk_priority;
> > +    int i;
> > +
> > +    blk_priority = cache->sets[set_idx].priorities[blk_idx];
> > +    for (i = 0; i < cache->assoc; i++) {
> > +        if (cache->sets[set_idx].priorities[i] < blk_priority) {
> > +            cache->sets[set_idx].priorities[i]++;
> > +        }
> > +    }
> > +    cache->sets[set_idx].priorities[blk_idx] = 0;
>
> This seems pretty expensive depending on the number of blocks. Another
> approach would be to have a generation number that is incremented on
> each access and stored in the appropriate set. Then...
>
> > +}
> > +
> > +static int lru_get_lru_block(struct Cache *cache, int set_idx)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < cache->assoc; i++) {
> > +        if (cache->sets[set_idx].priorities[i] == cache->assoc - 1) {
> > +            return i;
> > +        }
> > +    }
>
> when you get to search for a "stale" block you just look for the lowest
> generation number. The eviction logic should be being called less than
> the update logic right?


Sorry for the confusion about LRU, so my idea is that if we have ASSOC
blocks in each set, the invariant is that I always have the least recently
used
block with a priority of ASSOC - 1. the second least recently used block
with
a priority of ASSOC - 2, etc. By incrementing every priority on miss, I
demote
other blocks and set the newly-cached block to priority 0, so we maintain
the
recency of usage sequence.

To be clear, your solution is on hit: increase the hit-block priority, and
on miss,
remove the element with the least priority (probably the new block here
will get
the highest priority which can be stored in the set itself so we don't
search for it)

If I understood something incorrectly please correct me.

The problem with that is we won't have the usage sequence anymore.
Consider a set with ASSOC = 4, and that set initially has no valid blocks
and the program generates the following accesses to the set:

MISS and replace block 3 (will increment block 3's priority to 1)
MISS and replace block 2 (will increment block 2's priority to 1)
MISS and replace block 1 (will increment block 1's priority to 1)
MISS and replace block 0 (will increment block 0's priority to 1)

LRU specifies that if we want to evict something, we'll need to evict block
3 since
the first block that was accessed and hence it's the LRU block.

But if we only choose the block with the least priority, we can evict block
0
which is in fact the most recently used block and the one with supposedly
the highest priority.

This would be an approximation for LRU and I guess that I can also
implement it
as an optional eviction algorithm since to my knowledge, numerous processors
implement an approximation for LRU. That's because it's expensive to
implement
exact LRU in hardware.


>
> > +
> > +    g_assert_not_reached();
> > +}
> > +
> > +static void fifo_init(struct Cache *cache)
> > +{
> > +    int i;
> > +
> > +    for (i = 0; i < cache->num_sets; i++) {
> > +        cache->sets[i].evict_queue = g_queue_new();
> > +    }
> > +}
> > +
> > +static int fifo_get_first_in_block(struct Cache *cache, int set)
> > +{
> > +    GQueue *q = cache->sets[set].evict_queue;
> > +    return GPOINTER_TO_INT(g_queue_pop_tail(q));
> > +}
> > +
> > +static void fifo_update_on_miss(struct Cache *cache,
> > +                                int set,
> > +                                int blk_idx)
> > +{
> > +    GQueue *q = cache->sets[set].evict_queue;
> > +    g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
> > +}
>
> Again some commentary would be helpful around above the fifo functions.
>
> > +
> > +
> >  static struct Cache *cache_init(int blksize, int assoc, int cachesize)
> >  {
> >      struct Cache *cache;
> > @@ -113,6 +195,12 @@ static struct Cache *cache_init(int blksize, int
> assoc, int cachesize)
> >      cache->set_mask = ((cache->num_sets - 1) <<
> (pow_of_two(cache->blksize)));
> >      cache->tag_mask = ~(cache->set_mask | cache->blk_mask);
> >
> > +    if (lru) {
> > +        lru_priorities_init(cache);
> > +    } else if (fifo) {
> > +        fifo_init(cache);
> > +    }
> > +
> >      return cache;
> >  }
> >
> > @@ -131,12 +219,20 @@ static int get_invalid_block(struct Cache *cache,
> uint64_t set)
> >      return -1;
> >  }
> >
> > -static int get_replaced_block(struct Cache *cache)
> > +static int get_replaced_block(struct Cache *cache, int set)
> >  {
> > -    return g_rand_int_range(rng, 0, cache->assoc);
> > +    if (rnd) {
> > +        return g_rand_int_range(rng, 0, cache->assoc);
> > +    } else if (lru) {
> > +        return lru_get_lru_block(cache, set);
> > +    } else if (fifo) {
> > +        return fifo_get_first_in_block(cache, set);
> > +    }
> > +
> > +    g_assert_not_reached();
> >  }
> >
> > -static bool in_cache(struct Cache *cache, uint64_t addr)
> > +static int in_cache(struct Cache *cache, uint64_t addr)
> >  {
> >      int i;
> >      uint64_t tag, set;
> > @@ -147,29 +243,39 @@ static bool in_cache(struct Cache *cache, uint64_t
> addr)
> >      for (i = 0; i < cache->assoc; i++) {
> >          if (cache->sets[set].blocks[i].tag == tag &&
> >                  cache->sets[set].blocks[i].valid) {
> > -            return true;
> > +            return i;
> >          }
> >      }
> >
> > -    return false;
> > +    return -1;
> >  }
> >
> >  static enum AccessResult access_cache(struct Cache *cache, uint64_t
> addr)
> >  {
> >      uint64_t tag, set;
> > -    int replaced_blk;
> > -
> > -    if (in_cache(cache, addr)) {
> > -        return HIT;
> > -    }
> > +    int hit_blk, replaced_blk;
> >
> >      tag = extract_tag(cache, addr);
> >      set = extract_set(cache, addr);
> > +    hit_blk = in_cache(cache, addr);
> > +
> > +    if (hit_blk != -1) {
> > +        if (lru) {
> > +            lru_update_on_hit(cache, set, hit_blk);
> > +        }
> > +        return HIT;
> > +    }
> >
> >      replaced_blk = get_invalid_block(cache, set);
> >
> >      if (replaced_blk == -1) {
> > -        replaced_blk = get_replaced_block(cache);
> > +        replaced_blk = get_replaced_block(cache, set);
> > +    }
> > +
> > +    if (lru) {
> > +        lru_update_on_miss(cache, set, replaced_blk);
> > +    } else if (fifo) {
> > +        fifo_update_on_miss(cache, set, replaced_blk);
> >      }
>
> I wonder if just having a update_hit and update_miss function pointer
> would keep things cleaner?
>
>   if (update_hit) {
>        update_hit(cache, set, hit, block)
>   }
>
> etc...
>
> >
> >      cache->sets[set].blocks[replaced_blk].tag = tag;
> > @@ -307,6 +413,11 @@ static void free_cache(struct Cache *cache)
> >  {
> >      for (int i = 0; i < cache->num_sets; i++) {
> >          g_free(cache->sets[i].blocks);
> > +        if (lru) {
> > +            g_free(cache->sets[i].priorities);
>
> Hmm I've obviously missed something about how priorities are meant to be
> sued.
>

Can you please explain what's wrong with this? I only allocate an array of
priorities
when LRU is the chosen policy, so I only free it if `lru` is true. Am I
missing something?

>
> > +        } else if (fifo) {
> > +            g_queue_free(cache->sets[i].evict_queue);
> > +        }
> >      }
> >
> >      g_free(cache->sets);
> > @@ -403,8 +514,6 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
> >      iblksize = 64;
> >      icachesize = iblksize * iassoc * 32;
> >
> > -    rng = g_rand_new();
> > -
> >      for (i = 0; i < argc; i++) {
> >          char *opt = argv[i];
> >          if (g_str_has_prefix(opt, "I=")) {
> > @@ -433,6 +542,22 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
> >              if (!tracefile) {
> >                  fprintf(stderr, "could not open: %s for writing\n",
> file_name);
> >              }
> > +        } else if (g_str_has_prefix(opt, "evict=")) {
> > +            if (lru || rnd || fifo) {
> > +                fprintf(stderr, "eviction policy specified more than
> once\n");
> > +                return -1;
> > +            }
>
> This is one argument for the separate bools although generally QEMU
> operates on the basis of last argument wins ;-)


I Will do that, thanks. This seemed logical since I observed that from the
behavior of other plugins as well. It introduced some clutter since I'll
have
to turn off the other two booleans in order to only leave one policy turned
on.
But that is now not a problem since we agreed on removing the `lru`, `rnd`,
and `fifo` booleans already.

>
>
> > +            gchar *policy = opt + 6;
> > +            if (g_strcmp0(policy, "rand") == 0) {
> > +                rnd = true;
> > +            } else if (g_strcmp0(policy, "lru") == 0) {
> > +                lru = true;
> > +            } else if (g_strcmp0(policy, "fifo") == 0) {
> > +                fifo = true;
> > +            } else {
> > +                fprintf(stderr, "invalid eviction policy: %s\n", opt);
> > +                return -1;
> > +            }
> >          } else {
> >              fprintf(stderr, "option parsing failed: %s\n", opt);
> >              return -1;
> > @@ -449,6 +574,14 @@ int qemu_plugin_install(qemu_plugin_id_t id, const
> qemu_info_t *info,
> >          return -1;
> >      }
> >
> > +    if (!rnd && !lru && !fifo) {
> > +        lru = true;
> > +    }
> > +
> > +    if (rnd) {
> > +        rng = g_rand_new();
> > +    }
> > +
> >      dcache = cache_init(dblksize, dassoc, dcachesize);
> >      icache = cache_init(iblksize, iassoc, icachesize);
>
>
> --
> Alex Bennée
>

[-- Attachment #2: Type: text/html, Size: 16926 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 0/3] Cache modelling TCG plugin
  2021-06-01 13:30 ` Alex Bennée
@ 2021-06-02  6:22   ` Mahmoud Mandour
  0 siblings, 0 replies; 15+ messages in thread
From: Mahmoud Mandour @ 2021-06-02  6:22 UTC (permalink / raw)
  To: Alex Bennée; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1694 bytes --]

On Tue, Jun 1, 2021 at 3:32 PM Alex Bennée <alex.bennee@linaro.org> wrote:

>
> Mahmoud Mandour <ma.mandourr@gmail.com> writes:
>
> > In this RFC patch series, I propose an initial cache modelling TCG
> > plugin. As of now, it models separate L1 data cache and L1 instruction
> > cache. It supports three eviction policies: LRU, random, and FIFO. Once
> > a policy is chosen, it's used for both instruction and data caches.
> >
> > v1 -> v2: Unlocked dmtx on early return in vcpu_mem_access & removed a
> >           (probably?) bad InsnData free.
> >           This is probably still problematic since it does not free the
> >           ``idata`` allocated for the vcpu_mem_access callback even
> >           once, but if it's placed, it would double-free it.
> >           How do I mitigate this? I need to free the InsnData passed to
> >           vcpu_mem_access only once if we find out that it's an IO
> >           access since we do not need it anymore and it will early
> >           return every time.
>
> OK I've done my review, sorry I reviewed 1/3 from the previous series.
> I've made some comments inline in those patches but I think this is an
> excellent start to the project. I think we can get the core plugin
> up-streamed fairly quickly and then spend some time examining better
> integration and possibly enhancing the plugin API.
>
> --
> Alex Bennée
>

Thanks so much for taking the time to thoroughly and also quickly
review the series. The feedback is greatly appreciated and I've
implemented most of it, I want your approval about the LRU discussion
and I think that I will be able to repost a cleaner version.

Thanks,
Mahmoud

[-- Attachment #2: Type: text/html, Size: 2307 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing
  2021-06-01 11:18   ` Alex Bennée
  2021-06-02  4:29     ` Mahmoud Mandour
@ 2021-06-03  9:39     ` Stefan Hajnoczi
  1 sibling, 0 replies; 15+ messages in thread
From: Stefan Hajnoczi @ 2021-06-03  9:39 UTC (permalink / raw)
  To: Alex Bennée; +Cc: Mahmoud Mandour, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1706 bytes --]

On Tue, Jun 01, 2021 at 12:18:58PM +0100, Alex Bennée wrote:
> > +    if (tracefile) {
> > +        g_mutex_lock(&fmtx);
> > +        g_autoptr(GString) rep = g_string_new("");
> > +        bool is_store = qemu_plugin_mem_is_store(info);
> > +        g_string_append_printf(rep, "%c: 0x%" PRIx64,
> > +                is_store ? 'S' : 'L', effective_addr);
> > +        fprintf(tracefile, "%s\n", rep->str);
> > +        g_mutex_unlock(&fmtx);
> > +    }
> 
> I can see this would be useful for debugging but I'm wary of adding
> ad-hoc tracing formats when QEMU already has support for a wide range of
> tracing formats. We discussed this a bit in:
> 
>   Subject: trace_FOO_tcg bit-rotted?
>   Date: Tue, 06 Apr 2021 17:00:20 +0100
>   Message-ID: <87eefnwd0l.fsf@linaro.org>
> 
> However I don't know how easy it would be to leverage the existing
> tracing infrastructure from inside a plugin. As I understand it QEMU
> currently builds a static list of trace points during the build so maybe
> we would need additional infrastructure for a plugin to register a trace
> point and for the final output to be use-able. For example the binary
> trace output I think still needs to reference the source trace-events
> file?
> 
> So that's not a NACK but maybe we could spend a little time working out
> if we can come up with a cleaner solution?
> 
> Stefan, any thoughts?

QEMU's tracing system requires knowledge of all trace events at QEMU
compilation time. If the plugins are built together with QEMU then it
should be possible to give them their own trace-events files.

If not then I'm afraid it would be tricky to integrate into QEMU's
tracing system.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2021-06-03  9:40 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-30  6:37 [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
2021-05-30  6:37 ` [RFC PATCH v2 1/3] plugins: Added a new cache modelling plugin Mahmoud Mandour
2021-06-02  3:14   ` Mahmoud Mandour
2021-05-30  6:37 ` [RFC PATCH v2 2/3] plugins: cache: Enabled parameterization and added trace printing Mahmoud Mandour
2021-06-01 11:18   ` Alex Bennée
2021-06-02  4:29     ` Mahmoud Mandour
2021-06-03  9:39     ` Stefan Hajnoczi
2021-06-02  3:15   ` Mahmoud Mandour
2021-05-30  6:37 ` [RFC PATCH v2 3/3] plugins: cache: Added FIFO and LRU eviction policies Mahmoud Mandour
2021-06-01 12:43   ` Alex Bennée
2021-06-02  5:23     ` Mahmoud Mandour
2021-06-02  3:16   ` Mahmoud Mandour
2021-05-30  6:44 ` [RFC PATCH v2 0/3] Cache modelling TCG plugin Mahmoud Mandour
2021-06-01 13:30 ` Alex Bennée
2021-06-02  6:22   ` Mahmoud Mandour

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.