All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v7 0/2] Update AVX512 support for xbzrle and CI failure
@ 2022-11-16 15:29 ling xu
  2022-11-16 15:29 ` [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer ling xu
  2022-11-16 15:29 ` [PATCH v7 2/2] Update bench-code for addressing CI problem ling xu
  0 siblings, 2 replies; 8+ messages in thread
From: ling xu @ 2022-11-16 15:29 UTC (permalink / raw)
  To: qemu-devel; +Cc: quintela, dgilbert, ling xu, Zhou Zhao, Jun Jin

This patch updates code of avx512 support for xbzrle_encode_buffer function. 
We mainly modified code in xbzrle-bench.c for addressing CI failure.

Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>

ling xu (2):
  Update AVX512 support for xbzrle_encode_buffer
  Unit test code and benchmark code

 meson.build                |  16 ++
 meson_options.txt          |   2 +
 migration/ram.c            |  34 ++-
 migration/xbzrle.c         | 124 ++++++++++
 migration/xbzrle.h         |   4 +
 tests/bench/meson.build    |   4 +
 tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
 tests/unit/test-xbzrle.c   |  39 ++-
 8 files changed, 684 insertions(+), 8 deletions(-)
 create mode 100644 tests/bench/xbzrle-bench.c

-- 
2.25.1



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer
  2022-11-16 15:29 [PATCH v7 0/2] Update AVX512 support for xbzrle and CI failure ling xu
@ 2022-11-16 15:29 ` ling xu
  2023-02-09 19:30   ` Juan Quintela
  2022-11-16 15:29 ` [PATCH v7 2/2] Update bench-code for addressing CI problem ling xu
  1 sibling, 1 reply; 8+ messages in thread
From: ling xu @ 2022-11-16 15:29 UTC (permalink / raw)
  To: qemu-devel; +Cc: quintela, dgilbert, ling xu, Zhou Zhao, Jun Jin

This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer
function to accelerate xbzrle encoding speed. Runtime check of avx512
support and benchmark for this feature are added. Compared with C
version of xbzrle_encode_buffer function, avx512 version can achieve
50%-70% performance improvement on benchmarking. In addition, if dirty
data is randomly located in 4K page, the avx512 version can achieve
almost 140% performance gain.

Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
---
 meson.build        |  16 ++++++
 meson_options.txt  |   2 +
 migration/ram.c    |  34 +++++++++++--
 migration/xbzrle.c | 124 +++++++++++++++++++++++++++++++++++++++++++++
 migration/xbzrle.h |   4 ++
 5 files changed, 177 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index cf3e517e56..d0d28f5c9e 100644
--- a/meson.build
+++ b/meson.build
@@ -2344,6 +2344,22 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
     int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
   '''), error_message: 'AVX512F not available').allowed())
 
+config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
+  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
+  .require(cc.links('''
+    #pragma GCC push_options
+    #pragma GCC target("avx512bw")
+    #include <cpuid.h>
+    #include <immintrin.h>
+    static int bar(void *a) {
+
+      __m512i *x = a;
+      __m512i res= _mm512_abs_epi8(*x);
+      return res[1];
+    }
+    int main(int argc, char *argv[]) { return bar(argv[0]); }
+  '''), error_message: 'AVX512BW not available').allowed())
+
 have_pvrdma = get_option('pvrdma') \
   .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
   .require(cc.compiles(gnu_source_prefix + '''
diff --git a/meson_options.txt b/meson_options.txt
index 66128178bf..96814dd211 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -104,6 +104,8 @@ option('avx2', type: 'feature', value: 'auto',
        description: 'AVX2 optimizations')
 option('avx512f', type: 'feature', value: 'disabled',
        description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+       description: 'AVX512BW optimizations')
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')
 
diff --git a/migration/ram.c b/migration/ram.c
index dc1de9ddbc..ff4c15c9c3 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -83,6 +83,34 @@
 /* 0x80 is reserved in migration.h start with 0x100 next */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+}
+#endif
+
 XBZRLECacheStats xbzrle_counters;
 
 /* struct contains XBZRLE cache and a static page
@@ -802,9 +830,9 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
     /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
+    encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                            TARGET_PAGE_SIZE);
 
     /*
      * Update the cache contents, so that it corresponds to the data
diff --git a/migration/xbzrle.c b/migration/xbzrle.c
index 1ba482ded9..05366e86c0 100644
--- a/migration/xbzrle.c
+++ b/migration/xbzrle.c
@@ -174,3 +174,127 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
 
     return d;
 }
+
+#if defined(CONFIG_AVX512BW_OPT)
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#include <immintrin.h>
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                             uint8_t *dst, int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0, i = 0, num = 0;
+    uint8_t *nzrun_start = NULL;
+    /* add 1 to include residual part in main loop */
+    uint32_t count512s = (slen >> 6) + 1;
+    /* countResidual is tail of data, i.e., countResidual = slen % 64 */
+    uint32_t count_residual = slen & 0b111111;
+    bool never_same = true;
+    uint64_t mask_residual = 1;
+    mask_residual <<= count_residual;
+    mask_residual -= 1;
+    __m512i r = _mm512_set1_epi32(0);
+
+    while (count512s) {
+        if (d + 2 > dlen) {
+            return -1;
+        }
+
+        int bytes_to_check = 64;
+        uint64_t mask = 0xffffffffffffffff;
+        if (count512s == 1) {
+            bytes_to_check = count_residual;
+            mask = mask_residual;
+        }
+        __m512i old_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, old_buf + i);
+        __m512i new_data = _mm512_mask_loadu_epi8(r,
+                                                  mask, new_buf + i);
+        uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
+        count512s--;
+
+        bool is_same = (comp & 0x1);
+        while (bytes_to_check) {
+            if (is_same) {
+                if (nzrun_len) {
+                    d += uleb128_encode_small(dst + d, nzrun_len);
+                    if (d + nzrun_len > dlen) {
+                        return -1;
+                    }
+                    nzrun_start = new_buf + i - nzrun_len;
+                    memcpy(dst + d, nzrun_start, nzrun_len);
+                    d += nzrun_len;
+                    nzrun_len = 0;
+                }
+                /* 64 data at a time for speed */
+                if (count512s && (comp == 0xffffffffffffffff)) {
+                    i += 64;
+                    zrun_len += 64;
+                    break;
+                }
+                never_same = false;
+                num = __builtin_ctzll(~comp);
+                num = (num < bytes_to_check) ? num : bytes_to_check;
+                zrun_len += num;
+                bytes_to_check -= num;
+                comp >>= num;
+                i += num;
+                if (bytes_to_check) {
+                    /* still has different data after same data */
+                    d += uleb128_encode_small(dst + d, zrun_len);
+                    zrun_len = 0;
+                } else {
+                    break;
+                }
+            }
+            if (never_same || zrun_len) {
+                /*
+                 * never_same only acts if
+                 * data begins with diff in first count512s
+                 */
+                d += uleb128_encode_small(dst + d, zrun_len);
+                zrun_len = 0;
+                never_same = false;
+            }
+            /* has diff, 64 data at a time for speed */
+            if ((bytes_to_check == 64) && (comp == 0x0)) {
+                i += 64;
+                nzrun_len += 64;
+                break;
+            }
+            num = __builtin_ctzll(comp);
+            num = (num < bytes_to_check) ? num : bytes_to_check;
+            nzrun_len += num;
+            bytes_to_check -= num;
+            comp >>= num;
+            i += num;
+            if (bytes_to_check) {
+                /* mask like 111000 */
+                d += uleb128_encode_small(dst + d, nzrun_len);
+                /* overflow */
+                if (d + nzrun_len > dlen) {
+                    return -1;
+                }
+                nzrun_start = new_buf + i - nzrun_len;
+                memcpy(dst + d, nzrun_start, nzrun_len);
+                d += nzrun_len;
+                nzrun_len = 0;
+                is_same = true;
+            }
+        }
+    }
+
+    if (nzrun_len != 0) {
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        /* overflow */
+        if (d + nzrun_len > dlen) {
+            return -1;
+        }
+        nzrun_start = new_buf + i - nzrun_len;
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+    }
+    return d;
+}
+#pragma GCC pop_options
+#endif
diff --git a/migration/xbzrle.h b/migration/xbzrle.h
index a0db507b9c..6feb49160a 100644
--- a/migration/xbzrle.h
+++ b/migration/xbzrle.h
@@ -18,4 +18,8 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
                          uint8_t *dst, int dlen);
 
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#if defined(CONFIG_AVX512BW_OPT)
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+#endif
 #endif
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v7 2/2] Update bench-code for addressing CI problem
  2022-11-16 15:29 [PATCH v7 0/2] Update AVX512 support for xbzrle and CI failure ling xu
  2022-11-16 15:29 ` [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer ling xu
@ 2022-11-16 15:29 ` ling xu
  2023-02-09 19:31   ` Juan Quintela
  2023-02-09 22:46   ` Philippe Mathieu-Daudé
  1 sibling, 2 replies; 8+ messages in thread
From: ling xu @ 2022-11-16 15:29 UTC (permalink / raw)
  To: qemu-devel; +Cc: quintela, dgilbert, ling xu, Zhou Zhao, Jun Jin

Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
for performance benchmarking. we have modified xbzrle-bench.c to address
CI problem.

Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
---
 tests/bench/meson.build    |   4 +
 tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
 tests/unit/test-xbzrle.c   |  39 ++-
 3 files changed, 507 insertions(+), 5 deletions(-)
 create mode 100644 tests/bench/xbzrle-bench.c

diff --git a/tests/bench/meson.build b/tests/bench/meson.build
index 279a8fcc33..daefead58d 100644
--- a/tests/bench/meson.build
+++ b/tests/bench/meson.build
@@ -3,6 +3,10 @@ qht_bench = executable('qht-bench',
                        sources: 'qht-bench.c',
                        dependencies: [qemuutil])
 
+xbzrle_bench = executable('xbzrle-bench',
+                       sources: 'xbzrle-bench.c',
+                       dependencies: [qemuutil,migration])
+
 executable('atomic_add-bench',
            sources: files('atomic_add-bench.c'),
            dependencies: [qemuutil],
diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
new file mode 100644
index 0000000000..8848a3a32d
--- /dev/null
+++ b/tests/bench/xbzrle-bench.c
@@ -0,0 +1,469 @@
+/*
+ * Xor Based Zero Run Length Encoding unit tests.
+ *
+ * Copyright 2013 Red Hat, Inc. and/or its affiliates
+ *
+ * Authors:
+ *  Orit Wasserman  <owasserm@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "../migration/xbzrle.h"
+
+#if defined(CONFIG_AVX512BW_OPT)
+#define XBZRLE_PAGE_SIZE 4096
+static bool is_cpu_support_avx512bw;
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    is_cpu_support_avx512bw = false;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                is_cpu_support_avx512bw = true;
+            }
+        }
+    }
+    return ;
+}
+
+struct ResTime {
+    float t_raw;
+    float t_512;
+};
+
+
+/* Function prototypes
+int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                                uint8_t *dst, int dlen);
+*/
+static void encode_decode_zero(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        buffer512[1000 + i] = i;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    buffer[1000 + diff_len + 5] = 105;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    buffer512[1000 + diff_len + 5] = 105;
+
+    /* encode zero page */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(buffer512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_zero_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_zero(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Zero test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_unchanged(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0;
+    int dlen = 0, dlen512 = 0;
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        test[1000 + i] = i + 4;
+        test512[1000 + i] = i + 4;
+    }
+
+    test[1000 + diff_len + 3] = 107;
+    test[1000 + diff_len + 5] = 109;
+
+    test512[1000 + diff_len + 3] = 107;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test unchanged buffer */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(test);
+    g_free(compressed);
+    g_free(test512);
+    g_free(compressed512);
+
+}
+
+static void test_encode_decode_unchanged_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_unchanged(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Unchanged test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_1_byte(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    int dlen = 0, rc = 0, dlen512 = 0, rc512 = 0;
+    uint8_t buf[2];
+    uint8_t buf512[2];
+
+    test[XBZRLE_PAGE_SIZE - 1] = 1;
+    test512[XBZRLE_PAGE_SIZE - 1] = 1;
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                       XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
+
+    rc = xbzrle_decode_buffer(compressed, dlen, buffer, XBZRLE_PAGE_SIZE);
+    g_assert(rc == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(dlen512 == (uleb128_encode_small(&buf512[0], 4095) + 2));
+
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, buffer512,
+                                 XBZRLE_PAGE_SIZE);
+    g_assert(rc512 == XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_1_byte_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_1_byte(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("1 byte test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_overflow(struct ResTime *res)
+{
+    uint8_t *compressed = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+
+    for (i = 0; i < XBZRLE_PAGE_SIZE / 2 - 1; i++) {
+        test[i * 2] = 1;
+        test512[i * 2] = 1;
+    }
+
+    /* encode overflow */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+                              XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    g_assert(rc == -1);
+
+    t_start512 = clock();
+    rc512 = xbzrle_encode_buffer_avx512(buffer512, test512, XBZRLE_PAGE_SIZE,
+                                     compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    g_assert(rc512 == -1);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_overflow_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_overflow(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Overflow test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_range_avx512(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1006);
+
+    for (i = diff_len; i > 0; i--) {
+        buffer[1000 + i] = i;
+        test[1000 + i] = i + 4;
+        buffer512[1000 + i] = i;
+        test512[1000 + i] = i + 4;
+    }
+
+    buffer[1000 + diff_len + 3] = 103;
+    test[1000 + diff_len + 3] = 107;
+
+    buffer[1000 + diff_len + 5] = 105;
+    test[1000 + diff_len + 5] = 109;
+
+    buffer512[1000 + diff_len + 3] = 103;
+    test512[1000 + diff_len + 3] = 107;
+
+    buffer512[1000 + diff_len + 5] = 105;
+    test512[1000 + diff_len + 5] = 109;
+
+    /* test encode/decode */
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test, buffer, XBZRLE_PAGE_SIZE) == 0);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+    g_assert(memcmp(test512, buffer512, XBZRLE_PAGE_SIZE) == 0);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_range_avx512(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Encode decode test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+
+static void encode_decode_random(struct ResTime *res)
+{
+    uint8_t *buffer = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *buffer512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    uint8_t *compressed512 = g_malloc(XBZRLE_PAGE_SIZE);
+    uint8_t *test512 = g_malloc0(XBZRLE_PAGE_SIZE);
+    int i = 0, rc = 0, rc512 = 0;
+    int dlen = 0, dlen512 = 0;
+
+    int diff_len = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    /* store the index of diff */
+    int dirty_index[diff_len];
+    for (int j = 0; j < diff_len; j++) {
+        dirty_index[j] = g_test_rand_int_range(0, XBZRLE_PAGE_SIZE - 1);
+    }
+    for (i = diff_len - 1; i >= 0; i--) {
+        buffer[dirty_index[i]] = i;
+        test[dirty_index[i]] = i + 4;
+        buffer512[dirty_index[i]] = i;
+        test512[dirty_index[i]] = i + 4;
+    }
+
+    time_t t_start, t_end, t_start512, t_end512;
+    t_start = clock();
+    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+                                XBZRLE_PAGE_SIZE);
+    t_end = clock();
+    float time_val = difftime(t_end, t_start);
+    rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
+    g_assert(rc < XBZRLE_PAGE_SIZE);
+
+    t_start512 = clock();
+    dlen512 = xbzrle_encode_buffer_avx512(test512, buffer512, XBZRLE_PAGE_SIZE,
+                                       compressed512, XBZRLE_PAGE_SIZE);
+    t_end512 = clock();
+    float time_val512 = difftime(t_end512, t_start512);
+    rc512 = xbzrle_decode_buffer(compressed512, dlen512, test512, XBZRLE_PAGE_SIZE);
+    g_assert(rc512 < XBZRLE_PAGE_SIZE);
+
+    res->t_raw = time_val;
+    res->t_512 = time_val512;
+
+    g_free(buffer);
+    g_free(compressed);
+    g_free(test);
+    g_free(buffer512);
+    g_free(compressed512);
+    g_free(test512);
+
+}
+
+static void test_encode_decode_random_avx512(void)
+{
+    int i;
+    float time_raw = 0.0, time_512 = 0.0;
+    struct ResTime res;
+    for (i = 0; i < 10000; i++) {
+        encode_decode_random(&res);
+        time_raw += res.t_raw;
+        time_512 += res.t_512;
+    }
+    printf("Random test:\n");
+    printf("Raw xbzrle_encode time is %f ms\n", time_raw);
+    printf("512 xbzrle_encode time is %f ms\n", time_512);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_rand_int();
+    #if defined(CONFIG_AVX512BW_OPT)
+    if (likely(is_cpu_support_avx512bw)) {
+        g_test_add_func("/xbzrle/encode_decode_zero", test_encode_decode_zero_avx512);
+        g_test_add_func("/xbzrle/encode_decode_unchanged",
+                        test_encode_decode_unchanged_avx512);
+        g_test_add_func("/xbzrle/encode_decode_1_byte", test_encode_decode_1_byte_avx512);
+        g_test_add_func("/xbzrle/encode_decode_overflow",
+                        test_encode_decode_overflow_avx512);
+        g_test_add_func("/xbzrle/encode_decode", test_encode_decode_avx512);
+        g_test_add_func("/xbzrle/encode_decode_random", test_encode_decode_random_avx512);
+    }
+    #endif
+    return g_test_run();
+}
diff --git a/tests/unit/test-xbzrle.c b/tests/unit/test-xbzrle.c
index ef951b6e54..547046d093 100644
--- a/tests/unit/test-xbzrle.c
+++ b/tests/unit/test-xbzrle.c
@@ -16,6 +16,35 @@
 
 #define XBZRLE_PAGE_SIZE 4096
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+    return ;
+}
+#endif
+
 static void test_uleb(void)
 {
     uint32_t i, val;
@@ -54,7 +83,7 @@ static void test_encode_decode_zero(void)
     buffer[1000 + diff_len + 5] = 105;
 
     /* encode zero page */
-    dlen = xbzrle_encode_buffer(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, buffer, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -78,7 +107,7 @@ static void test_encode_decode_unchanged(void)
     test[1000 + diff_len + 5] = 109;
 
     /* test unchanged buffer */
-    dlen = xbzrle_encode_buffer(test, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, test, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
     g_assert(dlen == 0);
 
@@ -96,7 +125,7 @@ static void test_encode_decode_1_byte(void)
 
     test[XBZRLE_PAGE_SIZE - 1] = 1;
 
-    dlen = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                        XBZRLE_PAGE_SIZE);
     g_assert(dlen == (uleb128_encode_small(&buf[0], 4095) + 2));
 
@@ -121,7 +150,7 @@ static void test_encode_decode_overflow(void)
     }
 
     /* encode overflow */
-    rc = xbzrle_encode_buffer(buffer, test, XBZRLE_PAGE_SIZE, compressed,
+    rc = xbzrle_encode_buffer_func(buffer, test, XBZRLE_PAGE_SIZE, compressed,
                               XBZRLE_PAGE_SIZE);
     g_assert(rc == -1);
 
@@ -152,7 +181,7 @@ static void encode_decode_range(void)
     test[1000 + diff_len + 5] = 109;
 
     /* test encode/decode */
-    dlen = xbzrle_encode_buffer(test, buffer, XBZRLE_PAGE_SIZE, compressed,
+    dlen = xbzrle_encode_buffer_func(test, buffer, XBZRLE_PAGE_SIZE, compressed,
                                 XBZRLE_PAGE_SIZE);
 
     rc = xbzrle_decode_buffer(compressed, dlen, test, XBZRLE_PAGE_SIZE);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer
  2022-11-16 15:29 ` [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer ling xu
@ 2023-02-09 19:30   ` Juan Quintela
  0 siblings, 0 replies; 8+ messages in thread
From: Juan Quintela @ 2023-02-09 19:30 UTC (permalink / raw)
  To: ling xu; +Cc: qemu-devel, dgilbert, Zhou Zhao, Jun Jin

ling xu <ling1.xu@intel.com> wrote:
> This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer
> function to accelerate xbzrle encoding speed. Runtime check of avx512
> support and benchmark for this feature are added. Compared with C
> version of xbzrle_encode_buffer function, avx512 version can achieve
> 50%-70% performance improvement on benchmarking. In addition, if dirty
> data is randomly located in 4K page, the avx512 version can achieve
> almost 140% performance gain.
>
> Signed-off-by: ling xu <ling1.xu@intel.com>
> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
> Co-authored-by: Jun Jin <jun.i.jin@intel.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>

But there were a lot of "but's":

> diff --git a/meson.build b/meson.build
> index cf3e517e56..d0d28f5c9e 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -2344,6 +2344,22 @@ config_host_data.set('CONFIG_AVX512F_OPT', get_option('avx512f') \
>      int main(int argc, char *argv[]) { return bar(argv[argc - 1]); }
>    '''), error_message: 'AVX512F not available').allowed())
>  
> +config_host_data.set('CONFIG_AVX512BW_OPT', get_option('avx512bw') \
> +  .require(have_cpuid_h, error_message: 'cpuid.h not available, cannot enable AVX512BW') \
> +  .require(cc.links('''
> +    #pragma GCC push_options
> +    #pragma GCC target("avx512bw")
> +    #include <cpuid.h>
> +    #include <immintrin.h>
> +    static int bar(void *a) {
> +
> +      __m512i *x = a;
> +      __m512i res= _mm512_abs_epi8(*x);
> +      return res[1];
> +    }
> +    int main(int argc, char *argv[]) { return bar(argv[0]); }
> +  '''), error_message: 'AVX512BW not available').allowed())
> +
>  have_pvrdma = get_option('pvrdma') \
>    .require(rdma.found(), error_message: 'PVRDMA requires OpenFabrics libraries') \
>    .require(cc.compiles(gnu_source_prefix + '''

This file misses:

@@ -3783,6 +3799,7 @@ summary_info += {'debug stack usage': get_option('debug_stack_usage')}
 summary_info += {'mutex debugging':   get_option('debug_mutex')}
 summary_info += {'memory allocator':  get_option('malloc')}
 summary_info += {'avx2 optimization': config_host_data.get('CONFIG_AVX2_OPT')}
+summary_info += {'avx512bw optimization': config_host_data.get('CONFIG_AVX512BW_OPT')}
 summary_info += {'avx512f optimization': config_host_data.get('CONFIG_AVX512F_OPT')}
 summary_info += {'gprof enabled':     get_option('gprof')}
 summary_info += {'gcov':              get_option('b_coverage')}
diff --git a/meson_options.txt b/meson_options.txt
index 559a571b6b..e5f199119e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -104,6 +104,8 @@ option('avx2', type: 'feature', value: 'auto',
        description: 'AVX2 optimizations')
 option('avx512f', type: 'feature', value: 'disabled',
        description: 'AVX512F optimizations')
+option('avx512bw', type: 'feature', value: 'auto',
+       description: 'AVX512BW optimizations')
 option('keyring', type: 'feature', value: 'auto',
        description: 'Linux keyring support')

And you are missing:

diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 0f71e92dcb..c2982ea087 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -70,6 +70,7 @@ meson_options_help() {
   printf "%s\n" '  attr            attr/xattr support'
   printf "%s\n" '  auth-pam        PAM access control'
   printf "%s\n" '  avx2            AVX2 optimizations'
+  printf "%s\n" '  avx512bw        AVX512BW optimizations'
   printf "%s\n" '  avx512f         AVX512F optimizations'
   printf "%s\n" '  blkio           libblkio block device driver'
   printf "%s\n" '  bochs           bochs image format support'
@@ -198,6 +199,8 @@ _meson_option_parse() {
     --disable-auth-pam) printf "%s" -Dauth_pam=disabled ;;
     --enable-avx2) printf "%s" -Davx2=enabled ;;
     --disable-avx2) printf "%s" -Davx2=disabled ;;
+    --enable-avx512bw) printf "%s" -Davx512bw=enabled ;;
+    --disable-avx512bw) printf "%s" -Davx512bw=disabled ;;
     --enable-avx512f) printf "%s" -Davx512f=enabled ;;
     --disable-avx512f) printf "%s" -Davx512f=disabled ;;
     --enable-gcov) printf "%s" -Db_coverage=true ;;



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 2/2] Update bench-code for addressing CI problem
  2022-11-16 15:29 ` [PATCH v7 2/2] Update bench-code for addressing CI problem ling xu
@ 2023-02-09 19:31   ` Juan Quintela
  2023-02-09 22:46   ` Philippe Mathieu-Daudé
  1 sibling, 0 replies; 8+ messages in thread
From: Juan Quintela @ 2023-02-09 19:31 UTC (permalink / raw)
  To: ling xu; +Cc: qemu-devel, dgilbert, Zhou Zhao, Jun Jin

ling xu <ling1.xu@intel.com> wrote:
> Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
> for performance benchmarking. we have modified xbzrle-bench.c to address
> CI problem.
>
> Signed-off-by: ling xu <ling1.xu@intel.com>
> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
> Co-authored-by: Jun Jin <jun.i.jin@intel.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 2/2] Update bench-code for addressing CI problem
  2022-11-16 15:29 ` [PATCH v7 2/2] Update bench-code for addressing CI problem ling xu
  2023-02-09 19:31   ` Juan Quintela
@ 2023-02-09 22:46   ` Philippe Mathieu-Daudé
  2023-02-09 23:50     ` Juan Quintela
  1 sibling, 1 reply; 8+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-02-09 22:46 UTC (permalink / raw)
  To: ling xu, qemu-devel
  Cc: quintela, dgilbert, Zhou Zhao, Jun Jin, Thomas Huth, Markus Armbruster

On 16/11/22 16:29, ling xu wrote:
> Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
> for performance benchmarking. we have modified xbzrle-bench.c to address
> CI problem.
> 
> Signed-off-by: ling xu <ling1.xu@intel.com>
> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
> Co-authored-by: Jun Jin <jun.i.jin@intel.com>
> ---
>   tests/bench/meson.build    |   4 +
>   tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
>   tests/unit/test-xbzrle.c   |  39 ++-
>   3 files changed, 507 insertions(+), 5 deletions(-)
>   create mode 100644 tests/bench/xbzrle-bench.c


> diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
> new file mode 100644
> index 0000000000..8848a3a32d
> --- /dev/null
> +++ b/tests/bench/xbzrle-bench.c
> @@ -0,0 +1,469 @@
> +/*
> + * Xor Based Zero Run Length Encoding unit tests.
> + *
> + * Copyright 2013 Red Hat, Inc. and/or its affiliates
> + *
> + * Authors:
> + *  Orit Wasserman  <owasserm@redhat.com>

Is Orit the real author? Or is it based on migration/xbzrle.c?

> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +#include "qemu/osdep.h"
> +#include "qemu/cutils.h"
> +#include "../migration/xbzrle.h"

Interesting migration include path. Similarly:

$ git grep -F '#include "../' tests | egrep -v 
'(../libqtest.h|tests/tcg/mips|../multiarch)'
tests/qtest/netdev-socket.c:12:#include "../unit/socket-helpers.h"
tests/unit/test-qgraph.c:20:#include "../qtest/libqos/qgraph.h"
tests/unit/test-qgraph.c:21:#include "../qtest/libqos/qgraph_internal.h"

tests/migration/aarch64/a-b-kernel.S:14:#include "../migration-test.h"
tests/unit/test-vmstate.c:27:#include "../migration/migration.h"
tests/unit/test-vmstate.c:30:#include "../migration/qemu-file.h"
tests/unit/test-vmstate.c:31:#include "../migration/savevm.h"
tests/unit/test-xbzrle.c:15:#include "../migration/xbzrle.h"

$ ls -1 migration/*.h
migration/block.h
migration/channel-block.h
migration/channel.h
migration/dirtyrate.h
migration/exec.h
migration/fd.h
migration/migration.h      [*]
migration/multifd.h
migration/page_cache.h
migration/postcopy-ram.h
migration/qemu-file.h      [*]
migration/ram.h
migration/rdma.h
migration/savevm.h         [*]
migration/socket.h
migration/threadinfo.h
migration/tls.h
migration/trace.h
migration/xbzrle.h         [*]
migration/yank_functions.h

$ ls -1 include/migration/*.h
include/migration/blocker.h
include/migration/colo.h
include/migration/cpu.h
include/migration/failover.h
include/migration/global_state.h
include/migration/misc.h
include/migration/qemu-file-types.h
include/migration/register.h
include/migration/snapshot.h
include/migration/vmstate.h

Do the 4 files marked [*] belong to include/migration/?


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 2/2] Update bench-code for addressing CI problem
  2023-02-09 22:46   ` Philippe Mathieu-Daudé
@ 2023-02-09 23:50     ` Juan Quintela
  2023-02-10  7:04       ` Philippe Mathieu-Daudé
  0 siblings, 1 reply; 8+ messages in thread
From: Juan Quintela @ 2023-02-09 23:50 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé
  Cc: ling xu, qemu-devel, dgilbert, Zhou Zhao, Jun Jin, Thomas Huth,
	Markus Armbruster

Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> On 16/11/22 16:29, ling xu wrote:
>> Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
>> for performance benchmarking. we have modified xbzrle-bench.c to address
>> CI problem.
>> Signed-off-by: ling xu <ling1.xu@intel.com>
>> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
>> Co-authored-by: Jun Jin <jun.i.jin@intel.com>
>> ---
>>   tests/bench/meson.build    |   4 +
>>   tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
>>   tests/unit/test-xbzrle.c   |  39 ++-
>>   3 files changed, 507 insertions(+), 5 deletions(-)
>>   create mode 100644 tests/bench/xbzrle-bench.c
>
>
>> diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
>> new file mode 100644
>> index 0000000000..8848a3a32d
>> --- /dev/null
>> +++ b/tests/bench/xbzrle-bench.c
>> @@ -0,0 +1,469 @@
>> +/*
>> + * Xor Based Zero Run Length Encoding unit tests.
>> + *
>> + * Copyright 2013 Red Hat, Inc. and/or its affiliates
>> + *
>> + * Authors:
>> + *  Orit Wasserman  <owasserm@redhat.com>
>
> Is Orit the real author? Or is it based on migration/xbzrle.c?

Based on as far as I can se.

>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +#include "qemu/osdep.h"
>> +#include "qemu/cutils.h"
>> +#include "../migration/xbzrle.h"
>
> Interesting migration include path. Similarly:

xbzrle.h is only exported for migration.  Nothing else can use them.
So we can't put that on include/migration/*

> $ git grep -F '#include "../' tests | egrep -v
> '(../libqtest.h|tests/tcg/mips|../multiarch)'
> tests/qtest/netdev-socket.c:12:#include "../unit/socket-helpers.h"
> tests/unit/test-qgraph.c:20:#include "../qtest/libqos/qgraph.h"
> tests/unit/test-qgraph.c:21:#include "../qtest/libqos/qgraph_internal.h"
>
> tests/migration/aarch64/a-b-kernel.S:14:#include "../migration-test.h"
> tests/unit/test-vmstate.c:27:#include "../migration/migration.h"
> tests/unit/test-vmstate.c:30:#include "../migration/qemu-file.h"
> tests/unit/test-vmstate.c:31:#include "../migration/savevm.h"
> tests/unit/test-xbzrle.c:15:#include "../migration/xbzrle.h"
>
> $ ls -1 migration/*.h
> migration/block.h
> migration/channel-block.h
> migration/channel.h
> migration/dirtyrate.h
> migration/exec.h
> migration/fd.h
> migration/migration.h      [*]
> migration/multifd.h
> migration/page_cache.h
> migration/postcopy-ram.h
> migration/qemu-file.h      [*]
> migration/ram.h
> migration/rdma.h
> migration/savevm.h         [*]
> migration/socket.h
> migration/threadinfo.h
> migration/tls.h
> migration/trace.h
> migration/xbzrle.h         [*]
> migration/yank_functions.h
>
> $ ls -1 include/migration/*.h
> include/migration/blocker.h
> include/migration/colo.h
> include/migration/cpu.h
> include/migration/failover.h
> include/migration/global_state.h
> include/migration/misc.h
> include/migration/qemu-file-types.h
> include/migration/register.h
> include/migration/snapshot.h
> include/migration/vmstate.h
>
> Do the 4 files marked [*] belong to include/migration/?

The split is:
include/migration/* <- exported for everybody to use
migration/*.h       <- Only for migration

Now, doing tests for migration makes this difference complicated,
because some tests really need things that are not exported.

This is the way that it is normally used in the tree, no?

Later, Juan.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v7 2/2] Update bench-code for addressing CI problem
  2023-02-09 23:50     ` Juan Quintela
@ 2023-02-10  7:04       ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 8+ messages in thread
From: Philippe Mathieu-Daudé @ 2023-02-10  7:04 UTC (permalink / raw)
  To: quintela
  Cc: ling xu, qemu-devel, dgilbert, Zhou Zhao, Jun Jin, Thomas Huth,
	Markus Armbruster

On 10/2/23 00:50, Juan Quintela wrote:
> Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>> On 16/11/22 16:29, ling xu wrote:
>>> Unit test code is in test-xbzrle.c, and benchmark code is in xbzrle-bench.c
>>> for performance benchmarking. we have modified xbzrle-bench.c to address
>>> CI problem.
>>> Signed-off-by: ling xu <ling1.xu@intel.com>
>>> Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
>>> Co-authored-by: Jun Jin <jun.i.jin@intel.com>
>>> ---
>>>    tests/bench/meson.build    |   4 +
>>>    tests/bench/xbzrle-bench.c | 469 +++++++++++++++++++++++++++++++++++++
>>>    tests/unit/test-xbzrle.c   |  39 ++-
>>>    3 files changed, 507 insertions(+), 5 deletions(-)
>>>    create mode 100644 tests/bench/xbzrle-bench.c
>>
>>
>>> diff --git a/tests/bench/xbzrle-bench.c b/tests/bench/xbzrle-bench.c
>>> new file mode 100644
>>> index 0000000000..8848a3a32d
>>> --- /dev/null
>>> +++ b/tests/bench/xbzrle-bench.c
>>> @@ -0,0 +1,469 @@
>>> +/*
>>> + * Xor Based Zero Run Length Encoding unit tests.
>>> + *
>>> + * Copyright 2013 Red Hat, Inc. and/or its affiliates
>>> + *
>>> + * Authors:
>>> + *  Orit Wasserman  <owasserm@redhat.com>
>>
>> Is Orit the real author? Or is it based on migration/xbzrle.c?
> 
> Based on as far as I can se.
> 
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +#include "qemu/osdep.h"
>>> +#include "qemu/cutils.h"
>>> +#include "../migration/xbzrle.h"
>>
>> Interesting migration include path. Similarly:
> 
> xbzrle.h is only exported for migration.  Nothing else can use them.
> So we can't put that on include/migration/*
> 
>> $ git grep -F '#include "../' tests | egrep -v
>> '(../libqtest.h|tests/tcg/mips|../multiarch)'
>> tests/qtest/netdev-socket.c:12:#include "../unit/socket-helpers.h"
>> tests/unit/test-qgraph.c:20:#include "../qtest/libqos/qgraph.h"
>> tests/unit/test-qgraph.c:21:#include "../qtest/libqos/qgraph_internal.h"
>>
>> tests/migration/aarch64/a-b-kernel.S:14:#include "../migration-test.h"
>> tests/unit/test-vmstate.c:27:#include "../migration/migration.h"
>> tests/unit/test-vmstate.c:30:#include "../migration/qemu-file.h"
>> tests/unit/test-vmstate.c:31:#include "../migration/savevm.h"
>> tests/unit/test-xbzrle.c:15:#include "../migration/xbzrle.h"
>>
>> $ ls -1 migration/*.h
>> migration/block.h
>> migration/channel-block.h
>> migration/channel.h
>> migration/dirtyrate.h
>> migration/exec.h
>> migration/fd.h
>> migration/migration.h      [*]
>> migration/multifd.h
>> migration/page_cache.h
>> migration/postcopy-ram.h
>> migration/qemu-file.h      [*]
>> migration/ram.h
>> migration/rdma.h
>> migration/savevm.h         [*]
>> migration/socket.h
>> migration/threadinfo.h
>> migration/tls.h
>> migration/trace.h
>> migration/xbzrle.h         [*]
>> migration/yank_functions.h
>>
>> $ ls -1 include/migration/*.h
>> include/migration/blocker.h
>> include/migration/colo.h
>> include/migration/cpu.h
>> include/migration/failover.h
>> include/migration/global_state.h
>> include/migration/misc.h
>> include/migration/qemu-file-types.h
>> include/migration/register.h
>> include/migration/snapshot.h
>> include/migration/vmstate.h
>>
>> Do the 4 files marked [*] belong to include/migration/?
> 
> The split is:
> include/migration/* <- exported for everybody to use
> migration/*.h       <- Only for migration
> 
> Now, doing tests for migration makes this difference complicated,
> because some tests really need things that are not exported.
> 
> This is the way that it is normally used in the tree, no?

Yes you are correct. Thanks for clarifying!

Phil.



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-02-10  7:05 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-16 15:29 [PATCH v7 0/2] Update AVX512 support for xbzrle and CI failure ling xu
2022-11-16 15:29 ` [PATCH v7 1/2] AVX512 support for xbzrle_encode_buffer ling xu
2023-02-09 19:30   ` Juan Quintela
2022-11-16 15:29 ` [PATCH v7 2/2] Update bench-code for addressing CI problem ling xu
2023-02-09 19:31   ` Juan Quintela
2023-02-09 22:46   ` Philippe Mathieu-Daudé
2023-02-09 23:50     ` Juan Quintela
2023-02-10  7:04       ` Philippe Mathieu-Daudé

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.