All of lore.kernel.org
 help / color / mirror / Atom feed
* [MODERATED] Re: Some micro-perf tests
@ 2019-02-27 19:09 Stewart, David C
  2019-02-27 19:44 ` Andrew Cooper
  0 siblings, 1 reply; 3+ messages in thread
From: Stewart, David C @ 2019-02-27 19:09 UTC (permalink / raw)
  To: speck

On Sat, Feb 23, 2019 at 10:27 AM speck for Andrew Cooper
<speck@linutronix.de> wrote:
>
>
> Pre microcode:
> * VERW of NUL   => 65-69 cycles
> * VERW of %ds   => 33-37 cycles
>
> Post microcode:
> * VERW of NUL   => 512-520 cycles
> * VERW of %ds   => 520-540 cycles

Andrew ��� can you please send me your code? We need to root-cause why the behavior you are seeing is different than our guidance.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [MODERATED] Re: Some micro-perf tests
  2019-02-27 19:09 [MODERATED] Re: Some micro-perf tests Stewart, David C
@ 2019-02-27 19:44 ` Andrew Cooper
  0 siblings, 0 replies; 3+ messages in thread
From: Andrew Cooper @ 2019-02-27 19:44 UTC (permalink / raw)
  To: speck


[-- Attachment #1.1: Type: text/plain, Size: 2418 bytes --]

On 27/02/2019 19:09, speck for Stewart, David C wrote:
> On Sat, Feb 23, 2019 at 10:27 AM speck for Andrew Cooper
> <speck@linutronix.de> wrote:
>>
>>
>> Pre microcode:
>> * VERW of NUL   => 65-69 cycles
>> * VERW of %ds   => 33-37 cycles
>>
>> Post microcode:
>> * VERW of NUL   => 512-520 cycles
>> * VERW of %ds   => 520-540 cycles
> 
> Andrew – can you please send me your code? We need to root-cause why the behavior you are seeing is different than our guidance.

Hello,

The exact code is unlikely to be of direct interest, as it is specific
to my Xen Test Framework.

The interesting subset, converted to work in regular userspace is:

andrewcoop@andrewcoop:/tmp/verw$ cat verw.c
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>

#define barrier() asm volatile ("" ::: "memory")

static unsigned int read_ds(void)
{
    unsigned int sel;

    asm volatile ("mov %%ds, %0" : "=rm" (sel));

    return sel;
}

static uint64_t read_time(void)
{
    unsigned long low, high;

    asm volatile ("rdtscp"
                  : "=a" (low), "=d" (high) :: "ecx");

    return ((uint64_t)high << 32) | low;
}

static uint64_t time_sel(unsigned int sel)
{
    uint64_t t1, t2;

    barrier();
    t1 = read_time();

    asm volatile ("verw %0" :: "m" (sel));

    t2 = read_time();
    barrier();

    return t2 - t1;
}

int main(void)
{
    static uint64_t times[2][50];
    unsigned int i;

    for ( i = 0; i < 50; ++i )
    {
        unsigned int sel = 0;

        times[0][i] = time_sel(sel);
    }

    for ( i = 0; i < 50; ++i )
    {
        unsigned int sel = read_ds();

        times[1][i] = time_sel(sel);
    }

    printf("     0 \t%#4x\n", read_ds());
    for ( i = 0; i < 50; ++i )
        printf("[%02u] %"PRIu64"\t%"PRIu64"\n",
               i, times[0][i], times[1][i]);

    return 0;
}

To compile,
$gcc -m32 -O3 verw.c -o verw

For 64bit, you need to hardcode a %ds other than 0.  0x2b looks to be
the going candidate.

The exact CPU in question is:

[root@idol ~]# head /proc/cpuinfo
processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 158
model name	: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
stepping	: 10
microcode	: 0x109a
cpu MHz		: 3200.000
cache size	: 12288 KB
physical id	: 0

which is a CoffeeLake part using the alpha ucode drop.

Thanks,

~Andrew

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: 0001-time-verw.patch --]
[-- Type: text/x-patch; name="0001-time-verw.patch", Size: 3306 bytes --]

From 15872ecbac1df9fba04b8a1e052b888f4b65eaf2 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Thu, 1 Nov 2018 20:21:36 +0000
Subject: [PATCH 1/1] time verw

---
 docs/all-tests.dox  |   2 +
 tests/verw/Makefile |   9 +++++
 tests/verw/main.c   | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 tests/verw/Makefile
 create mode 100644 tests/verw/main.c

diff --git a/docs/all-tests.dox b/docs/all-tests.dox
index 732d44c..8aa54ab 100644
--- a/docs/all-tests.dox
+++ b/docs/all-tests.dox
@@ -146,3 +146,5 @@ enable BTS.
 
 @subpage test-nested-vmx - Nested VT-x tests.
 */
+# Placeholder: Merge into the appropriate location above
+@subpage test-verw - @todo title
diff --git a/tests/verw/Makefile b/tests/verw/Makefile
new file mode 100644
index 0000000..b6260b2
--- /dev/null
+++ b/tests/verw/Makefile
@@ -0,0 +1,9 @@
+include $(ROOT)/build/common.mk
+
+NAME      := verw
+CATEGORY  := utility
+TEST-ENVS := pv64 hvm64
+
+obj-perenv += main.o
+
+include $(ROOT)/build/gen.mk
diff --git a/tests/verw/main.c b/tests/verw/main.c
new file mode 100644
index 0000000..630f881
--- /dev/null
+++ b/tests/verw/main.c
@@ -0,0 +1,105 @@
+/**
+ * @file tests/verw/main.c
+ * @ref test-verw
+ *
+ * @page test-verw verw
+ *
+ * @todo Docs for test-verw
+ *
+ * @see tests/verw/main.c
+ */
+#include <xtf.h>
+
+const char test_title[] = "Test verw";
+
+static uint64_t read_time(void)
+{
+    unsigned long low, high;
+
+    asm volatile ("rdtscp"
+                  : "=a" (low), "=d" (high) :: "ecx");
+
+    return ((uint64_t)high << 32) | low;
+}
+
+static uint64_t time_sel(unsigned int sel)
+{
+    uint64_t t1, t2;
+
+    barrier();
+    t1 = read_time();
+
+    asm volatile ("verw %0" :: "m" (sel));
+
+    t2 = read_time();
+    barrier();
+
+    return t2 - t1;
+}
+
+static uint64_t time_flushcmd(void)
+{
+    uint64_t t1, t2;
+
+    barrier();
+    t1 = read_time();
+
+    wrmsr(0x10b, 1);
+
+    t2 = read_time();
+    barrier();
+
+    return t2 - t1;
+}
+
+void test_main(void)
+{
+    static uint64_t times[2][20];
+    unsigned int i;
+
+    for ( i = 0; i < 20; ++i )
+    {
+        unsigned int sel = 0;
+
+        //asm volatile ("clflush %1" : "+r" (sel) : "m"(*gdt));
+
+        times[0][i] = time_sel(sel);
+    }
+
+    for ( i = 0; i < 20; ++i )
+    {
+        unsigned int sel = read_ds();
+
+        //asm volatile ("clflush %1" : "+r" (sel) : "m"(*gdt));
+
+        times[1][i] = time_sel(sel);
+    }
+
+    printk("     0 \t%#x\n", read_ds());
+    for ( i = 0; i < 20; ++i )
+        printk("[%02u] %"PRIu64"\t%"PRIu64"\n",
+               i, times[0][i], times[1][i]);
+
+    if ( IS_DEFINED(CONFIG_HVM) )
+    {
+        for ( i = 0; i < 20; ++i )
+            times[0][i] = time_flushcmd();
+
+        printk("MSR_FLUSH_CMD\n");
+        for ( i = 0; i < 20; ++i )
+            printk("[%02u] %"PRIu64"\n",
+                   i, times[0][i]);
+    }
+
+    xtf_success(NULL);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [MODERATED] Re: Some micro-perf tests
@ 2019-02-27 17:53 Stewart, David C
  0 siblings, 0 replies; 3+ messages in thread
From: Stewart, David C @ 2019-02-27 17:53 UTC (permalink / raw)
  To: speck

On Sat, Feb 23, 2019 at 10:27 AM speck for Andrew Cooper
<speck@linutronix.de> wrote:
>
>
> Pre microcode:
> * VERW of NUL   => 65-69 cycles
> * VERW of %ds   => 33-37 cycles
>
> Post microcode:
> * VERW of NUL   => 512-520 cycles
> * VERW of %ds   => 520-540 cycles

Andrew ��� can you please send me your code? We need to root-cause why the behavior you are seeing is different than our guidance.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-02-27 19:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-27 19:09 [MODERATED] Re: Some micro-perf tests Stewart, David C
2019-02-27 19:44 ` Andrew Cooper
  -- strict thread matches above, loose matches on Subject: below --
2019-02-27 17:53 Stewart, David C

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.