All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xenproject.org>
Cc: "Igor Druzhinin" <igor.druzhinin@citrix.com>,
	"Edwin Torok" <edvin.torok@citrix.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jan Beulich" <JBeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>, "Wei Liu" <wl@xen.org>
Subject: [PATCH 5/5] tests: Introduce a TSX test
Date: Fri, 11 Jun 2021 17:36:27 +0100	[thread overview]
Message-ID: <20210611163627.4878-6-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <20210611163627.4878-1-andrew.cooper3@citrix.com>

See the comment at the top of test-tsx.c for details.

This covers various complexities encountered while trying to address the
recent TSX deprecation on client parts.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 tools/tests/Makefile       |   1 +
 tools/tests/tsx/.gitignore |   1 +
 tools/tests/tsx/Makefile   |  43 ++++
 tools/tests/tsx/test-tsx.c | 474 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 519 insertions(+)
 create mode 100644 tools/tests/tsx/.gitignore
 create mode 100644 tools/tests/tsx/Makefile
 create mode 100644 tools/tests/tsx/test-tsx.c

diff --git a/tools/tests/Makefile b/tools/tests/Makefile
index 8746aabe6b..25531a984a 100644
--- a/tools/tests/Makefile
+++ b/tools/tests/Makefile
@@ -5,6 +5,7 @@ SUBDIRS-y :=
 SUBDIRS-y += resource
 SUBDIRS-$(CONFIG_X86) += cpu-policy
 SUBDIRS-$(CONFIG_X86) += mce-test
+SUBDIRS-$(CONFIG_X86) += tsx
 ifneq ($(clang),y)
 SUBDIRS-$(CONFIG_X86) += x86_emulator
 endif
diff --git a/tools/tests/tsx/.gitignore b/tools/tests/tsx/.gitignore
new file mode 100644
index 0000000000..97ec4db7ff
--- /dev/null
+++ b/tools/tests/tsx/.gitignore
@@ -0,0 +1 @@
+test-tsx
diff --git a/tools/tests/tsx/Makefile b/tools/tests/tsx/Makefile
new file mode 100644
index 0000000000..7381a4f5a4
--- /dev/null
+++ b/tools/tests/tsx/Makefile
@@ -0,0 +1,43 @@
+XEN_ROOT = $(CURDIR)/../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+TARGET := test-tsx
+
+.PHONY: all
+all: $(TARGET)
+
+.PHONY: run
+run: $(TARGET)
+	./$(TARGET)
+
+.PHONY: clean
+clean:
+	$(RM) -f -- *.o $(TARGET) $(DEPS_RM)
+
+.PHONY: distclean
+distclean: clean
+	$(RM) -f -- *~
+
+.PHONY: install
+install: all
+
+.PHONY: uninstall
+uninstall:
+
+CFLAGS += -Werror -std=gnu11
+CFLAGS += $(CFLAGS_xeninclude)
+CFLAGS += $(CFLAGS_libxenctrl)
+CFLAGS += $(CFLAGS_libxenguest)
+CFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl -I$(XEN_ROOT)/tools/libs/guest
+CFLAGS += $(APPEND_CFLAGS)
+
+LDFLAGS += $(LDLIBS_libxenctrl)
+LDFLAGS += $(LDLIBS_libxenguest)
+LDFLAGS += $(APPEND_LDFLAGS)
+
+test-tsx.o: Makefile
+
+test-tsx: test-tsx.o
+	$(CC) -o $@ $< $(LDFLAGS)
+
+-include $(DEPS_INCLUDE)
diff --git a/tools/tests/tsx/test-tsx.c b/tools/tests/tsx/test-tsx.c
new file mode 100644
index 0000000000..2bf22cea81
--- /dev/null
+++ b/tools/tests/tsx/test-tsx.c
@@ -0,0 +1,474 @@
+/*
+ * TSX settings and consistency tests
+ *
+ * This tests various behaviours and invariants with regards to TSX.  It
+ * ideally wants running for several microcode versions, and all applicable
+ * tsx= commandline settings, on a single CPU, including after an S3
+ * suspend/resume event.
+ *
+ * It tests specifically:
+ *  - The consistency of MSR_TSX_CTRL/MSR_TSX_FORCE_ABORT values across the
+ *    system, and their accessibility WRT data in the host CPU policy.
+ *  - The actual behaviour of RTM on the system.
+ *
+ *  - 
+ */
+
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+
+#include <xenctrl.h>
+#include <xenguest.h>
+#include <xen-tools/libs.h>
+
+#include "xg_private.h"
+
+enum {
+#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
+#include <xen/arch-x86/cpufeatureset.h>
+};
+#define bitmaskof(idx)      (1u << ((idx) & 31))
+
+#define MSR_ARCH_CAPABILITIES               0x0000010a
+#define  ARCH_CAPS_TSX_CTRL                 (1 <<  7)
+#define MSR_TSX_FORCE_ABORT                 0x0000010f
+#define MSR_TSX_CTRL                        0x00000122
+
+static unsigned int nr_failures;
+#define fail(fmt, ...)                          \
+({                                              \
+    nr_failures++;                              \
+    (void)printf(fmt, ##__VA_ARGS__);           \
+})
+
+static xc_interface *xch;
+
+/*
+ * Policies, arranged as an array for easy collection of all of them.  We
+ * don't care about the raw policy (index 0) so reuse that for the guest
+ * policy.
+ */
+static struct xc_cpu_policy policies[6];
+#define guest_policy policies[0]
+#define host         policies[XEN_SYSCTL_cpu_policy_host]
+#define pv_max       policies[XEN_SYSCTL_cpu_policy_pv_max]
+#define hvm_max      policies[XEN_SYSCTL_cpu_policy_hvm_max]
+#define pv_default   policies[XEN_SYSCTL_cpu_policy_pv_default]
+#define hvm_default  policies[XEN_SYSCTL_cpu_policy_hvm_default]
+
+static bool xen_has_pv = true, xen_has_hvm = true;
+
+static unsigned int nr_cpus;
+static enum rtm_behaviour {
+    RTM_UD,
+    RTM_OK,
+    RTM_ABORT,
+} rtm_behaviour;
+
+/*
+ * Test a specific TSX MSR for consistency across the system, taking into
+ * account whether it ought to be accessable or not.
+ *
+ * We can't query offline CPUs, so skip those if encountered.  We don't care
+ * particularly for the exact MSR value, but we do care that it is the same
+ * everywhere.
+ */
+static void test_tsx_msr_consistency(unsigned int msr, bool accessable)
+{
+    uint64_t cpu0_val = ~0;
+
+    for ( unsigned int cpu = 0; cpu < nr_cpus; ++cpu )
+    {
+        xc_resource_entry_t ent = {
+            .u.cmd = XEN_RESOURCE_OP_MSR_READ,
+            .idx = msr,
+        };
+        xc_resource_op_t op = {
+            .cpu = cpu,
+            .entries = &ent,
+            .nr_entries = 1,
+        };
+        int rc = xc_resource_op(xch, 1, &op);
+
+        if ( rc < 0 )
+        {
+            /* Don't emit a message for offline CPUs */
+            if ( errno != ENODEV )
+                fail("  xc_resource_op() for CPU%u failed: rc %d, errno %d - %s\n",
+                     cpu, rc, errno, strerror(errno));
+            continue;
+        }
+
+        if ( accessable )
+        {
+            if ( rc != 1 )
+            {
+                fail("  Expected 1 result, got %u\n", rc);
+                continue;
+            }
+            if ( ent.u.ret != 0 )
+            {
+                fail("  Expected ok, got %d\n", ent.u.ret);
+                continue;
+            }
+        }
+        else
+        {
+            if ( rc != 0 )
+                fail("  Expected 0 results, got %u\n", rc);
+            else if ( ent.u.ret != -EPERM )
+                fail("  Expected -EPERM, got %d\n", ent.u.ret);
+            continue;
+        }
+
+        if ( cpu == 0 )
+        {
+            cpu0_val = ent.val;
+            printf("  CPU0 val %#"PRIx64"\n", cpu0_val);
+        }
+        else if ( ent.val != cpu0_val )
+            fail("  CPU%u val %#"PRIx64" differes from CPU0 %#"PRIx64"\n",
+                 cpu, ent.val, cpu0_val);
+    }
+}
+
+/*
+ * Check all TSX MSRs, and in particular that their accessibility matches what
+ * is expressed in the host CPU policy.
+ */
+static void test_tsx_msrs(void)
+{
+    printf("Testing MSR_TSX_FORCE_ABORT consistency\n");
+    test_tsx_msr_consistency(
+        MSR_TSX_FORCE_ABORT, host.cpuid.feat.tsx_force_abort);
+
+    printf("Testing MSR_TSX_CTRL consistency\n");
+    test_tsx_msr_consistency(
+        MSR_TSX_CTRL, host.msr.arch_caps.tsx_ctrl);
+}
+
+/*
+ * Probe for how RTM behaves, deliberately not inspecting CPUID.
+ * Distinguishes between "no support at all" (i.e. XBEGIN suffers #UD),
+ * working ok, and appearing to always abort.
+ */
+static enum rtm_behaviour probe_rtm_behaviour(void)
+{
+    for ( int i = 0; i < 1000; ++i )
+    {
+        /*
+         * Opencoding the RTM infrastructure from immintrin.h, because we
+         * still support older versions of GCC.  ALso so we can include #UD
+         * detection logic.
+         */
+#define XBEGIN_STARTED -1
+#define XBEGIN_UD      -2
+        unsigned int status = XBEGIN_STARTED;
+
+        asm volatile (".Lxbegin: .byte 0xc7,0xf8,0,0,0,0" /* XBEGIN 1f; 1: */
+                      : "+a" (status) :: "memory");
+        if ( status == XBEGIN_STARTED )
+        {
+            asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); /* XEND */
+            return RTM_OK;
+        }
+        else if ( status == XBEGIN_UD )
+            return RTM_UD;
+    }
+
+    return RTM_ABORT;
+}
+
+static struct sigaction old_sigill;
+
+static void sigill_handler(int signo, siginfo_t *info, void *extra)
+{
+    extern char xbegin_label[] asm(".Lxbegin");
+
+    if ( info->si_addr == xbegin_label ||
+         memcmp(info->si_addr, "\xc7\xf8\x00\x00\x00\x00", 6) == 0 )
+    {
+        ucontext_t *context = extra;
+
+        /*
+         * Found the XBEGIN instruction.  Step over it, and update `status` to
+         * signal #UD.
+         */
+#ifdef __x86_64__
+        context->uc_mcontext.gregs[REG_RIP] += 6;
+        context->uc_mcontext.gregs[REG_RAX] = XBEGIN_UD;
+#else
+        context->uc_mcontext.gregs[REG_EIP] += 6;
+        context->uc_mcontext.gregs[REG_EAX] = XBEGIN_UD;
+#endif
+    }
+    else
+    {
+        /*
+         * Not the SIGILL we're looking for...  Restore the old handler and
+         * try again.  Will likely coredump as a result.
+         */
+        sigaction(SIGILL, &old_sigill, NULL);
+    }
+}
+
+static void test_rtm_behaviour(void)
+{
+    struct sigaction new_sigill = {
+        .sa_flags = SA_SIGINFO,
+        .sa_sigaction = sigill_handler,
+    };
+    const char *str;
+
+    printf("Testing RTM behaviour\n");
+
+    /*
+     * Install a custom SIGILL handler while probing for RTM behaviour, as the
+     * XBEGIN instruction might suffer #UD.
+     */
+    sigaction(SIGILL, &new_sigill, &old_sigill);
+    rtm_behaviour = probe_rtm_behaviour();
+    sigaction(SIGILL, &old_sigill, NULL);
+
+    switch ( rtm_behaviour )
+    {
+    case RTM_UD:    str = "#UD";   break;
+    case RTM_OK:    str = "OK";    break;
+    case RTM_ABORT: str = "Abort"; break;
+    default:        str = NULL;    break;
+    }
+
+    if ( str )
+        printf("  Got %s\n", str);
+    else
+        return fail("  Got unexpected behaviour %d\n", rtm_behaviour);
+
+    if ( host.cpuid.feat.rtm )
+    {
+        if ( rtm_behaviour == RTM_UD )
+            fail("  Host reports RTM, but appears unavailable\n");
+    }
+    else
+    {
+        if ( rtm_behaviour != RTM_UD )
+            fail("  Host reports no RTM, but appears available\n");
+    }
+}
+
+static void dump_tsx_details(const struct xc_cpu_policy *p, const char *pref)
+{
+    printf("  %s RTM %u, HLE %u, TSX_FORCE_ABORT %u, RTM_ALWAYS_ABORT %u, TSX_CTRL %u\n",
+           pref,
+           p->cpuid.feat.rtm,
+           p->cpuid.feat.hle,
+           p->cpuid.feat.tsx_force_abort,
+           p->cpuid.feat.rtm_always_abort,
+           p->msr.arch_caps.tsx_ctrl
+        );
+}
+
+/*
+ * Sanity test various invariants we expect in the default/max policies.
+ */
+static void test_guest_policies(const struct xc_cpu_policy *max,
+                                const struct xc_cpu_policy *def)
+{
+    const struct cpuid_policy *cm = &max->cpuid;
+    const struct cpuid_policy *cd = &def->cpuid;
+    const struct msr_policy *mm = &max->msr;
+    const struct msr_policy *md = &def->msr;
+
+    dump_tsx_details(max, "Max:");
+    dump_tsx_details(def, "Def:");
+
+    if ( ((cm->feat.raw[0].d | cd->feat.raw[0].d) &
+          (bitmaskof(X86_FEATURE_TSX_FORCE_ABORT) |
+           bitmaskof(X86_FEATURE_RTM_ALWAYS_ABORT))) ||
+         ((mm->arch_caps.raw | md->arch_caps.raw) & ARCH_CAPS_TSX_CTRL) )
+        fail("  Xen-only TSX controls offered to guest\n");
+
+    switch ( rtm_behaviour )
+    {
+    case RTM_UD:
+        if ( (cm->feat.raw[0].b | cd->feat.raw[0].b) &
+             (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) )
+             fail("  HLE/RTM offered to guests despite not being available\n");
+        break;
+
+    case RTM_ABORT:
+        if ( cd->feat.raw[0].b &
+             (bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_RTM)) )
+             fail("  HLE/RTM offered to guests by default despite not being usable\n");
+        break;
+
+    case RTM_OK:
+        if ( !cm->feat.rtm || !cd->feat.rtm )
+             fail("  RTM not offered to guests despite being available\n");
+        break;
+    }
+
+    if ( cd->feat.hle )
+        fail("  Fail: HLE offered in default policy\n");
+}
+
+static void test_def_max_policies(void)
+{
+    if ( xen_has_pv )
+    {
+        printf("Testing PV default/max policies\n");
+        test_guest_policies(&pv_max, &pv_default);
+    }
+
+    if ( xen_has_hvm )
+    {
+        printf("Testing HVM default/max policies\n");
+        test_guest_policies(&hvm_max, &hvm_default);
+    }
+}
+
+static void test_guest(struct xen_domctl_createdomain *c)
+{
+    uint32_t domid = 0;
+    int rc;
+
+    rc = xc_domain_create(xch, &domid, c);
+    if ( rc )
+        return fail("  Domain create failure: %d - %s\n",
+                    errno, strerror(errno));
+
+    printf("  Created d%u\n", domid);
+
+    rc = xc_cpu_policy_get_domain(xch, domid, &guest_policy);
+    if ( rc )
+    {
+        fail("  Failed to obtain domain policy: %d - %s\n",
+             errno, strerror(errno));
+        goto out;
+    }
+
+    dump_tsx_details(&guest_policy, "Cur:");
+
+    /*
+     * Check defaults given to the guest.
+     */
+    if ( guest_policy.cpuid.feat.rtm != (rtm_behaviour == RTM_OK) )
+        fail("  RTM %u in guest, despite rtm behaviour\n",
+             guest_policy.cpuid.feat.rtm);
+
+    if ( guest_policy.cpuid.feat.hle ||
+         guest_policy.cpuid.feat.tsx_force_abort ||
+         guest_policy.cpuid.feat.rtm_always_abort ||
+         guest_policy.msr.arch_caps.tsx_ctrl )
+        fail("  Unexpected features advertised\n");
+
+ out:
+    rc = xc_domain_destroy(xch, domid);
+    if ( rc )
+        fail("  Failed to destroy domain: %d - %s\n",
+             errno, strerror(errno));
+}
+
+static void test_guests(void)
+{
+    if ( xen_has_pv )
+    {
+        struct xen_domctl_createdomain c = {
+            .max_vcpus = 1,
+            .max_grant_frames = 1,
+        };
+
+        printf("Testing PV guest\n");
+        test_guest(&c);
+    }
+
+    if ( xen_has_hvm )
+    {
+        struct xen_domctl_createdomain c = {
+            .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
+            .max_vcpus = 1,
+            .max_grant_frames = 1,
+            .arch = {
+                .emulation_flags = XEN_X86_EMU_LAPIC,
+            },
+        };
+
+        printf("Testing HVM guest\n");
+        test_guest(&c);
+    }
+}
+
+/* Obtain some general data, then run the tests. */
+static void test_tsx(void)
+{
+    int rc;
+    xc_physinfo_t info = {};
+
+    /* Read all policies except raw. */
+    for ( int i = XEN_SYSCTL_cpu_policy_host;
+          i <= XEN_SYSCTL_cpu_policy_hvm_default; ++i )
+    {
+        rc = xc_cpu_policy_get_system(xch, i, &policies[i]);
+
+        if ( rc == -1 && errno == EOPNOTSUPP )
+        {
+            /*
+             * Use EOPNOTSUPP to spot Xen missing CONFIG_{PV,HVM}, and adjust
+             * later testing accordingly.
+             */
+            switch ( i )
+            {
+            case XEN_SYSCTL_cpu_policy_pv_max:
+            case XEN_SYSCTL_cpu_policy_pv_default:
+                if ( xen_has_pv )
+                    printf("  Xen doesn't support PV\n");
+                xen_has_pv = false;
+                continue;
+
+            case XEN_SYSCTL_cpu_policy_hvm_max:
+            case XEN_SYSCTL_cpu_policy_hvm_default:
+                if ( xen_has_hvm )
+                    printf("  Xen doesn't support HVM\n");
+                xen_has_hvm = false;
+                continue;
+            }
+        }
+        if ( rc )
+            return fail("Failed to obtain policy[%u]: %d - %s\n",
+                        i, errno, strerror(errno));
+    }
+
+    rc = xc_physinfo(xch, &info);
+    if ( rc )
+        return fail("Failed to obtain physinfo: %d - %s\n",
+                    errno, strerror(errno));
+
+    nr_cpus = info.max_cpu_id + 1;
+    printf("  Got %u CPUs\n", nr_cpus);
+
+    test_tsx_msrs();
+    test_rtm_behaviour();
+    test_def_max_policies();
+    test_guests();
+}
+
+int main(int argc, char **argv)
+{
+    printf("TSX tests\n");
+
+    xch = xc_interface_open(NULL, NULL, 0);
+
+    if ( !xch )
+        err(1, "xc_interface_open");
+
+    test_tsx();
+
+    return !!nr_failures;
+}
-- 
2.11.0



  parent reply	other threads:[~2021-06-11 16:37 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-11 16:36 [PATCH 0/5] x86/tsx: Consistency and settings test Andrew Cooper
2021-06-11 16:36 ` [PATCH 1/5] x86/platform: Improve MSR permission handling for XENPF_resource_op Andrew Cooper
2021-06-14 12:45   ` Jan Beulich
2021-06-11 16:36 ` [PATCH 2/5] x86/platform: Permit reading the TSX control MSRs via XENPF_resource_op Andrew Cooper
2021-06-14 12:46   ` Jan Beulich
2021-06-11 16:36 ` [PATCH 3/5] x86/msr: Expose MSR_ARCH_CAPS in the raw and host policies Andrew Cooper
2021-06-14 12:57   ` Jan Beulich
2021-06-14 14:10     ` Andrew Cooper
2021-06-14 14:54       ` Jan Beulich
2021-06-11 16:36 ` [PATCH 4/5] libs/guest: Move struct xc_cpu_policy into xg_private.h Andrew Cooper
2021-06-14 13:00   ` Jan Beulich
2021-06-14 13:49     ` Ian Jackson
2021-06-14 13:56       ` Jan Beulich
2021-06-11 16:36 ` Andrew Cooper [this message]
2021-06-14 10:47   ` [PATCH v1.1 5/5] tests: Introduce a TSX test Andrew Cooper
2021-06-14 13:31     ` Jan Beulich
2021-06-14 14:50       ` Andrew Cooper
2021-06-14 14:59         ` Jan Beulich
2021-06-14 15:55     ` Edwin Torok
2021-06-14 16:32       ` Andrew Cooper
2021-06-14 16:13   ` [PATCH v2 " Andrew Cooper
2021-06-14 17:21     ` Andrew Cooper
2021-06-15 13:49     ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210611163627.4878-6-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=edvin.torok@citrix.com \
    --cc=igor.druzhinin@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.