xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xen.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Jan Beulich <JBeulich@suse.com>
Subject: [PATCH v4 08/26] xen/x86: Generate deep dependencies of features
Date: Wed, 23 Mar 2016 16:36:11 +0000	[thread overview]
Message-ID: <1458750989-28967-9-git-send-email-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <1458750989-28967-1-git-send-email-andrew.cooper3@citrix.com>

Some features depend on other features.  Working out and maintaining the exact
dependency tree is complicated, so it is expressed in the automatic generation
script.

At runtime, Xen needs to be disable all features which are dependent on a
feature being disabled.  Because of the flattening performed at compile time,
runtime can use a single mask to disable all eventual features.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>

v2:
 * New.
v3:
 * Vastly more reserch and comments.
v4:
 * Expand commit message.
 * More tweaks to the dependency tree.
 * Avoid for_each_set_bit() walking off the end of disabled_features[].
   Expanding disabled_features[] turns out to be far more simple than
   attempting to opencode for_each_set_bit()
---
 xen/arch/x86/cpuid.c        |  56 +++++++++++++++++
 xen/include/asm-x86/cpuid.h |   2 +
 xen/tools/gen-cpuid.py      | 143 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 200 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 41439f8..e1e0e44 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -11,6 +11,7 @@ const uint32_t special_features[] = INIT_SPECIAL_FEATURES;
 static const uint32_t __initconst pv_featuremask[] = INIT_PV_FEATURES;
 static const uint32_t __initconst hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
 static const uint32_t __initconst hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;
+static const uint32_t __initconst deep_features[] = INIT_DEEP_FEATURES;
 
 uint32_t __read_mostly raw_featureset[FSCAPINTS];
 uint32_t __read_mostly pv_featureset[FSCAPINTS];
@@ -18,12 +19,36 @@ uint32_t __read_mostly hvm_featureset[FSCAPINTS];
 
 static void __init sanitise_featureset(uint32_t *fs)
 {
+    /* for_each_set_bit() uses unsigned longs.  Extend with zeroes. */
+    uint32_t disabled_features[
+        ROUNDUP(FSCAPINTS, sizeof(unsigned long)/sizeof(uint32_t))] = {};
     unsigned int i;
 
     for ( i = 0; i < FSCAPINTS; ++i )
     {
         /* Clamp to known mask. */
         fs[i] &= known_features[i];
+
+        /*
+         * Identify which features with deep dependencies have been
+         * disabled.
+         */
+        disabled_features[i] = ~fs[i] & deep_features[i];
+    }
+
+    for_each_set_bit(i, (void *)disabled_features,
+                     sizeof(disabled_features) * 8)
+    {
+        const uint32_t *dfs = lookup_deep_deps(i);
+        unsigned int j;
+
+        ASSERT(dfs); /* deep_features[] should guarentee this. */
+
+        for ( j = 0; j < FSCAPINTS; ++j )
+        {
+            fs[j] &= ~dfs[j];
+            disabled_features[j] &= ~dfs[j];
+        }
     }
 
     /*
@@ -164,6 +189,36 @@ void __init calculate_featuresets(void)
     calculate_hvm_featureset();
 }
 
+const uint32_t * __init lookup_deep_deps(uint32_t feature)
+{
+    static const struct {
+        uint32_t feature;
+        uint32_t fs[FSCAPINTS];
+    } deep_deps[] __initconst = INIT_DEEP_DEPS;
+    unsigned int start = 0, end = ARRAY_SIZE(deep_deps);
+
+    BUILD_BUG_ON(ARRAY_SIZE(deep_deps) != NR_DEEP_DEPS);
+
+    /* Fast early exit. */
+    if ( !test_bit(feature, deep_features) )
+        return NULL;
+
+    /* deep_deps[] is sorted.  Perform a binary search. */
+    while ( start < end )
+    {
+        unsigned int mid = start + ((end - start) / 2);
+
+        if ( deep_deps[mid].feature > feature )
+            end = mid;
+        else if ( deep_deps[mid].feature < feature )
+            start = mid + 1;
+        else
+            return deep_deps[mid].fs;
+    }
+
+    return NULL;
+}
+
 static void __init __maybe_unused build_assertions(void)
 {
     BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS);
@@ -171,6 +226,7 @@ static void __init __maybe_unused build_assertions(void)
     BUILD_BUG_ON(ARRAY_SIZE(pv_featuremask) != FSCAPINTS);
     BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_featuremask) != FSCAPINTS);
     BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_featuremask) != FSCAPINTS);
+    BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS);
 }
 
 /*
diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h
index 5041bcd..4725672 100644
--- a/xen/include/asm-x86/cpuid.h
+++ b/xen/include/asm-x86/cpuid.h
@@ -29,6 +29,8 @@ extern uint32_t hvm_featureset[FSCAPINTS];
 
 void calculate_featuresets(void);
 
+const uint32_t *lookup_deep_deps(uint32_t feature);
+
 #endif /* __ASSEMBLY__ */
 #endif /* !__X86_CPUID_H__ */
 
diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
index 4fd603d..1cec5d8 100755
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -144,6 +144,131 @@ def crunch_numbers(state):
     state.hvm_shadow = featureset_to_uint32s(state.raw_hvm_shadow, nr_entries)
     state.hvm_hap = featureset_to_uint32s(state.raw_hvm_hap, nr_entries)
 
+    #
+    # Feature dependency information.
+    #
+    # !!! WARNING !!!
+    #
+    # A lot of this information is derived from the written text of vendors
+    # software manuals, rather than directly from a statement.  As such, it
+    # does contain guesswork and assumptions, and may not accurately match
+    # hardware implementations.
+    #
+    # It is however designed to create an end result for a guest which does
+    # plausibly match real hardware.
+    #
+    # !!! WARNING !!!
+    #
+    # The format of this dictionary is that the feature in the key is a direct
+    # prerequisite of each feature in the value.
+    #
+    # The first consideration is about which functionality is physically built
+    # on top of other features.  The second consideration, which is more
+    # subjective, is whether real hardware would ever be found supporting
+    # feature X but not Y.
+    #
+    deps = {
+        # FPU is taken to mean support for the x87 regisers as well as the
+        # instructions.  MMX is documented to alias the %MM registers over the
+        # x87 %ST registers in hardware.
+        FPU: [MMX],
+
+        # The PSE36 feature indicates that reserved bits in a PSE superpage
+        # may be used as extra physical address bits.
+        PSE: [PSE36],
+
+        # Entering Long Mode requires that %CR4.PAE is set.  The NX and PKU
+        # pagetable bits are only representable in the 64bit PTE format
+        # offered by PAE.
+        PAE: [LM, NX, PKU],
+
+        # APIC is special, but X2APIC does depend on APIC being available in
+        # the first place.
+        APIC: [X2APIC],
+
+        # AMD built MMXExtentions and 3DNow as extentions to MMX.
+        MMX: [MMXEXT, _3DNOW],
+
+        # The FXSAVE/FXRSTOR instructions were introduced into hardware before
+        # SSE, which is why they behave differently based on %CR4.OSFXSAVE and
+        # have their own feature bit.  AMD however introduce the Fast FXSR
+        # feature as an optimisation.
+        FXSR: [FFXSR, SSE],
+
+        # SSE is taken to mean support for the %XMM registers as well as the
+        # instructions.
+        SSE: [SSE2],
+
+        # SSE2 was also re-specified as core for 64bit.  The AESNI and SHA
+        # instruction groups are documented to require checking for SSE2
+        # support as a prerequisite.
+        SSE2: [SSE3, LM, AESNI, SHA],
+
+        # AMD K10 processors has SSE3 and SSE4A.  Bobcat/Barcelona processors
+        # subsequently included SSSE3, and Bulldozer subsequently included
+        # SSE4_1.  Intel have never shipped SSE4A.
+        SSE3: [SSSE3, SSE4_1, SSE4_2, SSE4A],
+
+        # The INVPCID instruction depends on PCID infrastructure being
+        # available.
+        PCID: [INVPCID],
+
+        # XSAVE is an extra set of instructions for state management, but
+        # doesn't constitue new state itself.  Some of the dependent features
+        # are instructions built on top of base XSAVE, while others are new
+        # instruction groups which are specified to require XSAVE for state
+        # management.
+        XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
+                AVX, MPX, PKU, LWP],
+
+        # AVX is taken to mean hardware support for VEX encoded instructions,
+        # 256bit registers, and the instructions themselves.  Each of these
+        # subsequent instruction groups may only be VEX encoded.
+        AVX: [FMA, FMA4, F16C, AVX2, XOP],
+
+        # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the
+        # SAHF/LAHF instructions are reintroduced in Long Mode.  1GB
+        # superpages and PCID are only available in 4 level paging.
+        LM: [CX16, PCID, LAHF_LM, PAGE1GB],
+
+        # AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the
+        # standard 3DNow in the earlier K6 processors.
+        _3DNOW: [_3DNOWEXT],
+    }
+
+    deep_features = tuple(sorted(deps.keys()))
+    state.deep_deps = {}
+
+    for feat in deep_features:
+
+        seen = [feat]
+        to_process = list(deps[feat])
+
+        while len(to_process):
+
+            # To debug, uncomment the following lines:
+            # def repl(l):
+            #     return "[" + ", ".join((state.names[x] for x in l)) + "]"
+            # print >>sys.stderr, "Feature %s, seen %s, to_process %s " % \
+            #     (state.names[feat], repl(seen), repl(to_process))
+
+            f = to_process.pop(0)
+
+            if f in seen:
+                raise Fail("ERROR: Cycle found with %s when processing %s"
+                           % (state.names[f], state.names[feat]))
+
+            seen.append(f)
+            to_process = list(set(to_process + deps.get(f, [])))
+
+        state.deep_deps[feat] = seen[1:]
+
+    state.deep_features = featureset_to_uint32s(deps.keys(), nr_entries)
+    state.nr_deep_deps = len(state.deep_deps.keys())
+
+    for k, v in state.deep_deps.iteritems():
+        state.deep_deps[k] = featureset_to_uint32s(v, nr_entries)
+
 
 def write_results(state):
     state.output.write(
@@ -170,6 +295,12 @@ def write_results(state):
 #define INIT_HVM_SHADOW_FEATURES { \\\n%s\n}
 
 #define INIT_HVM_HAP_FEATURES { \\\n%s\n}
+
+#define NR_DEEP_DEPS %sU
+
+#define INIT_DEEP_FEATURES { \\\n%s\n}
+
+#define INIT_DEEP_DEPS { \\
 """ % (state.nr_entries,
        state.common_1d,
        format_uint32s(state.known, 4),
@@ -177,10 +308,20 @@ def write_results(state):
        format_uint32s(state.pv, 4),
        format_uint32s(state.hvm_shadow, 4),
        format_uint32s(state.hvm_hap, 4),
+       state.nr_deep_deps,
+       format_uint32s(state.deep_features, 4),
        ))
 
+    for dep in sorted(state.deep_deps.keys()):
+        state.output.write(
+            "    { %#xU, /* %s */ { \\\n%s\n    }, }, \\\n"
+            % (dep, state.names[dep],
+               format_uint32s(state.deep_deps[dep], 8)
+           ))
+
     state.output.write(
-"""
+"""}
+
 #endif /* __XEN_X86__FEATURESET_DATA__ */
 """)
 
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-03-23 16:36 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-23 16:36 [PATCH v4 00/26] x86: Improvements to cpuid handling for guests Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 01/26] xen/public: Export cpu featureset information in the public API Andrew Cooper
2016-03-24 14:08   ` Jan Beulich
2016-03-24 14:12     ` Andrew Cooper
2016-03-24 14:16       ` Jan Beulich
2016-03-23 16:36 ` [PATCH v4 02/26] xen/x86: Script to automatically process featureset information Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 03/26] xen/x86: Collect more cpuid feature leaves Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 04/26] xen/x86: Mask out unknown features from Xen's capabilities Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 05/26] xen/x86: Annotate special features Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 06/26] xen/x86: Annotate VM applicability in featureset Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 07/26] xen/x86: Calculate maximum host and guest featuresets Andrew Cooper
2016-03-29  8:57   ` Jan Beulich
2016-03-23 16:36 ` Andrew Cooper [this message]
2016-03-24 16:16   ` [PATCH v4 08/26] xen/x86: Generate deep dependencies of features Jan Beulich
2016-03-23 16:36 ` [PATCH v4 09/26] xen/x86: Clear dependent features when clearing a cpu cap Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 10/26] xen/x86: Improve disabling of features which have dependencies Andrew Cooper
2016-03-28 15:18   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 11/26] xen/x86: Improvements to in-hypervisor cpuid sanity checks Andrew Cooper
2016-03-24 15:38   ` Andrew Cooper
2016-03-24 16:47   ` Jan Beulich
2016-03-24 17:01     ` Andrew Cooper
2016-03-24 17:11       ` Jan Beulich
2016-03-24 17:12         ` Andrew Cooper
2016-03-28 15:29   ` Konrad Rzeszutek Wilk
2016-04-05 15:25     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 12/26] x86/cpu: Move set_cpumask() calls into c_early_init() Andrew Cooper
2016-03-28 15:55   ` Konrad Rzeszutek Wilk
2016-04-05 16:19     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 13/26] x86/cpu: Sysctl and common infrastructure for levelling context switching Andrew Cooper
2016-03-24 16:58   ` Jan Beulich
2016-03-28 16:12   ` Konrad Rzeszutek Wilk
2016-04-05 16:33     ` Andrew Cooper
2016-03-28 17:37   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 14/26] x86/cpu: Rework AMD masking MSR setup Andrew Cooper
2016-03-28 18:55   ` Konrad Rzeszutek Wilk
2016-04-05 16:44     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 15/26] x86/cpu: Rework Intel masking/faulting setup Andrew Cooper
2016-03-28 19:14   ` Konrad Rzeszutek Wilk
2016-04-05 16:45     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 16/26] x86/cpu: Context switch cpuid masks and faulting state in context_switch() Andrew Cooper
2016-03-28 19:27   ` Konrad Rzeszutek Wilk
2016-04-05 18:34     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 17/26] x86/pv: Provide custom cpumasks for PV domains Andrew Cooper
2016-03-28 19:40   ` Konrad Rzeszutek Wilk
2016-04-05 16:55     ` Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 18/26] x86/domctl: Update PV domain cpumasks when setting cpuid policy Andrew Cooper
2016-03-24 17:04   ` Jan Beulich
2016-03-24 17:05     ` Andrew Cooper
2016-03-28 19:51   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 19/26] xen+tools: Export maximum host and guest cpu featuresets via SYSCTL Andrew Cooper
2016-03-28 19:59   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 20/26] tools/libxc: Modify bitmap operations to take void pointers Andrew Cooper
2016-03-28 20:05   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 21/26] tools/libxc: Use public/featureset.h for cpuid policy generation Andrew Cooper
2016-03-28 20:07   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 22/26] tools/libxc: Expose the automatically generated cpu featuremask information Andrew Cooper
2016-03-28 20:08   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 23/26] tools: Utility for dealing with featuresets Andrew Cooper
2016-03-28 20:26   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 24/26] tools/libxc: Wire a featureset through to cpuid policy logic Andrew Cooper
2016-03-28 20:39   ` Konrad Rzeszutek Wilk
2016-03-23 16:36 ` [PATCH v4 25/26] tools/libxc: Use featuresets rather than guesswork Andrew Cooper
2016-03-23 16:36 ` [PATCH v4 26/26] tools/libxc: Calculate xstate cpuid leaf from guest information Andrew Cooper
2016-03-24 17:20   ` Wei Liu
2016-03-31  7:48   ` Jan Beulich
2016-04-05 17:48     ` Andrew Cooper
2016-04-07  0:16       ` Jan Beulich
2016-04-07  0:40         ` Andrew Cooper
2016-04-07  0:56           ` Jan Beulich
2016-04-07 11:34             ` Andrew Cooper
2016-03-24 10:27 ` [PATCH v4 00/26] x86: Improvements to cpuid handling for guests Jan Beulich
2016-03-24 10:28   ` Andrew Cooper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1458750989-28967-9-git-send-email-andrew.cooper3@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).