xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad@kernel.org>
To: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Jan Beulich <JBeulich@suse.com>, Xen-devel <xen-devel@lists.xen.org>
Subject: Re: [PATCH v3 10/28] xen/x86: Generate deep dependencies of features
Date: Thu, 17 Mar 2016 15:45:40 -0400	[thread overview]
Message-ID: <20160317194539.GB32590@localhost.localdomain> (raw)
In-Reply-To: <1458056124-8024-11-git-send-email-andrew.cooper3@citrix.com>

On Tue, Mar 15, 2016 at 03:35:06PM +0000, Andrew Cooper wrote:
> Some features depend on other features.  Working out and maintaining the exact
> dependency tree is complicated, so it is expressed in the automatic generation
> script, and flattened for faster runtime use.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> 
> v3:
>  * Vastly more reserch and comments
> v2:
>  * New
> ---
>  xen/arch/x86/cpuid.c        |  54 +++++++++++++++++
>  xen/include/asm-x86/cpuid.h |   2 +
>  xen/tools/gen-cpuid.py      | 139 +++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 194 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
> index 174cfa0..17487f5 100644
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -11,6 +11,7 @@ const uint32_t special_features[] = INIT_SPECIAL_FEATURES;
>  static const uint32_t __initconst pv_featuremask[] = INIT_PV_FEATURES;
>  static const uint32_t __initconst hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
>  static const uint32_t __initconst hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;
> +static const uint32_t __initconst deep_features[] = INIT_DEEP_FEATURES;
>  
>  uint32_t __read_mostly raw_featureset[FSCAPINTS];
>  uint32_t __read_mostly pv_featureset[FSCAPINTS];
> @@ -18,12 +19,34 @@ uint32_t __read_mostly hvm_featureset[FSCAPINTS];
>  
>  static void __init sanitise_featureset(uint32_t *fs)
>  {
> +    uint32_t disabled_features[FSCAPINTS];
>      unsigned int i;
>  
>      for ( i = 0; i < FSCAPINTS; ++i )
>      {
>          /* Clamp to known mask. */
>          fs[i] &= known_features[i];
> +
> +        /*
> +         * Identify which features with deep dependencies have been
> +         * disabled.
> +         */
> +        disabled_features[i] = ~fs[i] & deep_features[i];
> +    }
> +
> +    for_each_set_bit(i, (void *)disabled_features,
> +                     sizeof(disabled_features) * 8)
> +    {
> +        const uint32_t *dfs = lookup_deep_deps(i);
> +        unsigned int j;
> +
> +        ASSERT(dfs); /* deep_features[] should guarentee this. */
> +
> +        for ( j = 0; j < FSCAPINTS; ++j )
> +        {
> +            fs[j] &= ~dfs[j];
> +            disabled_features[j] &= ~dfs[j];

What if this clears the entries you have already skipped over?

Say you are at word 2 (i==2)and disabled_features[j] cleared word 1.
The loop (for_each_set_bit) won't notice this.

And furthermore lets assume that the clearing of a bit in
word 1 should have done another dependency check which would clear
something at word 7. You would miss that?

I think?

> +        }
>      }
>  
>      /*
> @@ -162,6 +185,36 @@ void __init calculate_featuresets(void)
>      calculate_hvm_featureset();
>  }
>  
> +const uint32_t * __init lookup_deep_deps(uint32_t feature)
> +{
> +    static const struct {
> +        uint32_t feature;
> +        uint32_t fs[FSCAPINTS];
> +    } deep_deps[] __initconst = INIT_DEEP_DEPS;
> +    unsigned int start = 0, end = ARRAY_SIZE(deep_deps);
> +
> +    BUILD_BUG_ON(ARRAY_SIZE(deep_deps) != NR_DEEP_DEPS);
> +
> +    /* Fast early exit. */
> +    if ( !test_bit(feature, deep_features) )
> +        return NULL;
> +
> +    /* deep_deps[] is sorted.  Perform a binary search. */
> +    while ( start < end )
> +    {
> +        unsigned int mid = start + ((end - start) / 2);
> +
> +        if ( deep_deps[mid].feature > feature )
> +            end = mid;
> +        else if ( deep_deps[mid].feature < feature )
> +            start = mid + 1;
> +        else
> +            return deep_deps[mid].fs;
> +    }
> +
> +    return NULL;
> +}
> +
>  static void __init __maybe_unused build_assertions(void)
>  {
>      BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS);
> @@ -169,6 +222,7 @@ static void __init __maybe_unused build_assertions(void)
>      BUILD_BUG_ON(ARRAY_SIZE(pv_featuremask) != FSCAPINTS);
>      BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_featuremask) != FSCAPINTS);
>      BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_featuremask) != FSCAPINTS);
> +    BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FSCAPINTS);
>  }
>  
>  /*
> diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h
> index 5041bcd..4725672 100644
> --- a/xen/include/asm-x86/cpuid.h
> +++ b/xen/include/asm-x86/cpuid.h
> @@ -29,6 +29,8 @@ extern uint32_t hvm_featureset[FSCAPINTS];
>  
>  void calculate_featuresets(void);
>  
> +const uint32_t *lookup_deep_deps(uint32_t feature);
> +
>  #endif /* __ASSEMBLY__ */
>  #endif /* !__X86_CPUID_H__ */
>  
> diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
> index 452c26a..ed368cb 100755
> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -144,6 +144,127 @@ def crunch_numbers(state):
>      state.hvm_shadow = featureset_to_uint32s(state.raw_hvm_shadow, nr_entries)
>      state.hvm_hap = featureset_to_uint32s(state.raw_hvm_hap, nr_entries)
>  
> +    #
> +    # Feature dependency information.
> +    #
> +    # !!! WARNING !!!
> +    #
> +    # A lot of this information is derived from the written text of vendors
> +    # software manuals, rather than directly from a statement.  As such, it
> +    # does contain guesswork and assumptions, and may not accurately match
> +    # hardware implementations.
> +    #
> +    # It is however designed to create an end result for a guest which does
> +    # plausibly match real hardware.
> +    #
> +    # !!! WARNING !!!
> +    #
> +    # The format of this dictionary is that the feature in the key is a direct
> +    # prerequisite of each feature in the value.

s/of/for/ ?
s/in the/enumarated/

> +    #
> +    # The first consideration is about which functionality is physically built

s/about//
> +    # on top of other features.  The second consideration, which is more
> +    # subjective, is whether real hardware would ever be found supporting
> +    # feature X but not Y.
> +    #
> +    deps = {
> +        # FPU is taken to mean support for the x87 regisers as well as the

s/regisers/registers/
> +        # instructions.  MMX is documented to alias the %MM registers over the
> +        # x87 %ST registers in hardware.
> +        FPU: [MMX],
> +
> +        # The PSE36 feature indicates that reserved bits in a PSE superpage
> +        # may be used as extra physical address bits.
> +        PSE: [PSE36],
> +
> +        # Entering Long Mode requires that %CR4.PAE is set.  The NX pagetable
> +        # bit is only representable in the 64bit PTE format offered by PAE.
> +        PAE: [LM, NX],
> +
> +        # APIC is special, but X2APIC does depend on APIC being available in
> +        # the first place.
> +        APIC: [X2APIC],
> +
> +        # AMD built MMXExtentions and 3DNow as extentions to MMX.
> +        MMX: [MMXEXT, _3DNOW]
EWhy the underscore?
> +
> +        # The FXSAVE/FXRSTOR instructions were introduced into hardware before
> +        # SSE, which is why they behave differently based on %CR4.OSFXSAVE and
> +        # have their own feature bit.  AMD however introduce the Fast FXSR

s/introduce/introduced/
> +        # feature as an optimisation.
> +        FXSR: [FFXSR],
> +
> +        # SSE is taken to mean support for the %XMM registers as well as the
> +        # instructions.  The SSE extentions were re-specified as core for
> +        # 64bit support.
> +        SSE: [SSE2, LM],
> +
> +        # SSE2 was also re-specified as core for 64bit.  The AESNI and SHA
> +        # instruction groups are documented to require checking for SSE2
> +        # support as a prerequisite.
> +        SSE2: [SSE3, LM, AESNI, SHA],
> +
> +        # AMD K10 processors has SSE3 and SSE4A.  Bobcat/Barcelona processors
> +        # subsequently included SSSE3, and Bulldozer subsequently included
> +        # SSE4_1.  Intel have never shipped SSE4A.
> +        SSE3: [SSSE3, SSE4_1, SSE4A],
> +        SSE4_1: [SSE4_2],
> +
> +        # XSAVE is an extra set of instructions for state management, but
> +        # doesn't constitue new state itself.  Some of the dependent features
> +        # are instructions built on top of base XSAVE, while others are new
> +        # instruction groups which are specified to require XSAVE for state
> +        # management.
> +        XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES, AVX, MPX],
> +
> +        # AVX is taken to mean hardware support for VEX encoded instructions,
> +        # 256bit registers, and the instructions themselves.  Each of these
> +        # subsequent instruction groups may only be VEX encoded.
> +        AVX: [FMA, FMA4, F16C, AVX2, XOP],
> +
> +        # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the
> +        # SAHF/LAHF instructions are reintroduced in Long Mode.  1GB
> +        # superpages are only available in 4 level paging.
> +        LM: [CX16, LAHF_LM, PAGE1GB],
> +
> +        # AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the
> +        # standard 3DNow in the earlier K6 processors.
> +        _3DNOW: [_3DNOWEXT],
> +    }
> +
> +    deep_features = tuple(sorted(deps.keys()))
> +    state.deep_deps = {}
> +
> +    for feat in deep_features:
> +
> +        seen = [feat]
> +        to_process = list(deps[feat])
> +
> +        while len(to_process):
> +
> +            # To debug, uncomment the following lines:
> +            # def repl(l):
> +            #     return "[" + ", ".join((state.names[x] for x in l)) + "]"
> +            # print >>sys.stderr, "Feature %s, seen %s, to_process %s " % \
> +            #     (state.names[feat], repl(seen), repl(to_process))
> +
> +            f = to_process.pop(0)
> +
> +            if f in seen:
> +                raise Fail("ERROR: Cycle found with %s when processing %s"
> +                           % (state.names[f], state.names[feat]))
> +
> +            seen.append(f)
> +            to_process = list(set(to_process + deps.get(f, [])))
> +
> +        state.deep_deps[feat] = seen[1:]
> +
> +    state.deep_features = featureset_to_uint32s(deps.keys(), nr_entries)
> +    state.nr_deep_deps = len(state.deep_deps.keys())
> +
> +    for k, v in state.deep_deps.iteritems():
> +        state.deep_deps[k] = featureset_to_uint32s(v, nr_entries)
> +
>  
>  def write_results(state):
>      state.output.write(
> @@ -170,6 +291,12 @@ def write_results(state):
>  #define INIT_HVM_SHADOW_FEATURES { \\\n%s\n}
>  
>  #define INIT_HVM_HAP_FEATURES { \\\n%s\n}
> +
> +#define NR_DEEP_DEPS %sU
> +
> +#define INIT_DEEP_FEATURES { \\\n%s\n}
> +
> +#define INIT_DEEP_DEPS { \\
>  """ % (state.nr_entries,
>         state.common_1d,
>         format_uint32s(state.known, 4),
> @@ -177,10 +304,20 @@ def write_results(state):
>         format_uint32s(state.pv, 4),
>         format_uint32s(state.hvm_shadow, 4),
>         format_uint32s(state.hvm_hap, 4),
> +       state.nr_deep_deps,
> +       format_uint32s(state.deep_features, 4),
>         ))
>  
> +    for dep in sorted(state.deep_deps.keys()):
> +        state.output.write(
> +            "    { %#xU, /* %s */ { \\\n%s\n    }, }, \\\n"
> +            % (dep, state.names[dep],
> +               format_uint32s(state.deep_deps[dep], 8)
> +           ))
> +
>      state.output.write(
> -"""
> +"""}
> +
>  #endif /* __XEN_X86__FEATURESET_DATA__ */
>  """)
>  
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  reply	other threads:[~2016-03-17 19:45 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-15 15:34 [PATCH RFC v3 00/28] x86: Improvements to cpuid handling for guests Andrew Cooper
2016-03-15 15:34 ` [PATCH v3 01/28] xen/x86: Drop unused and non-useful feature definitions Andrew Cooper
2016-03-16  7:53   ` Konrad Rzeszutek Wilk
2016-03-16  9:49     ` Andrew Cooper
2016-03-22 14:06   ` Doug Goldstein
2016-03-22 14:38     ` Jan Beulich
2016-03-15 15:34 ` [PATCH v3 02/28] xen/x86: Rename features to be closer to the vendor definitions Andrew Cooper
2016-03-16  8:01   ` Konrad Rzeszutek Wilk
2016-03-17 19:46     ` Andrew Cooper
2016-03-15 15:34 ` [PATCH v3 03/28] xen/public: Export cpu featureset information in the public API Andrew Cooper
2016-03-16  8:32   ` Konrad Rzeszutek Wilk
2016-03-22 10:39     ` Andrew Cooper
2016-03-18 15:52   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 04/28] xen/x86: Script to automatically process featureset information Andrew Cooper
2016-03-16  8:41   ` Konrad Rzeszutek Wilk
2016-03-15 15:35 ` [PATCH v3 05/28] xen/x86: Collect more cpuid feature leaves Andrew Cooper
2016-03-16  8:50   ` Konrad Rzeszutek Wilk
2016-03-15 15:35 ` [PATCH v3 06/28] xen/x86: Mask out unknown features from Xen's capabilities Andrew Cooper
2016-03-16 18:01   ` Konrad Rzeszutek Wilk
2016-03-15 15:35 ` [PATCH v3 07/28] xen/x86: Annotate special features Andrew Cooper
2016-03-16 18:04   ` Konrad Rzeszutek Wilk
2016-03-18 16:29   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 08/28] xen/x86: Annotate VM applicability in featureset Andrew Cooper
2016-03-16 18:15   ` Konrad Rzeszutek Wilk
2016-03-18 16:57   ` Jan Beulich
2016-03-18 18:56     ` Andrew Cooper
2016-03-21 11:53       ` Jan Beulich
2016-03-21 13:39         ` Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 09/28] xen/x86: Calculate maximum host and guest featuresets Andrew Cooper
2016-03-16 18:24   ` Konrad Rzeszutek Wilk
2016-03-18 17:09   ` Jan Beulich
2016-03-22 11:23     ` Andrew Cooper
2016-03-22 12:39       ` Jan Beulich
2016-03-22 14:37         ` Andrew Cooper
2016-03-22 14:52           ` Jan Beulich
2016-03-22 15:01             ` Andrew Cooper
2016-03-22 16:10               ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 10/28] xen/x86: Generate deep dependencies of features Andrew Cooper
2016-03-17 19:45   ` Konrad Rzeszutek Wilk [this message]
2016-03-17 20:14     ` Andrew Cooper
2016-03-17 20:32       ` Konrad Rzeszutek Wilk
2016-03-21 15:41   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 11/28] xen/x86: Clear dependent features when clearing a cpu cap Andrew Cooper
2016-03-17 19:51   ` Konrad Rzeszutek Wilk
2016-03-17 19:56     ` Andrew Cooper
2016-03-28 15:02       ` Konrad Rzeszutek Wilk
2016-03-21 15:45   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 12/28] xen/x86: Improve disabling of features which have dependencies Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 13/28] xen/x86: Improvements to in-hypervisor cpuid sanity checks Andrew Cooper
2016-03-21 16:11   ` Jan Beulich
2016-03-22 15:30     ` Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 14/28] x86/cpu: Move set_cpumask() calls into c_early_init() Andrew Cooper
2016-03-21 16:16   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 15/28] x86/cpu: Sysctl and common infrastructure for levelling context switching Andrew Cooper
2016-03-15 17:35   ` Joao Martins
2016-03-15 19:29     ` Andrew Cooper
2016-03-15 19:34       ` Joao Martins
2016-03-21 16:23   ` Jan Beulich
2016-03-22 15:57     ` Andrew Cooper
2016-03-22 16:16       ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 16/28] x86/cpu: Rework AMD masking MSR setup Andrew Cooper
2016-03-21 16:51   ` Jan Beulich
2016-03-21 16:55     ` Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 17/28] x86/cpu: Rework Intel masking/faulting setup Andrew Cooper
2016-03-21 16:44   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 18/28] x86/cpu: Context switch cpuid masks and faulting state in context_switch() Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 19/28] x86/pv: Provide custom cpumasks for PV domains Andrew Cooper
2016-03-21 16:53   ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 20/28] x86/domctl: Update PV domain cpumasks when setting cpuid policy Andrew Cooper
2016-03-21 17:06   ` Jan Beulich
2016-03-22 16:37     ` Andrew Cooper
2016-03-22 16:51       ` Jan Beulich
2016-03-15 15:35 ` [PATCH v3 21/28] xen+tools: Export maximum host and guest cpu featuresets via SYSCTL Andrew Cooper
2016-03-16 18:23   ` Wei Liu
2016-03-16 20:38     ` David Scott
2016-03-22  8:43   ` Jan Beulich
2016-03-22 20:39     ` Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 22/28] tools/libxc: Modify bitmap operations to take void pointers Andrew Cooper
2016-03-16 15:24   ` Andrew Cooper
2016-03-17 12:17   ` Wei Liu
2016-03-15 15:35 ` [PATCH v3 23/28] tools/libxc: Use public/featureset.h for cpuid policy generation Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 24/28] tools/libxc: Expose the automatically generated cpu featuremask information Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 25/28] tools: Utility for dealing with featuresets Andrew Cooper
2016-03-16 18:23   ` Wei Liu
2016-03-15 15:35 ` [PATCH v3 26/28] tools/libxc: Wire a featureset through to cpuid policy logic Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 27/28] tools/libxc: Use featuresets rather than guesswork Andrew Cooper
2016-03-15 15:35 ` [PATCH v3 28/28] tools/libxc: Calculate xstate cpuid leaf from guest information Andrew Cooper
2016-03-16 18:23   ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160317194539.GB32590@localhost.localdomain \
    --to=konrad@kernel.org \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).