Re: [OE-core] [PATCH] glibc: Bring back L1 icache line size

* Re: [OE-core] [PATCH] glibc: Bring back L1 icache line size
       [not found] <16654B509E6B5B9A.19946@lists.openembedded.org>
@ 2021-03-09 20:15 ` Andrei Gherzan
  0 siblings, 0 replies; only message in thread
From: Andrei Gherzan @ 2021-03-09 20:15 UTC (permalink / raw)
  To: openembedded

[-- Attachment #1: Type: text/plain, Size: 45203 bytes --]

Superseded by https://lists.openembedded.org/g/openembedded-core/message/149192
---
Andrei

On Fri, 19 Feb 2021, at 23:53, Andrei Gherzan wrote:
> From: Andrei Gherzan <andrei.gherzan@huawei.com>
> 
> It was observerd that with glibc 2.33, sysconf reports unsupported
> option (-1) for _SC_LEVEL1_ICACHE_LINESIZE.
> 
> This can be reproduced with sysconf tool:
> 
> ```
> └─❯  docker run -ti --rm archlinux:base-20210214.0.15477 getconf -a |
> grep "GNU_LIBC_VERSION\|LEVEL1_ICACHE_LINESIZE"
> GNU_LIBC_VERSION                   glibc 2.33
> LEVEL1_ICACHE_LINESIZE
> └─❯  docker run -ti --rm archlinux:base-20210131.0.14634 getconf -a |
> grep "GNU_LIBC_VERSION\|LEVEL1_ICACHE_LINESIZE"
> GNU_LIBC_VERSION                   glibc 2.32
> LEVEL1_ICACHE_LINESIZE             64
> ```
> 
> The offending patch in glibc is:
> 
> commit 2d651eb9265d1366d7b9e881bfddd46db9c1ecc4
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date:   Fri Sep 18 07:55:14 2020 -0700
>     x86: Move x86 processor cache info to cpu_features
> 
> This patch reverts the above mentioned glibc change. It was tested on
> qemux86.
> 
> Extra small cosmetic tweaks brought you by devtool (a superflous newline
> and whitespace).
> 
> Signed-off-by: Andrei Gherzan <andrei.gherzan@huawei.com>
> ---
> ...x86-processor-cache-info-to-cpu_feat.patch | 1074 +++++++++++++++++
> meta/recipes-core/glibc/glibc_2.33.bb         |    4 +-
> 2 files changed, 1076 insertions(+), 2 deletions(-)
> create mode 100644 meta/recipes-core/glibc/glibc/0032-Revert-x86-Move-x86-processor-cache-info-to-cpu_feat.patch
> 
> diff --git a/meta/recipes-core/glibc/glibc/0032-Revert-x86-Move-x86-processor-cache-info-to-cpu_feat.patch b/meta/recipes-core/glibc/glibc/0032-Revert-x86-Move-x86-processor-cache-info-to-cpu_feat.patch
> new file mode 100644
> index 0000000000..0ff1eba82b
> --- /dev/null
> +++ b/meta/recipes-core/glibc/glibc/0032-Revert-x86-Move-x86-processor-cache-info-to-cpu_feat.patch
> @@ -0,0 +1,1074 @@
> +From 961d681e38d30a4de06c980de0a96464fa3b4d74 Mon Sep 17 00:00:00 2001
> +From: Andrei Gherzan <andrei@gherzan.com>
> +Date: Fri, 19 Feb 2021 23:06:50 +0000
> +Subject: [PATCH] Revert "x86: Move x86 processor cache info to cpu_features"
> +
> +This reverts commit 2d651eb9265d1366d7b9e881bfddd46db9c1ecc4.
> +
> +Upstream-Status: Pending
> +Signed-off-by: Andrei Gherzan <andrei.gherzan@huawei.com>
> +---
> + sysdeps/x86/cacheinfo.c            |  46 +--
> + sysdeps/x86/cacheinfo.h            | 400 +++++++++++++++++++++++--
> + sysdeps/x86/cpu-features.c         |  35 ++-
> + sysdeps/x86/dl-cacheinfo.h         | 460 -----------------------------
> + sysdeps/x86/include/cpu-features.h |  22 --
> + 5 files changed, 412 insertions(+), 551 deletions(-)
> +
> +diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
> +index 7b8df45e3b..948dbea3db 100644
> +--- a/sysdeps/x86/cacheinfo.c
> ++++ b/sysdeps/x86/cacheinfo.c
> +@@ -18,8 +18,11 @@
> + 
> + #if IS_IN (libc)
> + 
> ++#include <assert.h>
> + #include <unistd.h>
> ++#include <cpuid.h>
> + #include <ldsodefs.h>
> ++#include <dl-cacheinfo.h>
> + 
> + /* Get the value of the system variable NAME.  */
> + long int
> +@@ -27,45 +30,20 @@ attribute_hidden
> + __cache_sysconf (int name)
> + {
> +   const struct cpu_features *cpu_features = __get_cpu_features ();
> +-  switch (name)
> +-    {
> +-    case _SC_LEVEL1_ICACHE_SIZE:
> +-      return cpu_features->level1_icache_size;
> + 
> +-    case _SC_LEVEL1_DCACHE_SIZE:
> +-      return cpu_features->level1_dcache_size;
> ++  if (cpu_features->basic.kind == arch_kind_intel)
> ++    return handle_intel (name, cpu_features);
> + 
> +-    case _SC_LEVEL1_DCACHE_ASSOC:
> +-      return cpu_features->level1_dcache_assoc;
> ++  if (cpu_features->basic.kind == arch_kind_amd)
> ++    return handle_amd (name);
> + 
> +-    case _SC_LEVEL1_DCACHE_LINESIZE:
> +-      return cpu_features->level1_dcache_linesize;
> ++  if (cpu_features->basic.kind == arch_kind_zhaoxin)
> ++    return handle_zhaoxin (name);
> + 
> +-    case _SC_LEVEL2_CACHE_SIZE:
> +-      return cpu_features->level2_cache_size;
> ++  // XXX Fill in more vendors.
> + 
> +-    case _SC_LEVEL2_CACHE_ASSOC:
> +-      return cpu_features->level2_cache_assoc;
> +-
> +-    case _SC_LEVEL2_CACHE_LINESIZE:
> +-      return cpu_features->level2_cache_linesize;
> +-
> +-    case _SC_LEVEL3_CACHE_SIZE:
> +-      return cpu_features->level3_cache_size;
> +-
> +-    case _SC_LEVEL3_CACHE_ASSOC:
> +-      return cpu_features->level3_cache_assoc;
> +-
> +-    case _SC_LEVEL3_CACHE_LINESIZE:
> +-      return cpu_features->level3_cache_linesize;
> +-
> +-    case _SC_LEVEL4_CACHE_SIZE:
> +-      return cpu_features->level4_cache_size;
> +-
> +-    default:
> +-      break;
> +-    }
> +-  return -1;
> ++  /* CPU not known, we have no information.  */
> ++  return 0;
> + }
> + 
> + # ifdef SHARED
> +diff --git a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
> +index 68c253542f..736189f7f2 100644
> +--- a/sysdeps/x86/cacheinfo.h
> ++++ b/sysdeps/x86/cacheinfo.h
> +@@ -18,16 +18,7 @@
> + 
> + #include <assert.h>
> + #include <unistd.h>
> +-#include <cpuid.h>
> +-#include <cpu-features.h>
> + 
> +-#if HAVE_TUNABLES
> +-# define TUNABLE_NAMESPACE cpu
> +-# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf.  */
> +-# include <elf/dl-tunables.h>
> +-#endif
> +-
> +-#if IS_IN (libc)
> + /* Data cache size for use in memory and string routines, typically
> +    L1 size, rounded to multiple of 256 bytes.  */
> + long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
> +@@ -54,30 +45,385 @@ long int __x86_rep_movsb_threshold attribute_hidden = 2048;
> + /* Threshold to use Enhanced REP STOSB.  */
> + long int __x86_rep_stosb_threshold attribute_hidden = 2048;
> + 
> ++static void
> ++get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
> ++        long int core)
> ++{
> ++  unsigned int eax;
> ++  unsigned int ebx;
> ++  unsigned int ecx;
> ++  unsigned int edx;
> ++
> ++  /* Number of logical processors sharing L2 cache.  */
> ++  int threads_l2;
> ++
> ++  /* Number of logical processors sharing L3 cache.  */
> ++  int threads_l3;
> ++
> ++  const struct cpu_features *cpu_features = __get_cpu_features ();
> ++  int max_cpuid = cpu_features->basic.max_cpuid;
> ++  unsigned int family = cpu_features->basic.family;
> ++  unsigned int model = cpu_features->basic.model;
> ++  long int shared = *shared_ptr;
> ++  unsigned int threads = *threads_ptr;
> ++  bool inclusive_cache = true;
> ++  bool support_count_mask = true;
> ++
> ++  /* Try L3 first.  */
> ++  unsigned int level = 3;
> ++
> ++  if (cpu_features->basic.kind == arch_kind_zhaoxin && family == 6)
> ++    support_count_mask = false;
> ++
> ++  if (shared <= 0)
> ++    {
> ++      /* Try L2 otherwise.  */
> ++      level  = 2;
> ++      shared = core;
> ++      threads_l2 = 0;
> ++      threads_l3 = -1;
> ++    }
> ++  else
> ++    {
> ++      threads_l2 = 0;
> ++      threads_l3 = 0;
> ++    }
> ++
> ++  /* A value of 0 for the HTT bit indicates there is only a single
> ++     logical processor.  */
> ++  if (HAS_CPU_FEATURE (HTT))
> ++    {
> ++      /* Figure out the number of logical threads that share the
> ++         highest cache level.  */
> ++      if (max_cpuid >= 4)
> ++        {
> ++          int i = 0;
> ++
> ++          /* Query until cache level 2 and 3 are enumerated.  */
> ++          int check = 0x1 | (threads_l3 == 0) << 1;
> ++          do
> ++            {
> ++              __cpuid_count (4, i++, eax, ebx, ecx, edx);
> ++
> ++              /* There seems to be a bug in at least some Pentium Ds
> ++                 which sometimes fail to iterate all cache parameters.
> ++                 Do not loop indefinitely here, stop in this case and
> ++                 assume there is no such information.  */
> ++              if (cpu_features->basic.kind == arch_kind_intel
> ++                  && (eax & 0x1f) == 0 )
> ++                goto intel_bug_no_cache_info;
> ++
> ++              switch ((eax >> 5) & 0x7)
> ++                {
> ++                  default:
> ++                    break;
> ++                  case 2:
> ++                    if ((check & 0x1))
> ++                      {
> ++                        /* Get maximum number of logical processors
> ++                           sharing L2 cache.  */
> ++                        threads_l2 = (eax >> 14) & 0x3ff;
> ++                        check &= ~0x1;
> ++                      }
> ++                    break;
> ++                  case 3:
> ++                    if ((check & (0x1 << 1)))
> ++                      {
> ++                        /* Get maximum number of logical processors
> ++                           sharing L3 cache.  */
> ++                        threads_l3 = (eax >> 14) & 0x3ff;
> ++
> ++                        /* Check if L2 and L3 caches are inclusive.  */
> ++                        inclusive_cache = (edx & 0x2) != 0;
> ++                        check &= ~(0x1 << 1);
> ++                      }
> ++                    break;
> ++                }
> ++            }
> ++          while (check);
> ++
> ++          /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
> ++             numbers of addressable IDs for logical processors sharing
> ++             the cache, instead of the maximum number of threads
> ++             sharing the cache.  */
> ++          if (max_cpuid >= 11 && support_count_mask)
> ++            {
> ++              /* Find the number of logical processors shipped in
> ++                 one core and apply count mask.  */
> ++              i = 0;
> ++
> ++              /* Count SMT only if there is L3 cache.  Always count
> ++                 core if there is no L3 cache.  */
> ++              int count = ((threads_l2 > 0 && level == 3)
> ++                           | ((threads_l3 > 0
> ++                               || (threads_l2 > 0 && level == 2)) << 1));
> ++
> ++              while (count)
> ++                {
> ++                  __cpuid_count (11, i++, eax, ebx, ecx, edx);
> ++
> ++                  int shipped = ebx & 0xff;
> ++                  int type = ecx & 0xff00;
> ++                  if (shipped == 0 || type == 0)
> ++                    break;
> ++                  else if (type == 0x100)
> ++                    {
> ++                      /* Count SMT.  */
> ++                      if ((count & 0x1))
> ++                        {
> ++                          int count_mask;
> ++
> ++                          /* Compute count mask.  */
> ++                          asm ("bsr %1, %0"
> ++                               : "=r" (count_mask) : "g" (threads_l2));
> ++                          count_mask = ~(-1 << (count_mask + 1));
> ++                          threads_l2 = (shipped - 1) & count_mask;
> ++                          count &= ~0x1;
> ++                        }
> ++                    }
> ++                  else if (type == 0x200)
> ++                    {
> ++                      /* Count core.  */
> ++                      if ((count & (0x1 << 1)))
> ++                        {
> ++                          int count_mask;
> ++                          int threads_core
> ++                            = (level == 2 ? threads_l2 : threads_l3);
> ++
> ++                          /* Compute count mask.  */
> ++                          asm ("bsr %1, %0"
> ++                               : "=r" (count_mask) : "g" (threads_core));
> ++                          count_mask = ~(-1 << (count_mask + 1));
> ++                          threads_core = (shipped - 1) & count_mask;
> ++                          if (level == 2)
> ++                            threads_l2 = threads_core;
> ++                          else
> ++                            threads_l3 = threads_core;
> ++                          count &= ~(0x1 << 1);
> ++                        }
> ++                    }
> ++                }
> ++            }
> ++          if (threads_l2 > 0)
> ++            threads_l2 += 1;
> ++          if (threads_l3 > 0)
> ++            threads_l3 += 1;
> ++          if (level == 2)
> ++            {
> ++              if (threads_l2)
> ++                {
> ++                  threads = threads_l2;
> ++                  if (cpu_features->basic.kind == arch_kind_intel
> ++                      && threads > 2
> ++                      && family == 6)
> ++                    switch (model)
> ++                      {
> ++                        case 0x37:
> ++                        case 0x4a:
> ++                        case 0x4d:
> ++                        case 0x5a:
> ++                        case 0x5d:
> ++                          /* Silvermont has L2 cache shared by 2 cores.  */
> ++                          threads = 2;
> ++                          break;
> ++                        default:
> ++                          break;
> ++                      }
> ++                }
> ++            }
> ++          else if (threads_l3)
> ++            threads = threads_l3;
> ++        }
> ++      else
> ++        {
> ++intel_bug_no_cache_info:
> ++          /* Assume that all logical threads share the highest cache
> ++             level.  */
> ++          threads
> ++     = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
> ++ & 0xff);
> ++        }
> ++
> ++        /* Cap usage of highest cache level to the number of supported
> ++           threads.  */
> ++        if (shared > 0 && threads > 0)
> ++          shared /= threads;
> ++    }
> ++
> ++  /* Account for non-inclusive L2 and L3 caches.  */
> ++  if (!inclusive_cache)
> ++    {
> ++      if (threads_l2 > 0)
> ++        core /= threads_l2;
> ++      shared += core;
> ++    }
> ++
> ++  *shared_ptr = shared;
> ++  *threads_ptr = threads;
> ++}
> ++
> + static void
> + init_cacheinfo (void)
> + {
> ++  /* Find out what brand of processor.  */
> ++  unsigned int ebx;
> ++  unsigned int ecx;
> ++  unsigned int edx;
> ++  int max_cpuid_ex;
> ++  long int data = -1;
> ++  long int shared = -1;
> ++  long int core;
> ++  unsigned int threads = 0;
> +   const struct cpu_features *cpu_features = __get_cpu_features ();
> +-  long int data = cpu_features->data_cache_size;
> +-  __x86_raw_data_cache_size_half = data / 2;
> +-  __x86_raw_data_cache_size = data;
> +-  /* Round data cache size to multiple of 256 bytes.  */
> +-  data = data & ~255L;
> +-  __x86_data_cache_size_half = data / 2;
> +-  __x86_data_cache_size = data;
> +-
> +-  long int shared = cpu_features->shared_cache_size;
> +-  __x86_raw_shared_cache_size_half = shared / 2;
> +-  __x86_raw_shared_cache_size = shared;
> +-  /* Round shared cache size to multiple of 256 bytes.  */
> +-  shared = shared & ~255L;
> +-  __x86_shared_cache_size_half = shared / 2;
> +-  __x86_shared_cache_size = shared;
> + 
> ++  /* NB: In libc.so, cpu_features is defined in ld.so and is initialized
> ++     by DL_PLATFORM_INIT or IFUNC relocation before init_cacheinfo is
> ++     called by IFUNC relocation.  In libc.a, init_cacheinfo is called
> ++     from init_cpu_features by ARCH_INIT_CPU_FEATURES.  */
> ++  assert (cpu_features->basic.kind != arch_kind_unknown);
> ++
> ++  if (cpu_features->basic.kind == arch_kind_intel)
> ++    {
> ++      data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
> ++      core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
> ++      shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
> ++
> ++      get_common_cache_info (&shared, &threads, core);
> ++    }
> ++  else if (cpu_features->basic.kind == arch_kind_zhaoxin)
> ++    {
> ++      data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
> ++      core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
> ++      shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
> ++
> ++      get_common_cache_info (&shared, &threads, core);
> ++    }
> ++  else if (cpu_features->basic.kind == arch_kind_amd)
> ++    {
> ++      data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
> ++      long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
> ++      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
> ++
> ++      /* Get maximum extended function. */
> ++      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
> ++
> ++      if (shared <= 0)
> ++ /* No shared L3 cache.  All we have is the L2 cache.  */
> ++ shared = core;
> ++      else
> ++ {
> ++   /* Figure out the number of logical threads that share L3.  */
> ++   if (max_cpuid_ex >= 0x80000008)
> ++     {
> ++       /* Get width of APIC ID.  */
> ++       __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
> ++       threads = 1 << ((ecx >> 12) & 0x0f);
> ++     }
> ++
> ++   if (threads == 0 || cpu_features->basic.family >= 0x17)
> ++     {
> ++       /* If APIC ID width is not available, use logical
> ++ processor count.  */
> ++       __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
> ++
> ++       if ((edx & (1 << 28)) != 0)
> ++ threads = (ebx >> 16) & 0xff;
> ++     }
> ++
> ++   /* Cap usage of highest cache level to the number of
> ++      supported threads.  */
> ++   if (threads > 0)
> ++     shared /= threads;
> ++
> ++   /* Get shared cache per ccx for Zen architectures.  */
> ++   if (cpu_features->basic.family >= 0x17)
> ++     {
> ++       unsigned int eax;
> ++
> ++       /* Get number of threads share the L3 cache in CCX.  */
> ++       __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
> ++
> ++       unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
> ++       shared *= threads_per_ccx;
> ++     }
> ++   else
> ++     {
> ++       /* Account for exclusive L2 and L3 caches.  */
> ++       shared += core;
> ++            }
> ++      }
> ++    }
> ++
> ++  /* Prefer cache size configure via tuning.  */
> ++  if (cpu_features->data_cache_size != 0)
> ++    data = cpu_features->data_cache_size;
> ++
> ++  if (data > 0)
> ++    {
> ++      __x86_raw_data_cache_size_half = data / 2;
> ++      __x86_raw_data_cache_size = data;
> ++      /* Round data cache size to multiple of 256 bytes.  */
> ++      data = data & ~255L;
> ++      __x86_data_cache_size_half = data / 2;
> ++      __x86_data_cache_size = data;
> ++    }
> ++
> ++  /* Prefer cache size configure via tuning.  */
> ++  if (cpu_features->shared_cache_size != 0)
> ++    shared = cpu_features->shared_cache_size;
> ++
> ++  if (shared > 0)
> ++    {
> ++      __x86_raw_shared_cache_size_half = shared / 2;
> ++      __x86_raw_shared_cache_size = shared;
> ++      /* Round shared cache size to multiple of 256 bytes.  */
> ++      shared = shared & ~255L;
> ++      __x86_shared_cache_size_half = shared / 2;
> ++      __x86_shared_cache_size = shared;
> ++    }
> ++
> ++  /* The default setting for the non_temporal threshold is 3/4 of one
> ++     thread's share of the chip's cache. For most Intel and AMD processors
> ++     with an initial release date between 2017 and 2020, a thread's typical
> ++     share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
> ++     threshold leaves 125 KBytes to 500 KBytes of the thread's data
> ++     in cache after a maximum temporal copy, which will maintain
> ++     in cache a reasonable portion of the thread's stack and other
> ++     active data. If the threshold is set higher than one thread's
> ++     share of the cache, it has a substantial risk of negatively
> ++     impacting the performance of other threads running on the chip. */
> +   __x86_shared_non_temporal_threshold
> +-    = cpu_features->non_temporal_threshold;
> ++    = (cpu_features->non_temporal_threshold != 0
> ++       ? cpu_features->non_temporal_threshold
> ++       : __x86_shared_cache_size * 3 / 4);
> ++
> ++  /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8.  */
> ++  unsigned int minimum_rep_movsb_threshold;
> ++  /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16).  */
> ++  unsigned int rep_movsb_threshold;
> ++  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
> ++      && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512))
> ++    {
> ++      rep_movsb_threshold = 2048 * (64 / 16);
> ++      minimum_rep_movsb_threshold = 64 * 8;
> ++    }
> ++  else if (CPU_FEATURE_PREFERRED_P (cpu_features,
> ++     AVX_Fast_Unaligned_Load))
> ++    {
> ++      rep_movsb_threshold = 2048 * (32 / 16);
> ++      minimum_rep_movsb_threshold = 32 * 8;
> ++    }
> ++  else
> ++    {
> ++      rep_movsb_threshold = 2048 * (16 / 16);
> ++      minimum_rep_movsb_threshold = 16 * 8;
> ++    }
> ++  if (cpu_features->rep_movsb_threshold > minimum_rep_movsb_threshold)
> ++    __x86_rep_movsb_threshold = cpu_features->rep_movsb_threshold;
> ++  else
> ++    __x86_rep_movsb_threshold = rep_movsb_threshold;
> + 
> +-  __x86_rep_movsb_threshold = cpu_features->rep_movsb_threshold;
> ++# if HAVE_TUNABLES
> +   __x86_rep_stosb_threshold = cpu_features->rep_stosb_threshold;
> ++# endif
> + }
> +-#endif
> +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> +index 73b0a4dc9a..c9e51b5e5a 100644
> +--- a/sysdeps/x86/cpu-features.c
> ++++ b/sysdeps/x86/cpu-features.c
> +@@ -16,13 +16,22 @@
> +    License along with the GNU C Library; if not, see
> +    <https://www.gnu.org/licenses/>.  */
> + 
> ++#include <cpuid.h>
> + #include <dl-hwcap.h>
> + #include <libc-pointer-arith.h>
> + #include <get-isa-level.h>
> +-#include <cacheinfo.h>
> +-#include <dl-cacheinfo.h>
> ++#if IS_IN (libc) && !defined SHARED
> ++# include <assert.h>
> ++# include <unistd.h>
> ++# include <dl-cacheinfo.h>
> ++# include <cacheinfo.h>
> ++#endif
> + 
> + #if HAVE_TUNABLES
> ++# define TUNABLE_NAMESPACE cpu
> ++# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf.  */
> ++# include <elf/dl-tunables.h>
> ++
> + extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> +   attribute_hidden;
> + 
> +@@ -639,14 +648,24 @@ no_cpuid:
> +   cpu_features->basic.model = model;
> +   cpu_features->basic.stepping = stepping;
> + 
> +-  dl_init_cacheinfo (cpu_features);
> +-
> + #if HAVE_TUNABLES
> +   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> +-#elif defined SHARED
> +-  /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86.  The
> +-     glibc.cpu.hwcap_mask tunable is initialized already, so no
> +-     need to do this.  */
> ++  cpu_features->non_temporal_threshold
> ++    = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
> ++  cpu_features->rep_movsb_threshold
> ++    = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
> ++  cpu_features->rep_stosb_threshold
> ++    = TUNABLE_GET (x86_rep_stosb_threshold, long int, NULL);
> ++  cpu_features->data_cache_size
> ++    = TUNABLE_GET (x86_data_cache_size, long int, NULL);
> ++  cpu_features->shared_cache_size
> ++    = TUNABLE_GET (x86_shared_cache_size, long int, NULL);
> ++#endif
> ++
> ++  /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86.  */
> ++#if !HAVE_TUNABLES && defined SHARED
> ++  /* The glibc.cpu.hwcap_mask tunable is initialized already, so no need to do
> ++     this.  */
> +   GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
> + #endif
> + 
> +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
> +index a31fa0783a..6adce4147c 100644
> +--- a/sysdeps/x86/dl-cacheinfo.h
> ++++ b/sysdeps/x86/dl-cacheinfo.h
> +@@ -476,463 +476,3 @@ handle_zhaoxin (int name)
> +   /* Nothing found.  */
> +   return 0;
> + }
> +-
> +-static void
> +-get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
> +-                long int core)
> +-{
> +-  unsigned int eax;
> +-  unsigned int ebx;
> +-  unsigned int ecx;
> +-  unsigned int edx;
> +-
> +-  /* Number of logical processors sharing L2 cache.  */
> +-  int threads_l2;
> +-
> +-  /* Number of logical processors sharing L3 cache.  */
> +-  int threads_l3;
> +-
> +-  const struct cpu_features *cpu_features = __get_cpu_features ();
> +-  int max_cpuid = cpu_features->basic.max_cpuid;
> +-  unsigned int family = cpu_features->basic.family;
> +-  unsigned int model = cpu_features->basic.model;
> +-  long int shared = *shared_ptr;
> +-  unsigned int threads = *threads_ptr;
> +-  bool inclusive_cache = true;
> +-  bool support_count_mask = true;
> +-
> +-  /* Try L3 first.  */
> +-  unsigned int level = 3;
> +-
> +-  if (cpu_features->basic.kind == arch_kind_zhaoxin && family == 6)
> +-    support_count_mask = false;
> +-
> +-  if (shared <= 0)
> +-    {
> +-      /* Try L2 otherwise.  */
> +-      level  = 2;
> +-      shared = core;
> +-      threads_l2 = 0;
> +-      threads_l3 = -1;
> +-    }
> +-  else
> +-    {
> +-      threads_l2 = 0;
> +-      threads_l3 = 0;
> +-    }
> +-
> +-  /* A value of 0 for the HTT bit indicates there is only a single
> +-     logical processor.  */
> +-  if (HAS_CPU_FEATURE (HTT))
> +-    {
> +-      /* Figure out the number of logical threads that share the
> +-         highest cache level.  */
> +-      if (max_cpuid >= 4)
> +-        {
> +-          int i = 0;
> +-
> +-          /* Query until cache level 2 and 3 are enumerated.  */
> +-          int check = 0x1 | (threads_l3 == 0) << 1;
> +-          do
> +-            {
> +-              __cpuid_count (4, i++, eax, ebx, ecx, edx);
> +-
> +-              /* There seems to be a bug in at least some Pentium Ds
> +-                 which sometimes fail to iterate all cache parameters.
> +-                 Do not loop indefinitely here, stop in this case and
> +-                 assume there is no such information.  */
> +-              if (cpu_features->basic.kind == arch_kind_intel
> +-                  && (eax & 0x1f) == 0 )
> +-                goto intel_bug_no_cache_info;
> +-
> +-              switch ((eax >> 5) & 0x7)
> +-                {
> +-                  default:
> +-                    break;
> +-                  case 2:
> +-                    if ((check & 0x1))
> +-                      {
> +-                        /* Get maximum number of logical processors
> +-                           sharing L2 cache.  */
> +-                        threads_l2 = (eax >> 14) & 0x3ff;
> +-                        check &= ~0x1;
> +-                      }
> +-                    break;
> +-                  case 3:
> +-                    if ((check & (0x1 << 1)))
> +-                      {
> +-                        /* Get maximum number of logical processors
> +-                           sharing L3 cache.  */
> +-                        threads_l3 = (eax >> 14) & 0x3ff;
> +-
> +-                        /* Check if L2 and L3 caches are inclusive.  */
> +-                        inclusive_cache = (edx & 0x2) != 0;
> +-                        check &= ~(0x1 << 1);
> +-                      }
> +-                    break;
> +-                }
> +-            }
> +-          while (check);
> +-
> +-          /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
> +-             numbers of addressable IDs for logical processors sharing
> +-             the cache, instead of the maximum number of threads
> +-             sharing the cache.  */
> +-          if (max_cpuid >= 11 && support_count_mask)
> +-            {
> +-              /* Find the number of logical processors shipped in
> +-                 one core and apply count mask.  */
> +-              i = 0;
> +-
> +-              /* Count SMT only if there is L3 cache.  Always count
> +-                 core if there is no L3 cache.  */
> +-              int count = ((threads_l2 > 0 && level == 3)
> +-                           | ((threads_l3 > 0
> +-                               || (threads_l2 > 0 && level == 2)) << 1));
> +-
> +-              while (count)
> +-                {
> +-                  __cpuid_count (11, i++, eax, ebx, ecx, edx);
> +-
> +-                  int shipped = ebx & 0xff;
> +-                  int type = ecx & 0xff00;
> +-                  if (shipped == 0 || type == 0)
> +-                    break;
> +-                  else if (type == 0x100)
> +-                    {
> +-                      /* Count SMT.  */
> +-                      if ((count & 0x1))
> +-                        {
> +-                          int count_mask;
> +-
> +-                          /* Compute count mask.  */
> +-                          asm ("bsr %1, %0"
> +-                               : "=r" (count_mask) : "g" (threads_l2));
> +-                          count_mask = ~(-1 << (count_mask + 1));
> +-                          threads_l2 = (shipped - 1) & count_mask;
> +-                          count &= ~0x1;
> +-                        }
> +-                    }
> +-                  else if (type == 0x200)
> +-                    {
> +-                      /* Count core.  */
> +-                      if ((count & (0x1 << 1)))
> +-                        {
> +-                          int count_mask;
> +-                          int threads_core
> +-                            = (level == 2 ? threads_l2 : threads_l3);
> +-
> +-                          /* Compute count mask.  */
> +-                          asm ("bsr %1, %0"
> +-                               : "=r" (count_mask) : "g" (threads_core));
> +-                          count_mask = ~(-1 << (count_mask + 1));
> +-                          threads_core = (shipped - 1) & count_mask;
> +-                          if (level == 2)
> +-                            threads_l2 = threads_core;
> +-                          else
> +-                            threads_l3 = threads_core;
> +-                          count &= ~(0x1 << 1);
> +-                        }
> +-                    }
> +-                }
> +-            }
> +-          if (threads_l2 > 0)
> +-            threads_l2 += 1;
> +-          if (threads_l3 > 0)
> +-            threads_l3 += 1;
> +-          if (level == 2)
> +-            {
> +-              if (threads_l2)
> +-                {
> +-                  threads = threads_l2;
> +-                  if (cpu_features->basic.kind == arch_kind_intel
> +-                      && threads > 2
> +-                      && family == 6)
> +-                    switch (model)
> +-                      {
> +-                        case 0x37:
> +-                        case 0x4a:
> +-                        case 0x4d:
> +-                        case 0x5a:
> +-                        case 0x5d:
> +-                          /* Silvermont has L2 cache shared by 2 cores.  */
> +-                          threads = 2;
> +-                          break;
> +-                        default:
> +-                          break;
> +-                      }
> +-                }
> +-            }
> +-          else if (threads_l3)
> +-            threads = threads_l3;
> +-        }
> +-      else
> +-        {
> +-intel_bug_no_cache_info:
> +-          /* Assume that all logical threads share the highest cache
> +-             level.  */
> +-          threads
> +-            = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
> +-        & 0xff);
> +-        }
> +-
> +-        /* Cap usage of highest cache level to the number of supported
> +-           threads.  */
> +-        if (shared > 0 && threads > 0)
> +-          shared /= threads;
> +-    }
> +-
> +-  /* Account for non-inclusive L2 and L3 caches.  */
> +-  if (!inclusive_cache)
> +-    {
> +-      if (threads_l2 > 0)
> +-        core /= threads_l2;
> +-      shared += core;
> +-    }
> +-
> +-  *shared_ptr = shared;
> +-  *threads_ptr = threads;
> +-}
> +-
> +-static void
> +-dl_init_cacheinfo (struct cpu_features *cpu_features)
> +-{
> +-  /* Find out what brand of processor.  */
> +-  unsigned int ebx;
> +-  unsigned int ecx;
> +-  unsigned int edx;
> +-  int max_cpuid_ex;
> +-  long int data = -1;
> +-  long int shared = -1;
> +-  long int core;
> +-  unsigned int threads = 0;
> +-  unsigned long int level1_icache_size = -1;
> +-  unsigned long int level1_dcache_size = -1;
> +-  unsigned long int level1_dcache_assoc = -1;
> +-  unsigned long int level1_dcache_linesize = -1;
> +-  unsigned long int level2_cache_size = -1;
> +-  unsigned long int level2_cache_assoc = -1;
> +-  unsigned long int level2_cache_linesize = -1;
> +-  unsigned long int level3_cache_size = -1;
> +-  unsigned long int level3_cache_assoc = -1;
> +-  unsigned long int level3_cache_linesize = -1;
> +-  unsigned long int level4_cache_size = -1;
> +-
> +-  if (cpu_features->basic.kind == arch_kind_intel)
> +-    {
> +-      data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
> +-      core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
> +-      shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
> +-
> +-      level1_icache_size
> +- = handle_intel (_SC_LEVEL1_ICACHE_SIZE, cpu_features);
> +-      level1_dcache_size = data;
> +-      level1_dcache_assoc
> +- = handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
> +-      level1_dcache_linesize
> +- = handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
> +-      level2_cache_size = core;
> +-      level2_cache_assoc
> +- = handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
> +-      level2_cache_linesize
> +- = handle_intel (_SC_LEVEL2_CACHE_LINESIZE, cpu_features);
> +-      level3_cache_size = shared;
> +-      level3_cache_assoc
> +- = handle_intel (_SC_LEVEL3_CACHE_ASSOC, cpu_features);
> +-      level3_cache_linesize
> +- = handle_intel (_SC_LEVEL3_CACHE_LINESIZE, cpu_features);
> +-      level4_cache_size
> +- = handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
> +-
> +-      get_common_cache_info (&shared, &threads, core);
> +-    }
> +-  else if (cpu_features->basic.kind == arch_kind_zhaoxin)
> +-    {
> +-      data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
> +-      core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
> +-      shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
> +-
> +-      level1_icache_size = handle_zhaoxin (_SC_LEVEL1_ICACHE_SIZE);
> +-      level1_dcache_size = data;
> +-      level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
> +-      level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
> +-      level2_cache_size = core;
> +-      level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
> +-      level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
> +-      level3_cache_size = shared;
> +-      level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
> +-      level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
> +-
> +-      get_common_cache_info (&shared, &threads, core);
> +-    }
> +-  else if (cpu_features->basic.kind == arch_kind_amd)
> +-    {
> +-      data  = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
> +-      core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
> +-      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
> +-
> +-      level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
> +-      level1_dcache_size = data;
> +-      level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
> +-      level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
> +-      level2_cache_size = core;
> +-      level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
> +-      level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
> +-      level3_cache_size = shared;
> +-      level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
> +-      level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
> +-
> +-      /* Get maximum extended function. */
> +-      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
> +-
> +-      if (shared <= 0)
> +- /* No shared L3 cache.  All we have is the L2 cache.  */
> +- shared = core;
> +-      else
> +- {
> +-   /* Figure out the number of logical threads that share L3.  */
> +-   if (max_cpuid_ex >= 0x80000008)
> +-     {
> +-       /* Get width of APIC ID.  */
> +-       __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
> +-       threads = 1 << ((ecx >> 12) & 0x0f);
> +-     }
> +-
> +-   if (threads == 0 || cpu_features->basic.family >= 0x17)
> +-     {
> +-       /* If APIC ID width is not available, use logical
> +- processor count.  */
> +-       __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
> +-
> +-       if ((edx & (1 << 28)) != 0)
> +- threads = (ebx >> 16) & 0xff;
> +-     }
> +-
> +-   /* Cap usage of highest cache level to the number of
> +-      supported threads.  */
> +-   if (threads > 0)
> +-     shared /= threads;
> +-
> +-   /* Get shared cache per ccx for Zen architectures.  */
> +-   if (cpu_features->basic.family >= 0x17)
> +-     {
> +-       unsigned int eax;
> +-
> +-       /* Get number of threads share the L3 cache in CCX.  */
> +-       __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
> +-
> +-       unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
> +-       shared *= threads_per_ccx;
> +-     }
> +-   else
> +-     {
> +-       /* Account for exclusive L2 and L3 caches.  */
> +-       shared += core;
> +-            }
> +- }
> +-    }
> +-
> +-  cpu_features->level1_icache_size = level1_icache_size;
> +-  cpu_features->level1_dcache_size = level1_dcache_size;
> +-  cpu_features->level1_dcache_assoc = level1_dcache_assoc;
> +-  cpu_features->level1_dcache_linesize = level1_dcache_linesize;
> +-  cpu_features->level2_cache_size = level2_cache_size;
> +-  cpu_features->level2_cache_assoc = level2_cache_assoc;
> +-  cpu_features->level2_cache_linesize = level2_cache_linesize;
> +-  cpu_features->level3_cache_size = level3_cache_size;
> +-  cpu_features->level3_cache_assoc = level3_cache_assoc;
> +-  cpu_features->level3_cache_linesize = level3_cache_linesize;
> +-  cpu_features->level4_cache_size = level4_cache_size;
> +-
> +-  /* The default setting for the non_temporal threshold is 3/4 of one
> +-     thread's share of the chip's cache. For most Intel and AMD processors
> +-     with an initial release date between 2017 and 2020, a thread's typical
> +-     share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
> +-     threshold leaves 125 KBytes to 500 KBytes of the thread's data
> +-     in cache after a maximum temporal copy, which will maintain
> +-     in cache a reasonable portion of the thread's stack and other
> +-     active data. If the threshold is set higher than one thread's
> +-     share of the cache, it has a substantial risk of negatively
> +-     impacting the performance of other threads running on the chip. */
> +-  unsigned long int non_temporal_threshold = shared * 3 / 4;
> +-
> +-#if HAVE_TUNABLES
> +-  /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8.  */
> +-  unsigned int minimum_rep_movsb_threshold;
> +-#endif
> +-  /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16).  */
> +-  unsigned int rep_movsb_threshold;
> +-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
> +-      && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512))
> +-    {
> +-      rep_movsb_threshold = 2048 * (64 / 16);
> +-#if HAVE_TUNABLES
> +-      minimum_rep_movsb_threshold = 64 * 8;
> +-#endif
> +-    }
> +-  else if (CPU_FEATURE_PREFERRED_P (cpu_features,
> +-     AVX_Fast_Unaligned_Load))
> +-    {
> +-      rep_movsb_threshold = 2048 * (32 / 16);
> +-#if HAVE_TUNABLES
> +-      minimum_rep_movsb_threshold = 32 * 8;
> +-#endif
> +-    }
> +-  else
> +-    {
> +-      rep_movsb_threshold = 2048 * (16 / 16);
> +-#if HAVE_TUNABLES
> +-      minimum_rep_movsb_threshold = 16 * 8;
> +-#endif
> +-    }
> +-
> +-  /* The default threshold to use Enhanced REP STOSB.  */
> +-  unsigned long int rep_stosb_threshold = 2048;
> +-
> +-#if HAVE_TUNABLES
> +-  long int tunable_size;
> +-
> +-  tunable_size = TUNABLE_GET (x86_data_cache_size, long int, NULL);
> +-  /* NB: Ignore the default value 0.  */
> +-  if (tunable_size != 0)
> +-    data = tunable_size;
> +-
> +-  tunable_size = TUNABLE_GET (x86_shared_cache_size, long int, NULL);
> +-  /* NB: Ignore the default value 0.  */
> +-  if (tunable_size != 0)
> +-    shared = tunable_size;
> +-
> +-  tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL);
> +-  /* NB: Ignore the default value 0.  */
> +-  if (tunable_size != 0)
> +-    non_temporal_threshold = tunable_size;
> +-
> +-  tunable_size = TUNABLE_GET (x86_rep_movsb_threshold, long int, NULL);
> +-  if (tunable_size > minimum_rep_movsb_threshold)
> +-    rep_movsb_threshold = tunable_size;
> +-
> +-  /* NB: The default value of the x86_rep_stosb_threshold tunable is the
> +-     same as the default value of __x86_rep_stosb_threshold and the
> +-     minimum value is fixed.  */
> +-  rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
> +-      long int, NULL);
> +-
> +-  TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, long int, data,
> +-    0, (long int) -1);
> +-  TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, long int, shared,
> +-    0, (long int) -1);
> +-  TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, long int,
> +-    non_temporal_threshold, 0, (long int) -1);
> +-  TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, long int,
> +-    rep_movsb_threshold,
> +-    minimum_rep_movsb_threshold, (long int) -1);
> +-  TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, long int,
> +-    rep_stosb_threshold, 1, (long int) -1);
> +-#endif
> +-
> +-  cpu_features->data_cache_size = data;
> +-  cpu_features->shared_cache_size = shared;
> +-  cpu_features->non_temporal_threshold = non_temporal_threshold;
> +-  cpu_features->rep_movsb_threshold = rep_movsb_threshold;
> +-  cpu_features->rep_stosb_threshold = rep_stosb_threshold;
> +-}
> +diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
> +index 624736b40e..fb02f0607b 100644
> +--- a/sysdeps/x86/include/cpu-features.h
> ++++ b/sysdeps/x86/include/cpu-features.h
> +@@ -872,28 +872,6 @@ struct cpu_features
> +   unsigned long int rep_movsb_threshold;
> +   /* Threshold to use "rep stosb".  */
> +   unsigned long int rep_stosb_threshold;
> +-  /* _SC_LEVEL1_ICACHE_SIZE.  */
> +-  unsigned long int level1_icache_size;
> +-  /* _SC_LEVEL1_DCACHE_SIZE.  */
> +-  unsigned long int level1_dcache_size;
> +-  /* _SC_LEVEL1_DCACHE_ASSOC.  */
> +-  unsigned long int level1_dcache_assoc;
> +-  /* _SC_LEVEL1_DCACHE_LINESIZE.  */
> +-  unsigned long int level1_dcache_linesize;
> +-  /* _SC_LEVEL2_CACHE_ASSOC.  */
> +-  unsigned long int level2_cache_size;
> +-  /* _SC_LEVEL2_DCACHE_ASSOC.  */
> +-  unsigned long int level2_cache_assoc;
> +-  /* _SC_LEVEL2_CACHE_LINESIZE.  */
> +-  unsigned long int level2_cache_linesize;
> +-  /* /_SC_LEVEL3_CACHE_SIZE.  */
> +-  unsigned long int level3_cache_size;
> +-  /* _SC_LEVEL3_CACHE_ASSOC.  */
> +-  unsigned long int level3_cache_assoc;
> +-  /* _SC_LEVEL3_CACHE_LINESIZE.  */
> +-  unsigned long int level3_cache_linesize;
> +-  /* /_SC_LEVEL4_CACHE_SIZE.  */
> +-  unsigned long int level4_cache_size;
> + };
> + 
> + /* Get a pointer to the CPU features structure.  */
> diff --git a/meta/recipes-core/glibc/glibc_2.33.bb b/meta/recipes-core/glibc/glibc_2.33.bb
> index e0002e6046..dd4087f80b 100644
> --- a/meta/recipes-core/glibc/glibc_2.33.bb
> +++ b/meta/recipes-core/glibc/glibc_2.33.bb
> @@ -15,11 +15,10 @@ NATIVESDKFIXES_class-nativesdk = "\
>             file://faccessat2-perm.patch \
> "
>
> -SRC_URI =  "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \
> +SRC_URI = "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \
>             file://etc/ld.so.conf \
>             file://generate-supported.mk \
>             file://makedbs.sh \
> -           \
>             ${NATIVESDKFIXES} \
>             file://0008-fsl-e500-e5500-e6500-603e-fsqrt-implementation.patch \
>             file://0009-ppc-sqrt-Fix-undefined-reference-to-__sqrt_finite.patch \
> @@ -44,6 +43,7 @@ SRC_URI =  "${GLIBC_GIT_URI};branch=${SRCBRANCH};name=glibc \
>             file://0029-wordsize.h-Unify-the-header-between-arm-and-aarch64.patch \
>             file://0030-powerpc-Do-not-ask-compiler-for-finding-arch.patch \
>             file://0031-x86-Require-full-ISA-support-for-x86-64-level-marker.patch \
> +           file://0032-Revert-x86-Move-x86-processor-cache-info-to-cpu_feat.patch \
>             "
> S = "${WORKDIR}/git"
> B = "${WORKDIR}/build-${TARGET_SYS}"
> -- 
> 2.30.1
> 
> 
> 
> 
> 
> 

[-- Attachment #2: Type: text/html, Size: 93672 bytes --]

^ permalink raw reply	[flat|nested] only message in thread