xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: "xen-devel" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Wei Liu <wei.liu2@citrix.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Roger Pau Monne <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH 1/5] x86/cpuidle: switch to uniform meaning of "max_cstate="
Date: Thu, 23 May 2019 06:16:54 -0600	[thread overview]
Message-ID: <5CE68F360200007800231B25@prv1-mh.provo.novell.com> (raw)
Message-ID: <20190523121654.YpFqIMhDcUq1t9DK52M9bauthjcH7u4iIRlxLtK1GFc@z> (raw)
In-Reply-To: <5CE68CD30200007800231B01@prv1-mh.provo.novell.com>

While the MWAIT idle driver already takes it to mean an actual C state,
the ACPI idle driver so far used it as a list index. The list index,
however, is an implementation detail of Xen and affected by firmware
settings (i.e. not necessarily uniform for a particular system).

While touching this code also avoid invoking menu_get_trace_data()
when tracing is not active. For consistency do this also for the
MWAIT driver.

Note that I'm intentionally not adding any sorting logic to set_cx():
Before and after this patch we assume entries to arrive in order, so
this would be an orthogonal change.

Take the opportunity and add minimal documentation for the command line
option.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: I wonder if we really need struct acpi_processor_cx's idx field
     anymore. It's used in a number of (questionable) places ...

--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1371,6 +1371,8 @@ This option is ignored in **pv-shim** mo
 ### max_cstate (x86)
 > `= <integer>`
 
+Specify the deepest C-state CPUs are permitted to be placed in.
+
 ### max_gsi_irqs (x86)
 > `= <integer>`
 
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -64,7 +64,7 @@ void show_help(void)
             " set-sched-smt           enable|disable enable/disable scheduler smt power saving\n"
             " set-vcpu-migration-delay      <num> set scheduler vcpu migration delay in us\n"
             " get-vcpu-migration-delay            get scheduler vcpu migration delay\n"
-            " set-max-cstate        <num>         set the C-State limitation (<num> >= 0)\n"
+            " set-max-cstate        <num>|'unlimited' set the C-State limitation (<num> >= 0)\n"
             " start [seconds]                     start collect Cx/Px statistics,\n"
             "                                     output after CTRL-C or SIGINT or several seconds.\n"
             " enable-turbo-mode     [cpuid]       enable Turbo Mode for processors that support it.\n"
@@ -194,7 +194,11 @@ static int show_max_cstate(xc_interface
     if ( (ret = xc_get_cpuidle_max_cstate(xc_handle, &value)) )
         return ret;
 
-    printf("Max possible C-state: C%d\n\n", value);
+    if ( value < XEN_SYSCTL_CX_UNLIMITED )
+        printf("Max possible C-state: C%"PRIu32"\n\n", value);
+    else
+        printf("All C-states allowed\n\n");
+
     return 0;
 }
 
@@ -1117,18 +1121,24 @@ void get_vcpu_migration_delay_func(int a
 void set_max_cstate_func(int argc, char *argv[])
 {
     int value;
+    char buf[12];
 
-    if ( argc != 1 || sscanf(argv[0], "%d", &value) != 1 || value < 0 )
+    if ( argc != 1 ||
+         (sscanf(argv[0], "%d", &value) == 1
+          ? value < 0
+          : (value = XEN_SYSCTL_CX_UNLIMITED, strcmp(argv[0], "unlimited"))) )
     {
-        fprintf(stderr, "Missing or invalid argument(s)\n");
+        fprintf(stderr, "Missing, excess, or invalid argument(s)\n");
         exit(EINVAL);
     }
 
+    snprintf(buf, ARRAY_SIZE(buf), "C%d", value);
+
     if ( !xc_set_cpuidle_max_cstate(xc_handle, (uint32_t)value) )
-        printf("set max_cstate to C%d succeeded\n", value);
+        printf("set max C-state to %s succeeded\n", value >= 0 ? buf : argv[0]);
     else
-        fprintf(stderr, "set max_cstate to C%d failed (%d - %s)\n",
-                value, errno, strerror(errno));
+        fprintf(stderr, "set max C-state to %s failed (%d - %s)\n",
+                value >= 0 ? buf : argv[0], errno, strerror(errno));
 }
 
 void enable_turbo_mode(int argc, char *argv[])
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -103,7 +103,7 @@ bool lapic_timer_init(void)
 }
 
 void (*__read_mostly pm_idle_save)(void);
-unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER - 1;
+unsigned int max_cstate __read_mostly = UINT_MAX;
 integer_param("max_cstate", max_cstate);
 static bool __read_mostly local_apic_timer_c2_ok;
 boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
@@ -344,7 +344,8 @@ static void dump_cx(unsigned char key)
     unsigned int cpu;
 
     printk("'%c' pressed -> printing ACPI Cx structures\n", key);
-    printk("max cstate: C%u\n", max_cstate);
+    if ( max_cstate < UINT_MAX )
+        printk("max state: C%u\n", max_cstate);
     for_each_present_cpu ( cpu )
     {
         struct acpi_processor_power *power = processor_powers[cpu];
@@ -582,13 +583,19 @@ static void acpi_processor_idle(void)
     if ( max_cstate > 0 && power && !sched_has_urgent_vcpu() &&
          (next_state = cpuidle_current_governor->select(power)) > 0 )
     {
-        cx = &power->states[next_state];
-        if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
-             acpi_idle_bm_check() )
-            cx = power->safe_state;
-        if ( cx->idx > max_cstate )
-            cx = &power->states[max_cstate];
-        menu_get_trace_data(&exp, &pred);
+        do {
+            cx = &power->states[next_state];
+        } while ( cx->type > max_cstate && --next_state );
+        if ( next_state )
+        {
+            if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check &&
+                 acpi_idle_bm_check() )
+                cx = power->safe_state;
+            if ( tb_init_done )
+                menu_get_trace_data(&exp, &pred);
+        }
+        else
+            cx = NULL;
     }
     if ( !cx )
     {
@@ -1395,12 +1402,12 @@ int pmstat_reset_cx_stat(uint32_t cpuid)
 
 void cpuidle_disable_deep_cstate(void)
 {
-    if ( max_cstate > 1 )
+    if ( max_cstate > ACPI_STATE_C1 )
     {
         if ( local_apic_timer_c2_ok )
-            max_cstate = 2;
+            max_cstate = ACPI_STATE_C2;
         else
-            max_cstate = 1;
+            max_cstate = ACPI_STATE_C1;
     }
 
     hpet_disable_legacy_broadcast();
@@ -1408,7 +1415,8 @@ void cpuidle_disable_deep_cstate(void)
 
 bool cpuidle_using_deep_cstate(void)
 {
-    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : 1);
+    return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? ACPI_STATE_C2
+                                                               : ACPI_STATE_C1);
 }
 
 static int cpu_callback(
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -731,7 +731,8 @@ static void mwait_idle(void)
 		} while (cx->type > max_cstate && --next_state);
 		if (!next_state)
 			cx = NULL;
-		menu_get_trace_data(&exp, &pred);
+		else if (tb_init_done)
+			menu_get_trace_data(&exp, &pred);
 	}
 	if (!cx) {
 		if (pm_idle_save)
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -2268,7 +2268,7 @@ static void dump_softtsc(unsigned char k
     else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
     {
         printk("TSC has constant rate, ");
-        if (max_cstate <= 2 && tsc_max_warp == 0)
+        if ( max_cstate <= ACPI_STATE_C2 && tsc_max_warp == 0 )
             printk("no deep Cstates, passed warp test, deemed reliable, ");
         else
             printk("deep Cstates possible, so not reliable, ");
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -451,6 +451,7 @@ int do_pm_op(struct xen_sysctl_pm_op *op
 
     case XEN_SYSCTL_pm_op_get_max_cstate:
     {
+        BUILD_BUG_ON(XEN_SYSCTL_CX_UNLIMITED != UINT_MAX);
         op->u.get_max_cstate = acpi_get_cstate_limit();
         break;
     }
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -370,6 +370,7 @@ struct xen_sysctl_pm_op {
         struct xen_set_cpufreq_para set_para;
         uint64_aligned_t get_avgfreq;
         uint32_t                    set_sched_opt_smt;
+#define XEN_SYSCTL_CX_UNLIMITED 0xffffffff
         uint32_t                    get_max_cstate;
         uint32_t                    set_max_cstate;
     } u;




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-05-23 12:17 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-23 12:06 [PATCH 0/5] x86: CPU idle management adjustments Jan Beulich
2019-05-23 12:06 ` [Xen-devel] " Jan Beulich
2019-05-23 12:16 ` Jan Beulich [this message]
2019-05-23 12:16   ` [Xen-devel] [PATCH 1/5] x86/cpuidle: switch to uniform meaning of "max_cstate=" Jan Beulich
2019-06-10 15:48   ` Andrew Cooper
2019-06-11 12:13     ` Jan Beulich
2019-05-23 12:17 ` [PATCH 2/5] x86/cpuidle: really use C1 for "urgent" CPUs Jan Beulich
2019-05-23 12:17   ` [Xen-devel] " Jan Beulich
2019-06-10 15:50   ` Andrew Cooper
2019-05-23 12:18 ` [PATCH 3/5] x86/AMD: make C-state handling independent of Dom0 Jan Beulich
2019-05-23 12:18   ` [Xen-devel] " Jan Beulich
2019-06-10 16:28   ` Andrew Cooper
2019-06-11 12:42     ` Jan Beulich
2019-06-18 17:22       ` Woods, Brian
2019-06-19  6:20         ` Jan Beulich
2019-06-19 15:54           ` Woods, Brian
2019-06-21  6:37             ` Jan Beulich
2019-06-21 14:29               ` Woods, Brian
2019-06-21 14:56                 ` Jan Beulich
2019-06-21 15:26                   ` Woods, Brian
2019-05-23 12:18 ` [PATCH 4/5] x86: allow limiting the max C-state sub-state Jan Beulich
2019-05-23 12:18   ` [Xen-devel] " Jan Beulich
2019-06-10 16:43   ` Andrew Cooper
2019-06-11 12:46     ` Jan Beulich
2019-05-23 12:19 ` [PATCH 5/5] tools/libxc: allow controlling " Jan Beulich
2019-05-23 12:19   ` [Xen-devel] " Jan Beulich
2019-05-24 14:00   ` Wei Liu
2019-05-24 14:00     ` [Xen-devel] " Wei Liu
2019-06-10 16:54   ` Andrew Cooper
2019-06-11 12:50     ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5CE68F360200007800231B25@prv1-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).