From: Frederic Weisbecker <frederic@kernel.org>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: atomlin@atomlin.com, cl@linux.com, tglx@linutronix.de,
mingo@kernel.org, peterz@infradead.org, pauld@redhat.com,
neelx@redhat.com, oleksandr@natalenko.name,
linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH v11 3/6] mm/vmstat: manage per-CPU stats from CPU context when NOHZ full
Date: Fri, 23 Dec 2022 15:41:50 +0100 [thread overview]
Message-ID: <20221223144150.GA79369@lothringen> (raw)
In-Reply-To: <20221221170436.330627967@redhat.com>
On Wed, Dec 21, 2022 at 01:58:04PM -0300, Marcelo Tosatti wrote:
> @@ -194,21 +195,50 @@ void fold_vm_numa_events(void)
> #endif
>
> #ifdef CONFIG_SMP
> -static DEFINE_PER_CPU_ALIGNED(bool, vmstat_dirty);
> +
> +struct vmstat_dirty {
> + bool dirty;
> + bool cpuhotplug;
May be call it "online" for clarity. Also should it depend on CONFIG_FLUSH_WORK_ON_RESUME_USER?
> +};
> +
> +static DEFINE_PER_CPU_ALIGNED(struct vmstat_dirty, vmstat_dirty_pcpu);
> +static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
> +int sysctl_stat_interval __read_mostly = HZ;
>
> static inline void vmstat_mark_dirty(void)
> {
> - this_cpu_write(vmstat_dirty, true);
> + struct vmstat_dirty *vms = this_cpu_ptr(&vmstat_dirty_pcpu);
> +
> +#ifdef CONFIG_FLUSH_WORK_ON_RESUME_USER
Please avoid ifdeffery in the middle of a function when possible.
This block could be in a different function or use IS_ENABLED()
for example.
> + int cpu = smp_processor_id();
> +
> + if (tick_nohz_full_cpu(cpu) && !vms->dirty) {
> + struct delayed_work *dw;
> +
> + dw = this_cpu_ptr(&vmstat_work);
> + if (!delayed_work_pending(dw) && !vms->cpuhotplug) {
> + unsigned long delay;
> +
> + delay = round_jiffies_relative(sysctl_stat_interval);
> + queue_delayed_work_on(cpu, mm_percpu_wq, dw, delay);
> + }
> + }
> +#endif
> + vms->dirty = true;
> }
>
> static inline void vmstat_clear_dirty(void)
> {
> - this_cpu_write(vmstat_dirty, false);
> + struct vmstat_dirty *vms = this_cpu_ptr(&vmstat_dirty_pcpu);
> +
> + vms->dirty = false;
You could keep this_cpu_write(vmstat_dirty.dirty, false)
> }
>
> static inline bool is_vmstat_dirty(void)
> {
> - return this_cpu_read(vmstat_dirty);
> + struct vmstat_dirty *vms = this_cpu_ptr(&vmstat_dirty_pcpu);
> +
> + return vms->dirty;
Ditto with this_cpu_read()?
> }
>
> int calculate_pressure_threshold(struct zone *zone)
> @@ -1981,13 +2008,18 @@ void quiet_vmstat(void)
> if (!is_vmstat_dirty())
> return;
>
> + refresh_cpu_vm_stats(false);
> +
> +#ifdef CONFIG_FLUSH_WORK_ON_RESUME_USER
This can use IS_ENABLED()
> + if (!user)
> + return;
> /*
> - * Just refresh counters and do not care about the pending delayed
> - * vmstat_update. It doesn't fire that often to matter and canceling
> - * it would be too expensive from this path.
> - * vmstat_shepherd will take care about that for us.
> + * If the tick is stopped, cancel any delayed work to avoid
> + * interruptions to this CPU in the future.
> */
> - refresh_cpu_vm_stats(false);
> + if (delayed_work_pending(this_cpu_ptr(&vmstat_work)))
> + cancel_delayed_work(this_cpu_ptr(&vmstat_work));
> +#endif
> }
>
> /*
> @@ -2008,8 +2040,15 @@ static void vmstat_shepherd(struct work_
> /* Check processors whose vmstat worker threads have been disabled */
> for_each_online_cpu(cpu) {
> struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
> + struct vmstat_dirty *vms = per_cpu_ptr(&vmstat_dirty_pcpu, cpu);
>
> - if (!delayed_work_pending(dw) && per_cpu(vmstat_dirty, cpu))
> +#ifdef CONFIG_FLUSH_WORK_ON_RESUME_USER
Same here.
> + /* NOHZ full CPUs manage their own vmstat flushing */
> + if (tick_nohz_full_cpu(cpu))
> + continue;
> +#endif
> +
> + if (!delayed_work_pending(dw) && vms->dirty)
> queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
>
> cond_resched();
> @@ -2053,8 +2111,15 @@ static int vmstat_cpu_online(unsigned in
> return 0;
> }
>
> +/*
> + * ONLINE: The callbacks are invoked on the hotplugged CPU from the per CPU
> + * hotplug thread with interrupts and preemption enabled.
This is OFFLINE and the reason behind that comment is confusing.
> + */
> static int vmstat_cpu_down_prep(unsigned int cpu)
> {
> + struct vmstat_dirty *vms = per_cpu_ptr(&vmstat_dirty_pcpu, cpu);
> +
> + vms->cpuhotplug = true;
this_cpu_write() ?
> cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
> return 0;
> }
> +config FLUSH_WORK_ON_RESUME_USER
> + bool "Flush per-CPU vmstats on user return (for nohz full CPUs)"
> + depends on NO_HZ_FULL
> + default y
> +
> + help
> + By default, nohz full CPUs flush per-CPU vm statistics on return
> + to userspace (to avoid additional interferences when executing
> + userspace code). This has a small but measurable impact on
> + system call performance. You can disable this to improve system call
> + performance, at the expense of potential interferences to userspace
> + execution.
Can you move that below config CPU_ISOLATION ?
Thanks!
> +
> # multi-gen LRU {
> config LRU_GEN
> bool "Multi-Gen LRU"
>
>
next prev parent reply other threads:[~2022-12-23 14:42 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-21 16:58 [PATCH v11 0/6] Ensure quiet_vmstat() is called when returning to userpace and when idle tick is stopped Marcelo Tosatti
2022-12-21 16:58 ` [PATCH v11 1/6] mm/vmstat: Add CPU-specific variable to track a vmstat discrepancy Marcelo Tosatti
2022-12-21 16:58 ` [PATCH v11 2/6] mm/vmstat: Use vmstat_dirty to track CPU-specific vmstat discrepancies Marcelo Tosatti
2022-12-21 16:58 ` [PATCH v11 3/6] mm/vmstat: manage per-CPU stats from CPU context when NOHZ full Marcelo Tosatti
2022-12-23 14:41 ` Frederic Weisbecker [this message]
2022-12-21 16:58 ` [PATCH v11 4/6] tick/nohz_full: Ensure quiet_vmstat() is called on exit to user-mode when the idle tick is stopped Marcelo Tosatti
2022-12-21 16:58 ` [PATCH v11 5/6] tick/sched: Ensure quiet_vmstat() is called when the idle tick was stopped too Marcelo Tosatti
2022-12-21 16:58 ` [PATCH v11 6/6] mm/vmstat: avoid queueing work item if cpu stats are clean Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221223144150.GA79369@lothringen \
--to=frederic@kernel.org \
--cc=atomlin@atomlin.com \
--cc=cl@linux.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@kernel.org \
--cc=mtosatti@redhat.com \
--cc=neelx@redhat.com \
--cc=oleksandr@natalenko.name \
--cc=pauld@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).