All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@meta.com>
To: Tero Kristo <tero.kristo@linux.intel.com>,
	shuah@kernel.org, tglx@linutronix.de, x86@kernel.org,
	bp@alien8.de, dave.hansen@linux.intel.com, mingo@redhat.com
Cc: ast@kernel.org, linux-kselftest@vger.kernel.org,
	linux-kernel@vger.kernel.org, andrii@kernel.org,
	daniel@iogearbox.net, bpf@vger.kernel.org
Subject: Re: [PATCH 1/2] x86/tsc: Add new BPF helper call bpf_rdtsc
Date: Mon, 3 Jul 2023 21:49:41 -0700	[thread overview]
Message-ID: <0ec3be00-1be1-540b-7598-a693c5a92c8a@meta.com> (raw)
In-Reply-To: <20230703105745.1314475-2-tero.kristo@linux.intel.com>



On 7/3/23 3:57 AM, Tero Kristo wrote:
> Currently the raw TSC counter can be read within kernel via rdtsc_ordered()
> and friends, and additionally even userspace has access to it via the
> RDTSC assembly instruction. BPF programs on the other hand don't have
> direct access to the TSC counter, but alternatively must go through the
> performance subsystem (bpf_perf_event_read), which only provides relative
> value compared to the start point of the program, and is also much slower
> than the direct read. Add a new BPF helper definition for bpf_rdtsc() which
> can be used for any accurate profiling needs.
> 
> A use-case for the new API is for example wakeup latency tracing via
> eBPF on Intel architecture, where it is extremely beneficial to be able
> to get raw TSC timestamps and compare these directly to the value
> programmed to the MSR_IA32_TSC_DEADLINE register. This way a direct
> latency value from the hardware interrupt to the execution of the
> interrupt handler can be calculated. Having the functionality within
> eBPF also has added benefits of allowing to filter any other relevant
> data like C-state residency values, and also to drop any irrelevant
> data points directly in the kernel context, without passing all the
> data to userspace for post-processing.
> 
> Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
> ---
>   arch/x86/include/asm/msr.h |  1 +
>   arch/x86/kernel/tsc.c      | 23 +++++++++++++++++++++++
>   2 files changed, 24 insertions(+)
> 
> diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
> index 65ec1965cd28..3dde673cb563 100644
> --- a/arch/x86/include/asm/msr.h
> +++ b/arch/x86/include/asm/msr.h
> @@ -309,6 +309,7 @@ struct msr *msrs_alloc(void);
>   void msrs_free(struct msr *msrs);
>   int msr_set_bit(u32 msr, u8 bit);
>   int msr_clear_bit(u32 msr, u8 bit);
> +u64 bpf_rdtsc(void);
>   
>   #ifdef CONFIG_SMP
>   int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h);
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 344698852146..ded857abef81 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -15,6 +15,8 @@
>   #include <linux/timex.h>
>   #include <linux/static_key.h>
>   #include <linux/static_call.h>
> +#include <linux/btf.h>
> +#include <linux/btf_ids.h>
>   
>   #include <asm/hpet.h>
>   #include <asm/timer.h>
> @@ -29,6 +31,7 @@
>   #include <asm/intel-family.h>
>   #include <asm/i8259.h>
>   #include <asm/uv/uv.h>
> +#include <asm/tlbflush.h>
>   
>   unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
>   EXPORT_SYMBOL(cpu_khz);
> @@ -1551,6 +1554,24 @@ void __init tsc_early_init(void)
>   	tsc_enable_sched_clock();
>   }
>   
> +u64 bpf_rdtsc(void)

Please see kernel/bpf/helpers.c. For kfunc definition, we should have

__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
                   "Global functions as their definitions will be in 
vmlinux BTF");

_bpf_kfunc u64 bpf_rdtsc(void)
{
	...
}

__diag_pop();


> +{
> +	/* Check if Time Stamp is enabled only in ring 0 */
> +	if (cr4_read_shadow() & X86_CR4_TSD)
> +		return 0;
> +
> +	return rdtsc_ordered();
> +}
> +
> +BTF_SET8_START(tsc_bpf_kfunc_ids)
> +BTF_ID_FLAGS(func, bpf_rdtsc)
> +BTF_SET8_END(tsc_bpf_kfunc_ids)
> +
> +static const struct btf_kfunc_id_set tsc_bpf_kfunc_set = {
> +	.owner		= THIS_MODULE,
> +	.set		= &tsc_bpf_kfunc_ids,
> +};
> +
>   void __init tsc_init(void)
>   {
>   	if (!cpu_feature_enabled(X86_FEATURE_TSC)) {
> @@ -1594,6 +1615,8 @@ void __init tsc_init(void)
>   
>   	clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
>   	detect_art();
> +
> +	register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tsc_bpf_kfunc_set);

register_btf_kfunc_id_set() could fail, maybe you at least wants to
have a warning so bpf prog users may be aware that kfunc bpf_rdtsc()
not really available to bpf programs?

>   }
>   
>   #ifdef CONFIG_SMP

  reply	other threads:[~2023-07-04  4:50 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-03 10:57 [PATCH 0/2] x86/BPF: Add new BPF helper call bpf_rdtsc Tero Kristo
2023-07-03 10:57 ` [PATCH 1/2] x86/tsc: " Tero Kristo
2023-07-04  4:49   ` Yonghong Song [this message]
2023-07-06 12:00     ` Tero Kristo
2023-07-06  3:02   ` Alexei Starovoitov
2023-07-06  5:16     ` John Fastabend
2023-07-06 11:59       ` Tero Kristo
2023-07-06 19:51         ` Alexei Starovoitov
2023-07-07  5:41           ` John Fastabend
2023-07-07  8:27             ` Tero Kristo
2023-07-07 14:42               ` Alexei Starovoitov
2023-08-09 11:31                 ` Tero Kristo
2023-07-03 10:57 ` [PATCH 2/2] selftests/bpf: Add test for bpf_rdtsc Tero Kristo
2023-07-03 22:00   ` John Fastabend
2023-07-04  8:55     ` Tero Kristo
2023-07-06  4:57       ` John Fastabend
2023-07-03 21:55 ` [PATCH 0/2] x86/BPF: Add new BPF helper call bpf_rdtsc John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0ec3be00-1be1-540b-7598-a693c5a92c8a@meta.com \
    --to=yhs@meta.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bp@alien8.de \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tero.kristo@linux.intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.