From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
To: Lorenz Bauer <lmb@cloudflare.com>
Cc: bpf <bpf@vger.kernel.org>, Andrii Nakryiko <andrii@kernel.org>,
Florent Revest <revest@chromium.org>
Subject: Re: Portability of bpf_tracing.h
Date: Wed, 26 May 2021 11:34:04 -0700 [thread overview]
Message-ID: <CAEf4BzZftL2q9qAoeXsO87-Wx9AbF8A1mLnBAtBrGo=XSx996g@mail.gmail.com> (raw)
In-Reply-To: <CACAyw99QydcWBeE3T_4g5QzuDyfb_MEpR1V0EzEwbY=R-s202w@mail.gmail.com>
On Wed, May 26, 2021 at 2:13 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> On Mon, 24 May 2021 at 18:48, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > If there are enums/types/fields that we can use to reliably detect the
> > platform, then yes, we can have a new set of helpers that would do
> > this with CO-RE. Someone will need to investigate how to do that for
> > all the platforms we have. It's all about finding something that's
> > already in the kernel and can server as a reliably indicator of a
> > target architecture.
>
> Can you explain a bit more how this would work? Seems like leg work I could do.
>
So I did a bit of investigation and gathered struct pt_regs
definitions from all the "supported" architectures in bpf_tracing.h.
I'll leave it here for further reference.
i386
====
struct pt_regs {
unsigned long bx;
unsigned long cx;
unsigned long dx;
unsigned long si;
unsigned long di;
unsigned long bp;
unsigned long ax;
unsigned short ds;
unsigned short __dsh;
unsigned short es;
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
unsigned short gs;
unsigned short __gsh;
unsigned long orig_ax;
unsigned long ip;
unsigned short cs;
unsigned short __csh;
unsigned long flags;
unsigned long sp;
unsigned short ss;
unsigned short __ssh;
};
x86-64
======
struct pt_regs {
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long ax;
unsigned long cx;
unsigned long dx;
unsigned long si;
unsigned long di;
unsigned long orig_ax;
unsigned long ip;
unsigned long cs;
unsigned long flags;
unsigned long sp;
unsigned long ss;
};
s390
====
struct pt_regs
{
union {
user_pt_regs user_regs;
struct {
unsigned long args[1];
psw_t psw;
unsigned long gprs[NUM_GPRS];
};
};
unsigned long orig_gpr2;
unsigned int int_code;
unsigned int int_parm;
unsigned long int_parm_long;
unsigned long flags;
unsigned long cr1;
};
arm
===
struct pt_regs {
unsigned long uregs[18];
};
arm64
=====
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
u64 regs[31];
u64 sp;
u64 pc;
u64 pstate;
};
};
u64 orig_x0;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif
u64 sdei_ttbr1;
u64 pmr_save;
u64 stackframe[2];
u64 lockdep_hardirqs;
u64 exit_rcu;
};
mips
====
struct pt_regs {
#ifdef CONFIG_32BIT
unsigned long pad0[8];
#endif
unsigned long regs[32];
unsigned long cp0_status;
unsigned long hi;
unsigned long lo;
#ifdef CONFIG_CPU_HAS_SMARTMIPS
unsigned long acx;
#endif
unsigned long cp0_badvaddr;
unsigned long cp0_cause;
unsigned long cp0_epc;
#ifdef CONFIG_CPU_CAVIUM_OCTEON
unsigned long long mpl[6]; /* MTM{0-5} */
unsigned long long mtp[6]; /* MTP{0-5} */
#endif
unsigned long __last[0];
} __aligned(8);
powerpc
=======
struct pt_regs
{
union {
struct user_pt_regs user_regs;
struct {
unsigned long gpr[32];
unsigned long nip;
unsigned long msr;
unsigned long orig_gpr3;
unsigned long ctr;
unsigned long link;
unsigned long xer;
unsigned long ccr;
#ifdef CONFIG_PPC64
unsigned long softe;
#else
unsigned long mq;
#endif
unsigned long trap;
unsigned long dar;
unsigned long dsisr;
unsigned long result;
};
};
union {
struct {
#ifdef CONFIG_PPC64
unsigned long ppr;
#endif
union {
#ifdef CONFIG_PPC_KUAP
unsigned long kuap;
#endif
#ifdef CONFIG_PPC_PKEY
unsigned long amr;
#endif
};
#ifdef CONFIG_PPC_PKEY
unsigned long iamr;
#endif
};
unsigned long __pad[4]; /* Maintain 16 byte interrupt
stack alignment */
};
};
sparc
=====
struct pt_regs {
unsigned long u_regs[16]; /* globals and ins */
unsigned long tstate;
unsigned long tpc;
unsigned long tnpc;
unsigned int y;
/* We encode a magic number, PT_REGS_MAGIC, along
* with the %tt (trap type) register value at trap
* entry time. The magic number allows us to identify
* accurately a trap stack frame in the stack
* unwinder, and the %tt value allows us to test
* things like "in a system call" etc. for an arbitray
* process.
*
* The PT_REGS_MAGIC is chosen such that it can be
* loaded completely using just a sethi instruction.
*/
unsigned int magic;
};
Now, note how each architecture has some uniquely named fields.
Assuming we pick something that is not going to get renamed easily, we
should be able to do something like this:
struct pt_regs___x86 {
unsigned long di;
} __attribute__((preserve_access_index));
struct pt_regs___s390 {
unsigned long gprs[NUM_GPRS];
} __attribute__((preserve_access_index));
struct pt_regs___powerpc {
unsigned long gpr[32]
} __attribute__((preserve_access_index));
/* and so on for all arches */
Then PT_REGS_PARM1 CO-RE equivalent would be implemented like this:
#define ___arch_is_x86 (bpf_core_field_exists(((struct pt_regs___x86 *)0)->di))
#define ___arch_is_s390 (bpf_core_field_exists(((struct pt_regs___s390
*)0)->gprs))
#define ___arch_is_powerpc (bpf_core_field_exists(((struct
pt_regs___powerpc *)0)->gpr))
static unsigned long bpf_pt_regs_parm1(const void *regs)
{
if (___arch_is_x86)
return ((struct pt_regs___x86 *)regs)->di;
else if (___arch_is_s390)
return ((struct pt_regs___s390 *)regs)->gprs[2];
else if (___arch_is_powerpc)
return ((struct pt_regs___powerpc *)regs)->gpr[3];
else
while(1); /* need some better way to force BPF verification failure */
}
And so on for other architectures and other helpers, you should get
the idea from the above.
As a shameless plug, if you'd like to see some more examples of using
CO-RE for detecting kernel features, see [0]
[0] https://nakryiko.com/posts/bpf-tips-printk/
> > Well, obviously I'm not a fan of even more magic #defines. But I think
> > we can achieve a similar effect with a more "lazy" approach. I.e., if
> > user tries to use PT_REGS_xxx macros but doesn't specify the platform
> > -- only then it gets compilation errors. There is stuff in
> > bpf_tracing.h that doesn't need pt_regs, so we can't just outright do
> > #error unconditinally. But we can do something like this:
> >
> > #else /* !bpf_target_defined */
> >
> > #define PT_REGS_PARM1(x) _Pragma("GCC error \"blah blah something
> > user-facing\"")
> >
> > ... and so on for all macros
> >
> > #endif
> >
> > Thoughts?
>
> That would work for me, but it would change the behaviour for current
> users of the header, no? That's why I added the magic define in the
> first place.
How so? If someone is using PT_REGS_PARM1 without setting target arch
they should get compilation error about undefined macro. Here it will
be the same thing, only if someone tries to use PT_REGS_PARM1() will
they reach that _Pragma.
Or am I missing something?
>
> --
> Lorenz Bauer | Systems Engineer
> 6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
>
> www.cloudflare.com
next prev parent reply other threads:[~2021-05-26 18:34 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-24 15:05 Portability of bpf_tracing.h Lorenz Bauer
2021-05-24 17:47 ` Andrii Nakryiko
2021-05-24 19:30 ` John Fastabend
2021-05-25 0:13 ` Andrii Nakryiko
2021-05-26 9:13 ` Lorenz Bauer
2021-05-26 18:34 ` Andrii Nakryiko [this message]
2021-05-28 8:29 ` Lorenz Bauer
2021-05-30 0:51 ` Andrii Nakryiko
2021-06-10 14:09 ` Lorenz Bauer
2021-06-10 18:14 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAEf4BzZftL2q9qAoeXsO87-Wx9AbF8A1mLnBAtBrGo=XSx996g@mail.gmail.com' \
--to=andrii.nakryiko@gmail.com \
--cc=andrii@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=lmb@cloudflare.com \
--cc=revest@chromium.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).