All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
To: Lorenz Bauer <lmb@cloudflare.com>
Cc: bpf <bpf@vger.kernel.org>, Andrii Nakryiko <andrii@kernel.org>,
	Florent Revest <revest@chromium.org>
Subject: Re: Portability of bpf_tracing.h
Date: Wed, 26 May 2021 11:34:04 -0700	[thread overview]
Message-ID: <CAEf4BzZftL2q9qAoeXsO87-Wx9AbF8A1mLnBAtBrGo=XSx996g@mail.gmail.com> (raw)
In-Reply-To: <CACAyw99QydcWBeE3T_4g5QzuDyfb_MEpR1V0EzEwbY=R-s202w@mail.gmail.com>

On Wed, May 26, 2021 at 2:13 AM Lorenz Bauer <lmb@cloudflare.com> wrote:
>
> On Mon, 24 May 2021 at 18:48, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > If there are enums/types/fields that we can use to reliably detect the
> > platform, then yes, we can have a new set of helpers that would do
> > this with CO-RE. Someone will need to investigate how to do that for
> > all the platforms we have. It's all about finding something that's
> > already in the kernel and can server as a reliably indicator of a
> > target architecture.
>
> Can you explain a bit more how this would work? Seems like leg work I could do.
>

So I did a bit of investigation and gathered struct pt_regs
definitions from all the "supported" architectures in bpf_tracing.h.
I'll leave it here for further reference.

i386
====

struct pt_regs {
        unsigned long bx;
        unsigned long cx;
        unsigned long dx;
        unsigned long si;
        unsigned long di;
        unsigned long bp;
        unsigned long ax;
        unsigned short ds;
        unsigned short __dsh;
        unsigned short es;
        unsigned short __esh;
        unsigned short fs;
        unsigned short __fsh;
        unsigned short gs;
        unsigned short __gsh;
        unsigned long orig_ax;
        unsigned long ip;
        unsigned short cs;
        unsigned short __csh;
        unsigned long flags;
        unsigned long sp;
        unsigned short ss;
        unsigned short __ssh;
};

x86-64
======

struct pt_regs {
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long bp;
        unsigned long bx;
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
        unsigned long r8;
        unsigned long ax;
        unsigned long cx;
        unsigned long dx;
        unsigned long si;
        unsigned long di;
        unsigned long orig_ax;
        unsigned long ip;
        unsigned long cs;
        unsigned long flags;
        unsigned long sp;
        unsigned long ss;
};

s390
====

struct pt_regs
{
        union {
                user_pt_regs user_regs;
                struct {
                        unsigned long args[1];
                        psw_t psw;
                        unsigned long gprs[NUM_GPRS];
                };
        };
        unsigned long orig_gpr2;
        unsigned int int_code;
        unsigned int int_parm;
        unsigned long int_parm_long;
        unsigned long flags;
        unsigned long cr1;
};

arm
===

struct pt_regs {
        unsigned long uregs[18];
};

arm64
=====

struct pt_regs {
        union {
                struct user_pt_regs user_regs;
                struct {
                        u64 regs[31];
                        u64 sp;
                        u64 pc;
                        u64 pstate;
                };
        };
        u64 orig_x0;
#ifdef __AARCH64EB__
        u32 unused2;
        s32 syscallno;
#else
        s32 syscallno;
        u32 unused2;
#endif
        u64 sdei_ttbr1;
        u64 pmr_save;
        u64 stackframe[2];
        u64 lockdep_hardirqs;
        u64 exit_rcu;
};

mips
====

struct pt_regs {
#ifdef CONFIG_32BIT
        unsigned long pad0[8];
#endif
        unsigned long regs[32];
        unsigned long cp0_status;
        unsigned long hi;
        unsigned long lo;
#ifdef CONFIG_CPU_HAS_SMARTMIPS
        unsigned long acx;
#endif
        unsigned long cp0_badvaddr;
        unsigned long cp0_cause;
        unsigned long cp0_epc;
#ifdef CONFIG_CPU_CAVIUM_OCTEON
        unsigned long long mpl[6];        /* MTM{0-5} */
        unsigned long long mtp[6];        /* MTP{0-5} */
#endif
        unsigned long __last[0];
} __aligned(8);


powerpc
=======

struct pt_regs
{
        union {
                struct user_pt_regs user_regs;
                struct {
                        unsigned long gpr[32];
                        unsigned long nip;
                        unsigned long msr;
                        unsigned long orig_gpr3;
                        unsigned long ctr;
                        unsigned long link;
                        unsigned long xer;
                        unsigned long ccr;
#ifdef CONFIG_PPC64
                        unsigned long softe;
#else
                        unsigned long mq;
#endif
                        unsigned long trap;
                        unsigned long dar;
                        unsigned long dsisr;
                        unsigned long result;
                };
        };
        union {
                struct {
#ifdef CONFIG_PPC64
                        unsigned long ppr;
#endif
                        union {
#ifdef CONFIG_PPC_KUAP
                                unsigned long kuap;
#endif
#ifdef CONFIG_PPC_PKEY
                                unsigned long amr;
#endif
                        };
#ifdef CONFIG_PPC_PKEY
                        unsigned long iamr;
#endif
                };
                unsigned long __pad[4]; /* Maintain 16 byte interrupt
stack alignment */
        };
};


sparc
=====

struct pt_regs {
        unsigned long u_regs[16]; /* globals and ins */
        unsigned long tstate;
        unsigned long tpc;
        unsigned long tnpc;
        unsigned int y;

        /* We encode a magic number, PT_REGS_MAGIC, along
         * with the %tt (trap type) register value at trap
         * entry time.  The magic number allows us to identify
         * accurately a trap stack frame in the stack
         * unwinder, and the %tt value allows us to test
         * things like "in a system call" etc. for an arbitray
         * process.
         *
         * The PT_REGS_MAGIC is chosen such that it can be
         * loaded completely using just a sethi instruction.
         */
        unsigned int magic;
};


Now, note how each architecture has some uniquely named fields.
Assuming we pick something that is not going to get renamed easily, we
should be able to do something like this:

struct pt_regs___x86 {
    unsigned long di;
} __attribute__((preserve_access_index));

struct pt_regs___s390 {
    unsigned long gprs[NUM_GPRS];
} __attribute__((preserve_access_index));

struct pt_regs___powerpc {
    unsigned long gpr[32]
} __attribute__((preserve_access_index));

/* and so on for all arches */

Then PT_REGS_PARM1 CO-RE equivalent would be implemented like this:

#define ___arch_is_x86 (bpf_core_field_exists(((struct pt_regs___x86 *)0)->di))
#define ___arch_is_s390 (bpf_core_field_exists(((struct pt_regs___s390
*)0)->gprs))
#define ___arch_is_powerpc (bpf_core_field_exists(((struct
pt_regs___powerpc *)0)->gpr))

static unsigned long bpf_pt_regs_parm1(const void *regs)
{
    if (___arch_is_x86)
        return ((struct pt_regs___x86 *)regs)->di;
    else if (___arch_is_s390)
        return ((struct pt_regs___s390 *)regs)->gprs[2];
    else if (___arch_is_powerpc)
        return ((struct pt_regs___powerpc *)regs)->gpr[3];
    else
        while(1); /* need some better way to force BPF verification failure */
}

And so on for other architectures and other helpers, you should get
the idea from the above.

As a shameless plug, if you'd like to see some more examples of using
CO-RE for detecting kernel features, see [0]

  [0] https://nakryiko.com/posts/bpf-tips-printk/

> > Well, obviously I'm not a fan of even more magic #defines. But I think
> > we can achieve a similar effect with a more "lazy" approach. I.e., if
> > user tries to use PT_REGS_xxx macros but doesn't specify the platform
> > -- only then it gets compilation errors. There is stuff in
> > bpf_tracing.h that doesn't need pt_regs, so we can't just outright do
> > #error unconditinally. But we can do something like this:
> >
> > #else /* !bpf_target_defined */
> >
> > #define PT_REGS_PARM1(x) _Pragma("GCC error \"blah blah something
> > user-facing\"")
> >
> > ... and so on for all macros
> >
> > #endif
> >
> > Thoughts?
>
> That would work for me, but it would change the behaviour for current
> users of the header, no? That's why I added the magic define in the
> first place.

How so? If someone is using PT_REGS_PARM1 without setting target arch
they should get compilation error about undefined macro. Here it will
be the same thing, only if someone tries to use PT_REGS_PARM1() will
they reach that _Pragma.

Or am I missing something?

>
> --
> Lorenz Bauer  |  Systems Engineer
> 6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
>
> www.cloudflare.com

  reply	other threads:[~2021-05-26 18:34 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-24 15:05 Portability of bpf_tracing.h Lorenz Bauer
2021-05-24 17:47 ` Andrii Nakryiko
2021-05-24 19:30   ` John Fastabend
2021-05-25  0:13     ` Andrii Nakryiko
2021-05-26  9:13   ` Lorenz Bauer
2021-05-26 18:34     ` Andrii Nakryiko [this message]
2021-05-28  8:29       ` Lorenz Bauer
2021-05-30  0:51         ` Andrii Nakryiko
2021-06-10 14:09           ` Lorenz Bauer
2021-06-10 18:14             ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAEf4BzZftL2q9qAoeXsO87-Wx9AbF8A1mLnBAtBrGo=XSx996g@mail.gmail.com' \
    --to=andrii.nakryiko@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=lmb@cloudflare.com \
    --cc=revest@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.