Re: [PATCH bpf-next v3 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
To: Florent Revest <revest@chromium.org>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>, Yonghong Song <yhs@fb.com>,
	KP Singh <kpsingh@kernel.org>,
	Brendan Jackman <jackmanb@chromium.org>,
	open list <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH bpf-next v3 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf
Date: Tue, 13 Apr 2021 16:01:33 -0700	[thread overview]
Message-ID: <CAEf4BzaUeE7EPObUuS=NPw9qmssxJ=i6+M1v6A3=wvLVGOKkXg@mail.gmail.com> (raw)
In-Reply-To: <20210412153754.235500-2-revest@chromium.org>

On Mon, Apr 12, 2021 at 8:38 AM Florent Revest <revest@chromium.org> wrote:
>
> Two helpers (trace_printk and seq_printf) have very similar
> implementations of format string parsing and a third one is coming
> (snprintf). To avoid code duplication and make the code easier to
> maintain, this moves the operations associated with format string
> parsing (validation and argument sanitization) into one generic
> function.
>
> The implementation of the two existing helpers already drifted quite a
> bit so unifying them entailed a lot of changes:
>
> - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
>   NULL character, this is no longer true, the first 0 is terminating.
> - bpf_trace_printk now supports %% (which produces the percentage char).
> - bpf_trace_printk now skips width formating fields.
> - bpf_trace_printk now supports the X modifier (capital hexadecimal).
> - bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
> - argument casting on 32 bit has been simplified into one macro and
>   using an enum instead of obscure int increments.
>
> - bpf_seq_printf now uses bpf_trace_copy_string instead of
>   strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
> - bpf_seq_printf now prints longs correctly on 32 bit architectures.
>
> - both were changed to use a global per-cpu tmp buffer instead of one
>   stack buffer for trace_printk and 6 small buffers for seq_printf.
> - to avoid per-cpu buffer usage conflict, these helpers disable
>   preemption while the per-cpu buffer is in use.
> - both helpers now support the %ps and %pS specifiers to print symbols.
>
> Signed-off-by: Florent Revest <revest@chromium.org>
> ---
>  kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
>  1 file changed, 248 insertions(+), 281 deletions(-)
>

[...]

> +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
> + */
> +#define MAX_PRINTF_BUF_LEN     512
> +
> +struct bpf_printf_buf {
> +       char tmp_buf[MAX_PRINTF_BUF_LEN];
> +};
> +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
> +static DEFINE_PER_CPU(int, bpf_printf_buf_used);
> +
> +static int try_get_fmt_tmp_buf(char **tmp_buf)
>  {
> -       static char buf[BPF_TRACE_PRINTK_SIZE];
> -       unsigned long flags;
> -       va_list ap;
> -       int ret;
> +       struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);

why doing this_cpu_ptr() if below (if *tmp_buf case), you will not use
it. just a waste of CPU, no?

> +       int used;
>
> -       raw_spin_lock_irqsave(&trace_printk_lock, flags);
> -       va_start(ap, fmt);
> -       ret = vsnprintf(buf, sizeof(buf), fmt, ap);
> -       va_end(ap);
> -       /* vsnprintf() will not append null for zero-length strings */
> -       if (ret == 0)
> -               buf[0] = '\0';
> -       trace_bpf_trace_printk(buf);
> -       raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> +       if (*tmp_buf)
> +               return 0;
>
> -       return ret;
> +       preempt_disable();
> +       used = this_cpu_inc_return(bpf_printf_buf_used);
> +       if (WARN_ON_ONCE(used > 1)) {
> +               this_cpu_dec(bpf_printf_buf_used);
> +               return -EBUSY;
> +       }

get bufs pointer here instead?

> +       *tmp_buf = bufs->tmp_buf;
> +
> +       return 0;
> +}
> +
> +static void put_fmt_tmp_buf(void)
> +{
> +       if (this_cpu_read(bpf_printf_buf_used)) {
> +               this_cpu_dec(bpf_printf_buf_used);
> +               preempt_enable();
> +       }
>  }
>
>  /*
> - * Only limited trace_printk() conversion specifiers allowed:
> - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
> + *
> + * Returns a negative value if fmt is an invalid format string or 0 otherwise.
> + *
> + * This can be used in two ways:
> + * - Format string verification only: when final_args and mod are NULL
> + * - Arguments preparation: in addition to the above verification, it writes in
> + *   final_args a copy of raw_args where pointers from BPF have been sanitized
> + *   into pointers safe to use by snprintf. This also writes in the mod array
> + *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
> + *
> + * In argument preparation mode, if 0 is returned, safe temporary buffers are
> + * allocated and put_fmt_tmp_buf should be called to free them after use.
>   */
> -BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
> -          u64, arg2, u64, arg3)
> -{
> -       int i, mod[3] = {}, fmt_cnt = 0;
> -       char buf[64], fmt_ptype;
> -       void *unsafe_ptr = NULL;
> -       bool str_seen = false;
> +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
> +                       u64 *final_args, enum bpf_printf_mod_type *mod,
> +                       u32 num_args)
> +{
> +       int err, i, curr_specifier = 0, copy_size;
> +       char *unsafe_ptr = NULL, *tmp_buf = NULL;
> +       size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
> +       enum bpf_printf_mod_type current_mod;
> +       u64 current_arg;

naming consistency: current_arg vs curr_specifier? maybe just cur_arg
and cur_spec?

> +       char fmt_ptype;
> +
> +       if ((final_args && !mod) || (mod && !final_args))

nit: same check:

if (!!final_args != !!mod)

> +               return -EINVAL;
>
> -       /*
> -        * bpf_check()->check_func_arg()->check_stack_boundary()
> -        * guarantees that fmt points to bpf program stack,
> -        * fmt_size bytes of it were initialized and fmt_size > 0
> -        */
> -       if (fmt[--fmt_size] != 0)
> +       fmt_size = (strnchr(fmt, fmt_size, 0) - fmt);

extra ()

> +       if (!fmt_size)

hm... strnchr() will return NULL if the character is not found, so
fmt_size will be some non-zero value (due to - fmt), how is this
supposed to work?

some negative tests are clearly missing, it seems, if you didn't catch this

>                 return -EINVAL;
>
> -       /* check format string for allowed specifiers */
>         for (i = 0; i < fmt_size; i++) {
> -               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
> -                       return -EINVAL;
> +               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
> +                       err = -EINVAL;
> +                       goto out;
> +               }
>
>                 if (fmt[i] != '%')
>                         continue;
>
> -               if (fmt_cnt >= 3)
> -                       return -EINVAL;
> +               if (fmt[i + 1] == '%') {
> +                       i++;
> +                       continue;
> +               }
> +
> +               if (curr_specifier >= num_args) {
> +                       err = -EINVAL;
> +                       goto out;
> +               }
>
>                 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */

a bit outdated comment, last doesn't exist anymore. I think the
comment is trying to say that fmt[i + 1] can be read because in the
worst case it will be a final zero terminator (which we checked
above).

>                 i++;
> -               if (fmt[i] == 'l') {
> -                       mod[fmt_cnt]++;
> +

[...]

> +       err = 0;
> +out:
> +       put_fmt_tmp_buf();

so you are putting tmp_buf unconditionally, even when there was no
error. That seems wrong? Should this be:

if (err)
    put_fmt_tmp_buf()

?

> +       return err;
> +}
> +

[...]