bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().
@ 2022-08-01 20:50 Hao Luo
  2022-08-02 11:14 ` Jiri Olsa
  2022-08-03 11:53 ` Jiri Olsa
  0 siblings, 2 replies; 5+ messages in thread
From: Hao Luo @ 2022-08-01 20:50 UTC (permalink / raw)
  To: linux-kernel, bpf
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, KP Singh,
	John Fastabend, Stanislav Fomichev, Jiri Olsa, Hao Luo

Refactor bpf_seq_read() by extracting some common logic into helper
functions. I hope this makes bpf_seq_read() more readable. This is
a refactoring patch, so no behavior change is expected.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 kernel/bpf/bpf_iter.c | 156 +++++++++++++++++++++++++-----------------
 1 file changed, 93 insertions(+), 63 deletions(-)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 7e8fd49406f6..39b5b647fdb7 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -77,6 +77,83 @@ static bool bpf_iter_support_resched(struct seq_file *seq)
 	return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED;
 }
 
+/* do_copy_to_user, copies seq->buf at offset seq->from to userspace and
+ * updates corresponding fields in seq. It returns -EFAULT if any error
+ * happens. The actual number of bytes copied is returned via the argument
+ * 'copied'.
+ */
+static int do_copy_to_user(struct seq_file *seq, char __user *buf, size_t size,
+			   size_t *copied)
+{
+	size_t n;
+
+	n = min(seq->count, size);
+	if (copy_to_user(buf, seq->buf + seq->from, n))
+		return -EFAULT;
+
+	seq->count -= n;
+	seq->from += n;
+	*copied = n;
+	return 0;
+}
+
+/* do_seq_show, shows the given object 'p'. If 'p' is skipped or
+ * error happens, resets seq->count to 'offs'.
+ *
+ * Returns err > 0, indicating show() skips this object.
+ * Returns err = 0, indicating show() succeeds.
+ * Returns err < 0, indicating show() fails or overflow happened.
+ */
+static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
+{
+	int err;
+
+	WARN_ON(IS_ERR_OR_NULL(p));
+
+	err = seq->op->show(seq, p);
+	if (err > 0) {
+		/* object is skipped, decrease seq_num, so next
+		 * valid object can reuse the same seq_num.
+		 */
+		bpf_iter_dec_seq_num(seq);
+		seq->count = offs;
+		return err;
+	}
+
+	if (err < 0 || seq_has_overflowed(seq)) {
+		seq->count = offs;
+		return err ? err : -E2BIG;
+	}
+
+	/* err == 0 and no overflow */
+	return 0;
+}
+
+/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
+ * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
+ * returns error. Returns 0 otherwise.
+ */
+static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
+{
+	if (IS_ERR(p)) {
+		seq->op->stop(seq, NULL);
+		seq->count = offs;
+		return PTR_ERR(p);
+	}
+
+	seq->op->stop(seq, p);
+	if (!p) {
+		if (!seq_has_overflowed(seq)) {
+			bpf_iter_done_stop(seq);
+		} else {
+			seq->count = offs;
+			if (offs == 0)
+				return -E2BIG;
+		}
+	}
+	return 0;
+}
+
 /* maximum visited objects before bailing out */
 #define MAX_ITER_OBJECTS	1000000
 
@@ -91,7 +168,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 			    loff_t *ppos)
 {
 	struct seq_file *seq = file->private_data;
-	size_t n, offs, copied = 0;
+	size_t offs, copied = 0;
 	int err = 0, num_objs = 0;
 	bool can_resched;
 	void *p;
@@ -108,40 +185,18 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 	}
 
 	if (seq->count) {
-		n = min(seq->count, size);
-		err = copy_to_user(buf, seq->buf + seq->from, n);
-		if (err) {
-			err = -EFAULT;
-			goto done;
-		}
-		seq->count -= n;
-		seq->from += n;
-		copied = n;
+		err = do_copy_to_user(seq, buf, size, &copied);
 		goto done;
 	}
 
 	seq->from = 0;
 	p = seq->op->start(seq, &seq->index);
-	if (!p)
+	if (IS_ERR_OR_NULL(p))
 		goto stop;
-	if (IS_ERR(p)) {
-		err = PTR_ERR(p);
-		seq->op->stop(seq, p);
-		seq->count = 0;
-		goto done;
-	}
 
-	err = seq->op->show(seq, p);
-	if (err > 0) {
-		/* object is skipped, decrease seq_num, so next
-		 * valid object can reuse the same seq_num.
-		 */
-		bpf_iter_dec_seq_num(seq);
-		seq->count = 0;
-	} else if (err < 0 || seq_has_overflowed(seq)) {
-		if (!err)
-			err = -E2BIG;
-		seq->op->stop(seq, p);
+	err = do_seq_show(seq, p, 0);
+	if (err < 0) {
+		do_seq_stop(seq, p, 0);
 		seq->count = 0;
 		goto done;
 	}
@@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 		num_objs++;
 		offs = seq->count;
 		p = seq->op->next(seq, p, &seq->index);
-		if (pos == seq->index) {
+		if (unlikely(pos == seq->index)) {
 			pr_info_ratelimited("buggy seq_file .next function %ps "
 				"did not updated position index\n",
 				seq->op->next);
@@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 		}
 
 		if (IS_ERR_OR_NULL(p))
-			break;
+			goto stop;
 
 		/* got a valid next object, increase seq_num */
 		bpf_iter_inc_seq_num(seq);
@@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 		if (num_objs >= MAX_ITER_OBJECTS) {
 			if (offs == 0) {
 				err = -EAGAIN;
-				seq->op->stop(seq, p);
+				do_seq_stop(seq, p, seq->count);
 				goto done;
 			}
 			break;
 		}
 
-		err = seq->op->show(seq, p);
-		if (err > 0) {
-			bpf_iter_dec_seq_num(seq);
-			seq->count = offs;
-		} else if (err < 0 || seq_has_overflowed(seq)) {
-			seq->count = offs;
+		err = do_seq_show(seq, p, offs);
+		if (err < 0) {
 			if (offs == 0) {
-				if (!err)
-					err = -E2BIG;
-				seq->op->stop(seq, p);
+				do_seq_stop(seq, p, seq->count);
 				goto done;
 			}
 			break;
@@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 			cond_resched();
 	}
 stop:
-	offs = seq->count;
-	/* bpf program called if !p */
-	seq->op->stop(seq, p);
-	if (!p) {
-		if (!seq_has_overflowed(seq)) {
-			bpf_iter_done_stop(seq);
-		} else {
-			seq->count = offs;
-			if (offs == 0) {
-				err = -E2BIG;
-				goto done;
-			}
-		}
-	}
-
-	n = min(seq->count, size);
-	err = copy_to_user(buf, seq->buf, n);
-	if (err) {
-		err = -EFAULT;
+	err = do_seq_stop(seq, p, seq->count);
+	if (err)
 		goto done;
-	}
-	copied = n;
-	seq->count -= n;
-	seq->from = n;
+
+	err = do_copy_to_user(seq, buf, size, &copied);
 done:
 	if (!copied)
 		copied = err;
-- 
2.37.1.455.g008518b4e5-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().
  2022-08-01 20:50 [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read() Hao Luo
@ 2022-08-02 11:14 ` Jiri Olsa
  2022-08-03  0:15   ` Hao Luo
  2022-08-03 11:53 ` Jiri Olsa
  1 sibling, 1 reply; 5+ messages in thread
From: Jiri Olsa @ 2022-08-02 11:14 UTC (permalink / raw)
  To: Hao Luo
  Cc: linux-kernel, bpf, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, John Fastabend, Stanislav Fomichev

On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:

SNIP

> +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> +{
> +	int err;
> +
> +	WARN_ON(IS_ERR_OR_NULL(p));
> +
> +	err = seq->op->show(seq, p);
> +	if (err > 0) {
> +		/* object is skipped, decrease seq_num, so next
> +		 * valid object can reuse the same seq_num.
> +		 */
> +		bpf_iter_dec_seq_num(seq);
> +		seq->count = offs;
> +		return err;
> +	}
> +
> +	if (err < 0 || seq_has_overflowed(seq)) {
> +		seq->count = offs;
> +		return err ? err : -E2BIG;
> +	}
> +
> +	/* err == 0 and no overflow */
> +	return 0;
> +}
> +
> +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> + * returns error. Returns 0 otherwise.
> + */
> +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> +{
> +	if (IS_ERR(p)) {
> +		seq->op->stop(seq, NULL);
> +		seq->count = offs;

should we set seq->count to 0 in case of error?

jirka

> +		return PTR_ERR(p);
> +	}
> +
> +	seq->op->stop(seq, p);
> +	if (!p) {
> +		if (!seq_has_overflowed(seq)) {
> +			bpf_iter_done_stop(seq);
> +		} else {
> +			seq->count = offs;
> +			if (offs == 0)
> +				return -E2BIG;
> +		}
> +	}
> +	return 0;
> +}
> +
>  /* maximum visited objects before bailing out */
>  #define MAX_ITER_OBJECTS	1000000
>  

SNIP

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().
  2022-08-02 11:14 ` Jiri Olsa
@ 2022-08-03  0:15   ` Hao Luo
  2022-08-03 11:50     ` Jiri Olsa
  0 siblings, 1 reply; 5+ messages in thread
From: Hao Luo @ 2022-08-03  0:15 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, bpf, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, John Fastabend, Stanislav Fomichev

On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:
>
> SNIP
>
> > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> > +{
> > +     int err;
> > +
> > +     WARN_ON(IS_ERR_OR_NULL(p));
> > +
> > +     err = seq->op->show(seq, p);
> > +     if (err > 0) {
> > +             /* object is skipped, decrease seq_num, so next
> > +              * valid object can reuse the same seq_num.
> > +              */
> > +             bpf_iter_dec_seq_num(seq);
> > +             seq->count = offs;
> > +             return err;
> > +     }
> > +
> > +     if (err < 0 || seq_has_overflowed(seq)) {
> > +             seq->count = offs;
> > +             return err ? err : -E2BIG;
> > +     }
> > +
> > +     /* err == 0 and no overflow */
> > +     return 0;
> > +}
> > +
> > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> > + * returns error. Returns 0 otherwise.
> > + */
> > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> > +{
> > +     if (IS_ERR(p)) {
> > +             seq->op->stop(seq, NULL);
> > +             seq->count = offs;
>
> should we set seq->count to 0 in case of error?
>

Thanks Jiri. To be honest, I don't know. There are two paths that may
lead to an error "p".

First, seq->op->start() could return ERR, in that case, '"offs'" is
zero and we set it to zero already. This is fine.

The other one, seq->op->next() could return ERR. This is a case where
bpf_seq_read() fails to handle right now, so I am not sure what to do.

Based on my understanding reading the code, if seq->count isn't
zeroed, the current read() will not copy data, but the next read()
will copy data (see the "if (seq->count)" at the beginning of
bpf_seq_read). If seq->count is zeroed, the data in buffer will be
discarded. I don't know what is right.

> jirka
>
> > +             return PTR_ERR(p);
> > +     }
> > +
> > +     seq->op->stop(seq, p);
> > +     if (!p) {
> > +             if (!seq_has_overflowed(seq)) {
> > +                     bpf_iter_done_stop(seq);
> > +             } else {
> > +                     seq->count = offs;
> > +                     if (offs == 0)
> > +                             return -E2BIG;
> > +             }
> > +     }
> > +     return 0;
> > +}
> > +
> >  /* maximum visited objects before bailing out */
> >  #define MAX_ITER_OBJECTS     1000000
> >
>
> SNIP

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().
  2022-08-03  0:15   ` Hao Luo
@ 2022-08-03 11:50     ` Jiri Olsa
  0 siblings, 0 replies; 5+ messages in thread
From: Jiri Olsa @ 2022-08-03 11:50 UTC (permalink / raw)
  To: Hao Luo
  Cc: Jiri Olsa, linux-kernel, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, KP Singh, John Fastabend, Stanislav Fomichev

On Tue, Aug 02, 2022 at 05:15:50PM -0700, Hao Luo wrote:
> On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:
> >
> > SNIP
> >
> > > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs)
> > > +{
> > > +     int err;
> > > +
> > > +     WARN_ON(IS_ERR_OR_NULL(p));
> > > +
> > > +     err = seq->op->show(seq, p);
> > > +     if (err > 0) {
> > > +             /* object is skipped, decrease seq_num, so next
> > > +              * valid object can reuse the same seq_num.
> > > +              */
> > > +             bpf_iter_dec_seq_num(seq);
> > > +             seq->count = offs;
> > > +             return err;
> > > +     }
> > > +
> > > +     if (err < 0 || seq_has_overflowed(seq)) {
> > > +             seq->count = offs;
> > > +             return err ? err : -E2BIG;
> > > +     }
> > > +
> > > +     /* err == 0 and no overflow */
> > > +     return 0;
> > > +}
> > > +
> > > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If
> > > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and
> > > + * returns error. Returns 0 otherwise.
> > > + */
> > > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
> > > +{
> > > +     if (IS_ERR(p)) {
> > > +             seq->op->stop(seq, NULL);
> > > +             seq->count = offs;
> >
> > should we set seq->count to 0 in case of error?
> >
> 
> Thanks Jiri. To be honest, I don't know. There are two paths that may
> lead to an error "p".
> 
> First, seq->op->start() could return ERR, in that case, '"offs'" is
> zero and we set it to zero already. This is fine.

ah right, offs is zero at that time, looks good then

> 
> The other one, seq->op->next() could return ERR. This is a case where
> bpf_seq_read() fails to handle right now, so I am not sure what to do.

but maybe we don't need to set seq->count in here, like:

	static int do_seq_stop(struct seq_file *seq, void *p, size_t offs)
	{
		if (IS_ERR(p)) {
			seq->op->stop(seq, NULL);
			return PTR_ERR(p);
		}

because it's already set by error path of do_seq_show

> 
> Based on my understanding reading the code, if seq->count isn't
> zeroed, the current read() will not copy data, but the next read()
> will copy data (see the "if (seq->count)" at the beginning of
> bpf_seq_read). If seq->count is zeroed, the data in buffer will be
> discarded. I don't know what is right.

I think we should return the buffer up to the point there's an error,
that's why we set seq->count to previous offs value after failed
show callback

jirka

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read().
  2022-08-01 20:50 [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read() Hao Luo
  2022-08-02 11:14 ` Jiri Olsa
@ 2022-08-03 11:53 ` Jiri Olsa
  1 sibling, 0 replies; 5+ messages in thread
From: Jiri Olsa @ 2022-08-03 11:53 UTC (permalink / raw)
  To: Hao Luo
  Cc: linux-kernel, bpf, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, John Fastabend, Stanislav Fomichev

On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote:

SNIP

> -	err = seq->op->show(seq, p);
> -	if (err > 0) {
> -		/* object is skipped, decrease seq_num, so next
> -		 * valid object can reuse the same seq_num.
> -		 */
> -		bpf_iter_dec_seq_num(seq);
> -		seq->count = 0;
> -	} else if (err < 0 || seq_has_overflowed(seq)) {
> -		if (!err)
> -			err = -E2BIG;
> -		seq->op->stop(seq, p);
> +	err = do_seq_show(seq, p, 0);
> +	if (err < 0) {
> +		do_seq_stop(seq, p, 0);
>  		seq->count = 0;
>  		goto done;
>  	}
> @@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>  		num_objs++;
>  		offs = seq->count;
>  		p = seq->op->next(seq, p, &seq->index);
> -		if (pos == seq->index) {
> +		if (unlikely(pos == seq->index)) {
>  			pr_info_ratelimited("buggy seq_file .next function %ps "
>  				"did not updated position index\n",
>  				seq->op->next);
> @@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>  		}
>  
>  		if (IS_ERR_OR_NULL(p))
> -			break;
> +			goto stop;

we could still keep the break here

>  
>  		/* got a valid next object, increase seq_num */
>  		bpf_iter_inc_seq_num(seq);
> @@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>  		if (num_objs >= MAX_ITER_OBJECTS) {
>  			if (offs == 0) {
>  				err = -EAGAIN;
> -				seq->op->stop(seq, p);
> +				do_seq_stop(seq, p, seq->count);
>  				goto done;
>  			}
>  			break;
>  		}
>  
> -		err = seq->op->show(seq, p);
> -		if (err > 0) {
> -			bpf_iter_dec_seq_num(seq);
> -			seq->count = offs;
> -		} else if (err < 0 || seq_has_overflowed(seq)) {
> -			seq->count = offs;
> +		err = do_seq_show(seq, p, offs);
> +		if (err < 0) {
>  			if (offs == 0) {
> -				if (!err)
> -					err = -E2BIG;
> -				seq->op->stop(seq, p);
> +				do_seq_stop(seq, p, seq->count);
>  				goto done;
>  			}
>  			break;
> @@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
>  			cond_resched();
>  	}
>  stop:
> -	offs = seq->count;
> -	/* bpf program called if !p */
> -	seq->op->stop(seq, p);
> -	if (!p) {
> -		if (!seq_has_overflowed(seq)) {
> -			bpf_iter_done_stop(seq);
> -		} else {
> -			seq->count = offs;
> -			if (offs == 0) {
> -				err = -E2BIG;
> -				goto done;
> -			}
> -		}
> -	}
> -
> -	n = min(seq->count, size);
> -	err = copy_to_user(buf, seq->buf, n);
> -	if (err) {
> -		err = -EFAULT;
> +	err = do_seq_stop(seq, p, seq->count);
> +	if (err)
>  		goto done;

looks like we tried to copy the data before when stop failed,
now it's skipped

jirka

> -	}
> -	copied = n;
> -	seq->count -= n;
> -	seq->from = n;
> +
> +	err = do_copy_to_user(seq, buf, size, &copied);
>  done:
>  	if (!copied)
>  		copied = err;
> -- 
> 2.37.1.455.g008518b4e5-goog
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-08-03 11:53 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-01 20:50 [PATCH bpf-next v1] bpf, iter: clean up bpf_seq_read() Hao Luo
2022-08-02 11:14 ` Jiri Olsa
2022-08-03  0:15   ` Hao Luo
2022-08-03 11:50     ` Jiri Olsa
2022-08-03 11:53 ` Jiri Olsa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).