All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user
@ 2021-11-11 21:57 Alexander Lobakin
  2021-11-12  2:01 ` Kumar Kartikeya Dwivedi
  2021-11-12 20:50 ` patchwork-bot+netdevbpf
  0 siblings, 2 replies; 3+ messages in thread
From: Alexander Lobakin @ 2021-11-11 21:57 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann
  Cc: Alexander Lobakin, Jesse Brandeburg, Maciej Fijalkowski,
	Michal Swiatkowski, David S. Miller, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, Yonghong Song, KP Singh,
	Kumar Kartikeya Dwivedi, netdev, bpf, linux-kernel

sample_summary_print() uses accumulated period to calculate and
display per-sec averages. This period gets incremented by sampling
interval each time a new sample is formed, and thus equals to the
number of samples collected multiplied by this interval.
However, the totals are being calculated differently, they receive
current sample statistics already divided by the interval gotten as
a difference between sample timestamps for better precision -- in
other words, they are being incremented by the per-sec values each
sample.
This leads to the excessive division of summary per-secs when
interval != 1 sec. It is obvious pps couldn't become two times
lower just from picking a different sampling interval value:

$ samples/bpf/xdp_redirect_cpu -p xdp_prognum_n1_inverse_qnum -c all
  -s -d 6 -i 1
< snip >
  Packets received    : 2,197,230,321
  Average packets/s   : 22,887,816
  Packets redirected  : 2,197,230,472
  Average redir/s     : 22,887,817
$ samples/bpf/xdp_redirect_cpu -p xdp_prognum_n1_inverse_qnum -c all
  -s -d 6 -i 2
< snip >
  Packets received    : 159,566,498
  Average packets/s   : 11,397,607
  Packets redirected  : 159,566,995
  Average redir/s     : 11,397,642

This can be easily fixed by treating the divisor not as a period,
but rather as a total number of samples, and thus incrementing it
by 1 instead of interval. As a nice side effect, we can now remove
so-named argument from a couple of functions. Let us also create
an "alias" for sample_output::rx_cnt::pps named 'num' using a union
since this field is used to store this number (period previously)
as well, and the resulting counter-intuitive code might've been
a reason for this bug.

Fixes: 156f886cf697 ("samples: bpf: Add basic infrastructure for XDP samples")
Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
 samples/bpf/xdp_sample_user.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
index b32d82178199..8740838e7767 100644
--- a/samples/bpf/xdp_sample_user.c
+++ b/samples/bpf/xdp_sample_user.c
@@ -120,7 +120,10 @@ struct sample_output {
 		__u64 xmit;
 	} totals;
 	struct {
-		__u64 pps;
+		union {
+			__u64 pps;
+			__u64 num;
+		};
 		__u64 drop;
 		__u64 err;
 	} rx_cnt;
@@ -1322,7 +1325,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
 
 static void sample_summary_print(void)
 {
-	double period = sample_out.rx_cnt.pps;
+	double num = sample_out.rx_cnt.num;
 
 	if (sample_out.totals.rx) {
 		double pkts = sample_out.totals.rx;
@@ -1330,7 +1333,7 @@ static void sample_summary_print(void)
 		print_always("  Packets received    : %'-10llu\n",
 			     sample_out.totals.rx);
 		print_always("  Average packets/s   : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 	if (sample_out.totals.redir) {
 		double pkts = sample_out.totals.redir;
@@ -1338,7 +1341,7 @@ static void sample_summary_print(void)
 		print_always("  Packets redirected  : %'-10llu\n",
 			     sample_out.totals.redir);
 		print_always("  Average redir/s     : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 	if (sample_out.totals.drop)
 		print_always("  Rx dropped          : %'-10llu\n",
@@ -1355,7 +1358,7 @@ static void sample_summary_print(void)
 		print_always("  Packets transmitted : %'-10llu\n",
 			     sample_out.totals.xmit);
 		print_always("  Average transmit/s  : %'-10.0f\n",
-			     sample_round(pkts / period));
+			     sample_round(pkts / num));
 	}
 }
 
@@ -1422,7 +1425,7 @@ static int sample_stats_collect(struct stats_record *rec)
 	return 0;
 }
 
-static void sample_summary_update(struct sample_output *out, int interval)
+static void sample_summary_update(struct sample_output *out)
 {
 	sample_out.totals.rx += out->totals.rx;
 	sample_out.totals.redir += out->totals.redir;
@@ -1430,12 +1433,11 @@ static void sample_summary_update(struct sample_output *out, int interval)
 	sample_out.totals.drop_xmit += out->totals.drop_xmit;
 	sample_out.totals.err += out->totals.err;
 	sample_out.totals.xmit += out->totals.xmit;
-	sample_out.rx_cnt.pps += interval;
+	sample_out.rx_cnt.num++;
 }
 
 static void sample_stats_print(int mask, struct stats_record *cur,
-			       struct stats_record *prev, char *prog_name,
-			       int interval)
+			       struct stats_record *prev, char *prog_name)
 {
 	struct sample_output out = {};
 
@@ -1452,7 +1454,7 @@ static void sample_stats_print(int mask, struct stats_record *cur,
 	else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
 		stats_get_devmap_xmit_multi(cur, prev, 0, &out,
 					    mask & SAMPLE_DEVMAP_XMIT_CNT);
-	sample_summary_update(&out, interval);
+	sample_summary_update(&out);
 
 	stats_print(prog_name, mask, cur, prev, &out);
 }
@@ -1495,7 +1497,7 @@ static void swap(struct stats_record **a, struct stats_record **b)
 }
 
 static int sample_timer_cb(int timerfd, struct stats_record **rec,
-			   struct stats_record **prev, int interval)
+			   struct stats_record **prev)
 {
 	char line[64] = "Summary";
 	int ret;
@@ -1524,7 +1526,7 @@ static int sample_timer_cb(int timerfd, struct stats_record **rec,
 		snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
 	}
 
-	sample_stats_print(sample_mask, *rec, *prev, line, interval);
+	sample_stats_print(sample_mask, *rec, *prev, line);
 	return 0;
 }
 
@@ -1579,7 +1581,7 @@ int sample_run(int interval, void (*post_cb)(void *), void *ctx)
 		if (pfd[0].revents & POLLIN)
 			ret = sample_signal_cb();
 		else if (pfd[1].revents & POLLIN)
-			ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+			ret = sample_timer_cb(timerfd, &rec, &prev);
 
 		if (ret)
 			break;
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user
  2021-11-11 21:57 [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user Alexander Lobakin
@ 2021-11-12  2:01 ` Kumar Kartikeya Dwivedi
  2021-11-12 20:50 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 3+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2021-11-12  2:01 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: Alexei Starovoitov, Daniel Borkmann, Jesse Brandeburg,
	Maciej Fijalkowski, Michal Swiatkowski, David S. Miller,
	Jakub Kicinski, Jesper Dangaard Brouer, John Fastabend,
	Andrii Nakryiko, Martin KaFai Lau, Song Liu, Yonghong Song,
	KP Singh, netdev, bpf, linux-kernel

On Fri, Nov 12, 2021 at 03:27:03AM IST, Alexander Lobakin wrote:
> sample_summary_print() uses accumulated period to calculate and
> display per-sec averages. This period gets incremented by sampling
> interval each time a new sample is formed, and thus equals to the
> number of samples collected multiplied by this interval.
> However, the totals are being calculated differently, they receive
> current sample statistics already divided by the interval gotten as
> a difference between sample timestamps for better precision -- in
> other words, they are being incremented by the per-sec values each
> sample.
> This leads to the excessive division of summary per-secs when
> interval != 1 sec. It is obvious pps couldn't become two times
> lower just from picking a different sampling interval value:
>
> $ samples/bpf/xdp_redirect_cpu -p xdp_prognum_n1_inverse_qnum -c all
>   -s -d 6 -i 1
> < snip >
>   Packets received    : 2,197,230,321
>   Average packets/s   : 22,887,816
>   Packets redirected  : 2,197,230,472
>   Average redir/s     : 22,887,817
> $ samples/bpf/xdp_redirect_cpu -p xdp_prognum_n1_inverse_qnum -c all
>   -s -d 6 -i 2
> < snip >
>   Packets received    : 159,566,498
>   Average packets/s   : 11,397,607
>   Packets redirected  : 159,566,995
>   Average redir/s     : 11,397,642
>
> This can be easily fixed by treating the divisor not as a period,
> but rather as a total number of samples, and thus incrementing it
> by 1 instead of interval. As a nice side effect, we can now remove
> so-named argument from a couple of functions. Let us also create
> an "alias" for sample_output::rx_cnt::pps named 'num' using a union
> since this field is used to store this number (period previously)
> as well, and the resulting counter-intuitive code might've been
> a reason for this bug.
>
> Fixes: 156f886cf697 ("samples: bpf: Add basic infrastructure for XDP samples")
> Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
> Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> ---

Ouch. Thank you for the fix.

Reviewed-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>

> [...]

--
Kartikeya

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user
  2021-11-11 21:57 [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user Alexander Lobakin
  2021-11-12  2:01 ` Kumar Kartikeya Dwivedi
@ 2021-11-12 20:50 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 3+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-11-12 20:50 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: ast, daniel, jesse.brandeburg, maciej.fijalkowski,
	michal.swiatkowski, davem, kuba, hawk, john.fastabend, andrii,
	kafai, songliubraving, yhs, kpsingh, memxor, netdev, bpf,
	linux-kernel

Hello:

This patch was applied to bpf/bpf.git (master)
by Daniel Borkmann <daniel@iogearbox.net>:

On Thu, 11 Nov 2021 22:57:03 +0100 you wrote:
> sample_summary_print() uses accumulated period to calculate and
> display per-sec averages. This period gets incremented by sampling
> interval each time a new sample is formed, and thus equals to the
> number of samples collected multiplied by this interval.
> However, the totals are being calculated differently, they receive
> current sample statistics already divided by the interval gotten as
> a difference between sample timestamps for better precision -- in
> other words, they are being incremented by the per-sec values each
> sample.
> This leads to the excessive division of summary per-secs when
> interval != 1 sec. It is obvious pps couldn't become two times
> lower just from picking a different sampling interval value:
> 
> [...]

Here is the summary with links:
  - [bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user
    https://git.kernel.org/bpf/bpf/c/b51a6682d432

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-11-12 20:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-11 21:57 [PATCH bpf] samples: bpf: fix summary per-sec stats in xdp_sample_user Alexander Lobakin
2021-11-12  2:01 ` Kumar Kartikeya Dwivedi
2021-11-12 20:50 ` patchwork-bot+netdevbpf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.