All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net-next] net_sched: add performance counters for basic filter
@ 2019-01-18  1:14 Cong Wang
  2019-01-20  0:06 ` David Miller
  2019-02-03 18:35 ` Eric Dumazet
  0 siblings, 2 replies; 5+ messages in thread
From: Cong Wang @ 2019-01-18  1:14 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Jamal Hadi Salim, Jiri Pirko

Similar to u32 filter, it is useful to know how many times
we reach each basic filter and how many times we pass the
ematch attached to it.

Sample output:

filter protocol arp pref 49152 basic chain 0
filter protocol arp pref 49152 basic chain 0 handle 0x1  (rule hit 3 success 3)
	action order 1: gact action pass
	 random type none pass val 0
	 index 1 ref 1 bind 1 installed 81 sec used 4 sec
	Action statistics:
	Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/uapi/linux/pkt_cls.h |  7 +++++++
 net/sched/cls_basic.c        | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 32a3416b51c3..02ac251be8c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -333,12 +333,19 @@ enum {
 
 /* Basic filter */
 
+struct tc_basic_pcnt {
+	__u64 rcnt;
+	__u64 rhit;
+};
+
 enum {
 	TCA_BASIC_UNSPEC,
 	TCA_BASIC_CLASSID,
 	TCA_BASIC_EMATCHES,
 	TCA_BASIC_ACT,
 	TCA_BASIC_POLICE,
+	TCA_BASIC_PCNT,
+	TCA_BASIC_PAD,
 	__TCA_BASIC_MAX
 };
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..4a57fec6f306 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/idr.h>
+#include <linux/percpu.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -35,6 +36,7 @@ struct basic_filter {
 	struct tcf_result	res;
 	struct tcf_proto	*tp;
 	struct list_head	link;
+	struct tc_basic_pcnt __percpu *pf;
 	struct rcu_work		rwork;
 };
 
@@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct basic_filter *f;
 
 	list_for_each_entry_rcu(f, &head->flist, link) {
+		__this_cpu_inc(f->pf->rcnt);
 		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
 			continue;
+		__this_cpu_inc(f->pf->rhit);
 		*res = f->res;
 		r = tcf_exts_exec(skb, &f->exts, res);
 		if (r < 0)
@@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f)
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
 	tcf_exts_put_net(&f->exts);
+	free_percpu(f->pf);
 	kfree(f);
 }
 
@@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout;
 	fnew->handle = handle;
+	fnew->pf = alloc_percpu(struct tc_basic_pcnt);
+	if (!fnew->pf) {
+		err = -ENOMEM;
+		goto errout;
+	}
 
 	err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
 			      extack);
@@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 
 	return 0;
 errout:
+	free_percpu(fnew->pf);
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
@@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
+	struct tc_basic_pcnt gpf = {};
 	struct basic_filter *f = fh;
 	struct nlattr *nest;
+	int cpu;
 
 	if (f == NULL)
 		return skb->len;
@@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	    nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
+	for_each_possible_cpu(cpu) {
+		struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
+
+		gpf.rcnt += pf->rcnt;
+		gpf.rhit += pf->rhit;
+	}
+
+	if (nla_put_64bit(skb, TCA_BASIC_PCNT,
+			  sizeof(struct tc_basic_pcnt),
+			  &gpf, TCA_BASIC_PAD))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &f->exts) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto nla_put_failure;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Patch net-next] net_sched: add performance counters for basic filter
  2019-01-18  1:14 [Patch net-next] net_sched: add performance counters for basic filter Cong Wang
@ 2019-01-20  0:06 ` David Miller
  2019-02-03 18:35 ` Eric Dumazet
  1 sibling, 0 replies; 5+ messages in thread
From: David Miller @ 2019-01-20  0:06 UTC (permalink / raw)
  To: xiyou.wangcong; +Cc: netdev, jhs, jiri

From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 17 Jan 2019 17:14:01 -0800

> Similar to u32 filter, it is useful to know how many times
> we reach each basic filter and how many times we pass the
> ematch attached to it.
> 
> Sample output:
> 
> filter protocol arp pref 49152 basic chain 0
> filter protocol arp pref 49152 basic chain 0 handle 0x1  (rule hit 3 success 3)
> 	action order 1: gact action pass
> 	 random type none pass val 0
> 	 index 1 ref 1 bind 1 installed 81 sec used 4 sec
> 	Action statistics:
> 	Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
> 	backlog 0b 0p requeues 0
> 
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Jiri Pirko <jiri@resnulli.us>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch net-next] net_sched: add performance counters for basic filter
  2019-01-18  1:14 [Patch net-next] net_sched: add performance counters for basic filter Cong Wang
  2019-01-20  0:06 ` David Miller
@ 2019-02-03 18:35 ` Eric Dumazet
  2019-02-03 19:35   ` Cong Wang
  1 sibling, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2019-02-03 18:35 UTC (permalink / raw)
  To: Cong Wang, netdev; +Cc: Jamal Hadi Salim, Jiri Pirko



On 01/17/2019 05:14 PM, Cong Wang wrote:
> Similar to u32 filter, it is useful to know how many times
> we reach each basic filter and how many times we pass the
> ematch attached to it.
> 
> Sample output:
> 
> filter protocol arp pref 49152 basic chain 0
> filter protocol arp pref 49152 basic chain 0 handle 0x1  (rule hit 3 success 3)
> 	action order 1: gact action pass
> 	 random type none pass val 0
> 	 index 1 ref 1 bind 1 installed 81 sec used 4 sec
> 	Action statistics:
> 	Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
> 	backlog 0b 0p requeues 0

...

> +	for_each_possible_cpu(cpu) {
> +		struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
> +
> +		gpf.rcnt += pf->rcnt;
> +		gpf.rhit += pf->rhit;
> +	}
> 
This looks missing some synchronization of some sort for 32bit kernels ?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch net-next] net_sched: add performance counters for basic filter
  2019-02-03 18:35 ` Eric Dumazet
@ 2019-02-03 19:35   ` Cong Wang
  2019-02-04  3:27     ` Eric Dumazet
  0 siblings, 1 reply; 5+ messages in thread
From: Cong Wang @ 2019-02-03 19:35 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Linux Kernel Network Developers, Jamal Hadi Salim, Jiri Pirko

On Sun, Feb 3, 2019 at 10:35 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > +     for_each_possible_cpu(cpu) {
> > +             struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
> > +
> > +             gpf.rcnt += pf->rcnt;
> > +             gpf.rhit += pf->rhit;
> > +     }
> >
> This looks missing some synchronization of some sort for 32bit kernels ?

I don't know, u32 filter has literally same code:

                for_each_possible_cpu(cpu) {
                        int i;
                        struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);

                        gpf->rcnt += pf->rcnt;
                        gpf->rhit += pf->rhit;
                        for (i = 0; i < n->sel.nkeys; i++)
                                gpf->kcnts[i] += pf->kcnts[i];
                }

which has been there for years...

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Patch net-next] net_sched: add performance counters for basic filter
  2019-02-03 19:35   ` Cong Wang
@ 2019-02-04  3:27     ` Eric Dumazet
  0 siblings, 0 replies; 5+ messages in thread
From: Eric Dumazet @ 2019-02-04  3:27 UTC (permalink / raw)
  To: Cong Wang; +Cc: Linux Kernel Network Developers, Jamal Hadi Salim, Jiri Pirko



On 02/03/2019 11:35 AM, Cong Wang wrote:
> On Sun, Feb 3, 2019 at 10:35 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>> +     for_each_possible_cpu(cpu) {
>>> +             struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
>>> +
>>> +             gpf.rcnt += pf->rcnt;
>>> +             gpf.rhit += pf->rhit;
>>> +     }
>>>
>> This looks missing some synchronization of some sort for 32bit kernels ?
> 
> I don't know, u32 filter has literally same code:
> 
>                 for_each_possible_cpu(cpu) {
>                         int i;
>                         struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
> 
>                         gpf->rcnt += pf->rcnt;
>                         gpf->rhit += pf->rhit;
>                         for (i = 0; i < n->sel.nkeys; i++)
>                                 gpf->kcnts[i] += pf->kcnts[i];
>                 }
> 
> which has been there for years...
> 

Then u32 (or at least CONFIG_CLS_U32_PERF) was also broken.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-02-04  3:27 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-18  1:14 [Patch net-next] net_sched: add performance counters for basic filter Cong Wang
2019-01-20  0:06 ` David Miller
2019-02-03 18:35 ` Eric Dumazet
2019-02-03 19:35   ` Cong Wang
2019-02-04  3:27     ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.