All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
@ 2017-04-19 11:57 Jamal Hadi Salim
  2017-04-19 11:57 ` [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping Jamal Hadi Salim
  2017-04-19 12:36 ` [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jiri Pirko
  0 siblings, 2 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 11:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, eric.dumazet, jiri, xiyou.wangcong, Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.

With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.

A new top level TLV space is introduced. An attribute
TCAA_ACT_FLAGS is used to carry the flags indicating the user
is capable of processing these large dumps. Older user space which
doesnt set this flag doesnt get the large (than 32) batches.
The kernel uses the TCAA_ACT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32.

Some results dumping 1.5M actions, first unpatched tc which the
kernel doesnt help:

prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79

Now lets see a patched tc which sets the correct flags when requesting
a dump:

prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
 net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
 3 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cce0613..c7080ec 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -674,10 +674,27 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCAA_UNSPEC,
+	TCAA_ACT_TAB,
+	TCAA_ACT_FLAGS,
+	TCAA_ACT_COUNT,
+	__TCAA_MAX
+};
+
+#define TCAA_MAX (__TCAA_MAX - 1)
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+#define TCA_ACT_TAB TCAA_ACT_TAB
+/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
+ *
+ * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
+ *
+ */
+#define ACT_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 82b1d48..45e1cf7 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	unsigned short act_flags = cb->args[2];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & ACT_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
+	if (n_i) {
 		cb->args[0] += n_i;
+		if (act_flags & ACT_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
+	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCA_ACT_MAX + 1];
+	struct nlattr *tca[TCAA_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1081,9 +1086,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nlattr *nest;
 	struct tc_action_ops *a_o;
 	int ret = 0;
+	struct nlattr *tcaa[TCAA_MAX + 1];
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *kind = NULL;
+	u32 act_flags = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
+
 
+	kind = find_dump_kind(tcaa);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1093,10 +1107,16 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	if (tcaa[TCAA_ACT_FLAGS])
+		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	cb->args[2] = act_flags;
+
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
@@ -1113,6 +1133,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		if (nla_put_u32(skb, TCAA_ACT_COUNT, cb->args[1]))
+			goto out_module_put;
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 11:57 [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
@ 2017-04-19 11:57 ` Jamal Hadi Salim
  2017-04-19 12:53   ` Jiri Pirko
  2017-04-19 12:36 ` [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jiri Pirko
  1 sibling, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 11:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, eric.dumazet, jiri, xiyou.wangcong, Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

This adds support for filtering based on time since last used.
When we are dumping a large number of actions it is useful to
have the option of filtering based on when the action was last
used to reduce the amount of data crossing to user space.

With this patch the user space app sets the TCAA_ACT_TIME_FILTER
attribute with the value in milliseconds with "time of interest
since now".  The kernel converts this to jiffies and does the
filtering comparison matching entries that have seen activity
since then and returns them to user space.
Old kernels and old tc continue to work in legacy mode since
they dont specify this attribute.

Some example (we have 400 actions bound to 400 filters); at installation
time using  hacked tc which sets the time of interest to 120 seconds:

prompt$ hackedtc actions ls action gact | grep index | wc -l
400

go get some coffee and  wait for > 120 seconds and try again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
0

Lets see a filter bound to one of these actions:
..
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
  match 7f000002/ffffffff at 12 (success 1 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
    Action statistics:
    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0
....

that coffee took long, no? It was good.

Now lets ping -c 1 127.0.0.2, then run the actions again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
1

More details please:

prompt$ hackedtc -s actions ls action gact

    action order 0: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

And the filter?

filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
  match 7f000002/ffffffff at 12 (success 2 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/sched/act_api.c            | 25 +++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index c7080ec..1b36cc0 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -680,6 +680,7 @@ enum {
 	TCAA_ACT_TAB,
 	TCAA_ACT_FLAGS,
 	TCAA_ACT_COUNT,
+	TCAA_ACT_TIME_FILTER,
 	__TCAA_MAX
 };
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 45e1cf7..b03863a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -84,11 +84,12 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	unsigned short act_flags = cb->args[2];
+	unsigned long jiffy_filter = cb->args[3];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
 
-	s_i = cb->args[0];
+	s_i = cb->args[4];
 
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
 		struct hlist_head *head;
@@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			if (index < s_i)
 				continue;
 
+			if (jiffy_filter &&
+			    time_after(jiffy_filter,
+				       (unsigned long)p->tcfa_tm.lastuse))
+				continue;
+
 			nest = nla_nest_start(skb, n_i);
 			if (nest == NULL)
 				goto nla_put_failure;
@@ -118,6 +124,9 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 		}
 	}
 done:
+	if (index > 0)
+		cb->args[4] = index + 1;
+
 	spin_unlock_bh(&hinfo->lock);
 	if (n_i) {
 		cb->args[0] += n_i;
@@ -1000,6 +1009,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 
 static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
 	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
+	[TCAA_ACT_TIME_FILTER]      = { .type = NLA_U32 },
 };
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1090,13 +1100,14 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
 	struct nlattr *kind = NULL;
 	u32 act_flags = 0;
+	u32 msecs_filter = 0;
+	unsigned long jiffy_wanted = 0;
 
 	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
 			  tcaa_policy, NULL);
 	if (ret < 0)
 		return ret;
 
-
 	kind = find_dump_kind(tcaa);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
@@ -1110,12 +1121,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (tcaa[TCAA_ACT_FLAGS])
 		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
 
+	if (tcaa[TCAA_ACT_TIME_FILTER])
+		msecs_filter = nla_get_u32(tcaa[TCAA_ACT_TIME_FILTER]);
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
 
+	if (msecs_filter) {
+		unsigned long jiffy_msecs = msecs_to_jiffies(msecs_filter);
+
+		jiffy_wanted = jiffies - jiffy_msecs;
+	}
+
 	cb->args[2] = act_flags;
+	cb->args[3] = jiffy_wanted;
 
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 11:57 [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
  2017-04-19 11:57 ` [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping Jamal Hadi Salim
@ 2017-04-19 12:36 ` Jiri Pirko
  2017-04-19 12:55   ` Roman Mashak
  2017-04-19 13:03   ` Jamal Hadi Salim
  1 sibling, 2 replies; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 12:36 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>When you dump hundreds of thousands of actions, getting only 32 per
>dump batch even when the socket buffer and memory allocations allow
>is inefficient.
>
>With this change, the user will get as many as possibly fitting
>within the given constraints available to the kernel.
>
>A new top level TLV space is introduced. An attribute
>TCAA_ACT_FLAGS is used to carry the flags indicating the user
>is capable of processing these large dumps. Older user space which
>doesnt set this flag doesnt get the large (than 32) batches.
>The kernel uses the TCAA_ACT_COUNT attribute to tell the user how many
>actions are put in a single batch. As such user space app knows how long
>to iterate (independent of the type of action being dumped)
>instead of hardcoded maximum of 32.
>
>Some results dumping 1.5M actions, first unpatched tc which the
>kernel doesnt help:
>
>prompt$ time -p tc actions ls action gact | grep index | wc -l
>1500000
>real 1388.43
>user 2.07
>sys 1386.79
>
>Now lets see a patched tc which sets the correct flags when requesting
>a dump:
>
>prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
>1500000
>real 178.13
>user 2.02
>sys 176.96
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
> net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
> 3 files changed, 53 insertions(+), 12 deletions(-)
>
>diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>index cce0613..c7080ec 100644
>--- a/include/uapi/linux/rtnetlink.h
>+++ b/include/uapi/linux/rtnetlink.h
>@@ -674,10 +674,27 @@ struct tcamsg {
> 	unsigned char	tca__pad1;
> 	unsigned short	tca__pad2;
> };
>+
>+enum {
>+	TCAA_UNSPEC,
>+	TCAA_ACT_TAB,
>+	TCAA_ACT_FLAGS,
>+	TCAA_ACT_COUNT,
>+	__TCAA_MAX
>+};
>+
>+#define TCAA_MAX (__TCAA_MAX - 1)
> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>-#define TCAA_MAX 1
>+#define TCA_ACT_TAB TCAA_ACT_TAB

This is mess. What does "TCAA" stand for?
I suggest some more meaningful naming of the enum items and define
TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
Also, please put X_MAX = __X_MAX - 1 into enum


>+/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
>+ *
>+ * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>+ * actions in a dump. All dump responses will contain the number of actions
>+ * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
>+ *
>+ */
>+#define ACT_LARGE_DUMP_ON		(1 << 0) 

Use "BIT(0)"

Also use the same prefix as for the enum.

+ you can have each potential flag as a separate u8 attribute. That is the
clearest approach and easily extendable. That's how we do it in devlink
for example.


> 
> /* New extended info filters for IFLA_EXT_MASK */
> #define RTEXT_FILTER_VF		(1 << 0)
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 82b1d48..45e1cf7 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			   struct netlink_callback *cb)
> {
> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>+	unsigned short act_flags = cb->args[2];
> 	struct nlattr *nest;
> 
> 	spin_lock_bh(&hinfo->lock);
>@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			}
> 			nla_nest_end(skb, nest);
> 			n_i++;
>-			if (n_i >= TCA_ACT_MAX_PRIO)
>+			if (!(act_flags & ACT_LARGE_DUMP_ON) &&
>+			    n_i >= TCA_ACT_MAX_PRIO)
> 				goto done;
> 		}
> 	}
> done:
> 	spin_unlock_bh(&hinfo->lock);
>-	if (n_i)
>+	if (n_i) {
> 		cb->args[0] += n_i;
>+		if (act_flags & ACT_LARGE_DUMP_ON)
>+			cb->args[1] = n_i;
>+	}
> 	return n_i;
> 
> nla_put_failure:
>@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
> 	return tcf_add_notify(net, n, &actions, portid);
> }
> 
>+static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
>+	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
>+};
>+
> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 			 struct netlink_ext_ack *extack)
> {
> 	struct net *net = sock_net(skb->sk);
>-	struct nlattr *tca[TCA_ACT_MAX + 1];
>+	struct nlattr *tca[TCAA_MAX + 1];

This is certainly wrong.


> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
> 	int ret = 0, ovr = 0;
> 
>@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	    !netlink_capable(skb, CAP_NET_ADMIN))
> 		return -EPERM;
> 
>-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
>+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,

This is certainly wrong.


> 			  extack);
> 	if (ret < 0)
> 		return ret;
>@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	return ret;
> }
> 
>-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>+static struct nlattr *find_dump_kind(struct nlattr **nla)
> {
> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>-	struct nlattr *nla[TCAA_MAX + 1];
> 	struct nlattr *kind;
> 
>-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>-			NULL, NULL) < 0)
>-		return NULL;
> 	tb1 = nla[TCA_ACT_TAB];
> 	if (tb1 == NULL)
> 		return NULL;
>@@ -1081,9 +1086,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	struct nlattr *nest;
> 	struct tc_action_ops *a_o;
> 	int ret = 0;
>+	struct nlattr *tcaa[TCAA_MAX + 1];
> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>-	struct nlattr *kind = find_dump_kind(cb->nlh);
>+	struct nlattr *kind = NULL;
>+	u32 act_flags = 0;
>+
>+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
>+			  tcaa_policy, NULL);
>+	if (ret < 0)
>+		return ret;
>+
> 
>+	kind = find_dump_kind(tcaa);
> 	if (kind == NULL) {
> 		pr_info("tc_dump_action: action bad kind\n");
> 		return 0;
>@@ -1093,10 +1107,16 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (a_o == NULL)
> 		return 0;
> 
>+	if (tcaa[TCAA_ACT_FLAGS])
>+		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
>+
> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
> 	if (!nlh)
> 		goto out_module_put;
>+
>+	cb->args[2] = act_flags;
>+
> 	t = nlmsg_data(nlh);
> 	t->tca_family = AF_UNSPEC;
> 	t->tca__pad1 = 0;
>@@ -1113,6 +1133,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (ret > 0) {
> 		nla_nest_end(skb, nest);
> 		ret = skb->len;
>+		if (nla_put_u32(skb, TCAA_ACT_COUNT, cb->args[1]))
>+			goto out_module_put;
>+		cb->args[1] = 0;

Why you need to zero this?


> 	} else
> 		nlmsg_trim(skb, b);
> 
>-- 
>1.9.1
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 11:57 ` [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping Jamal Hadi Salim
@ 2017-04-19 12:53   ` Jiri Pirko
  2017-04-19 13:11     ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 12:53 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 01:57:30PM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>This adds support for filtering based on time since last used.
>When we are dumping a large number of actions it is useful to
>have the option of filtering based on when the action was last
>used to reduce the amount of data crossing to user space.
>
>With this patch the user space app sets the TCAA_ACT_TIME_FILTER
>attribute with the value in milliseconds with "time of interest
>since now".  The kernel converts this to jiffies and does the
>filtering comparison matching entries that have seen activity
>since then and returns them to user space.
>Old kernels and old tc continue to work in legacy mode since
>they dont specify this attribute.
>
>Some example (we have 400 actions bound to 400 filters); at installation
>time using  hacked tc which sets the time of interest to 120 seconds:
>
>prompt$ hackedtc actions ls action gact | grep index | wc -l
>400
>
>go get some coffee and  wait for > 120 seconds and try again:
>
>prompt$ hackedtc actions ls action gact | grep index | wc -l
>0
>
>Lets see a filter bound to one of these actions:
>..
>filter pref 10 u32
>filter pref 10 u32 fh 800: ht divisor 1
>filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
>  match 7f000002/ffffffff at 12 (success 1 )
>    action order 1: gact action pass
>     random type none pass val 0
>     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
>    Action statistics:
>    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
>    backlog 0b 0p requeues 0
>....
>
>that coffee took long, no? It was good.
>
>Now lets ping -c 1 127.0.0.2, then run the actions again:
>
>prompt$ hackedtc actions ls action gact | grep index | wc -l
>1
>
>More details please:
>
>prompt$ hackedtc -s actions ls action gact

I don't see where you pass the time.


>
>    action order 0: gact action pass
>     random type none pass val 0
>     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
>    Action statistics:
>    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>    backlog 0b 0p requeues 0
>
>And the filter?
>
>filter pref 10 u32
>filter pref 10 u32 fh 800: ht divisor 1
>filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
>  match 7f000002/ffffffff at 12 (success 2 )
>    action order 1: gact action pass
>     random type none pass val 0
>     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
>    Action statistics:
>    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>    backlog 0b 0p requeues 0
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/uapi/linux/rtnetlink.h |  1 +
> net/sched/act_api.c            | 25 +++++++++++++++++++++++--
> 2 files changed, 24 insertions(+), 2 deletions(-)
>
>diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>index c7080ec..1b36cc0 100644
>--- a/include/uapi/linux/rtnetlink.h
>+++ b/include/uapi/linux/rtnetlink.h
>@@ -680,6 +680,7 @@ enum {
> 	TCAA_ACT_TAB,
> 	TCAA_ACT_FLAGS,
> 	TCAA_ACT_COUNT,
>+	TCAA_ACT_TIME_FILTER,

Another use of word "filter" for something else. I believe that we need
to reduce confusion, not to make it worse.


> 	__TCAA_MAX
> };
> 
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 45e1cf7..b03863a 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -84,11 +84,12 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> {
> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
> 	unsigned short act_flags = cb->args[2];
>+	unsigned long jiffy_filter = cb->args[3];

call this "jiffy_since" for example


> 	struct nlattr *nest;
> 
> 	spin_lock_bh(&hinfo->lock);
> 
>-	s_i = cb->args[0];
>+	s_i = cb->args[4];
> 
> 	for (i = 0; i < (hinfo->hmask + 1); i++) {
> 		struct hlist_head *head;
>@@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			if (index < s_i)
> 				continue;
> 
>+			if (jiffy_filter &&
>+			    time_after(jiffy_filter,
>+				       (unsigned long)p->tcfa_tm.lastuse))
>+				continue;
>+
> 			nest = nla_nest_start(skb, n_i);
> 			if (nest == NULL)
> 				goto nla_put_failure;
>@@ -118,6 +124,9 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 		}
> 	}
> done:
>+	if (index > 0)
>+		cb->args[4] = index + 1;
>+
> 	spin_unlock_bh(&hinfo->lock);
> 	if (n_i) {
> 		cb->args[0] += n_i;

You don't use "cb->args[0]" anymore. Why do you need this?
Just use cb->args[0] instead of 4


>@@ -1000,6 +1009,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
> 
> static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
> 	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
>+	[TCAA_ACT_TIME_FILTER]      = { .type = NLA_U32 },
> };
> 
> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>@@ -1090,13 +1100,14 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
> 	struct nlattr *kind = NULL;
> 	u32 act_flags = 0;
>+	u32 msecs_filter = 0;
>+	unsigned long jiffy_wanted = 0;

Also, "jiffy_since". Same variable, same name please.

> 
> 	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
> 			  tcaa_policy, NULL);
> 	if (ret < 0)
> 		return ret;
> 
>-

This should not be part of this patch.


> 	kind = find_dump_kind(tcaa);
> 	if (kind == NULL) {
> 		pr_info("tc_dump_action: action bad kind\n");
>@@ -1110,12 +1121,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (tcaa[TCAA_ACT_FLAGS])
> 		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
> 
>+	if (tcaa[TCAA_ACT_TIME_FILTER])
>+		msecs_filter = nla_get_u32(tcaa[TCAA_ACT_TIME_FILTER]);
>+
> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
> 	if (!nlh)
> 		goto out_module_put;
> 
>+	if (msecs_filter) {
>+		unsigned long jiffy_msecs = msecs_to_jiffies(msecs_filter);
>+
>+		jiffy_wanted = jiffies - jiffy_msecs;

you can do just:
		jiffy_wanted = jiffies - msecs_to_jiffies(msecs_filter);

Also, you can put this under "if (tcaa[TCAA_ACT_TIME_FILTER])" so it's
all in one place.


>+	}
>+
> 	cb->args[2] = act_flags;
>+	cb->args[3] = jiffy_wanted;
> 
> 	t = nlmsg_data(nlh);
> 	t->tca_family = AF_UNSPEC;
>-- 
>1.9.1
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 12:36 ` [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jiri Pirko
@ 2017-04-19 12:55   ` Roman Mashak
  2017-04-19 13:05     ` Jiri Pirko
  2017-04-19 13:03   ` Jamal Hadi Salim
  1 sibling, 1 reply; 37+ messages in thread
From: Roman Mashak @ 2017-04-19 12:55 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: Jamal Hadi Salim, davem, netdev, eric.dumazet, xiyou.wangcong

Jiri Pirko <jiri@resnulli.us> writes:


[...]

>>+enum {
>>+	TCAA_UNSPEC,
>>+	TCAA_ACT_TAB,
>>+	TCAA_ACT_FLAGS,
>>+	TCAA_ACT_COUNT,
>>+	__TCAA_MAX
>>+};
>>+
>>+#define TCAA_MAX (__TCAA_MAX - 1)
>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>>-#define TCAA_MAX 1
>>+#define TCA_ACT_TAB TCAA_ACT_TAB
>
> This is mess. What does "TCAA" stand for?
> I suggest some more meaningful naming of the enum items and define
> TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
> Also, please put X_MAX = __X_MAX - 1 into enum
>

Notation observed in tc and unfortunately not consistently maintained is
to have enum with TCA* attributes for instance, followed by define,
outside of the enum, with __X_MAX -1

[...]

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 12:36 ` [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jiri Pirko
  2017-04-19 12:55   ` Roman Mashak
@ 2017-04-19 13:03   ` Jamal Hadi Salim
  2017-04-19 13:13     ` Jiri Pirko
  1 sibling, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 13:03 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 08:36 AM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>

>> include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>> net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>> 3 files changed, 53 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>> index cce0613..c7080ec 100644
>> --- a/include/uapi/linux/rtnetlink.h
>> +++ b/include/uapi/linux/rtnetlink.h
>> @@ -674,10 +674,27 @@ struct tcamsg {
>> 	unsigned char	tca__pad1;
>> 	unsigned short	tca__pad2;
>> };
>> +
>> +enum {
>> +	TCAA_UNSPEC,
>> +	TCAA_ACT_TAB,
>> +	TCAA_ACT_FLAGS,
>> +	TCAA_ACT_COUNT,
>> +	__TCAA_MAX
>> +};
>> +
>> +#define TCAA_MAX (__TCAA_MAX - 1)
>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>> -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>> -#define TCAA_MAX 1
>> +#define TCA_ACT_TAB TCAA_ACT_TAB
>
> This is mess. What does "TCAA" stand for?

TC Actions Attributes.  What would you call it? I could have
called it TCA_ROOT etc. But maybe a comment to just call it
TC Actions Attributes would be enough?

> I suggest some more meaningful naming of the enum items and define
> TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI


Thats what the above does (for UAPI) maintainance, no?

> Also, please put X_MAX = __X_MAX - 1 into enum

That is diverting from the norm which defines it outside
of the enum. A good reason could be: You, Jiri, plan to go and
cleanup all the netlink stuff which uses this style.
Or you think we should start a trend which leads us
to a new clean style.

>
>> +/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
>> + *
>> + * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>> + * actions in a dump. All dump responses will contain the number of actions
>> + * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
>> + *
>> + */
>> +#define ACT_LARGE_DUMP_ON		(1 << 0)
>
> Use "BIT(0)"
>

Same question as before.
Are you planning to cleanup the rest of the code which
follows the same style? example, look at this:
         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),


> Also use the same prefix as for the enum.
>
> + you can have each potential flag as a separate u8 attribute. That is the
> clearest approach and easily extendable. That's how we do it in devlink
> for example.
>

So you are using 8 bits for one flag which requires one bit?
+ the TLV header? Sounds like overkill.
Note: We dont need more than 1 or 2 bits for this case.
Even 32 bits is overkill for what I am doing.
When do i need to extend a single bit representation?

>
>> 	struct net *net = sock_net(skb->sk);
>> -	struct nlattr *tca[TCA_ACT_MAX + 1];
>> +	struct nlattr *tca[TCAA_MAX + 1];
>
> This is certainly wrong.
>

Why is it wrong?

>
>> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
>> 	int ret = 0, ovr = 0;
>>
>> @@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 	    !netlink_capable(skb, CAP_NET_ADMIN))
>> 		return -EPERM;
>>
>> -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
>> +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,
>
> This is certainly wrong.
>

Same question as above.


>> +		if (nla_put_u32(skb, TCAA_ACT_COUNT, cb->args[1]))
>> +			goto out_module_put;
>> +		cb->args[1] = 0;
>
> Why you need to zero this?
>
>

The count is per submitted message - every time we succesfuly send a msg
to user, we start the recount.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 12:55   ` Roman Mashak
@ 2017-04-19 13:05     ` Jiri Pirko
  0 siblings, 0 replies; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 13:05 UTC (permalink / raw)
  To: Roman Mashak
  Cc: Jamal Hadi Salim, davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 02:55:57PM CEST, mrv@mojatatu.com wrote:
>Jiri Pirko <jiri@resnulli.us> writes:
>
>
>[...]
>
>>>+enum {
>>>+	TCAA_UNSPEC,
>>>+	TCAA_ACT_TAB,
>>>+	TCAA_ACT_FLAGS,
>>>+	TCAA_ACT_COUNT,
>>>+	__TCAA_MAX
>>>+};
>>>+
>>>+#define TCAA_MAX (__TCAA_MAX - 1)
>>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>>>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>>>-#define TCAA_MAX 1
>>>+#define TCA_ACT_TAB TCAA_ACT_TAB
>>
>> This is mess. What does "TCAA" stand for?
>> I suggest some more meaningful naming of the enum items and define
>> TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
>> Also, please put X_MAX = __X_MAX - 1 into enum
>>
>
>Notation observed in tc and unfortunately not consistently maintained is
>to have enum with TCA* attributes for instance, followed by define,
>outside of the enum, with __X_MAX -1

I don't have strong opinion on define or in-enum. I like in-enum better.
The rest could be converted later on.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 12:53   ` Jiri Pirko
@ 2017-04-19 13:11     ` Jamal Hadi Salim
  2017-04-19 14:14       ` Jiri Pirko
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 13:11 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 08:53 AM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 01:57:30PM CEST, jhs@mojatatu.com wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>
>> This adds support for filtering based on time since last used.
>> When we are dumping a large number of actions it is useful to
>> have the option of filtering based on when the action was last
>> used to reduce the amount of data crossing to user space.
>>
>> With this patch the user space app sets the TCAA_ACT_TIME_FILTER
>> attribute with the value in milliseconds with "time of interest
>> since now".  The kernel converts this to jiffies and does the
>> filtering comparison matching entries that have seen activity
>> since then and returns them to user space.
>> Old kernels and old tc continue to work in legacy mode since
>> they dont specify this attribute.
>>
>> Some example (we have 400 actions bound to 400 filters); at installation
>> time using  hacked tc which sets the time of interest to 120 seconds:
>>
>> prompt$ hackedtc actions ls action gact | grep index | wc -l
>> 400
>>
>> go get some coffee and  wait for > 120 seconds and try again:
>>
>> prompt$ hackedtc actions ls action gact | grep index | wc -l
>> 0
>>
>> Lets see a filter bound to one of these actions:
>> ..
>> filter pref 10 u32
>> filter pref 10 u32 fh 800: ht divisor 1
>> filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
>>  match 7f000002/ffffffff at 12 (success 1 )
>>    action order 1: gact action pass
>>     random type none pass val 0
>>     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
>>    Action statistics:
>>    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
>>    backlog 0b 0p requeues 0
>> ....
>>
>> that coffee took long, no? It was good.
>>
>> Now lets ping -c 1 127.0.0.2, then run the actions again:
>>
>> prompt$ hackedtc actions ls action gact | grep index | wc -l
>> 1
>>
>> More details please:
>>
>> prompt$ hackedtc -s actions ls action gact
>
> I don't see where you pass the time.
>

User space sets the TCAA_ACT_TIME_FILTER value.
Jiri - this is described in the commit log ;->
I have some tc changes which set this value.
In the example below it was 120 seconds.


>
>>
>>    action order 0: gact action pass
>>     random type none pass val 0
>>     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
>>    Action statistics:
>>    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>>    backlog 0b 0p requeues 0
>>
>> And the filter?
>>
>> filter pref 10 u32
>> filter pref 10 u32 fh 800: ht divisor 1
>> filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
>>  match 7f000002/ffffffff at 12 (success 2 )
>>    action order 1: gact action pass
>>     random type none pass val 0
>>     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
>>    Action statistics:
>>    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>>    backlog 0b 0p requeues 0
>>
>> Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> ---
>> include/uapi/linux/rtnetlink.h |  1 +
>> net/sched/act_api.c            | 25 +++++++++++++++++++++++--
>> 2 files changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>> index c7080ec..1b36cc0 100644
>> --- a/include/uapi/linux/rtnetlink.h
>> +++ b/include/uapi/linux/rtnetlink.h
>> @@ -680,6 +680,7 @@ enum {
>> 	TCAA_ACT_TAB,
>> 	TCAA_ACT_FLAGS,
>> 	TCAA_ACT_COUNT,
>> +	TCAA_ACT_TIME_FILTER,
>
> Another use of word "filter" for something else. I believe that we need
> to reduce confusion, not to make it worse.
>

It is a time filter. What do you want to call it?


>
>> 	__TCAA_MAX
>> };
>>
>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> index 45e1cf7..b03863a 100644
>> --- a/net/sched/act_api.c
>> +++ b/net/sched/act_api.c
>> @@ -84,11 +84,12 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> {
>> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>> 	unsigned short act_flags = cb->args[2];
>> +	unsigned long jiffy_filter = cb->args[3];
>
> call this "jiffy_since" for example
>

Sure.


>
>> 	struct nlattr *nest;
>>
>> 	spin_lock_bh(&hinfo->lock);
>>
>> -	s_i = cb->args[0];
>> +	s_i = cb->args[4];
>>
>> 	for (i = 0; i < (hinfo->hmask + 1); i++) {
>> 		struct hlist_head *head;
>> @@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 			if (index < s_i)
>> 				continue;
>>
>> +			if (jiffy_filter &&
>> +			    time_after(jiffy_filter,
>> +				       (unsigned long)p->tcfa_tm.lastuse))
>> +				continue;
>> +
>> 			nest = nla_nest_start(skb, n_i);
>> 			if (nest == NULL)
>> 				goto nla_put_failure;
>> @@ -118,6 +124,9 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 		}
>> 	}
>> done:
>> +	if (index > 0)
>> +		cb->args[4] = index + 1;
>> +
>> 	spin_unlock_bh(&hinfo->lock);
>> 	if (n_i) {
>> 		cb->args[0] += n_i;
>
> You don't use "cb->args[0]" anymore. Why do you need this?

You are right - will remove this.

> Just use cb->args[0] instead of 4
>

I can do that too.


>
>> @@ -1000,6 +1009,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
>>
>> static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
>> 	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
>> +	[TCAA_ACT_TIME_FILTER]      = { .type = NLA_U32 },
>> };
>>
>> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> @@ -1090,13 +1100,14 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
>> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>> 	struct nlattr *kind = NULL;
>> 	u32 act_flags = 0;
>> +	u32 msecs_filter = 0;
>> +	unsigned long jiffy_wanted = 0;
>
> Also, "jiffy_since". Same variable, same name please.
>

np.

>>
>> 	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
>> 			  tcaa_policy, NULL);
>> 	if (ret < 0)
>> 		return ret;
>>
>> -
>
> This should not be part of this patch.
>

Will remove.

>
>> 	kind = find_dump_kind(tcaa);
>> 	if (kind == NULL) {
>> 		pr_info("tc_dump_action: action bad kind\n");
>> @@ -1110,12 +1121,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
>> 	if (tcaa[TCAA_ACT_FLAGS])
>> 		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
>>
>> +	if (tcaa[TCAA_ACT_TIME_FILTER])
>> +		msecs_filter = nla_get_u32(tcaa[TCAA_ACT_TIME_FILTER]);
>> +
>> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
>> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
>> 	if (!nlh)
>> 		goto out_module_put;
>>
>> +	if (msecs_filter) {
>> +		unsigned long jiffy_msecs = msecs_to_jiffies(msecs_filter);
>> +
>> +		jiffy_wanted = jiffies - jiffy_msecs;
>
> you can do just:
> 		jiffy_wanted = jiffies - msecs_to_jiffies(msecs_filter);
>
> Also, you can put this under "if (tcaa[TCAA_ACT_TIME_FILTER])" so it's
> all in one place.
>

Will do - next cycle run.


cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 13:03   ` Jamal Hadi Salim
@ 2017-04-19 13:13     ` Jiri Pirko
  2017-04-19 15:37       ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 13:13 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 08:36 AM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>> > From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>> > include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>> > net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>> > 3 files changed, 53 insertions(+), 12 deletions(-)
>> > 
>> > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>> > index cce0613..c7080ec 100644
>> > --- a/include/uapi/linux/rtnetlink.h
>> > +++ b/include/uapi/linux/rtnetlink.h
>> > @@ -674,10 +674,27 @@ struct tcamsg {
>> > 	unsigned char	tca__pad1;
>> > 	unsigned short	tca__pad2;
>> > };
>> > +
>> > +enum {
>> > +	TCAA_UNSPEC,
>> > +	TCAA_ACT_TAB,
>> > +	TCAA_ACT_FLAGS,
>> > +	TCAA_ACT_COUNT,
>> > +	__TCAA_MAX
>> > +};
>> > +
>> > +#define TCAA_MAX (__TCAA_MAX - 1)
>> > #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> > #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>> > -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>> > -#define TCAA_MAX 1
>> > +#define TCA_ACT_TAB TCAA_ACT_TAB
>> 
>> This is mess. What does "TCAA" stand for?
>
>TC Actions Attributes.  What would you call it? I could have
>called it TCA_ROOT etc. But maybe a comment to just call it
>TC Actions Attributes would be enough?

TCA_DUMP_X

it is only for dumping. Naming it "attribute" seems weird. Same as if
you have: int variable_something;


>
>> I suggest some more meaningful naming of the enum items and define
>> TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
>
>
>Thats what the above does (for UAPI) maintainance, no?

It does it for TCA_ACT_TAB. We need to do it for both TCA_ACT_TAB and TCAA_MAX


>
>> Also, please put X_MAX = __X_MAX - 1 into enum
>
>That is diverting from the norm which defines it outside
>of the enum. A good reason could be: You, Jiri, plan to go and
>cleanup all the netlink stuff which uses this style.
>Or you think we should start a trend which leads us
>to a new clean style.

I would start now. I can take of the follow-up patch to change the rest.



>
>> 
>> > +/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
>> > + *
>> > + * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>> > + * actions in a dump. All dump responses will contain the number of actions
>> > + * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
>> > + *
>> > + */
>> > +#define ACT_LARGE_DUMP_ON		(1 << 0)
>> 
>> Use "BIT(0)"
>> 
>
>Same question as before.

Same answer :)


>Are you planning to cleanup the rest of the code which
>follows the same style? example, look at this:
>        TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
>
>
>> Also use the same prefix as for the enum.
>> 
>> + you can have each potential flag as a separate u8 attribute. That is the
>> clearest approach and easily extendable. That's how we do it in devlink
>> for example.
>> 
>
>So you are using 8 bits for one flag which requires one bit?
>+ the TLV header? Sounds like overkill.
>Note: We dont need more than 1 or 2 bits for this case.
>Even 32 bits is overkill for what I am doing.
>When do i need to extend a single bit representation?

I don't see any problem adding couple of bytes if it increases cleannes
and easy extendability.


>
>> 
>> > 	struct net *net = sock_net(skb->sk);
>> > -	struct nlattr *tca[TCA_ACT_MAX + 1];
>> > +	struct nlattr *tca[TCAA_MAX + 1];
>> 
>> This is certainly wrong.
>> 
>
>Why is it wrong?

Because you use existing TCA_ACT_ attr enum.


>
>> 
>> > 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
>> > 	int ret = 0, ovr = 0;
>> > 
>> > @@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> > 	    !netlink_capable(skb, CAP_NET_ADMIN))
>> > 		return -EPERM;
>> > 
>> > -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
>> > +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,
>> 
>> This is certainly wrong.
>> 
>
>Same question as above.

Same answer.


>
>
>> > +		if (nla_put_u32(skb, TCAA_ACT_COUNT, cb->args[1]))
>> > +			goto out_module_put;
>> > +		cb->args[1] = 0;
>> 
>> Why you need to zero this?
>> 
>> 
>
>The count is per submitted message - every time we succesfuly send a msg
>to user, we start the recount.

ok


>
>cheers,
>jamal
>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 13:11     ` Jamal Hadi Salim
@ 2017-04-19 14:14       ` Jiri Pirko
  2017-04-19 15:47         ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 14:14 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 03:11:26PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 08:53 AM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 01:57:30PM CEST, jhs@mojatatu.com wrote:
>> > From: Jamal Hadi Salim <jhs@mojatatu.com>
>> > 
>> > This adds support for filtering based on time since last used.
>> > When we are dumping a large number of actions it is useful to
>> > have the option of filtering based on when the action was last
>> > used to reduce the amount of data crossing to user space.
>> > 
>> > With this patch the user space app sets the TCAA_ACT_TIME_FILTER
>> > attribute with the value in milliseconds with "time of interest
>> > since now".  The kernel converts this to jiffies and does the
>> > filtering comparison matching entries that have seen activity
>> > since then and returns them to user space.
>> > Old kernels and old tc continue to work in legacy mode since
>> > they dont specify this attribute.
>> > 
>> > Some example (we have 400 actions bound to 400 filters); at installation
>> > time using  hacked tc which sets the time of interest to 120 seconds:
>> > 
>> > prompt$ hackedtc actions ls action gact | grep index | wc -l
>> > 400
>> > 
>> > go get some coffee and  wait for > 120 seconds and try again:
>> > 
>> > prompt$ hackedtc actions ls action gact | grep index | wc -l
>> > 0
>> > 
>> > Lets see a filter bound to one of these actions:
>> > ..
>> > filter pref 10 u32
>> > filter pref 10 u32 fh 800: ht divisor 1
>> > filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
>> >  match 7f000002/ffffffff at 12 (success 1 )
>> >    action order 1: gact action pass
>> >     random type none pass val 0
>> >     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
>> >    Action statistics:
>> >    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
>> >    backlog 0b 0p requeues 0
>> > ....
>> > 
>> > that coffee took long, no? It was good.
>> > 
>> > Now lets ping -c 1 127.0.0.2, then run the actions again:
>> > 
>> > prompt$ hackedtc actions ls action gact | grep index | wc -l
>> > 1
>> > 
>> > More details please:
>> > 
>> > prompt$ hackedtc -s actions ls action gact
>> 
>> I don't see where you pass the time.
>> 
>
>User space sets the TCAA_ACT_TIME_FILTER value.
>Jiri - this is described in the commit log ;->
>I have some tc changes which set this value.
>In the example below it was 120 seconds.

So it is hard coded. That confused me. I believe it would be good to
provide actual tc patch that does the proper changes and include the
example of that usage in the description - instead of "hackedtc"


>
>
>> 
>> > 
>> >    action order 0: gact action pass
>> >     random type none pass val 0
>> >     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
>> >    Action statistics:
>> >    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>> >    backlog 0b 0p requeues 0
>> > 
>> > And the filter?
>> > 
>> > filter pref 10 u32
>> > filter pref 10 u32 fh 800: ht divisor 1
>> > filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
>> >  match 7f000002/ffffffff at 12 (success 2 )
>> >    action order 1: gact action pass
>> >     random type none pass val 0
>> >     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
>> >    Action statistics:
>> >    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>> >    backlog 0b 0p requeues 0
>> > 
>> > Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>> > ---
>> > include/uapi/linux/rtnetlink.h |  1 +
>> > net/sched/act_api.c            | 25 +++++++++++++++++++++++--
>> > 2 files changed, 24 insertions(+), 2 deletions(-)
>> > 
>> > diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>> > index c7080ec..1b36cc0 100644
>> > --- a/include/uapi/linux/rtnetlink.h
>> > +++ b/include/uapi/linux/rtnetlink.h
>> > @@ -680,6 +680,7 @@ enum {
>> > 	TCAA_ACT_TAB,
>> > 	TCAA_ACT_FLAGS,
>> > 	TCAA_ACT_COUNT,
>> > +	TCAA_ACT_TIME_FILTER,
>> 
>> Another use of word "filter" for something else. I believe that we need
>> to reduce confusion, not to make it worse.
>> 
>
>It is a time filter. What do you want to call it?

TIME_DELTA?


>
>
>> 
>> > 	__TCAA_MAX
>> > };
>> > 
>> > diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> > index 45e1cf7..b03863a 100644
>> > --- a/net/sched/act_api.c
>> > +++ b/net/sched/act_api.c
>> > @@ -84,11 +84,12 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> > {
>> > 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>> > 	unsigned short act_flags = cb->args[2];
>> > +	unsigned long jiffy_filter = cb->args[3];
>> 
>> call this "jiffy_since" for example
>> 
>
>Sure.
>
>
>> 
>> > 	struct nlattr *nest;
>> > 
>> > 	spin_lock_bh(&hinfo->lock);
>> > 
>> > -	s_i = cb->args[0];
>> > +	s_i = cb->args[4];
>> > 
>> > 	for (i = 0; i < (hinfo->hmask + 1); i++) {
>> > 		struct hlist_head *head;
>> > @@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> > 			if (index < s_i)
>> > 				continue;
>> > 
>> > +			if (jiffy_filter &&
>> > +			    time_after(jiffy_filter,
>> > +				       (unsigned long)p->tcfa_tm.lastuse))
>> > +				continue;
>> > +
>> > 			nest = nla_nest_start(skb, n_i);
>> > 			if (nest == NULL)
>> > 				goto nla_put_failure;
>> > @@ -118,6 +124,9 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> > 		}
>> > 	}
>> > done:
>> > +	if (index > 0)
>> > +		cb->args[4] = index + 1;
>> > +
>> > 	spin_unlock_bh(&hinfo->lock);
>> > 	if (n_i) {
>> > 		cb->args[0] += n_i;
>> 
>> You don't use "cb->args[0]" anymore. Why do you need this?
>
>You are right - will remove this.
>
>> Just use cb->args[0] instead of 4
>> 
>
>I can do that too.
>
>
>> 
>> > @@ -1000,6 +1009,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
>> > 
>> > static const struct nla_policy tcaa_policy[TCAA_MAX + 1] = {
>> > 	[TCAA_ACT_FLAGS]      = { .type = NLA_U32 },
>> > +	[TCAA_ACT_TIME_FILTER]      = { .type = NLA_U32 },
>> > };
>> > 
>> > static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> > @@ -1090,13 +1100,14 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
>> > 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>> > 	struct nlattr *kind = NULL;
>> > 	u32 act_flags = 0;
>> > +	u32 msecs_filter = 0;
>> > +	unsigned long jiffy_wanted = 0;
>> 
>> Also, "jiffy_since". Same variable, same name please.
>> 
>
>np.
>
>> > 
>> > 	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tcaa, TCAA_MAX,
>> > 			  tcaa_policy, NULL);
>> > 	if (ret < 0)
>> > 		return ret;
>> > 
>> > -
>> 
>> This should not be part of this patch.
>> 
>
>Will remove.
>
>> 
>> > 	kind = find_dump_kind(tcaa);
>> > 	if (kind == NULL) {
>> > 		pr_info("tc_dump_action: action bad kind\n");
>> > @@ -1110,12 +1121,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
>> > 	if (tcaa[TCAA_ACT_FLAGS])
>> > 		act_flags = nla_get_u32(tcaa[TCAA_ACT_FLAGS]);
>> > 
>> > +	if (tcaa[TCAA_ACT_TIME_FILTER])
>> > +		msecs_filter = nla_get_u32(tcaa[TCAA_ACT_TIME_FILTER]);
>> > +
>> > 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
>> > 			cb->nlh->nlmsg_type, sizeof(*t), 0);
>> > 	if (!nlh)
>> > 		goto out_module_put;
>> > 
>> > +	if (msecs_filter) {
>> > +		unsigned long jiffy_msecs = msecs_to_jiffies(msecs_filter);
>> > +
>> > +		jiffy_wanted = jiffies - jiffy_msecs;
>> 
>> you can do just:
>> 		jiffy_wanted = jiffies - msecs_to_jiffies(msecs_filter);
>> 
>> Also, you can put this under "if (tcaa[TCAA_ACT_TIME_FILTER])" so it's
>> all in one place.
>> 
>
>Will do - next cycle run.


Thanks.


>
>
>cheers,
>jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 13:13     ` Jiri Pirko
@ 2017-04-19 15:37       ` Jamal Hadi Salim
  2017-04-19 15:54         ` Jiri Pirko
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 15:37 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 09:13 AM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>> On 17-04-19 08:36 AM, Jiri Pirko wrote:
>>> Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>>>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>
>>>> include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>>>> net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>>>> 3 files changed, 53 insertions(+), 12 deletions(-)

>>>> +#define TCAA_MAX (__TCAA_MAX - 1)
>>>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>>>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>>>> -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>>>> -#define TCAA_MAX 1
>>>> +#define TCA_ACT_TAB TCAA_ACT_TAB
>>>
>>> This is mess. What does "TCAA" stand for?
>>
>> TC Actions Attributes.  What would you call it? I could have
>> called it TCA_ROOT etc. But maybe a comment to just call it
>> TC Actions Attributes would be enough?
>
> TCA_DUMP_X
>
> it is only for dumping. Naming it "attribute" seems weird. Same as if
> you have: int variable_something;
>

Jiri, this is not just for dumping. We are describing high level
attributes for tc actions.

>
>>
>>> I suggest some more meaningful naming of the enum items and define
>>> TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
>>
>>
>> Thats what the above does (for UAPI) maintainance, no?
>
> It does it for TCA_ACT_TAB. We need to do it for both TCA_ACT_TAB and TCAA_MAX
>

TCAA_XXX is the namespace selected. You dont like that name and
adding DUMP doesnt make sense to me. How about TCA_ACT_ROOT?

>>
>>> Also, please put X_MAX = __X_MAX - 1 into enum
>>
>> That is diverting from the norm which defines it outside
>> of the enum. A good reason could be: You, Jiri, plan to go and
>> cleanup all the netlink stuff which uses this style.
>> Or you think we should start a trend which leads us
>> to a new clean style.
>
> I would start now. I can take of the follow-up patch to change the rest.
>

It is a _lot_ of code to change! Note:
This is all the UAPI visible code (the same coding style for 20 years).
I am worried about this part.

>>>
>>>> +/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
>>>> + *
>>>> + * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>>>> + * actions in a dump. All dump responses will contain the number of actions
>>>> + * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
>>>> + *
>>>> + */
>>>> +#define ACT_LARGE_DUMP_ON		(1 << 0)
>>>
>>> Use "BIT(0)"
>>>
>>
>> Same question as before.
>
> Same answer :)
>

I will change this one - it is a lot simpler coding style
wide/wise than the other one.

>> Are you planning to cleanup the rest of the code which

>> So you are using 8 bits for one flag which requires one bit?
>> + the TLV header? Sounds like overkill.
>> Note: We dont need more than 1 or 2 bits for this case.
>> Even 32 bits is overkill for what I am doing.
>> When do i need to extend a single bit representation?
>
> I don't see any problem adding couple of bytes if it increases cleannes
> and easy extendability.
>

How do you extend one bit? Seriously. If i want to add another bit I
will add one more to existing bit map not 64 (T + L + 8bits + pad).
If i ran out of space i will add a new TLV.

>>>
>>>> 	struct net *net = sock_net(skb->sk);
>>>> -	struct nlattr *tca[TCA_ACT_MAX + 1];
>>>> +	struct nlattr *tca[TCAA_MAX + 1];
>>>
>>> This is certainly wrong.
>>>
>>
>> Why is it wrong?
>
> Because you use existing TCA_ACT_ attr enum.
>

Is there a programming mistake or you  just dont like the name?
AFAIK, and based on my testing that code is correct.

>>>> -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
>>>> +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,
>>>
>>> This is certainly wrong.
>>>
>>
>> Same question as above.
>
> Same answer.
>

And same question still.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 14:14       ` Jiri Pirko
@ 2017-04-19 15:47         ` Jamal Hadi Salim
  2017-04-19 15:55           ` Jiri Pirko
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 15:47 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 10:14 AM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 03:11:26PM CEST, jhs@mojatatu.com wrote:

> So it is hard coded. That confused me. I believe it would be good to
> provide actual tc patch that does the proper changes and include the
> example of that usage in the description - instead of "hackedtc"
>

Yes, the user sets it. I can make the diff available next time.
I didnt want to go on a discussion how to pass the command line
parameter in tc.

> TIME_DELTA?
>

sure.

Let me wait more comments then I will push the new update
by tommorow.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 15:37       ` Jamal Hadi Salim
@ 2017-04-19 15:54         ` Jiri Pirko
  2017-04-19 16:07           ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 15:54 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 05:37:15PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 09:13 AM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>> > On 17-04-19 08:36 AM, Jiri Pirko wrote:
>> > > Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>> > > > From: Jamal Hadi Salim <jhs@mojatatu.com>
>> > 
>> > > > include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>> > > > net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>> > > > 3 files changed, 53 insertions(+), 12 deletions(-)
>
>> > > > +#define TCAA_MAX (__TCAA_MAX - 1)
>> > > > #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> > > > #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>> > > > -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>> > > > -#define TCAA_MAX 1
>> > > > +#define TCA_ACT_TAB TCAA_ACT_TAB
>> > > 
>> > > This is mess. What does "TCAA" stand for?
>> > 
>> > TC Actions Attributes.  What would you call it? I could have
>> > called it TCA_ROOT etc. But maybe a comment to just call it
>> > TC Actions Attributes would be enough?
>> 
>> TCA_DUMP_X
>> 
>> it is only for dumping. Naming it "attribute" seems weird. Same as if
>> you have: int variable_something;
>> 
>
>Jiri, this is not just for dumping. We are describing high level
>attributes for tc actions.

This is already present:
enum {
        TCA_ACT_UNSPEC,
        TCA_ACT_KIND,
        TCA_ACT_OPTIONS,
        TCA_ACT_INDEX,
        TCA_ACT_STATS,
        TCA_ACT_PAD,
        TCA_ACT_COOKIE,
        __TCA_ACT_MAX
};

This is nested inside the dump message (TCAA_MAX, TCA_ACT_TAB)

Looks like you are mixing these 2.



>
>> 
>> > 
>> > > I suggest some more meaningful naming of the enum items and define
>> > > TCA_ACT_TAB and TCAA_MAX to the new values in order to maintain UAPI
>> > 
>> > 
>> > Thats what the above does (for UAPI) maintainance, no?
>> 
>> It does it for TCA_ACT_TAB. We need to do it for both TCA_ACT_TAB and TCAA_MAX
>> 
>
>TCAA_XXX is the namespace selected. You dont like that name and
>adding DUMP doesnt make sense to me. How about TCA_ACT_ROOT?
>
>> > 
>> > > Also, please put X_MAX = __X_MAX - 1 into enum
>> > 
>> > That is diverting from the norm which defines it outside
>> > of the enum. A good reason could be: You, Jiri, plan to go and
>> > cleanup all the netlink stuff which uses this style.
>> > Or you think we should start a trend which leads us
>> > to a new clean style.
>> 
>> I would start now. I can take of the follow-up patch to change the rest.
>> 
>
>It is a _lot_ of code to change! Note:
>This is all the UAPI visible code (the same coding style for 20 years).
>I am worried about this part.

We'll see. Lets do it in a sensitive way, in steps. But for new things,
I think it is good not to stick with old and outlived habits.


>
>> > > 
>> > > > +/* tcamsg flags stored in attribute TCAA_ACT_FLAGS
>> > > > + *
>> > > > + * ACT_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>> > > > + * actions in a dump. All dump responses will contain the number of actions
>> > > > + * being dumped stored in for user app's consumption in TCAA_ACT_COUNT
>> > > > + *
>> > > > + */
>> > > > +#define ACT_LARGE_DUMP_ON		(1 << 0)
>> > > 
>> > > Use "BIT(0)"
>> > > 
>> > 
>> > Same question as before.
>> 
>> Same answer :)
>> 
>
>I will change this one - it is a lot simpler coding style
>wide/wise than the other one.
>
>> > Are you planning to cleanup the rest of the code which
>
>> > So you are using 8 bits for one flag which requires one bit?
>> > + the TLV header? Sounds like overkill.
>> > Note: We dont need more than 1 or 2 bits for this case.
>> > Even 32 bits is overkill for what I am doing.
>> > When do i need to extend a single bit representation?
>> 
>> I don't see any problem adding couple of bytes if it increases cleannes
>> and easy extendability.
>> 
>
>How do you extend one bit? Seriously. If i want to add another bit I
>will add one more to existing bit map not 64 (T + L + 8bits + pad).
>If i ran out of space i will add a new TLV.

Netlink is TLV, should be used as TLV. I don't see how you can run out
any space. You tend to use Netlink in some weird hybrid mode, with only
argument being space. I think that couple of bytes wasted is not
a problem at all...


>
>> > > 
>> > > > 	struct net *net = sock_net(skb->sk);
>> > > > -	struct nlattr *tca[TCA_ACT_MAX + 1];
>> > > > +	struct nlattr *tca[TCAA_MAX + 1];
>> > > 
>> > > This is certainly wrong.
>> > > 
>> > 
>> > Why is it wrong?
>> 
>> Because you use existing TCA_ACT_ attr enum.
>> 
>
>Is there a programming mistake or you  just dont like the name?
>AFAIK, and based on my testing that code is correct.

See my first comment. I think that you mix 2 things together.


>
>> > > > -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
>> > > > +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, tcaa_policy,
>> > > 
>> > > This is certainly wrong.
>> > > 
>> > 
>> > Same question as above.
>> 
>> Same answer.
>> 
>
>And same question still.
>
>cheers,
>jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping
  2017-04-19 15:47         ` Jamal Hadi Salim
@ 2017-04-19 15:55           ` Jiri Pirko
  0 siblings, 0 replies; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 15:55 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 05:47:21PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 10:14 AM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 03:11:26PM CEST, jhs@mojatatu.com wrote:
>
>> So it is hard coded. That confused me. I believe it would be good to
>> provide actual tc patch that does the proper changes and include the
>> example of that usage in the description - instead of "hackedtc"
>> 
>
>Yes, the user sets it. I can make the diff available next time.
>I didnt want to go on a discussion how to pass the command line
>parameter in tc.

I think it is very good to send kernel and userspace part together.
Makes people to easily figure out what is going on and helps with
review.


>
>> TIME_DELTA?
>> 
>
>sure.
>
>Let me wait more comments then I will push the new update
>by tommorow.
>
>cheers,
>jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 15:54         ` Jiri Pirko
@ 2017-04-19 16:07           ` Jamal Hadi Salim
  2017-04-19 16:17             ` Jiri Pirko
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-19 16:07 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 11:54 AM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 05:37:15PM CEST, jhs@mojatatu.com wrote:
>> On 17-04-19 09:13 AM, Jiri Pirko wrote:
>>> Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>>>> On 17-04-19 08:36 AM, Jiri Pirko wrote:
>>>>> Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>>>>>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>>>
>>>>>> include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>>>>>> net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>>>>>> 3 files changed, 53 insertions(+), 12 deletions(-)
>>
>>>>>> +#define TCAA_MAX (__TCAA_MAX - 1)
>>>>>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>>>>>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>>>>>> -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>>>>>> -#define TCAA_MAX 1
>>>>>> +#define TCA_ACT_TAB TCAA_ACT_TAB
>>>>>
>>>>> This is mess. What does "TCAA" stand for?
>>>>
>>>> TC Actions Attributes.  What would you call it? I could have
>>>> called it TCA_ROOT etc. But maybe a comment to just call it
>>>> TC Actions Attributes would be enough?
>>>
>>> TCA_DUMP_X
>>>
>>> it is only for dumping. Naming it "attribute" seems weird. Same as if
>>> you have: int variable_something;
>>>
>>
>> Jiri, this is not just for dumping. We are describing high level
>> attributes for tc actions.
>
> This is already present:
> enum {
>         TCA_ACT_UNSPEC,
>         TCA_ACT_KIND,
>         TCA_ACT_OPTIONS,
>         TCA_ACT_INDEX,
>         TCA_ACT_STATS,
>         TCA_ACT_PAD,
>         TCA_ACT_COOKIE,
>         __TCA_ACT_MAX
> };
>
> This is nested inside the dump message (TCAA_MAX, TCA_ACT_TAB)
>
> Looks like you are mixing these 2.
>

No - That space is per action. The space i am defining is
above that in the the hierarchy. There used to be only
one attribute there (TCA_ACT_TAB) and now we are making
it more generic.

>
>

>> It is a _lot_ of code to change! Note:
>> This is all the UAPI visible code (the same coding style for 20 years).
>> I am worried about this part.
>
> We'll see. Lets do it in a sensitive way, in steps. But for new things,
> I think it is good not to stick with old and outlived habits.
>
>

I know you have the muscle to get it done - so fine, i will start
with this one change.

>
> Netlink is TLV, should be used as TLV. I don't see how you can run out
> any space. You tend to use Netlink in some weird hybrid mode, with only
> argument being space. I think that couple of bytes wasted is not
> a problem at all...
>

You are not making sense to me still.
What you describe as "a couple of bytes" adds up when you have
a large number of objects. I am trying to cut down on data back
and forth from user/kernel and a bitmap is a well understood entity.

Even if i did use a TLV - when i am representing flags which require one
bit each - it makes sense to use a bitmask encapsulated in a TLV. Not
to waste a TLV per bit.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 16:07           ` Jamal Hadi Salim
@ 2017-04-19 16:17             ` Jiri Pirko
  2017-04-20 10:42               ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-19 16:17 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Wed, Apr 19, 2017 at 06:07:48PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 11:54 AM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 05:37:15PM CEST, jhs@mojatatu.com wrote:
>> > On 17-04-19 09:13 AM, Jiri Pirko wrote:
>> > > Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>> > > > On 17-04-19 08:36 AM, Jiri Pirko wrote:
>> > > > > Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>> > > > > > From: Jamal Hadi Salim <jhs@mojatatu.com>
>> > > > 
>> > > > > > include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>> > > > > > net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>> > > > > > 3 files changed, 53 insertions(+), 12 deletions(-)
>> > 
>> > > > > > +#define TCAA_MAX (__TCAA_MAX - 1)
>> > > > > > #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> > > > > > #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>> > > > > > -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>> > > > > > -#define TCAA_MAX 1
>> > > > > > +#define TCA_ACT_TAB TCAA_ACT_TAB
>> > > > > 
>> > > > > This is mess. What does "TCAA" stand for?
>> > > > 
>> > > > TC Actions Attributes.  What would you call it? I could have
>> > > > called it TCA_ROOT etc. But maybe a comment to just call it
>> > > > TC Actions Attributes would be enough?
>> > > 
>> > > TCA_DUMP_X
>> > > 
>> > > it is only for dumping. Naming it "attribute" seems weird. Same as if
>> > > you have: int variable_something;
>> > > 
>> > 
>> > Jiri, this is not just for dumping. We are describing high level
>> > attributes for tc actions.
>> 
>> This is already present:
>> enum {
>>         TCA_ACT_UNSPEC,
>>         TCA_ACT_KIND,
>>         TCA_ACT_OPTIONS,
>>         TCA_ACT_INDEX,
>>         TCA_ACT_STATS,
>>         TCA_ACT_PAD,
>>         TCA_ACT_COOKIE,
>>         __TCA_ACT_MAX
>> };
>> 
>> This is nested inside the dump message (TCAA_MAX, TCA_ACT_TAB)
>> 
>> Looks like you are mixing these 2.
>> 
>
>No - That space is per action. The space i am defining is
>above that in the the hierarchy. There used to be only
>one attribute there (TCA_ACT_TAB) and now we are making
>it more generic.

Right. That's what I say. And that higher level is used only for
dumping. That's why I suggested TCA_ACT_DUMP prefix.


>
>> 
>> 
>
>> > It is a _lot_ of code to change! Note:
>> > This is all the UAPI visible code (the same coding style for 20 years).
>> > I am worried about this part.
>> 
>> We'll see. Lets do it in a sensitive way, in steps. But for new things,
>> I think it is good not to stick with old and outlived habits.
>> 
>> 
>
>I know you have the muscle to get it done - so fine, i will start
>with this one change.
>
>> 
>> Netlink is TLV, should be used as TLV. I don't see how you can run out
>> any space. You tend to use Netlink in some weird hybrid mode, with only
>> argument being space. I think that couple of bytes wasted is not
>> a problem at all...
>> 
>
>You are not making sense to me still.
>What you describe as "a couple of bytes" adds up when you have
>a large number of objects. I am trying to cut down on data back
>and forth from user/kernel and a bitmap is a well understood entity.

The attributes in question are per-message, not per-object


>
>Even if i did use a TLV - when i am representing flags which require one
>bit each - it makes sense to use a bitmask encapsulated in a TLV. Not
>to waste a TLV per bit.

That is the only correct way. For example, in future the kernel may
report in extended ack that it does not support some specific attribute
user passed. If you pack it all in one attr, that would not be possible.
Also, what prevents user from passing random data on bit flag positions
that you are not using? Current kernel would ignore it. Next kernel will
do something different according to the flag bit. That is UAPI break.
Essentialy the same thing what DaveM said about the struct. You have to
define this completely, at the beginning, not possible to use the unused
bits for other things in the future.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-19 16:17             ` Jiri Pirko
@ 2017-04-20 10:42               ` Jamal Hadi Salim
  2017-04-20 11:35                 ` Jiri Pirko
  2017-04-20 12:18                 ` Eric Dumazet
  0 siblings, 2 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-20 10:42 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-19 12:17 PM, Jiri Pirko wrote:
> Wed, Apr 19, 2017 at 06:07:48PM CEST, jhs@mojatatu.com wrote:
>> On 17-04-19 11:54 AM, Jiri Pirko wrote:
>>> Wed, Apr 19, 2017 at 05:37:15PM CEST, jhs@mojatatu.com wrote:
>>>> On 17-04-19 09:13 AM, Jiri Pirko wrote:
>>>>> Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>>>>>> On 17-04-19 08:36 AM, Jiri Pirko wrote:
>>>>>>> Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>>>>>>>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>>>>>
>>>>>>>> include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>>>>>>>> net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>>>>>>>> 3 files changed, 53 insertions(+), 12 deletions(-)
>>>>
>>>>>>>> +#define TCAA_MAX (__TCAA_MAX - 1)
>>>>>>>> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>>>>>>>> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>>>>>>>> -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>>>>>>>> -#define TCAA_MAX 1
>>>>>>>> +#define TCA_ACT_TAB TCAA_ACT_TAB
>>>>>>>
>>>>>>> This is mess. What does "TCAA" stand for?
>>>>>>
>>>>>> TC Actions Attributes.  What would you call it? I could have
>>>>>> called it TCA_ROOT etc. But maybe a comment to just call it
>>>>>> TC Actions Attributes would be enough?
>>>>>
>>>>> TCA_DUMP_X
>>>>>
>>>>> it is only for dumping. Naming it "attribute" seems weird. Same as if
>>>>> you have: int variable_something;
>>>>>
>>>>
>>>> Jiri, this is not just for dumping. We are describing high level
>>>> attributes for tc actions.
>>>
>>> This is already present:
>>> enum {
>>>         TCA_ACT_UNSPEC,
>>>         TCA_ACT_KIND,
>>>         TCA_ACT_OPTIONS,
>>>         TCA_ACT_INDEX,
>>>         TCA_ACT_STATS,
>>>         TCA_ACT_PAD,
>>>         TCA_ACT_COOKIE,
>>>         __TCA_ACT_MAX
>>> };
>>>
>>> This is nested inside the dump message (TCAA_MAX, TCA_ACT_TAB)
>>>
>>> Looks like you are mixing these 2.
>>>
>>
>> No - That space is per action. The space i am defining is
>> above that in the the hierarchy. There used to be only
>> one attribute there (TCA_ACT_TAB) and now we are making
>> it more generic.
>
> Right. That's what I say. And that higher level is used only for
> dumping. That's why I suggested TCA_ACT_DUMP prefix.
>

DUMP is not right. _TAB is used for SET/DEL as well.
why dont we leave this alone so we can progress?
You can submit changes later if it bothers you still.

>
>>
>>>
>>>
>>
>>>> It is a _lot_ of code to change! Note:
>>>> This is all the UAPI visible code (the same coding style for 20 years).
>>>> I am worried about this part.
>>>
>>> We'll see. Lets do it in a sensitive way, in steps. But for new things,
>>> I think it is good not to stick with old and outlived habits.
>>>
>>>
>>
>> I know you have the muscle to get it done - so fine, i will start
>> with this one change.
>>
>>>
>>> Netlink is TLV, should be used as TLV. I don't see how you can run out
>>> any space. You tend to use Netlink in some weird hybrid mode, with only
>>> argument being space. I think that couple of bytes wasted is not
>>> a problem at all...
>>>
>>
>> You are not making sense to me still.
>> What you describe as "a couple of bytes" adds up when you have
>> a large number of objects. I am trying to cut down on data back
>> and forth from user/kernel and a bitmap is a well understood entity.
>
> The attributes in question are per-message, not per-object
>
>
>>
>> Even if i did use a TLV - when i am representing flags which require one
>> bit each - it makes sense to use a bitmask encapsulated in a TLV. Not
>> to waste a TLV per bit.
>
> That is the only correct way. For example, in future the kernel may
> report in extended ack that it does not support some specific attribute
> user passed. If you pack it all in one attr, that would not be possible.
> Also, what prevents user from passing random data on bit flag positions
> that you are not using? Current kernel would ignore it. Next kernel will
> do something different according to the flag bit. That is UAPI break.
> Essentialy the same thing what DaveM said about the struct. You have to
> define this completely, at the beginning, not possible to use the unused
> bits for other things in the future.
>


They are not the same issue Jiri. We have used bitmasks fine on netlink
message for a millenia. Nobody sets garbage on a bitmask they are not
supposed to touch. The struct padding thing is a shame the way it
turned out - now netlink can no longer have a claim to be a (good)
wire protocol.
Other thing: I know you feel strongly about this but I dont agree that
everything has to be a TLV and in no way, as a matter of principle, you 
are going to convince me  (even when the cows come home) that I have to
use 64 bits to carry a single bit just so I can use TLVs.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 10:42               ` Jamal Hadi Salim
@ 2017-04-20 11:35                 ` Jiri Pirko
  2017-04-20 14:03                   ` Jamal Hadi Salim
  2017-04-20 12:18                 ` Eric Dumazet
  1 sibling, 1 reply; 37+ messages in thread
From: Jiri Pirko @ 2017-04-20 11:35 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

Thu, Apr 20, 2017 at 12:42:55PM CEST, jhs@mojatatu.com wrote:
>On 17-04-19 12:17 PM, Jiri Pirko wrote:
>> Wed, Apr 19, 2017 at 06:07:48PM CEST, jhs@mojatatu.com wrote:
>> > On 17-04-19 11:54 AM, Jiri Pirko wrote:
>> > > Wed, Apr 19, 2017 at 05:37:15PM CEST, jhs@mojatatu.com wrote:
>> > > > On 17-04-19 09:13 AM, Jiri Pirko wrote:
>> > > > > Wed, Apr 19, 2017 at 03:03:59PM CEST, jhs@mojatatu.com wrote:
>> > > > > > On 17-04-19 08:36 AM, Jiri Pirko wrote:
>> > > > > > > Wed, Apr 19, 2017 at 01:57:29PM CEST, jhs@mojatatu.com wrote:
>> > > > > > > > From: Jamal Hadi Salim <jhs@mojatatu.com>
>> > > > > > 
>> > > > > > > > include/uapi/linux/rtnetlink.h | 21 +++++++++++++++++++--
>> > > > > > > > net/sched/act_api.c            | 43 ++++++++++++++++++++++++++++++++----------
>> > > > > > > > 3 files changed, 53 insertions(+), 12 deletions(-)
>> > > > 
>> > > > > > > > +#define TCAA_MAX (__TCAA_MAX - 1)
>> > > > > > > > #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
>> > > > > > > > #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>> > > > > > > > -#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>> > > > > > > > -#define TCAA_MAX 1
>> > > > > > > > +#define TCA_ACT_TAB TCAA_ACT_TAB
>> > > > > > > 
>> > > > > > > This is mess. What does "TCAA" stand for?
>> > > > > > 
>> > > > > > TC Actions Attributes.  What would you call it? I could have
>> > > > > > called it TCA_ROOT etc. But maybe a comment to just call it
>> > > > > > TC Actions Attributes would be enough?
>> > > > > 
>> > > > > TCA_DUMP_X
>> > > > > 
>> > > > > it is only for dumping. Naming it "attribute" seems weird. Same as if
>> > > > > you have: int variable_something;
>> > > > > 
>> > > > 
>> > > > Jiri, this is not just for dumping. We are describing high level
>> > > > attributes for tc actions.
>> > > 
>> > > This is already present:
>> > > enum {
>> > >         TCA_ACT_UNSPEC,
>> > >         TCA_ACT_KIND,
>> > >         TCA_ACT_OPTIONS,
>> > >         TCA_ACT_INDEX,
>> > >         TCA_ACT_STATS,
>> > >         TCA_ACT_PAD,
>> > >         TCA_ACT_COOKIE,
>> > >         __TCA_ACT_MAX
>> > > };
>> > > 
>> > > This is nested inside the dump message (TCAA_MAX, TCA_ACT_TAB)
>> > > 
>> > > Looks like you are mixing these 2.
>> > > 
>> > 
>> > No - That space is per action. The space i am defining is
>> > above that in the the hierarchy. There used to be only
>> > one attribute there (TCA_ACT_TAB) and now we are making
>> > it more generic.
>> 
>> Right. That's what I say. And that higher level is used only for
>> dumping. That's why I suggested TCA_ACT_DUMP prefix.
>> 
>
>DUMP is not right. _TAB is used for SET/DEL as well.
>why dont we leave this alone so we can progress?
>You can submit changes later if it bothers you still.

Ha. So the current code is wrong, I see it now. Following is needed:

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 82b1d48..c432b22 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1005,7 +1005,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;

You confused me squashing the change to this patch.

Ok, so the name could be:
TCA_ACTS_*
?
I believe it is crucial to figure this out right now. TC UAPI is in deep
mess already, no need to push it even more.


>
>> 
>> > 
>> > > 
>> > > 
>> > 
>> > > > It is a _lot_ of code to change! Note:
>> > > > This is all the UAPI visible code (the same coding style for 20 years).
>> > > > I am worried about this part.
>> > > 
>> > > We'll see. Lets do it in a sensitive way, in steps. But for new things,
>> > > I think it is good not to stick with old and outlived habits.
>> > > 
>> > > 
>> > 
>> > I know you have the muscle to get it done - so fine, i will start
>> > with this one change.
>> > 
>> > > 
>> > > Netlink is TLV, should be used as TLV. I don't see how you can run out
>> > > any space. You tend to use Netlink in some weird hybrid mode, with only
>> > > argument being space. I think that couple of bytes wasted is not
>> > > a problem at all...
>> > > 
>> > 
>> > You are not making sense to me still.
>> > What you describe as "a couple of bytes" adds up when you have
>> > a large number of objects. I am trying to cut down on data back
>> > and forth from user/kernel and a bitmap is a well understood entity.
>> 
>> The attributes in question are per-message, not per-object
>> 
>> 
>> > 
>> > Even if i did use a TLV - when i am representing flags which require one
>> > bit each - it makes sense to use a bitmask encapsulated in a TLV. Not
>> > to waste a TLV per bit.
>> 
>> That is the only correct way. For example, in future the kernel may
>> report in extended ack that it does not support some specific attribute
>> user passed. If you pack it all in one attr, that would not be possible.
>> Also, what prevents user from passing random data on bit flag positions
>> that you are not using? Current kernel would ignore it. Next kernel will
>> do something different according to the flag bit. That is UAPI break.
>> Essentialy the same thing what DaveM said about the struct. You have to
>> define this completely, at the beginning, not possible to use the unused
>> bits for other things in the future.
>> 
>
>
>They are not the same issue Jiri. We have used bitmasks fine on netlink

Howcome they are not the same? I'm pretty certain they are.


>message for a millenia. Nobody sets garbage on a bitmask they are not
>supposed to touch. The struct padding thing is a shame the way it
>turned out - now netlink can no longer have a claim to be a (good)
>wire protocol.
>Other thing: I know you feel strongly about this but I dont agree that
>everything has to be a TLV and in no way, as a matter of principle, you are
>going to convince me  (even when the cows come home) that I have to
>use 64 bits to carry a single bit just so I can use TLVs.

Then I guess we have to agree to disagree. I believe that your approach
is wrong and will break users in future when you add even a single flag.
Argument that "we are doing this for ages-therefore it is correct" has
simply 0 value.

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 10:42               ` Jamal Hadi Salim
  2017-04-20 11:35                 ` Jiri Pirko
@ 2017-04-20 12:18                 ` Eric Dumazet
  2017-04-20 13:27                   ` Jamal Hadi Salim
  2017-04-20 15:23                   ` David Miller
  1 sibling, 2 replies; 37+ messages in thread
From: Eric Dumazet @ 2017-04-20 12:18 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: Jiri Pirko, davem, netdev, xiyou.wangcong

On Thu, 2017-04-20 at 06:42 -0400, Jamal Hadi Salim wrote:

> 
> They are not the same issue Jiri. We have used bitmasks fine on netlink
> message for a millenia. Nobody sets garbage on a bitmask they are not
> supposed to touch. The struct padding thing is a shame the way it
> turned out - now netlink can no longer have a claim to be a (good)
> wire protocol.

Except that users wrote programs, and these programs work today.

By changing the kernel and recognizing new flags in existing padding,
you might break the programs.

This is not acceptable. Period.

Had we checked the padding being 0 in old kernels, this change would
have been possible today.

But because old kernels did not care of the padding contents, then there
is no way new kernel can suddenly trust them at all.

Please Jamal, you have to forget this nonsense.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 12:18                 ` Eric Dumazet
@ 2017-04-20 13:27                   ` Jamal Hadi Salim
  2017-04-20 15:50                     ` David Miller
  2017-04-20 15:23                   ` David Miller
  1 sibling, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-20 13:27 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Jiri Pirko, davem, netdev, xiyou.wangcong

On 17-04-20 08:18 AM, Eric Dumazet wrote:
> On Thu, 2017-04-20 at 06:42 -0400, Jamal Hadi Salim wrote:
>
>>
>> They are not the same issue Jiri. We have used bitmasks fine on netlink
>> message for a millenia. Nobody sets garbage on a bitmask they are not
>> supposed to touch. The struct padding thing is a shame the way it
>> turned out - now netlink can no longer have a claim to be a (good)
>> wire protocol.
>
> Except that users wrote programs, and these programs work today.
>
> By changing the kernel and recognizing new flags in existing padding,
> you might break the programs.
>
> This is not acceptable. Period.
>
> Had we checked the padding being 0 in old kernels, this change would
> have been possible today.
>
> But because old kernels did not care of the padding contents, then there
> is no way new kernel can suddenly trust them at all.
>
> Please Jamal, you have to forget this nonsense.

That is fine. We can rule out netlink ever being able to work
across machines. That was the dream in the past. Lets close that
discussion.

The issue Jiri is bringing up is unrelated. He is talking about
a bitmap and conflating it with a data structure. They are not
the same issue.

cheers,
jamal

>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 11:35                 ` Jiri Pirko
@ 2017-04-20 14:03                   ` Jamal Hadi Salim
  0 siblings, 0 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-20 14:03 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, eric.dumazet, xiyou.wangcong

On 17-04-20 07:35 AM, Jiri Pirko wrote:
> Thu, Apr 20, 2017 at 12:42:55PM CEST, jhs@mojatatu.com wrote:
>> On 17-04-19 12:17 PM, Jiri Pirko wrote:
>
> Ha. So the current code is wrong, I see it now. Following is needed:
>
> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
> index 82b1d48..c432b22 100644
> --- a/net/sched/act_api.c
> +++ b/net/sched/act_api.c
> @@ -1005,7 +1005,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>  	    !netlink_capable(skb, CAP_NET_ADMIN))
>  		return -EPERM;
>
> -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
> +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>  			  extack);
>  	if (ret < 0)
>  		return ret;
>
> You confused me squashing the change to this patch.
>
> Ok, so the name could be:
> TCA_ACTS_*
> ?
> I believe it is crucial to figure this out right now. TC UAPI is in deep
> mess already, no need to push it even more.
>

I could change that name. Look at the last series i sent, add any extra
comments to that  and i will get to it by tomorrow.

>> They are not the same issue Jiri. We have used bitmasks fine on netlink
>
> Howcome they are not the same? I'm pretty certain they are.
>

They are not Jiri. I challenge you to point me to one netlink bitmask
representation used that has some bits that were not being used that
someone or some compiler is going to set some random values on.
Typically flags are u64/32/16

>
>> message for a millenia. Nobody sets garbage on a bitmask they are not
>> supposed to touch. The struct padding thing is a shame the way it
>> turned out - now netlink can no longer have a claim to be a (good)
>> wire protocol.
>> Other thing: I know you feel strongly about this but I dont agree that
>> everything has to be a TLV and in no way, as a matter of principle, you are
>> going to convince me  (even when the cows come home) that I have to
>> use 64 bits to carry a single bit just so I can use TLVs.
>
> Then I guess we have to agree to disagree. I believe that your approach
> is wrong and will break users in future when you add even a single flag.
> Argument that "we are doing this for ages-therefore it is correct" has
> simply 0 value.
>

"doing these for 80 years" is not the main driving point.
I appreciate that data structures - even when fields are
annotated as "pads" there is no guarantee when i allocate one
the pads will be zeroes.

Bitmasks are atomic in nature and therefore used in whole - not complex
like structs. Thats the difference. I dont define this u32 bitmap that
has 8 bits that are "pads" because that concept doesnt make sense for
atomic fields.
This implies that 6 months down the road I dont regret and say "shit, I
need 2 more bits for flagbits but some idiot or compiler was setting
those fields so i cant use them now because the kernel wont tell the
difference between what s/he is doing and real values".
Allocations of data structures I can understand, unless we enforce
always reset to zeroes all the struct members as part of creation.

But lets agree to disagree if you are still not convinced.

On your "everything must be a TLV". Caveat is: there is a balance
between  performance and flexibility. I should be able to pack aligned
complex data structures in a TLV for performance reasons.
One lesson is: in the future we can make these extra service header like
tcamsg very small and always use all their fields instead of defining
pads.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 12:18                 ` Eric Dumazet
  2017-04-20 13:27                   ` Jamal Hadi Salim
@ 2017-04-20 15:23                   ` David Miller
  1 sibling, 0 replies; 37+ messages in thread
From: David Miller @ 2017-04-20 15:23 UTC (permalink / raw)
  To: eric.dumazet; +Cc: jhs, jiri, netdev, xiyou.wangcong

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 20 Apr 2017 05:18:14 -0700

> By changing the kernel and recognizing new flags in existing padding,
> you might break the programs.
> 
> This is not acceptable. Period.
> 
> Had we checked the padding being 0 in old kernels, this change would
> have been possible today.

I completely agree.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 13:27                   ` Jamal Hadi Salim
@ 2017-04-20 15:50                     ` David Miller
  2017-04-20 17:38                       ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: David Miller @ 2017-04-20 15:50 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Thu, 20 Apr 2017 09:27:00 -0400

> The issue Jiri is bringing up is unrelated. He is talking about
> a bitmap and conflating it with a data structure. They are not
> the same issue.

Bitmaps can have the same exact problem as padding if we didn't code
it correctly.

The issue is _purely_, "did we check unused 'fields' and enforce them
to be a certain value"

If not, we lose, and can't use those "fields" in the future.

This rule applies whether you are speaking about padding or a bitmask.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 15:50                     ` David Miller
@ 2017-04-20 17:38                       ` Jamal Hadi Salim
  2017-04-20 17:58                         ` David Miller
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-20 17:38 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-20 11:50 AM, David Miller wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> Date: Thu, 20 Apr 2017 09:27:00 -0400
>
>> The issue Jiri is bringing up is unrelated. He is talking about
>> a bitmap and conflating it with a data structure. They are not
>> the same issue.
>
> Bitmaps can have the same exact problem as padding if we didn't code
> it correctly.
>
> The issue is _purely_, "did we check unused 'fields' and enforce them
> to be a certain value"
>
> If not, we lose, and can't use those "fields" in the future.
>
> This rule applies whether you are speaking about padding or a bitmask.
>

There are no examples of such issues with bitmasks encapsulated in TLVs
that exist.
I grep iproute2 code and there are tons of example of bitmask flags
being sent in TLVs. They all start with:

u64/32/16 mybitflags = 0;

if i want foo then
        mybitflags |= BRIDGE_FLAGS_SELF;
if i want bar then
        mybitflags |= xxxx

addattr16/32/64(&req.n, sizeof(req), ATTR_XXX, mybitflags);

It does not make much sense to have a TLV for each of these
bits when i can fit a bunch of them in u16/32/64.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 17:38                       ` Jamal Hadi Salim
@ 2017-04-20 17:58                         ` David Miller
  2017-04-21 10:36                           ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: David Miller @ 2017-04-20 17:58 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Thu, 20 Apr 2017 13:38:14 -0400

> On 17-04-20 11:50 AM, David Miller wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>> Date: Thu, 20 Apr 2017 09:27:00 -0400
>>
>>> The issue Jiri is bringing up is unrelated. He is talking about
>>> a bitmap and conflating it with a data structure. They are not
>>> the same issue.
>>
>> Bitmaps can have the same exact problem as padding if we didn't code
>> it correctly.
>>
>> The issue is _purely_, "did we check unused 'fields' and enforce them
>> to be a certain value"
>>
>> If not, we lose, and can't use those "fields" in the future.
>>
>> This rule applies whether you are speaking about padding or a bitmask.
>>
> 
> There are no examples of such issues with bitmasks encapsulated in
> TLVs
> that exist.
> I grep iproute2 code and there are tons of example of bitmask flags
> being sent in TLVs. They all start with:
> 
> u64/32/16 mybitflags = 0;
> 
> if i want foo then
>        mybitflags |= BRIDGE_FLAGS_SELF;
> if i want bar then
>        mybitflags |= xxxx
> 
> addattr16/32/64(&req.n, sizeof(req), ATTR_XXX, mybitflags);
> 
> It does not make much sense to have a TLV for each of these
> bits when i can fit a bunch of them in u16/32/64.

I have not ruled out bitmasks.  I'm only saying that the kernel must
properly reject bits it doesn't recognize when they are set.

Each bit must have a strict semantic, even unused ones, otherwise
unused ones may never safely be used in the future.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-20 17:58                         ` David Miller
@ 2017-04-21 10:36                           ` Jamal Hadi Salim
  2017-04-21 14:51                             ` David Miller
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-21 10:36 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-20 01:58 PM, David Miller wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> Date: Thu, 20 Apr 2017 13:38:14 -0400
>

>> There are no examples of such issues with bitmasks encapsulated in
>> TLVs

>> It does not make much sense to have a TLV for each of these
>> bits when i can fit a bunch of them in u16/32/64.
>
> I have not ruled out bitmasks.  I'm only saying that the kernel must
> properly reject bits it doesn't recognize when they are set.
>

It is the other way round from what i see: It ignores them.
This allows new bits to be added over time.
Note: It is a bug - which must be fixed - if user space sets
something the kernel doesnt want it to set. Even then, the only good
use case i can think of for something like this is the kernel
is exposing something to user space for read-only and user space
is being silly and setting read-only bits on requests to the kernel.
But even that is not a catastrophic issue; kernel should just ignore it.

> Each bit must have a strict semantic, even unused ones, otherwise
> unused ones may never safely be used in the future.
>

I think we are pretty good at this.
It would be interesting to have a fuzzer which sets random bits on a
TLV bitmask and see what bugs show up.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 10:36                           ` Jamal Hadi Salim
@ 2017-04-21 14:51                             ` David Miller
  2017-04-21 15:29                               ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: David Miller @ 2017-04-21 14:51 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 21 Apr 2017 06:36:19 -0400

> On 17-04-20 01:58 PM, David Miller wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>> Date: Thu, 20 Apr 2017 13:38:14 -0400
>>
> 
>>> There are no examples of such issues with bitmasks encapsulated in
>>> TLVs
> 
>>> It does not make much sense to have a TLV for each of these
>>> bits when i can fit a bunch of them in u16/32/64.
>>
>> I have not ruled out bitmasks.  I'm only saying that the kernel must
>> properly reject bits it doesn't recognize when they are set.
>>
> 
> It is the other way round from what i see: It ignores them.

Which means we can never use them for anything else reliably,
there could be random crap in there.

> This allows new bits to be added over time.

No, ignoring them actually means we cannot add new bits.

> Note: It is a bug - which must be fixed - if user space sets
> something the kernel doesnt want it to set. Even then, the only good
> use case i can think of for something like this is the kernel
> is exposing something to user space for read-only and user space
> is being silly and setting read-only bits on requests to the kernel.
> But even that is not a catastrophic issue; kernel should just ignore
> it.

But since we didn't check and enforce, we can't use the bits for
settings however we like.

That's the entire point.

We can _never_ go back later and say "oops, add the checks now, it's
all good" because that doesn't work at all.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 14:51                             ` David Miller
@ 2017-04-21 15:29                               ` Jamal Hadi Salim
  2017-04-21 15:38                                 ` David Miller
  0 siblings, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-21 15:29 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-21 10:51 AM, David Miller wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> Date: Fri, 21 Apr 2017 06:36:19 -0400
>
>> On 17-04-20 01:58 PM, David Miller wrote:
>>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>> Date: Thu, 20 Apr 2017 13:38:14 -0400
>>>
>>

>
> Which means we can never use them for anything else reliably,
> there could be random crap in there.
>

Today: User space set them to zero. Receivers in the kernel
only look at what they are interested in. I stopped checking after a
while - but everything i looked at in iproute2 worked
like this.

>> This allows new bits to be added over time.
>
> No, ignoring them actually means we cannot add new bits.
>

Old kernels ignore them. New kernels look at the new ones.
We'll be in a lot of trouble if this was not the case
for things today;-> People add bits all the time in TLVs
and in netlink headers that are labeled as flags.

>> Note: It is a bug - which must be fixed - if user space sets
>> something the kernel doesnt want it to set. Even then, the only good
>> use case i can think of for something like this is the kernel
>> is exposing something to user space for read-only and user space
>> is being silly and setting read-only bits on requests to the kernel.
>> But even that is not a catastrophic issue; kernel should just ignore
>> it.
>
> But since we didn't check and enforce, we can't use the bits for
> settings however we like.
>
> That's the entire point.
>
> We can _never_ go back later and say "oops, add the checks now, it's
> all good" because that doesn't work at all.
>

Dave, I dont think you are suggesting we should use a TLV for every bit
we want to  send to the kernel (as Jiri is), are you?

I think you as suggesting we should from now on enforce a rule that
in the kernel we start checking that bits in a bitmap received for
things we are not interested in. So if a bit i dont understand shows
up in the kernel what should i do?
Rejecting the transaction because i received something i dont
understand is not conducive to forward compatibility. Maybe logging
it would be useful.
If i dont get a bit i am expecting (old user space), then for sure
rejecting sounds reasonable.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 15:29                               ` Jamal Hadi Salim
@ 2017-04-21 15:38                                 ` David Miller
  2017-04-21 15:55                                   ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: David Miller @ 2017-04-21 15:38 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 21 Apr 2017 11:29:19 -0400

> On 17-04-21 10:51 AM, David Miller wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>> Date: Fri, 21 Apr 2017 06:36:19 -0400
>>
>>> On 17-04-20 01:58 PM, David Miller wrote:
>>>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>>>> Date: Thu, 20 Apr 2017 13:38:14 -0400
>>>>
>>>
> 
>>
>> Which means we can never use them for anything else reliably,
>> there could be random crap in there.
>>
> 
> Today: User space set them to zero.

You don't know this because the kernel has never verified it.
Jamal, you cannot walk past this important point, nothing can
be argued further because of it.

> Old kernels ignore them. New kernels look at the new ones.
> We'll be in a lot of trouble if this was not the case
> for things today;-> People add bits all the time in TLVs
> and in netlink headers that are labeled as flags.

And when we do things that way it's broken, and why we have such
crappy behavior.

We made a very bad decision a long time ago to ignore unrecognized
things in netlink and it was a grave error which we must start
correcting now.

If a user says "enable X" and it just gets simply ignored by older
kernels, that can't work properly.  What if "enable X" is something
like "turn on encryption"?  Are you OK with the user getting no
feedback that their stuff is not going to be encrypted?

Even something as benign as "give melarger action dumps" _must_ still
have the same behavior because the user has no alternative action plan
possible if it cannot tell if the kernel supports the facility or not.

> Dave, I dont think you are suggesting we should use a TLV for every
> bit
> we want to  send to the kernel (as Jiri is), are you?

Jiri is not suggesting this, he is instead saying if you want to
support more bits in the future then you must check that the unused
bits are zero _now_ so that we can prove that userland clears them
properly.

And if you don't have any direct plans for more bits in the future,
use just a single attribute with the smallest integer type possible.

> I think you as suggesting we should from now on enforce a rule that
> in the kernel we start checking that bits in a bitmap received for
> things we are not interested in. So if a bit i dont understand shows
> up in the kernel what should i do?

Reject it.

> Rejecting the transaction because i received something i dont
> understand is not conducive to forward compatibility.

Not rejecting it breaks everything and gives the user no feedback
or way whatsoever to know whether the kernel supports something or
not.

I'm not letting us continue to do things so stupidly any more.

I want future applications to know if they are running on an older
kernel and that a specific netlink feature is not supported.

Ignoring not-understood bits prevents that and is the single most
fundamental mistake we've made in netlink.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 15:38                                 ` David Miller
@ 2017-04-21 15:55                                   ` Jamal Hadi Salim
  2017-04-21 16:01                                     ` Jamal Hadi Salim
  2017-04-21 16:11                                     ` David Miller
  0 siblings, 2 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-21 15:55 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-21 11:38 AM, David Miller wrote:
> From: Jamal Hadi Salim <jhs@mojatatu.com>
> Date: Fri, 21 Apr 2017 11:29:19 -0400
>

>
> You don't know this because the kernel has never verified it.
> Jamal, you cannot walk past this important point, nothing can
> be argued further because of it.
>
>> Old kernels ignore them. New kernels look at the new ones.
>> We'll be in a lot of trouble if this was not the case
>> for things today;-> People add bits all the time in TLVs
>> and in netlink headers that are labeled as flags.
>
> And when we do things that way it's broken, and why we have such
> crappy behavior.
>
> We made a very bad decision a long time ago to ignore unrecognized
> things in netlink and it was a grave error which we must start
> correcting now.
>

Dave, that is a different argument which i can appreciate.

> If a user says "enable X" and it just gets simply ignored by older
> kernels, that can't work properly.  What if "enable X" is something
> like "turn on encryption"?  Are you OK with the user getting no
> feedback that their stuff is not going to be encrypted?
>

For this specific use case:
Dont they need a newer kernel which supports "enable encryption"?

> Even something as benign as "give melarger action dumps" _must_ still
> have the same behavior because the user has no alternative action plan
> possible if it cannot tell if the kernel supports the facility or not.
>

So you are seeing this as feature discovery as well then.

>> Dave, I dont think you are suggesting we should use a TLV for every
>> bit
>> we want to  send to the kernel (as Jiri is), are you?
>
> Jiri is not suggesting this, he is instead saying if you want to
> support more bits in the future then you must check that the unused
> bits are zero _now_ so that we can prove that userland clears them
> properly.
>
> And if you don't have any direct plans for more bits in the future,
> use just a single attribute with the smallest integer type possible.
>

Ok, lets move with that premise then in our discussion.

>> I think you as suggesting we should from now on enforce a rule that
>> in the kernel we start checking that bits in a bitmap received for
>> things we are not interested in. So if a bit i dont understand shows
>> up in the kernel what should i do?
>
> Reject it.
>

It may be case by case basis, no?

I can understand rejecting for something that will break operations.
Example "i want you to encrypt this".
But i think there are other cases like "please give me a large
dump" which require less harsh reaction in particular because
I have alternative means in the kernel to achieve the dump.
Would logging or no reaction be fine then?

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 15:55                                   ` Jamal Hadi Salim
@ 2017-04-21 16:01                                     ` Jamal Hadi Salim
  2017-04-21 16:12                                       ` David Miller
  2017-04-21 16:11                                     ` David Miller
  1 sibling, 1 reply; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-21 16:01 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-21 11:55 AM, Jamal Hadi Salim wrote:
> On 17-04-21 11:38 AM, David Miller wrote:

>> If a user says "enable X" and it just gets simply ignored by older
>> kernels, that can't work properly.  What if "enable X" is something
>> like "turn on encryption"?  Are you OK with the user getting no
>> feedback that their stuff is not going to be encrypted?
>>
>
> For this specific use case:
> Dont they need a newer kernel which supports "enable encryption"?

Also: What happens to this user space app when it encounters an
older kernel?

Using rejection as a capability discovery wont work I think.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 15:55                                   ` Jamal Hadi Salim
  2017-04-21 16:01                                     ` Jamal Hadi Salim
@ 2017-04-21 16:11                                     ` David Miller
  1 sibling, 0 replies; 37+ messages in thread
From: David Miller @ 2017-04-21 16:11 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 21 Apr 2017 11:55:40 -0400

> On 17-04-21 11:38 AM, David Miller wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
>> Date: Fri, 21 Apr 2017 11:29:19 -0400
>>
>> Even something as benign as "give melarger action dumps" _must_ still
>> have the same behavior because the user has no alternative action plan
>> possible if it cannot tell if the kernel supports the facility or not.
>>

   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

I am pretty sure I was clear in my position above.  And then you say:

> But i think there are other cases like "please give me a large
> dump" which require less harsh reaction in particular because
> I have alternative means in the kernel to achieve the dump.
> Would logging or no reaction be fine then?

I clearly said that the large dump should be handled the exactly the
same way as other kinds of attributes and requests.  And I told you
why, and it's because the user cannot act upon the situation if it
wants to.

You give the user no way to perform alternative actions.

Any feature whatsoever, even "give me large dumps" may require the
user to take alternative actions.  You give the user no option
whatsoever by silently ignoring stuff, and that is simply
unacceptable.

Please get out of the mindset of "oh, ignoring this and silently
proceeding in situation X is OK".

Thanks.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 16:01                                     ` Jamal Hadi Salim
@ 2017-04-21 16:12                                       ` David Miller
  2017-04-21 18:11                                         ` Jamal Hadi Salim
  0 siblings, 1 reply; 37+ messages in thread
From: David Miller @ 2017-04-21 16:12 UTC (permalink / raw)
  To: jhs; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 21 Apr 2017 12:01:07 -0400

> On 17-04-21 11:55 AM, Jamal Hadi Salim wrote:
>> On 17-04-21 11:38 AM, David Miller wrote:
> 
>>> If a user says "enable X" and it just gets simply ignored by older
>>> kernels, that can't work properly.  What if "enable X" is something
>>> like "turn on encryption"?  Are you OK with the user getting no
>>> feedback that their stuff is not going to be encrypted?
>>>
>>
>> For this specific use case:
>> Dont they need a newer kernel which supports "enable encryption"?
> 
> Also: What happens to this user space app when it encounters an
> older kernel?
> 
> Using rejection as a capability discovery wont work I think.

Yes for existing attributes we are stuck in the mud because of how
we've handled things in the past.  I'm not saying we should change
behavior for existing attributes.

I'm talking about any newly added attribute from here on out, and
that we need to require checks for them.

Thanks.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 16:12                                       ` David Miller
@ 2017-04-21 18:11                                         ` Jamal Hadi Salim
  2017-04-22 11:30                                           ` Jamal Hadi Salim
  2017-04-24  9:27                                           ` Simon Horman
  0 siblings, 2 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-21 18:11 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-21 12:12 PM, David Miller wrote:

> Yes for existing attributes we are stuck in the mud because of how
> we've handled things in the past.  I'm not saying we should change
> behavior for existing attributes.
>
> I'm talking about any newly added attribute from here on out, and
> that we need to require checks for them.
>

Please bear with me. I want to make sure to get this right.

Lets say I updated the kernel today to reject transactions with
bits it didnt understand. Lets call this "old kernel". A tc that
understands/sets these bits and nothing else. Call it "old tc".
3 months later:
I add one more bit setting to introduce a new feature in a new
kernel version. Lets call this new "kernel". I update to
understand new bits. Call it "new tc".

The possibilities:
a) old tc + old kernel combo. No problem
b) new tc + new kernel combo. No problem.
c) old tc + new kernel combo. No problem.
d) new tc + old kernel. Rejection.

For #d if i have a smart tc it would retry with a new combination
which restores its behavior to old tc level. Of course this means
apps would have to be rewritten going forward to understand these
mechanics.
Alternative is to request for capabilities first then doing a
lowest common denominator request.
But even that is a lot more code and crossing user/kernel twice.

There is a simpler approach that would work going forward.
How about letting the user choose their fate? Set something maybe
in the netlink header to tell the kernel "if you dont understand
something I am asking for - please ignore it and do what you can".
This would maintain current behavior but would force the user to
explicitly state so.

cheers,
jamal

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 18:11                                         ` Jamal Hadi Salim
@ 2017-04-22 11:30                                           ` Jamal Hadi Salim
  2017-04-24  9:27                                           ` Simon Horman
  1 sibling, 0 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-22 11:30 UTC (permalink / raw)
  To: David Miller; +Cc: eric.dumazet, jiri, netdev, xiyou.wangcong

[-- Attachment #1: Type: text/plain, Size: 1502 bytes --]

On 17-04-21 02:11 PM, Jamal Hadi Salim wrote:

> Please bear with me. I want to make sure to get this right.
>
> Lets say I updated the kernel today to reject transactions with
> bits it didnt understand. Lets call this "old kernel". A tc that
> understands/sets these bits and nothing else. Call it "old tc".
> 3 months later:
> I add one more bit setting to introduce a new feature in a new
> kernel version. Lets call this new "kernel". I update to
> understand new bits. Call it "new tc".
>
> The possibilities:
> a) old tc + old kernel combo. No problem
> b) new tc + new kernel combo. No problem.
> c) old tc + new kernel combo. No problem.
> d) new tc + old kernel. Rejection.
>
> For #d if i have a smart tc it would retry with a new combination
> which restores its behavior to old tc level. Of course this means
> apps would have to be rewritten going forward to understand these
> mechanics.
> Alternative is to request for capabilities first then doing a
> lowest common denominator request.
> But even that is a lot more code and crossing user/kernel twice.
>
> There is a simpler approach that would work going forward.
> How about letting the user choose their fate? Set something maybe
> in the netlink header to tell the kernel "if you dont understand
> something I am asking for - please ignore it and do what you can".
> This would maintain current behavior but would force the user to
> explicitly state so.
>


Tested patch that demonstrates this idea is attached.

cheers,
jamal


[-- Attachment #2: patch-act-check-flags --]
[-- Type: text/plain, Size: 5985 bytes --]

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cce0613..48d3acb 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -674,10 +674,28 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
+#define TCA_FLAG_LIBERAL_CHECK_ON	(1 << 1)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9ce22b7..fbe96ae 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
+	if (n_i) {
 		cb->args[0] += n_i;
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCAA_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1073,6 +1078,24 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
 	return kind;
 }
 
+#define valid_tca_flags (TCA_FLAG_LARGE_DUMP_ON | TCA_FLAG_LIBERAL_CHECK_ON)
+/* If user has set TCA_FLAG_LIBERAL_CHECK_ON we let the kernel continue
+ * its checking for valid bits only, otherwise we check for any unknown
+ * bits set and reject them
+ */
+static inline bool tca_flags_valid(u32 act_flags)
+{
+	u32 invalid_flags_mask  = ~valid_tca_flags;
+	printk("act flags 0x%x\n", act_flags);
+	if (act_flags & TCA_FLAG_LIBERAL_CHECK_ON)
+		return true;
+
+	if (act_flags & invalid_flags_mask)
+		return false;
+
+	return true;
+}
+
 static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1082,8 +1105,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *count_attr = NULL;
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *kind = NULL;
+	u32 act_flags = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1093,14 +1126,28 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	if (tb[TCA_ROOT_FLAGS])
+		act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
+
+	if (act_flags && !tca_flags_valid(act_flags)) {
+		printk("invalid flags 0x%x. Ask for liberal behavior\n",
+		       act_flags);
+		return -EINVAL;
+	}
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	cb->args[2] = act_flags;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1113,6 +1160,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-21 18:11                                         ` Jamal Hadi Salim
  2017-04-22 11:30                                           ` Jamal Hadi Salim
@ 2017-04-24  9:27                                           ` Simon Horman
  2017-04-24 12:54                                             ` Jamal Hadi Salim
  1 sibling, 1 reply; 37+ messages in thread
From: Simon Horman @ 2017-04-24  9:27 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: David Miller, eric.dumazet, jiri, netdev, xiyou.wangcong

On Fri, Apr 21, 2017 at 02:11:00PM -0400, Jamal Hadi Salim wrote:
> On 17-04-21 12:12 PM, David Miller wrote:
> 
> >Yes for existing attributes we are stuck in the mud because of how
> >we've handled things in the past.  I'm not saying we should change
> >behavior for existing attributes.
> >
> >I'm talking about any newly added attribute from here on out, and
> >that we need to require checks for them.
> >
> 
> Please bear with me. I want to make sure to get this right.
> 
> Lets say I updated the kernel today to reject transactions with
> bits it didnt understand. Lets call this "old kernel". A tc that
> understands/sets these bits and nothing else. Call it "old tc".
> 3 months later:
> I add one more bit setting to introduce a new feature in a new
> kernel version. Lets call this new "kernel". I update to
> understand new bits. Call it "new tc".
> 
> The possibilities:
> a) old tc + old kernel combo. No problem
> b) new tc + new kernel combo. No problem.
> c) old tc + new kernel combo. No problem.
> d) new tc + old kernel. Rejection.
> 
> For #d if i have a smart tc it would retry with a new combination
> which restores its behavior to old tc level. Of course this means
> apps would have to be rewritten going forward to understand these
> mechanics.
> Alternative is to request for capabilities first then doing a
> lowest common denominator request.
> But even that is a lot more code and crossing user/kernel twice.

>From my PoV, for #d user-space has to either get smart or fail.

Creating new tc involved work in order to support the new feature.
Part of that work would be a decision weather or not to provide
compatibility for old kernel or to bail out gracefully.

> There is a simpler approach that would work going forward.
> How about letting the user choose their fate? Set something maybe
> in the netlink header to tell the kernel "if you dont understand
> something I am asking for - please ignore it and do what you can".
> This would maintain current behavior but would force the user to
> explicitly state so.
> 
> cheers,
> jamal
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  2017-04-24  9:27                                           ` Simon Horman
@ 2017-04-24 12:54                                             ` Jamal Hadi Salim
  0 siblings, 0 replies; 37+ messages in thread
From: Jamal Hadi Salim @ 2017-04-24 12:54 UTC (permalink / raw)
  To: Simon Horman; +Cc: David Miller, eric.dumazet, jiri, netdev, xiyou.wangcong

On 17-04-24 05:27 AM, Simon Horman wrote:
> On Fri, Apr 21, 2017 at 02:11:00PM -0400, Jamal Hadi Salim wrote:
>> On 17-04-21 12:12 PM, David Miller wrote:

>
> From my PoV, for #d user-space has to either get smart or fail.
>
> Creating new tc involved work in order to support the new feature.
> Part of that work would be a decision weather or not to provide
> compatibility for old kernel or to bail out gracefully.
>

But not that much work as is being ascribed now.
This is a big change to user space code. Are we planning to
change all netlink apps (everything in iproute2) to now discover
by testing whether their flags are accepted or not? One extra
crossing to the kernel just to test the waters.

I think there's a middle ground and see which idea takes off.
Refer to the last patch I sent which implements the idea below.

cheers,
jamal

>> There is a simpler approach that would work going forward.
>> How about letting the user choose their fate? Set something maybe
>> in the netlink header to tell the kernel "if you dont understand
>> something I am asking for - please ignore it and do what you can".
>> This would maintain current behavior but would force the user to
>> explicitly state so.
>>
>> cheers,
>> jamal
>>

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2017-04-24 12:54 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-19 11:57 [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
2017-04-19 11:57 ` [PATCH net-next v4 2/2] net sched actions: add time filter for action dumping Jamal Hadi Salim
2017-04-19 12:53   ` Jiri Pirko
2017-04-19 13:11     ` Jamal Hadi Salim
2017-04-19 14:14       ` Jiri Pirko
2017-04-19 15:47         ` Jamal Hadi Salim
2017-04-19 15:55           ` Jiri Pirko
2017-04-19 12:36 ` [PATCH net-next v4 1/2] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jiri Pirko
2017-04-19 12:55   ` Roman Mashak
2017-04-19 13:05     ` Jiri Pirko
2017-04-19 13:03   ` Jamal Hadi Salim
2017-04-19 13:13     ` Jiri Pirko
2017-04-19 15:37       ` Jamal Hadi Salim
2017-04-19 15:54         ` Jiri Pirko
2017-04-19 16:07           ` Jamal Hadi Salim
2017-04-19 16:17             ` Jiri Pirko
2017-04-20 10:42               ` Jamal Hadi Salim
2017-04-20 11:35                 ` Jiri Pirko
2017-04-20 14:03                   ` Jamal Hadi Salim
2017-04-20 12:18                 ` Eric Dumazet
2017-04-20 13:27                   ` Jamal Hadi Salim
2017-04-20 15:50                     ` David Miller
2017-04-20 17:38                       ` Jamal Hadi Salim
2017-04-20 17:58                         ` David Miller
2017-04-21 10:36                           ` Jamal Hadi Salim
2017-04-21 14:51                             ` David Miller
2017-04-21 15:29                               ` Jamal Hadi Salim
2017-04-21 15:38                                 ` David Miller
2017-04-21 15:55                                   ` Jamal Hadi Salim
2017-04-21 16:01                                     ` Jamal Hadi Salim
2017-04-21 16:12                                       ` David Miller
2017-04-21 18:11                                         ` Jamal Hadi Salim
2017-04-22 11:30                                           ` Jamal Hadi Salim
2017-04-24  9:27                                           ` Simon Horman
2017-04-24 12:54                                             ` Jamal Hadi Salim
2017-04-21 16:11                                     ` David Miller
2017-04-20 15:23                   ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.