* [PATCH net-next v9 1/3] net sched actions: Use proper root attribute table for actions
2017-04-26 12:42 [PATCH net-next v9 0/3] net sched actions: improve dump performance Jamal Hadi Salim
@ 2017-04-26 12:42 ` Jamal Hadi Salim
2017-04-26 12:42 ` [PATCH net-next v9 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
2017-04-26 12:42 ` [PATCH net-next v9 3/3] net sched actions: add time filter for action dumping Jamal Hadi Salim
2 siblings, 0 replies; 9+ messages in thread
From: Jamal Hadi Salim @ 2017-04-26 12:42 UTC (permalink / raw)
To: davem
Cc: jiri, xiyou.wangcong, eric.dumazet, simon.horman, netdev,
Jamal Hadi Salim
From: Jamal Hadi Salim <jhs@mojatatu.com>
Bug fix for an issue which has been around for about a decade.
We got away with it because the enumeration was larger than needed.
Fixes: 7ba699c604ab ("[NET_SCHED]: Convert actions from rtnetlink to new netlink API")
Suggested-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
net/sched/act_api.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 7f2cd70..9d0f007 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1031,7 +1031,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct nlattr *tca[TCA_ACT_MAX + 1];
+ struct nlattr *tca[TCAA_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
@@ -1039,7 +1039,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
extack);
if (ret < 0)
return ret;
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH net-next v9 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
2017-04-26 12:42 [PATCH net-next v9 0/3] net sched actions: improve dump performance Jamal Hadi Salim
2017-04-26 12:42 ` [PATCH net-next v9 1/3] net sched actions: Use proper root attribute table for actions Jamal Hadi Salim
@ 2017-04-26 12:42 ` Jamal Hadi Salim
2017-04-26 13:08 ` Jiri Pirko
2017-04-26 12:42 ` [PATCH net-next v9 3/3] net sched actions: add time filter for action dumping Jamal Hadi Salim
2 siblings, 1 reply; 9+ messages in thread
From: Jamal Hadi Salim @ 2017-04-26 12:42 UTC (permalink / raw)
To: davem
Cc: jiri, xiyou.wangcong, eric.dumazet, simon.horman, netdev,
Jamal Hadi Salim
From: Jamal Hadi Salim <jhs@mojatatu.com>
When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.
With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.
The top level action TLV space is extended. An attribute
TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
is set by the user indicating the user is capable of processing
these large dumps. Older user space which doesnt set this flag
doesnt get the large (than 32) batches.
The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32.
Some results dumping 1.5M actions, first unpatched tc which the
kernel doesnt help:
prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79
Now lets see a patched tc which sets the correct flags when requesting
a dump:
prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96
That is about 8x performance improvement for tc which sets its
receive buffer to about 32K.
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
include/uapi/linux/rtnetlink.h | 21 +++++++++++++--
net/sched/act_api.c | 60 +++++++++++++++++++++++++++++++++++-------
2 files changed, 69 insertions(+), 12 deletions(-)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cce0613..20c86c0 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -674,10 +674,27 @@ struct tcamsg {
unsigned char tca__pad1;
unsigned short tca__pad2;
};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON BIT(0)
/* New extended info filters for IFLA_EXT_MASK */
#define RTEXT_FILTER_VF (1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9d0f007..ccc333e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+ u32 act_flags = cb->args[2];
struct nlattr *nest;
spin_lock_bh(&hinfo->lock);
@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
}
nla_nest_end(skb, nest);
n_i++;
- if (n_i >= TCA_ACT_MAX_PRIO)
+ if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+ n_i >= TCA_ACT_MAX_PRIO)
goto done;
}
}
done:
spin_unlock_bh(&hinfo->lock);
- if (n_i)
+ if (n_i) {
cb->args[0] += n_i;
+ if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+ cb->args[1] = n_i;
+ }
return n_i;
nla_put_failure:
@@ -1027,11 +1032,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return tcf_add_notify(net, n, &actions, portid);
}
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+ [TCA_ROOT_FLAGS] = { .type = NLA_U32 },
+};
+
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct nlattr *tca[TCAA_MAX + 1];
+ struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
@@ -1039,7 +1048,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
extack);
if (ret < 0)
return ret;
@@ -1080,16 +1089,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
return ret;
}
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
- struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
- if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
- NULL, NULL) < 0)
- return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
@@ -1107,6 +1112,17 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
return kind;
}
+#define VALID_TCA_ROOT_FLAGS TCA_FLAG_LARGE_DUMP_ON
+static inline bool tca_flags_valid(u32 act_flags)
+{
+ u32 invalid_flags_mask = ~VALID_TCA_ROOT_FLAGS;
+
+ if (act_flags & invalid_flags_mask)
+ return false;
+
+ return true;
+}
+
static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
@@ -1116,8 +1132,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tc_action_ops *a_o;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
- struct nlattr *kind = find_dump_kind(cb->nlh);
+ struct nlattr *count_attr = NULL;
+ struct nlattr *tb[TCA_ROOT_MAX + 1];
+ struct nlattr *kind = NULL;
+ u32 act_flags = 0;
+ u32 act_count = 0;
+
+ ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+ tcaa_policy, NULL);
+ if (ret < 0)
+ return ret;
+ kind = find_dump_kind(tb);
if (kind == NULL) {
pr_info("tc_dump_action: action bad kind\n");
return 0;
@@ -1127,14 +1153,25 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (a_o == NULL)
return 0;
+ if (tb[TCA_ROOT_FLAGS])
+ act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
+
+ if (act_flags && !tca_flags_valid(act_flags))
+ return -EINVAL;
+
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
+
+ cb->args[2] = act_flags;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
+ count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+ if (!count_attr)
+ goto out_module_put;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
@@ -1147,6 +1184,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (ret > 0) {
nla_nest_end(skb, nest);
ret = skb->len;
+ act_count = cb->args[1];
+ memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+ cb->args[1] = 0;
} else
nlmsg_trim(skb, b);
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH net-next v9 3/3] net sched actions: add time filter for action dumping
2017-04-26 12:42 [PATCH net-next v9 0/3] net sched actions: improve dump performance Jamal Hadi Salim
2017-04-26 12:42 ` [PATCH net-next v9 1/3] net sched actions: Use proper root attribute table for actions Jamal Hadi Salim
2017-04-26 12:42 ` [PATCH net-next v9 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch Jamal Hadi Salim
@ 2017-04-26 12:42 ` Jamal Hadi Salim
2 siblings, 0 replies; 9+ messages in thread
From: Jamal Hadi Salim @ 2017-04-26 12:42 UTC (permalink / raw)
To: davem
Cc: jiri, xiyou.wangcong, eric.dumazet, simon.horman, netdev,
Jamal Hadi Salim
From: Jamal Hadi Salim <jhs@mojatatu.com>
This adds support for filtering based on time since last used.
When we are dumping a large number of actions it is useful to
have the option of filtering based on when the action was last
used to reduce the amount of data crossing to user space.
With this patch the user space app sets the TCA_ROOT_TIME_DELTA
attribute with the value in milliseconds with "time of interest
since now". The kernel converts this to jiffies and does the
filtering comparison matching entries that have seen activity
since then and returns them to user space.
Old kernels and old tc continue to work in legacy mode since
they dont specify this attribute.
Some example (we have 400 actions bound to 400 filters); at
installation time using hacked tc which sets the time of
interest to 120 seconds:
prompt$ hackedtc actions ls action gact | grep index | wc -l
400
go get some coffee and wait for > 120 seconds and try again:
prompt$ hackedtc actions ls action gact | grep index | wc -l
0
Lets see a filter bound to one of these actions:
..
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 2 success 1)
match 7f000002/ffffffff at 12 (success 1 )
action order 1: gact action pass
random type none pass val 0
index 23 ref 2 bind 1 installed 1145 sec used 802 sec
Action statistics:
Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
....
that coffee took long, no? It was good.
Now lets ping -c 1 127.0.0.2, then run the actions again:
prompt$ hackedtc actions ls action gact | grep index | wc -l
1
More details please:
prompt$ hackedtc -s actions ls action gact
action order 0: gact action pass
random type none pass val 0
index 23 ref 2 bind 1 installed 1270 sec used 30 sec
Action statistics:
Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
And the filter?
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10 (rule hit 4 success 2)
match 7f000002/ffffffff at 12 (success 2 )
action order 1: gact action pass
random type none pass val 0
index 23 ref 2 bind 1 installed 1324 sec used 84 sec
Action statistics:
Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
include/uapi/linux/rtnetlink.h | 1 +
net/sched/act_api.c | 22 ++++++++++++++++++++--
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 20c86c0..d89b358 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -681,6 +681,7 @@ enum {
#define TCA_ACT_TAB TCA_ROOT_TAB
TCA_ROOT_FLAGS,
TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
__TCA_ROOT_MAX,
#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
};
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index ccc333e..7eb4f7a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -84,6 +84,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
{
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
u32 act_flags = cb->args[2];
+ unsigned long jiffy_since = cb->args[3];
struct nlattr *nest;
spin_lock_bh(&hinfo->lock);
@@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
if (index < s_i)
continue;
+ if (jiffy_since &&
+ time_after(jiffy_since,
+ (unsigned long)p->tcfa_tm.lastuse))
+ continue;
+
nest = nla_nest_start(skb, n_i);
if (nest == NULL)
goto nla_put_failure;
@@ -118,9 +124,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
}
}
done:
+ if (index > 0)
+ cb->args[0] = index + 1;
+
spin_unlock_bh(&hinfo->lock);
if (n_i) {
- cb->args[0] += n_i;
if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
cb->args[1] = n_i;
}
@@ -1033,7 +1041,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
}
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
- [TCA_ROOT_FLAGS] = { .type = NLA_U32 },
+ [TCA_ROOT_FLAGS] = { .type = NLA_U32 },
+ [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
};
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1134,7 +1143,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
struct nlattr *count_attr = NULL;
struct nlattr *tb[TCA_ROOT_MAX + 1];
+ unsigned long jiffy_since = 0;
struct nlattr *kind = NULL;
+ u32 msecs_since = 0;
u32 act_flags = 0;
u32 act_count = 0;
@@ -1159,12 +1170,19 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
if (act_flags && !tca_flags_valid(act_flags))
return -EINVAL;
+ if (tb[TCA_ROOT_TIME_DELTA])
+ msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
+ if (msecs_since)
+ jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
cb->args[2] = act_flags;
+ cb->args[3] = jiffy_since;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread