* [PATCH net-next] net_sched: sch_fq: add horizon attribute
@ 2020-05-01 5:51 Eric Dumazet
2020-05-01 10:43 ` kbuild test robot
0 siblings, 1 reply; 3+ messages in thread
From: Eric Dumazet @ 2020-05-01 5:51 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Eric Dumazet, Eric Dumazet, Willem de Bruijn
QUIC servers would like to use SO_TXTIME, without having CAP_NET_ADMIN,
to efficiently pace UDP packets.
As far as sch_fq is concerned, we need to add safety checks, so
that a buggy application does not fill the qdisc with packets
having delivery time far in the future.
This patch adds a configurable horizon (default: 10 seconds),
and a configurable policy when a packet is beyond the horizon
at enqueue() time:
- either drop the packet (default policy)
- or cap its delivery time to the horizon.
$ tc -s -d qd sh dev eth0
qdisc fq 8022: root refcnt 257 limit 10000p flow_limit 100p buckets 1024
orphan_mask 1023 quantum 10Kb initial_quantum 51160b low_rate_threshold 550Kbit
refill_delay 40.0ms timer_slack 10.000us horizon 10.000s
Sent 1234215879 bytes 837099 pkt (dropped 0, overlimits 0 requeues 6)
backlog 0b 0p requeues 6
flows 1191 (inactive 1177 throttled 0)
gc 0 highprio 0 throttled 692 latency 11.480us
pkts_too_long 0 alloc_errors 0 horizon_drops 21 horizon_caps 0
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
---
include/uapi/linux/pkt_sched.h | 6 ++++
net/sched/sch_fq.c | 59 +++++++++++++++++++++++++++++++---
2 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0c02737c8f47921b807e52d6482ca9ff84e89268..b1acdd246cf90c9865b62eeb8a0f6735e66cb8e7 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -913,6 +913,10 @@ enum {
TCA_FQ_TIMER_SLACK, /* timer slack */
+ TCA_FQ_HORIZON, /* time horizon in us */
+
+ TCA_FQ_HORIZON_DROP, /* drop packets beyond horizon, or cap their EDT */
+
__TCA_FQ_MAX
};
@@ -932,6 +936,8 @@ struct tc_fq_qd_stats {
__u32 throttled_flows;
__u32 unthrottle_latency_ns;
__u64 ce_mark; /* packets above ce_threshold */
+ __u64 horizon_drops;
+ __u64 horizon_caps;
};
/* Heavy-Hitter Filter */
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4c060134c7362dcf0d049404cc65066d2c95cb90..7653b65598c48ced6127853b171b802046a317b2 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -95,6 +95,7 @@ struct fq_sched_data {
struct rb_root delayed; /* for rate limited flows */
u64 time_next_delayed_flow;
+ u64 ktime_cache; /* copy of last ktime_get_ns() */
unsigned long unthrottle_latency_ns;
struct fq_flow internal; /* for non classified or high prio packets */
@@ -104,12 +105,13 @@ struct fq_sched_data {
u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
u64 ce_threshold;
+ u64 horizon; /* horizon in ns */
u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold;
struct rb_root *fq_root;
u8 rate_enable;
u8 fq_trees_log;
-
+ u8 horizon_drop;
u32 flows;
u32 inactive_flows;
u32 throttled_flows;
@@ -118,6 +120,8 @@ struct fq_sched_data {
u64 stat_internal_packets;
u64 stat_throttled;
u64 stat_ce_mark;
+ u64 stat_horizon_drops;
+ u64 stat_horizon_caps;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
@@ -390,8 +394,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
struct rb_node **p, *parent;
struct sk_buff *head, *aux;
- fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
-
head = flow->head;
if (!head ||
fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
@@ -419,6 +421,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
rb_insert_color(&skb->rbnode, &flow->t_root);
}
+static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
+ const struct fq_sched_data *q)
+{
+ return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
+}
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -428,6 +436,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(sch->q.qlen >= sch->limit))
return qdisc_drop(skb, sch, to_free);
+ if (!skb->tstamp) {
+ fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+ } else {
+ /* Check if packet timestamp is too far in the future.
+ * Try first if our cached value, to avoid ktime_get_ns()
+ * cost in most cases.
+ */
+ if (fq_packet_beyond_horizon(skb, q)) {
+ /* Refresh our cache and check another time */
+ q->ktime_cache = ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb, q)) {
+ if (q->horizon_drop) {
+ q->stat_horizon_drops++;
+ return qdisc_drop(skb, sch, to_free);
+ }
+ q->stat_horizon_caps++;
+ skb->tstamp = q->ktime_cache + q->horizon;
+ }
+ }
+ fq_skb_cb(skb)->time_to_send = skb->tstamp;
+ }
+
f = fq_classify(skb, q);
if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
q->stat_flows_plimit++;
@@ -498,7 +528,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
if (skb)
goto out;
- now = ktime_get_ns();
+ q->ktime_cache = now = ktime_get_ns();
fq_check_throttled(q, now);
begin:
head = &q->new_flows;
@@ -753,6 +783,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
[TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON] = { .type = NLA_U32 },
+ [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -842,7 +874,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_TIMER_SLACK])
q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+ if (tb[TCA_FQ_HORIZON])
+ q->horizon = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_HORIZON]);
+
+ if (tb[TCA_FQ_HORIZON_DROP])
+ q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+
if (!err) {
+
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
sch_tree_lock(sch);
@@ -895,6 +935,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
+ q->horizon = 10 * NSEC_PER_SEC; /* 10 seconds */
+ q->horizon_drop = 1; /* by default, drop packets beyond horizon */
+
/* Default ce_threshold of 4294 seconds */
q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
@@ -912,6 +955,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
u64 ce_threshold = q->ce_threshold;
+ u64 horizon = q->horizon;
struct nlattr *opts;
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -921,6 +965,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
do_div(ce_threshold, NSEC_PER_USEC);
+ do_div(horizon, NSEC_PER_USEC);
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
@@ -936,7 +981,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
q->low_rate_threshold) ||
nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
- nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
+ nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+ nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+ nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -967,6 +1014,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.unthrottle_latency_ns = min_t(unsigned long,
q->unthrottle_latency_ns, ~0U);
st.ce_mark = q->stat_ce_mark;
+ st.horizon_drops = q->stat_horizon_drops;
+ st.horizon_caps = q->stat_horizon_caps;
sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
--
2.26.2.526.g744177e7f7-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH net-next] net_sched: sch_fq: add horizon attribute
2020-05-01 5:51 [PATCH net-next] net_sched: sch_fq: add horizon attribute Eric Dumazet
@ 2020-05-01 10:43 ` kbuild test robot
2020-05-01 13:57 ` Eric Dumazet
0 siblings, 1 reply; 3+ messages in thread
From: kbuild test robot @ 2020-05-01 10:43 UTC (permalink / raw)
To: Eric Dumazet, David S . Miller, netdev, Eric Dumazet, Willem de Bruijn
Cc: kbuild-all, netdev, Eric Dumazet
[-- Attachment #1: Type: text/plain, Size: 2717 bytes --]
Hi Eric,
I love your patch! Perhaps something to improve:
[auto build test WARNING on net-next/master]
[also build test WARNING on net/master linus/master v5.7-rc3 next-20200430]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Eric-Dumazet/net_sched-sch_fq-add-horizon-attribute/20200501-135537
base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 37ecb5b8b8cd3156e739fd1c56a8e3842b72ebad
config: i386-allmodconfig (attached as .config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
net/sched/sch_fq.c: In function 'fq_init':
>> net/sched/sch_fq.c:938:18: warning: integer overflow in expression [-Woverflow]
q->horizon = 10 * NSEC_PER_SEC; /* 10 seconds */
^
vim +938 net/sched/sch_fq.c
913
914 static int fq_init(struct Qdisc *sch, struct nlattr *opt,
915 struct netlink_ext_ack *extack)
916 {
917 struct fq_sched_data *q = qdisc_priv(sch);
918 int err;
919
920 sch->limit = 10000;
921 q->flow_plimit = 100;
922 q->quantum = 2 * psched_mtu(qdisc_dev(sch));
923 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
924 q->flow_refill_delay = msecs_to_jiffies(40);
925 q->flow_max_rate = ~0UL;
926 q->time_next_delayed_flow = ~0ULL;
927 q->rate_enable = 1;
928 q->new_flows.first = NULL;
929 q->old_flows.first = NULL;
930 q->delayed = RB_ROOT;
931 q->fq_root = NULL;
932 q->fq_trees_log = ilog2(1024);
933 q->orphan_mask = 1024 - 1;
934 q->low_rate_threshold = 550000 / 8;
935
936 q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
937
> 938 q->horizon = 10 * NSEC_PER_SEC; /* 10 seconds */
939 q->horizon_drop = 1; /* by default, drop packets beyond horizon */
940
941 /* Default ce_threshold of 4294 seconds */
942 q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
943
944 qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
945
946 if (opt)
947 err = fq_change(sch, opt, extack);
948 else
949 err = fq_resize(sch, q->fq_trees_log);
950
951 return err;
952 }
953
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 73127 bytes --]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH net-next] net_sched: sch_fq: add horizon attribute
2020-05-01 10:43 ` kbuild test robot
@ 2020-05-01 13:57 ` Eric Dumazet
0 siblings, 0 replies; 3+ messages in thread
From: Eric Dumazet @ 2020-05-01 13:57 UTC (permalink / raw)
To: kbuild test robot
Cc: David S . Miller, netdev, Eric Dumazet, Willem de Bruijn, kbuild-all
On Fri, May 1, 2020 at 3:43 AM kbuild test robot <lkp@intel.com> wrote:
>
> Hi Eric,
>
> I love your patch! Perhaps something to improve:
>
> [auto build test WARNING on net-next/master]
> [also build test WARNING on net/master linus/master v5.7-rc3 next-20200430]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
> base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
>
> url: https://github.com/0day-ci/linux/commits/Eric-Dumazet/net_sched-sch_fq-add-horizon-attribute/20200501-135537
> base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 37ecb5b8b8cd3156e739fd1c56a8e3842b72ebad
> config: i386-allmodconfig (attached as .config)
> compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kbuild test robot <lkp@intel.com>
>
> All warnings (new ones prefixed by >>):
>
> net/sched/sch_fq.c: In function 'fq_init':
> >> net/sched/sch_fq.c:938:18: warning: integer overflow in expression [-Woverflow]
> q->horizon = 10 * NSEC_PER_SEC; /* 10 seconds */
Thanks, I will use 10ULL in v2
> ^
>
> vim +938 net/sched/sch_fq.c
>
> 913
> 914 static int fq_init(struct Qdisc *sch, struct nlattr *opt,
> 915 struct netlink_ext_ack *extack)
> 916 {
> 917 struct fq_sched_data *q = qdisc_priv(sch);
> 918 int err;
> 919
> 920 sch->limit = 10000;
> 921 q->flow_plimit = 100;
> 922 q->quantum = 2 * psched_mtu(qdisc_dev(sch));
> 923 q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
> 924 q->flow_refill_delay = msecs_to_jiffies(40);
> 925 q->flow_max_rate = ~0UL;
> 926 q->time_next_delayed_flow = ~0ULL;
> 927 q->rate_enable = 1;
> 928 q->new_flows.first = NULL;
> 929 q->old_flows.first = NULL;
> 930 q->delayed = RB_ROOT;
> 931 q->fq_root = NULL;
> 932 q->fq_trees_log = ilog2(1024);
> 933 q->orphan_mask = 1024 - 1;
> 934 q->low_rate_threshold = 550000 / 8;
> 935
> 936 q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
> 937
> > 938 q->horizon = 10 * NSEC_PER_SEC; /* 10 seconds */
> 939 q->horizon_drop = 1; /* by default, drop packets beyond horizon */
> 940
> 941 /* Default ce_threshold of 4294 seconds */
> 942 q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
> 943
> 944 qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
> 945
> 946 if (opt)
> 947 err = fq_change(sch, opt, extack);
> 948 else
> 949 err = fq_resize(sch, q->fq_trees_log);
> 950
> 951 return err;
> 952 }
> 953
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-05-01 13:57 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-01 5:51 [PATCH net-next] net_sched: sch_fq: add horizon attribute Eric Dumazet
2020-05-01 10:43 ` kbuild test robot
2020-05-01 13:57 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).