All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next] net/sched: add skbprio scheduler
@ 2018-06-23 20:47 Nishanth Devarajan
  2018-06-23 21:43 ` Cong Wang
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Nishanth Devarajan @ 2018-06-23 20:47 UTC (permalink / raw)
  To: xiyou.wangcong, jhs, jiri, davem; +Cc: netdev, doucette, michel

net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Although Skbprio can be employed in any
scenario in which a higher skb->priority field means a higher priority packet,
Skbprio was concieved as a solution for denial-of-service defenses that need to
route packets with different priorities.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig              |  13 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_skbprio.c        | 347 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 376 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..6fd07e8 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
 	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+	__u32	limit; 	    	/* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS	16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169f..9ac4b53 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
 
 	  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+	tristate "SKB priority queue scheduler (SKBPRIO)"
+	help
+	  Say Y here if you want to use the SKB priority queue
+	  scheduler. This schedules packets according to skb->priority,
+	  which is useful for request packets in DoS mitigation systems such
+	  as Gatekeeper.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_skbprio.
+
+	  If unsure, say N.
+
 config NET_SCH_CHOKE
 	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d38..a4d8893 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)	+= sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 0000000..5e89446
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,347 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Nishanth Devarajan, <ndev2021@gmail.com>
+ *		Cody Doucette, <doucette@bu.edu>
+ *	        original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+
+/*	  SKB Priority Queue
+ *	=================================
+ *
+ * This qdisc schedules a packet according to skb->priority, where a higher
+ * value places the packet closer to the exit of the queue. When the queue is
+ * full, the lowest priority packet in the queue is dropped to make room for
+ * the packet to be added if it has higher priority. If the packet to be added
+ * has lower priority than all packets in the queue, it is dropped.
+ *
+ * Without the SKB priority queue, queue length limits must be imposed
+ * for individual queues, and there is no easy way to enforce a global queue
+ * length limit across all priorities. With the SKBprio queue, a global
+ * queue length limit can be enforced while not restricting the queue lengths
+ * of individual priorities.
+ *
+ * This is especially useful for a denial-of-service defense system like
+ * Gatekeeper, which prioritizes packets in flows that demonstrate expected
+ * behavior of legitimate users. The queue is flexible to allow any number
+ * of packets of any priority up to the global limit of the scheduler
+ * without risking resource overconsumption by a flood of low priority packets.
+ *
+ * The Gatekeeper codebase is found here:
+ *
+ *		https://github.com/AltraMayor/gatekeeper
+ */
+
+struct skbprio_sched_data {
+	/* Parameters. */
+	u32 max_limit;
+
+	/* Queue state. */
+	struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+	struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+	u16 highest_prio;
+	u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return 0 (default highest priority setting). */
+	return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+	 * (default lowest priority setting).
+	 */
+	return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *qdisc;
+	struct sk_buff_head *lp_qdisc;
+	struct sk_buff *to_drop;
+	u16 prio, lp;
+
+	/* Obtain the priority of @skb. */
+	prio = min(skb->priority, max_priority);
+
+	qdisc = &q->qdiscs[prio];
+	if (sch->q.qlen < q->max_limit) {
+		__skb_queue_tail(qdisc, skb);
+		qdisc_qstats_backlog_inc(sch, skb);
+		q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+		/* Check to update highest and lowest priorities. */
+		if (prio > q->highest_prio)
+			q->highest_prio = prio;
+
+		if (prio < q->lowest_prio)
+			q->lowest_prio = prio;
+
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+
+	/* If this packet has the lowest priority, drop it. */
+	lp = q->lowest_prio;
+	if (prio <= lp) {
+		q->qstats[prio].drops++;
+		return qdisc_drop(skb, sch, to_free);
+	}
+
+	/* Drop the packet at the tail of the lowest priority qdisc. */
+	lp_qdisc = &q->qdiscs[lp];
+	to_drop = __skb_dequeue_tail(lp_qdisc);
+	BUG_ON(!to_drop);
+	qdisc_qstats_backlog_dec(sch, to_drop);
+	qdisc_drop(to_drop, sch, to_free);
+
+	q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+	q->qstats[lp].drops++;
+
+	__skb_queue_tail(qdisc, skb);
+	qdisc_qstats_backlog_inc(sch, skb);
+	q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+	/* Check to update highest and lowest priorities. */
+	if (skb_queue_empty(lp_qdisc)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->lowest_prio = prio;
+			q->highest_prio = prio;
+		} else {
+			q->lowest_prio = calc_new_low_prio(q);
+		}
+	}
+
+	if (prio > q->highest_prio)
+		q->highest_prio = prio;
+
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+	struct sk_buff *skb = __skb_dequeue(hpq);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	sch->q.qlen--;
+	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_bstats_update(sch, skb);
+
+	q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+	/* Update highest priority field. */
+	if (skb_queue_empty(hpq)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->highest_prio = 0;
+			q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+		} else {
+			q->highest_prio = calc_new_high_prio(q);
+		}
+	}
+	return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct tc_skbprio_qopt *ctl = nla_data(opt);
+	const unsigned int min_limit = 1;
+
+	if (ctl->limit == (typeof(ctl->limit))-1)
+		q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
+	else if (ctl->limit < min_limit ||
+		ctl->limit > qdisc_dev(sch)->tx_queue_len)
+		return -EINVAL;
+	else
+		q->max_limit = ctl->limit;
+
+	return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	const unsigned int min_limit = 1;
+	int prio;
+
+	/* Initialise all queues, one for each possible priority. */
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_head_init(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+	if (!opt) {
+		q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
+		return 0;
+	}
+	return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct tc_skbprio_qopt opt;
+
+	opt.limit = q->max_limit;
+
+	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+		return -1;
+
+	return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+		q->qstats[cl - 1].qlen) < 0)
+		return -1;
+	return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, i + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+	.leaf		=	skbprio_leaf,
+	.find		=	skbprio_find,
+	.dump		=	skbprio_dump_class,
+	.dump_stats	=	skbprio_dump_class_stats,
+	.walk		=	skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+	.cl_ops		=	&skbprio_class_ops,
+	.id		=	"skbprio",
+	.priv_size	=	sizeof(struct skbprio_sched_data),
+	.enqueue	=	skbprio_enqueue,
+	.dequeue	=	skbprio_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	skbprio_init,
+	.reset		=	skbprio_reset,
+	.change		=	skbprio_change,
+	.dump		=	skbprio_dump,
+	.destroy	=	skbprio_destroy,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+	return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+	unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-23 20:47 [PATCH v2 net-next] net/sched: add skbprio scheduler Nishanth Devarajan
@ 2018-06-23 21:43 ` Cong Wang
  2018-06-25 23:27   ` Nishanth Devarajan
  2018-06-23 22:10 ` Alexander Duyck
  2018-06-24 15:43 ` Jamal Hadi Salim
  2 siblings, 1 reply; 8+ messages in thread
From: Cong Wang @ 2018-06-23 21:43 UTC (permalink / raw)
  To: Nishanth Devarajan
  Cc: Jamal Hadi Salim, Jiri Pirko, David Miller,
	Linux Kernel Network Developers, Cody Doucette, Michel Machado

On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan <ndev2021@gmail.com> wrote:
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 37b5096..6fd07e8 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
...
> +#define SKBPRIO_MAX_PRIORITY 64
> +
> +struct tc_skbprio_qopt {
> +       __u32   limit;          /* Queue length in packets. */
> +};


Since this is just an integer, you can just make it NLA_U32 instead
of a struct?


> +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> +                       struct netlink_ext_ack *extack)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       struct tc_skbprio_qopt *ctl = nla_data(opt);
> +       const unsigned int min_limit = 1;
> +
> +       if (ctl->limit == (typeof(ctl->limit))-1)
> +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> +       else if (ctl->limit < min_limit ||
> +               ctl->limit > qdisc_dev(sch)->tx_queue_len)
> +               return -EINVAL;
> +       else
> +               q->max_limit = ctl->limit;
> +
> +       return 0;
> +}

Isn't q->max_limit same with sch->limit?

Also, please avoid dev->tx_queue_len here, it may change
independently of your qdisc change, unless you want to implement
ops->change_tx_queue_len().

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-23 20:47 [PATCH v2 net-next] net/sched: add skbprio scheduler Nishanth Devarajan
  2018-06-23 21:43 ` Cong Wang
@ 2018-06-23 22:10 ` Alexander Duyck
  2018-06-26  0:11   ` Nishanth Devarajan
  2018-06-24 15:43 ` Jamal Hadi Salim
  2 siblings, 1 reply; 8+ messages in thread
From: Alexander Duyck @ 2018-06-23 22:10 UTC (permalink / raw)
  To: Nishanth Devarajan
  Cc: Cong Wang, Jamal Hadi Salim, Jiri Pirko, David Miller, Netdev,
	doucette, michel

On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan <ndev2021@gmail.com> wrote:
> net/sched: add skbprio scheduler
>
> Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
> according to their skb->priority field. Although Skbprio can be employed in any
> scenario in which a higher skb->priority field means a higher priority packet,
> Skbprio was concieved as a solution for denial-of-service defenses that need to
> route packets with different priorities.

Really this description is not very good. Reading it I was thinking to
myself "why do we need this, prio already does this". It wasn't until
I read through the code that I figured out that you are basically
adding dropping of lower priority frames.

>
> v2
> *Use skb->priority field rather than DS field. Rename queueing discipline as
> SKB Priority Queue (previously Gatekeeper Priority Queue).
>
> *Queueing discipline is made classful to expose Skbprio's internal priority
> queues.
>
> Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
> Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
> Reviewed-by: Cody Doucette <doucette@bu.edu>
> Reviewed-by: Michel Machado <michel@digirati.com.br>
> ---
>  include/uapi/linux/pkt_sched.h |  15 ++
>  net/sched/Kconfig              |  13 ++
>  net/sched/Makefile             |   1 +
>  net/sched/sch_skbprio.c        | 347 +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 376 insertions(+)
>  create mode 100644 net/sched/sch_skbprio.c
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index 37b5096..6fd07e8 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -124,6 +124,21 @@ struct tc_fifo_qopt {
>         __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
>  };
>
> +/* SKBPRIO section */
> +
> +/*
> + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
> + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
> + * to map one to one the DS field of IPV4 and IPV6 headers.
> + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
> + */
> +
> +#define SKBPRIO_MAX_PRIORITY 64
> +
> +struct tc_skbprio_qopt {
> +       __u32   limit;          /* Queue length in packets. */
> +};
> +
>  /* PRIO section */
>
>  #define TCQ_PRIO_BANDS 16
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index a01169f..9ac4b53 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
>
>           If unsure, say N.
>
> +config NET_SCH_SKBPRIO
> +       tristate "SKB priority queue scheduler (SKBPRIO)"
> +       help
> +         Say Y here if you want to use the SKB priority queue
> +         scheduler. This schedules packets according to skb->priority,
> +         which is useful for request packets in DoS mitigation systems such
> +         as Gatekeeper.
> +
> +         To compile this driver as a module, choose M here: the module will
> +         be called sch_skbprio.
> +
> +         If unsure, say N.
> +
>  config NET_SCH_CHOKE
>         tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
>         help
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 8811d38..a4d8893 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
>  obj-$(CONFIG_NET_SCH_DRR)      += sch_drr.o
>  obj-$(CONFIG_NET_SCH_PLUG)     += sch_plug.o
>  obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
> +obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
>  obj-$(CONFIG_NET_SCH_CHOKE)    += sch_choke.o
>  obj-$(CONFIG_NET_SCH_QFQ)      += sch_qfq.o
>  obj-$(CONFIG_NET_SCH_CODEL)    += sch_codel.o
> diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
> new file mode 100644
> index 0000000..5e89446
> --- /dev/null
> +++ b/net/sched/sch_skbprio.c
> @@ -0,0 +1,347 @@
> +/*
> + * net/sched/sch_skbprio.c  SKB Priority Queue.
> + *
> + *             This program is free software; you can redistribute it and/or
> + *             modify it under the terms of the GNU General Public License
> + *             as published by the Free Software Foundation; either version
> + *             2 of the License, or (at your option) any later version.
> + *
> + * Authors:    Nishanth Devarajan, <ndev2021@gmail.com>
> + *             Cody Doucette, <doucette@bu.edu>
> + *             original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
> + */
> +
> +#include <linux/string.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <net/pkt_sched.h>
> +#include <net/sch_generic.h>
> +#include <net/inet_ecn.h>
> +
> +
> +/*       SKB Priority Queue
> + *     =================================
> + *
> + * This qdisc schedules a packet according to skb->priority, where a higher
> + * value places the packet closer to the exit of the queue. When the queue is
> + * full, the lowest priority packet in the queue is dropped to make room for
> + * the packet to be added if it has higher priority. If the packet to be added
> + * has lower priority than all packets in the queue, it is dropped.
> + *
> + * Without the SKB priority queue, queue length limits must be imposed
> + * for individual queues, and there is no easy way to enforce a global queue
> + * length limit across all priorities. With the SKBprio queue, a global
> + * queue length limit can be enforced while not restricting the queue lengths
> + * of individual priorities.
> + *
> + * This is especially useful for a denial-of-service defense system like
> + * Gatekeeper, which prioritizes packets in flows that demonstrate expected
> + * behavior of legitimate users. The queue is flexible to allow any number
> + * of packets of any priority up to the global limit of the scheduler
> + * without risking resource overconsumption by a flood of low priority packets.
> + *
> + * The Gatekeeper codebase is found here:
> + *
> + *             https://github.com/AltraMayor/gatekeeper

Do we really need to be linking to third party projects in the code? I
am not even really sure it is related here since I cannot find
anything that references the queuing discipline.

Also this seems like it should be in the Documentation/networking
folder rather than being stored in the code itself.

> + */
> +
> +struct skbprio_sched_data {
> +       /* Parameters. */
> +       u32 max_limit;
> +
> +       /* Queue state. */
> +       struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
> +       struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
> +       u16 highest_prio;
> +       u16 lowest_prio;
> +};
> +
> +static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
> +{
> +       int prio;
> +
> +       for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
> +               if (!skb_queue_empty(&q->qdiscs[prio]))
> +                       return prio;
> +       }
> +
> +       /* SKB queue is empty, return 0 (default highest priority setting). */
> +       return 0;
> +}
> +
> +static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
> +{
> +       int prio;
> +
> +       for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
> +               if (!skb_queue_empty(&q->qdiscs[prio]))
> +                       return prio;
> +       }
> +
> +       /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
> +        * (default lowest priority setting).
> +        */
> +       return SKBPRIO_MAX_PRIORITY - 1;
> +}
> +
> +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +                         struct sk_buff **to_free)
> +{
> +       const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       struct sk_buff_head *qdisc;
> +       struct sk_buff_head *lp_qdisc;
> +       struct sk_buff *to_drop;
> +       u16 prio, lp;
> +
> +       /* Obtain the priority of @skb. */
> +       prio = min(skb->priority, max_priority);
> +
> +       qdisc = &q->qdiscs[prio];
> +       if (sch->q.qlen < q->max_limit) {
> +               __skb_queue_tail(qdisc, skb);
> +               qdisc_qstats_backlog_inc(sch, skb);
> +               q->qstats[prio].backlog += qdisc_pkt_len(skb);
> +
> +               /* Check to update highest and lowest priorities. */
> +               if (prio > q->highest_prio)
> +                       q->highest_prio = prio;
> +
> +               if (prio < q->lowest_prio)
> +                       q->lowest_prio = prio;
> +
> +               sch->q.qlen++;
> +               return NET_XMIT_SUCCESS;
> +       }
> +
> +       /* If this packet has the lowest priority, drop it. */
> +       lp = q->lowest_prio;
> +       if (prio <= lp) {
> +               q->qstats[prio].drops++;
> +               return qdisc_drop(skb, sch, to_free);
> +       }
> +
> +       /* Drop the packet at the tail of the lowest priority qdisc. */
> +       lp_qdisc = &q->qdiscs[lp];
> +       to_drop = __skb_dequeue_tail(lp_qdisc);

Is there a specific reason for dropping tail instead of head? I would
think you might see better latency numbers with this if you were to
focus on dropping older packets from the lowest priority instead of
the newest ones. Then things like TCP would likely be able to respond
more quickly to the congestion effects and would be able to more
accurately determine actual round trip time versus buffer delay.

> +       BUG_ON(!to_drop);
> +       qdisc_qstats_backlog_dec(sch, to_drop);
> +       qdisc_drop(to_drop, sch, to_free);
> +
> +       q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
> +       q->qstats[lp].drops++;
> +
> +       __skb_queue_tail(qdisc, skb);
> +       qdisc_qstats_backlog_inc(sch, skb);
> +       q->qstats[prio].backlog += qdisc_pkt_len(skb);
> +

You could probably just take care of enqueueing first, and then take
care of the dequeue. That way you can drop the references to skb
earlier from the stack or free up whatever register is storing it.

> +       /* Check to update highest and lowest priorities. */
> +       if (skb_queue_empty(lp_qdisc)) {
> +               if (q->lowest_prio == q->highest_prio) {
> +                       BUG_ON(sch->q.qlen);
> +                       q->lowest_prio = prio;
> +                       q->highest_prio = prio;
> +               } else {
> +                       q->lowest_prio = calc_new_low_prio(q);
> +               }
> +       }
> +
> +       if (prio > q->highest_prio)
> +               q->highest_prio = prio;
> +
> +       return NET_XMIT_CN;
> +}
> +
> +static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
> +       struct sk_buff *skb = __skb_dequeue(hpq);
> +
> +       if (unlikely(!skb))
> +               return NULL;
> +
> +       sch->q.qlen--;
> +       qdisc_qstats_backlog_dec(sch, skb);
> +       qdisc_bstats_update(sch, skb);
> +
> +       q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
> +
> +       /* Update highest priority field. */
> +       if (skb_queue_empty(hpq)) {
> +               if (q->lowest_prio == q->highest_prio) {
> +                       BUG_ON(sch->q.qlen);
> +                       q->highest_prio = 0;
> +                       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> +               } else {
> +                       q->highest_prio = calc_new_high_prio(q);
> +               }
> +       }
> +       return skb;
> +}
> +
> +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> +                       struct netlink_ext_ack *extack)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       struct tc_skbprio_qopt *ctl = nla_data(opt);
> +       const unsigned int min_limit = 1;
> +
> +       if (ctl->limit == (typeof(ctl->limit))-1)
> +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> +       else if (ctl->limit < min_limit ||
> +               ctl->limit > qdisc_dev(sch)->tx_queue_len)
> +               return -EINVAL;
> +       else
> +               q->max_limit = ctl->limit;
> +
> +       return 0;
> +}
> +
> +static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
> +                       struct netlink_ext_ack *extack)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       const unsigned int min_limit = 1;
> +       int prio;
> +
> +       /* Initialise all queues, one for each possible priority. */
> +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> +               __skb_queue_head_init(&q->qdiscs[prio]);
> +
> +       memset(&q->qstats, 0, sizeof(q->qstats));
> +       q->highest_prio = 0;
> +       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> +       if (!opt) {
> +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> +               return 0;
> +       }
> +       return skbprio_change(sch, opt, extack);
> +}
> +
> +static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       struct tc_skbprio_qopt opt;
> +
> +       opt.limit = q->max_limit;
> +
> +       if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
> +               return -1;
> +
> +       return skb->len;
> +}
> +
> +static void skbprio_reset(struct Qdisc *sch)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       int prio;
> +
> +       sch->qstats.backlog = 0;
> +       sch->q.qlen = 0;
> +
> +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> +               __skb_queue_purge(&q->qdiscs[prio]);
> +
> +       memset(&q->qstats, 0, sizeof(q->qstats));
> +       q->highest_prio = 0;
> +       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> +}
> +
> +static void skbprio_destroy(struct Qdisc *sch)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       int prio;
> +
> +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> +               __skb_queue_purge(&q->qdiscs[prio]);
> +}
> +
> +static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
> +{
> +       return NULL;
> +}
> +
> +static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
> +{
> +       return 0;
> +}
> +
> +static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
> +                            struct sk_buff *skb, struct tcmsg *tcm)
> +{
> +       tcm->tcm_handle |= TC_H_MIN(cl);
> +       return 0;
> +}
> +
> +static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
> +                                  struct gnet_dump *d)
> +{
> +       struct skbprio_sched_data *q = qdisc_priv(sch);
> +       if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
> +               q->qstats[cl - 1].qlen) < 0)
> +               return -1;
> +       return 0;
> +}
> +
> +static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
> +{
> +       unsigned int i;
> +
> +       if (arg->stop)
> +               return;
> +
> +       for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
> +               if (arg->count < arg->skip) {
> +                       arg->count++;
> +                       continue;
> +               }
> +               if (arg->fn(sch, i + 1, arg) < 0) {
> +                       arg->stop = 1;
> +                       break;
> +               }
> +               arg->count++;
> +       }
> +}
> +
> +static const struct Qdisc_class_ops skbprio_class_ops = {
> +       .leaf           =       skbprio_leaf,
> +       .find           =       skbprio_find,
> +       .dump           =       skbprio_dump_class,
> +       .dump_stats     =       skbprio_dump_class_stats,
> +       .walk           =       skbprio_walk,
> +};
> +
> +static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
> +       .cl_ops         =       &skbprio_class_ops,
> +       .id             =       "skbprio",
> +       .priv_size      =       sizeof(struct skbprio_sched_data),
> +       .enqueue        =       skbprio_enqueue,
> +       .dequeue        =       skbprio_dequeue,
> +       .peek           =       qdisc_peek_dequeued,
> +       .init           =       skbprio_init,
> +       .reset          =       skbprio_reset,
> +       .change         =       skbprio_change,
> +       .dump           =       skbprio_dump,
> +       .destroy        =       skbprio_destroy,
> +       .owner          =       THIS_MODULE,
> +};
> +
> +static int __init skbprio_module_init(void)
> +{
> +       return register_qdisc(&skbprio_qdisc_ops);
> +}
> +
> +static void __exit skbprio_module_exit(void)
> +{
> +       unregister_qdisc(&skbprio_qdisc_ops);
> +}
> +
> +module_init(skbprio_module_init)
> +module_exit(skbprio_module_exit)
> +
> +MODULE_LICENSE("GPL");
> --
> 1.9.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-23 20:47 [PATCH v2 net-next] net/sched: add skbprio scheduler Nishanth Devarajan
  2018-06-23 21:43 ` Cong Wang
  2018-06-23 22:10 ` Alexander Duyck
@ 2018-06-24 15:43 ` Jamal Hadi Salim
  2018-06-26  0:34   ` Nishanth Devarajan
  2 siblings, 1 reply; 8+ messages in thread
From: Jamal Hadi Salim @ 2018-06-24 15:43 UTC (permalink / raw)
  To: Nishanth Devarajan, xiyou.wangcong, jiri, davem
  Cc: netdev, doucette, michel, alexander.duyck

On 23/06/18 04:47 PM, Nishanth Devarajan wrote:
[..]

> +	/* Drop the packet at the tail of the lowest priority qdisc. */
> +	lp_qdisc = &q->qdiscs[lp];
> +	to_drop = __skb_dequeue_tail(lp_qdisc);
> +	BUG_ON(!to_drop);
> +	qdisc_qstats_backlog_dec(sch, to_drop);
> +	qdisc_drop(to_drop, sch, to_free);
> +

Maybe also increase overlimit stat here? It will keep track
of low prio things dropped because you were congested.
Such a stat helps when debugging or collecting analytics.

Per Alex's comment, how about:

-----------
Skbprio (SKB Priority Queue) is a queueing discipline that
prioritizes packets according to their skb->priority field.
Under congestion, already-enqueued lower priority packets
will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for
denial-of-service defenses that need to route packets with
different priorities as a means to overcome DoS attacks
as described in paper xxxx...


cheers,
jamal

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-23 21:43 ` Cong Wang
@ 2018-06-25 23:27   ` Nishanth Devarajan
  0 siblings, 0 replies; 8+ messages in thread
From: Nishanth Devarajan @ 2018-06-25 23:27 UTC (permalink / raw)
  To: Cong Wang
  Cc: Jamal Hadi Salim, Jiri Pirko, David Miller, netdev, doucette, michel

On Sat, Jun 23, 2018 at 02:43:16PM -0700, Cong Wang wrote:
> On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan <ndev2021@gmail.com> wrote:
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 37b5096..6fd07e8 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> ...
> > +#define SKBPRIO_MAX_PRIORITY 64
> > +
> > +struct tc_skbprio_qopt {
> > +       __u32   limit;          /* Queue length in packets. */
> > +};
> 
> 
> Since this is just an integer, you can just make it NLA_U32 instead
> of a struct?
> 
>

Making it NLA_U32, wouldn't that be incurring a nla_policy struct in the
code? I also feel uneasy that we'd be straying convention of having a tc qopt
struct to pass in essential parameters from userspace.

> > +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> > +                       struct netlink_ext_ack *extack)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       struct tc_skbprio_qopt *ctl = nla_data(opt);
> > +       const unsigned int min_limit = 1;
> > +
> > +       if (ctl->limit == (typeof(ctl->limit))-1)
> > +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > +       else if (ctl->limit < min_limit ||
> > +               ctl->limit > qdisc_dev(sch)->tx_queue_len)
> > +               return -EINVAL;
> > +       else
> > +               q->max_limit = ctl->limit;
> > +
> > +       return 0;
> > +}
> 
> Isn't q->max_limit same with sch->limit?
>

q->max_limit was intended to represent the maximum limit that Skbprio could
accomodate i.e the tx queue len of the device attached to the qdisc, to check
the limit parameter passed from userspace. I'll correct this in v3.
 
> Also, please avoid dev->tx_queue_len here, it may change
> independently of your qdisc change, unless you want to implement
> ops->change_tx_queue_len().

OK, will make this change.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-23 22:10 ` Alexander Duyck
@ 2018-06-26  0:11   ` Nishanth Devarajan
  0 siblings, 0 replies; 8+ messages in thread
From: Nishanth Devarajan @ 2018-06-26  0:11 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Cong Wang, Jamal Hadi Salim, Jiri Pirko, David Miller, netdev,
	doucette, michel

On Sat, Jun 23, 2018 at 03:10:32PM -0700, Alexander Duyck wrote:
> On Sat, Jun 23, 2018 at 1:47 PM, Nishanth Devarajan <ndev2021@gmail.com> wrote:
> > net/sched: add skbprio scheduler
> >
> > Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
> > according to their skb->priority field. Although Skbprio can be employed in any
> > scenario in which a higher skb->priority field means a higher priority packet,
> > Skbprio was concieved as a solution for denial-of-service defenses that need to
> > route packets with different priorities.
> 
> Really this description is not very good. Reading it I was thinking to
> myself "why do we need this, prio already does this". It wasn't until
> I read through the code that I figured out that you are basically
> adding dropping of lower priority frames.
> 

OK, I'll take Jamal's suggestion on this and write up a new description.

> >
> > v2
> > *Use skb->priority field rather than DS field. Rename queueing discipline as
> > SKB Priority Queue (previously Gatekeeper Priority Queue).
> >
> > *Queueing discipline is made classful to expose Skbprio's internal priority
> > queues.
> >
> > Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
> > Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
> > Reviewed-by: Cody Doucette <doucette@bu.edu>
> > Reviewed-by: Michel Machado <michel@digirati.com.br>
> > ---
> >  include/uapi/linux/pkt_sched.h |  15 ++
> >  net/sched/Kconfig              |  13 ++
> >  net/sched/Makefile             |   1 +
> >  net/sched/sch_skbprio.c        | 347 +++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 376 insertions(+)
> >  create mode 100644 net/sched/sch_skbprio.c
> >
> > diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> > index 37b5096..6fd07e8 100644
> > --- a/include/uapi/linux/pkt_sched.h
> > +++ b/include/uapi/linux/pkt_sched.h
> > @@ -124,6 +124,21 @@ struct tc_fifo_qopt {
> >         __u32   limit;  /* Queue length: bytes for bfifo, packets for pfifo */
> >  };
> >
> > +/* SKBPRIO section */
> > +
> > +/*
> > + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
> > + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
> > + * to map one to one the DS field of IPV4 and IPV6 headers.
> > + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
> > + */
> > +
> > +#define SKBPRIO_MAX_PRIORITY 64
> > +
> > +struct tc_skbprio_qopt {
> > +       __u32   limit;          /* Queue length in packets. */
> > +};
> > +
> >  /* PRIO section */
> >
> >  #define TCQ_PRIO_BANDS 16
> > diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> > index a01169f..9ac4b53 100644
> > --- a/net/sched/Kconfig
> > +++ b/net/sched/Kconfig
> > @@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
> >
> >           If unsure, say N.
> >
> > +config NET_SCH_SKBPRIO
> > +       tristate "SKB priority queue scheduler (SKBPRIO)"
> > +       help
> > +         Say Y here if you want to use the SKB priority queue
> > +         scheduler. This schedules packets according to skb->priority,
> > +         which is useful for request packets in DoS mitigation systems such
> > +         as Gatekeeper.
> > +
> > +         To compile this driver as a module, choose M here: the module will
> > +         be called sch_skbprio.
> > +
> > +         If unsure, say N.
> > +
> >  config NET_SCH_CHOKE
> >         tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
> >         help
> > diff --git a/net/sched/Makefile b/net/sched/Makefile
> > index 8811d38..a4d8893 100644
> > --- a/net/sched/Makefile
> > +++ b/net/sched/Makefile
> > @@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)   += sch_netem.o
> >  obj-$(CONFIG_NET_SCH_DRR)      += sch_drr.o
> >  obj-$(CONFIG_NET_SCH_PLUG)     += sch_plug.o
> >  obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
> > +obj-$(CONFIG_NET_SCH_SKBPRIO)  += sch_skbprio.o
> >  obj-$(CONFIG_NET_SCH_CHOKE)    += sch_choke.o
> >  obj-$(CONFIG_NET_SCH_QFQ)      += sch_qfq.o
> >  obj-$(CONFIG_NET_SCH_CODEL)    += sch_codel.o
> > diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
> > new file mode 100644
> > index 0000000..5e89446
> > --- /dev/null
> > +++ b/net/sched/sch_skbprio.c
> > @@ -0,0 +1,347 @@
> > +/*
> > + * net/sched/sch_skbprio.c  SKB Priority Queue.
> > + *
> > + *             This program is free software; you can redistribute it and/or
> > + *             modify it under the terms of the GNU General Public License
> > + *             as published by the Free Software Foundation; either version
> > + *             2 of the License, or (at your option) any later version.
> > + *
> > + * Authors:    Nishanth Devarajan, <ndev2021@gmail.com>
> > + *             Cody Doucette, <doucette@bu.edu>
> > + *             original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
> > + */
> > +
> > +#include <linux/string.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/kernel.h>
> > +#include <linux/errno.h>
> > +#include <linux/skbuff.h>
> > +#include <net/pkt_sched.h>
> > +#include <net/sch_generic.h>
> > +#include <net/inet_ecn.h>
> > +
> > +
> > +/*       SKB Priority Queue
> > + *     =================================
> > + *
> > + * This qdisc schedules a packet according to skb->priority, where a higher
> > + * value places the packet closer to the exit of the queue. When the queue is
> > + * full, the lowest priority packet in the queue is dropped to make room for
> > + * the packet to be added if it has higher priority. If the packet to be added
> > + * has lower priority than all packets in the queue, it is dropped.
> > + *
> > + * Without the SKB priority queue, queue length limits must be imposed
> > + * for individual queues, and there is no easy way to enforce a global queue
> > + * length limit across all priorities. With the SKBprio queue, a global
> > + * queue length limit can be enforced while not restricting the queue lengths
> > + * of individual priorities.
> > + *
> > + * This is especially useful for a denial-of-service defense system like
> > + * Gatekeeper, which prioritizes packets in flows that demonstrate expected
> > + * behavior of legitimate users. The queue is flexible to allow any number
> > + * of packets of any priority up to the global limit of the scheduler
> > + * without risking resource overconsumption by a flood of low priority packets.
> > + *
> > + * The Gatekeeper codebase is found here:
> > + *
> > + *             https://github.com/AltraMayor/gatekeeper
> 
> Do we really need to be linking to third party projects in the code? I
> am not even really sure it is related here since I cannot find
> anything that references the queuing discipline.
> 
> Also this seems like it should be in the Documentation/networking
> folder rather than being stored in the code itself.
>

The Skbprio qdisc was designed from Gatekeeper's functionality, and we intended
for it to represent Gatekeeper's deployment in production. But I understand
that this should be in Documentation, will change this in v3.  
 
> > + */
> > +
> > +struct skbprio_sched_data {
> > +       /* Parameters. */
> > +       u32 max_limit;
> > +
> > +       /* Queue state. */
> > +       struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
> > +       struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
> > +       u16 highest_prio;
> > +       u16 lowest_prio;
> > +};
> > +
> > +static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
> > +{
> > +       int prio;
> > +
> > +       for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
> > +               if (!skb_queue_empty(&q->qdiscs[prio]))
> > +                       return prio;
> > +       }
> > +
> > +       /* SKB queue is empty, return 0 (default highest priority setting). */
> > +       return 0;
> > +}
> > +
> > +static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
> > +{
> > +       int prio;
> > +
> > +       for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
> > +               if (!skb_queue_empty(&q->qdiscs[prio]))
> > +                       return prio;
> > +       }
> > +
> > +       /* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
> > +        * (default lowest priority setting).
> > +        */
> > +       return SKBPRIO_MAX_PRIORITY - 1;
> > +}
> > +
> > +static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> > +                         struct sk_buff **to_free)
> > +{
> > +       const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       struct sk_buff_head *qdisc;
> > +       struct sk_buff_head *lp_qdisc;
> > +       struct sk_buff *to_drop;
> > +       u16 prio, lp;
> > +
> > +       /* Obtain the priority of @skb. */
> > +       prio = min(skb->priority, max_priority);
> > +
> > +       qdisc = &q->qdiscs[prio];
> > +       if (sch->q.qlen < q->max_limit) {
> > +               __skb_queue_tail(qdisc, skb);
> > +               qdisc_qstats_backlog_inc(sch, skb);
> > +               q->qstats[prio].backlog += qdisc_pkt_len(skb);
> > +
> > +               /* Check to update highest and lowest priorities. */
> > +               if (prio > q->highest_prio)
> > +                       q->highest_prio = prio;
> > +
> > +               if (prio < q->lowest_prio)
> > +                       q->lowest_prio = prio;
> > +
> > +               sch->q.qlen++;
> > +               return NET_XMIT_SUCCESS;
> > +       }
> > +
> > +       /* If this packet has the lowest priority, drop it. */
> > +       lp = q->lowest_prio;
> > +       if (prio <= lp) {
> > +               q->qstats[prio].drops++;
> > +               return qdisc_drop(skb, sch, to_free);
> > +       }
> > +
> > +       /* Drop the packet at the tail of the lowest priority qdisc. */
> > +       lp_qdisc = &q->qdiscs[lp];
> > +       to_drop = __skb_dequeue_tail(lp_qdisc);
> 
> Is there a specific reason for dropping tail instead of head? I would
> think you might see better latency numbers with this if you were to
> focus on dropping older packets from the lowest priority instead of
> the newest ones. Then things like TCP would likely be able to respond
> more quickly to the congestion effects and would be able to more
> accurately determine actual round trip time versus buffer delay.
> 

You make a good point that TCP would respond better to congestion by dropping
at the head. But the idea of dropping at the tail was to relieve immediate 
congestion (from a denial of service standpoint) prioritizing older packets
in the queue over ingress attack packets. And since Skbprio is designed as a
denial of service defense, wouldn't this be a better solution?

> > +       BUG_ON(!to_drop);
> > +       qdisc_qstats_backlog_dec(sch, to_drop);
> > +       qdisc_drop(to_drop, sch, to_free);
> > +
> > +       q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
> > +       q->qstats[lp].drops++;
> > +
> > +       __skb_queue_tail(qdisc, skb);
> > +       qdisc_qstats_backlog_inc(sch, skb);
> > +       q->qstats[prio].backlog += qdisc_pkt_len(skb);
> > +
> 
> You could probably just take care of enqueueing first, and then take
> care of the dequeue. That way you can drop the references to skb
> earlier from the stack or free up whatever register is storing it.
>

OK, will change this in v3.
 
> > +       /* Check to update highest and lowest priorities. */
> > +       if (skb_queue_empty(lp_qdisc)) {
> > +               if (q->lowest_prio == q->highest_prio) {
> > +                       BUG_ON(sch->q.qlen);
> > +                       q->lowest_prio = prio;
> > +                       q->highest_prio = prio;
> > +               } else {
> > +                       q->lowest_prio = calc_new_low_prio(q);
> > +               }
> > +       }
> > +
> > +       if (prio > q->highest_prio)
> > +               q->highest_prio = prio;
> > +
> > +       return NET_XMIT_CN;
> > +}
> > +
> > +static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
> > +       struct sk_buff *skb = __skb_dequeue(hpq);
> > +
> > +       if (unlikely(!skb))
> > +               return NULL;
> > +
> > +       sch->q.qlen--;
> > +       qdisc_qstats_backlog_dec(sch, skb);
> > +       qdisc_bstats_update(sch, skb);
> > +
> > +       q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
> > +
> > +       /* Update highest priority field. */
> > +       if (skb_queue_empty(hpq)) {
> > +               if (q->lowest_prio == q->highest_prio) {
> > +                       BUG_ON(sch->q.qlen);
> > +                       q->highest_prio = 0;
> > +                       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > +               } else {
> > +                       q->highest_prio = calc_new_high_prio(q);
> > +               }
> > +       }
> > +       return skb;
> > +}
> > +
> > +static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
> > +                       struct netlink_ext_ack *extack)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       struct tc_skbprio_qopt *ctl = nla_data(opt);
> > +       const unsigned int min_limit = 1;
> > +
> > +       if (ctl->limit == (typeof(ctl->limit))-1)
> > +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > +       else if (ctl->limit < min_limit ||
> > +               ctl->limit > qdisc_dev(sch)->tx_queue_len)
> > +               return -EINVAL;
> > +       else
> > +               q->max_limit = ctl->limit;
> > +
> > +       return 0;
> > +}
> > +
> > +static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
> > +                       struct netlink_ext_ack *extack)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       const unsigned int min_limit = 1;
> > +       int prio;
> > +
> > +       /* Initialise all queues, one for each possible priority. */
> > +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > +               __skb_queue_head_init(&q->qdiscs[prio]);
> > +
> > +       memset(&q->qstats, 0, sizeof(q->qstats));
> > +       q->highest_prio = 0;
> > +       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > +       if (!opt) {
> > +               q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
> > +               return 0;
> > +       }
> > +       return skbprio_change(sch, opt, extack);
> > +}
> > +
> > +static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       struct tc_skbprio_qopt opt;
> > +
> > +       opt.limit = q->max_limit;
> > +
> > +       if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
> > +               return -1;
> > +
> > +       return skb->len;
> > +}
> > +
> > +static void skbprio_reset(struct Qdisc *sch)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       int prio;
> > +
> > +       sch->qstats.backlog = 0;
> > +       sch->q.qlen = 0;
> > +
> > +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > +               __skb_queue_purge(&q->qdiscs[prio]);
> > +
> > +       memset(&q->qstats, 0, sizeof(q->qstats));
> > +       q->highest_prio = 0;
> > +       q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
> > +}
> > +
> > +static void skbprio_destroy(struct Qdisc *sch)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       int prio;
> > +
> > +       for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
> > +               __skb_queue_purge(&q->qdiscs[prio]);
> > +}
> > +
> > +static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
> > +{
> > +       return NULL;
> > +}
> > +
> > +static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
> > +{
> > +       return 0;
> > +}
> > +
> > +static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
> > +                            struct sk_buff *skb, struct tcmsg *tcm)
> > +{
> > +       tcm->tcm_handle |= TC_H_MIN(cl);
> > +       return 0;
> > +}
> > +
> > +static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
> > +                                  struct gnet_dump *d)
> > +{
> > +       struct skbprio_sched_data *q = qdisc_priv(sch);
> > +       if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
> > +               q->qstats[cl - 1].qlen) < 0)
> > +               return -1;
> > +       return 0;
> > +}
> > +
> > +static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
> > +{
> > +       unsigned int i;
> > +
> > +       if (arg->stop)
> > +               return;
> > +
> > +       for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
> > +               if (arg->count < arg->skip) {
> > +                       arg->count++;
> > +                       continue;
> > +               }
> > +               if (arg->fn(sch, i + 1, arg) < 0) {
> > +                       arg->stop = 1;
> > +                       break;
> > +               }
> > +               arg->count++;
> > +       }
> > +}
> > +
> > +static const struct Qdisc_class_ops skbprio_class_ops = {
> > +       .leaf           =       skbprio_leaf,
> > +       .find           =       skbprio_find,
> > +       .dump           =       skbprio_dump_class,
> > +       .dump_stats     =       skbprio_dump_class_stats,
> > +       .walk           =       skbprio_walk,
> > +};
> > +
> > +static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
> > +       .cl_ops         =       &skbprio_class_ops,
> > +       .id             =       "skbprio",
> > +       .priv_size      =       sizeof(struct skbprio_sched_data),
> > +       .enqueue        =       skbprio_enqueue,
> > +       .dequeue        =       skbprio_dequeue,
> > +       .peek           =       qdisc_peek_dequeued,
> > +       .init           =       skbprio_init,
> > +       .reset          =       skbprio_reset,
> > +       .change         =       skbprio_change,
> > +       .dump           =       skbprio_dump,
> > +       .destroy        =       skbprio_destroy,
> > +       .owner          =       THIS_MODULE,
> > +};
> > +
> > +static int __init skbprio_module_init(void)
> > +{
> > +       return register_qdisc(&skbprio_qdisc_ops);
> > +}
> > +
> > +static void __exit skbprio_module_exit(void)
> > +{
> > +       unregister_qdisc(&skbprio_qdisc_ops);
> > +}
> > +
> > +module_init(skbprio_module_init)
> > +module_exit(skbprio_module_exit)
> > +
> > +MODULE_LICENSE("GPL");
> > --
> > 1.9.1
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 net-next] net/sched: add skbprio scheduler
  2018-06-24 15:43 ` Jamal Hadi Salim
@ 2018-06-26  0:34   ` Nishanth Devarajan
  0 siblings, 0 replies; 8+ messages in thread
From: Nishanth Devarajan @ 2018-06-26  0:34 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Cong Wang, Jiri Pirko, David Miller, netdev, doucette, michel,
	Alexander Duyck

On Sun, Jun 24, 2018 at 11:43:07AM -0400, Jamal Hadi Salim wrote:
> On 23/06/18 04:47 PM, Nishanth Devarajan wrote:
> [..]
> 
> >+	/* Drop the packet at the tail of the lowest priority qdisc. */
> >+	lp_qdisc = &q->qdiscs[lp];
> >+	to_drop = __skb_dequeue_tail(lp_qdisc);
> >+	BUG_ON(!to_drop);
> >+	qdisc_qstats_backlog_dec(sch, to_drop);
> >+	qdisc_drop(to_drop, sch, to_free);
> >+
> 
> Maybe also increase overlimit stat here? It will keep track
> of low prio things dropped because you were congested.
> Such a stat helps when debugging or collecting analytics.
> 
> Per Alex's comment, how about:
> 
> -----------
> Skbprio (SKB Priority Queue) is a queueing discipline that
> prioritizes packets according to their skb->priority field.
> Under congestion, already-enqueued lower priority packets
> will be dropped to make space available for higher priority
> packets. Skbprio was conceived as a solution for
> denial-of-service defenses that need to route packets with
> different priorities as a means to overcome DoS attacks
> as described in paper xxxx...
> 
> 
> cheers,
> jamal

Sounds good, will make some changes in v3.

Thanks,
Nishanth

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 net-next] net/sched: add skbprio scheduler
@ 2018-06-06 15:40 Nishanth Devarajan
  0 siblings, 0 replies; 8+ messages in thread
From: Nishanth Devarajan @ 2018-06-06 15:40 UTC (permalink / raw)
  To: xiyou.wangcong, jhs, jiri, davem; +Cc: netdev, doucette, michel

net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queuing discipline that prioritizes IPv4
and IPv6 packets according to their skb->priority field. Although Skbprio can
be employed in any scenario in which a higher skb->priority field means a
higher priority packet, Skbprio was concieved as a solution for
denial-of-service defenses that need to route packets with different priorities.

v2:
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig              |  13 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_skbprio.c        | 347 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 376 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096..6fd07e8 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
 	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+	__u32	limit; 	    	/* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS	16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169f..9ac4b53 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -240,6 +240,19 @@ config NET_SCH_MQPRIO
 
 	  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+	tristate "SKB priority queue scheduler (SKBPRIO)"
+	help
+	  Say Y here if you want to use the SKB priority queue
+	  scheduler. This schedules packets according to skb->priority,
+	  which is useful for request packets in DoS mitigation systems such
+	  as Gatekeeper.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_skbprio.
+
+	  If unsure, say N.
+
 config NET_SCH_CHOKE
 	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d38..a4d8893 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)	+= sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 0000000..f73ad62
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,347 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Nishanth Devarajan, <ndev2021@gmail.com>
+ *		Cody Doucette, <doucette@bu.edu>
+ *	        original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+
+/*	  SKB Priority Queue
+ *	=================================
+ *
+ * This qdisc schedules a packet according to skb->priority, where a higher
+ * value places the packet closer to the exit of the queue. When the queue is
+ * full, the lowest priority packet in the queue is dropped to make room for
+ * the packet to be added if it has higher priority. If the packet to be added
+ * has lower priority than all packets in the queue, it is dropped.
+ *
+ * Without the SKB priority queue, queue length limits must be imposed
+ * for individual queues, and there is no easy way to enforce a global queue
+ * length limit across all priorities. With the SKBprio queue, a global
+ * queue length limit can be enforced while not restricting the queue lengths
+ * of individual priorities.
+ *
+ * This is especially useful for a denial-of-service defense system like
+ * Gatekeeper, which prioritizes packets in flows that demonstrate expected
+ * behavior of legitimate users. The queue is flexible to allow any number
+ * of packets of any priority up to the global limit of the scheduler
+ * without risking resource overconsumption by a flood of low priority packets.
+ *
+ * The Gatekeeper codebase is found here:
+ *
+ *		https://github.com/AltraMayor/gatekeeper
+ */
+
+struct skbprio_sched_data {
+	/* Parameters. */
+	u32 max_limit;
+
+	/* Queue state. */
+	struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+	struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+	u16 highest_prio;
+	u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return 0 (default highest priority setting). */
+	return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+	 * (default lowest priority setting).
+	 */
+	return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *qdisc;
+	struct sk_buff_head *lp_qdisc;
+	struct sk_buff *to_drop;
+	const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+	u16 prio, lp;
+
+	/* Obtain the priority of @skb. */
+	prio = min(skb->priority, max_priority);
+
+	qdisc = &q->qdiscs[prio];
+	if (sch->q.qlen < q->max_limit) {
+		__skb_queue_tail(qdisc, skb);
+		qdisc_qstats_backlog_inc(sch, skb);
+		q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+		/* Check to update highest and lowest priorities. */
+		if (prio > q->highest_prio)
+			q->highest_prio = prio;
+
+		if (prio < q->lowest_prio)
+			q->lowest_prio = prio;
+
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+
+	/* If this packet has the lowest priority, drop it. */
+	lp = q->lowest_prio;
+	if (prio <= lp) {
+		q->qstats[prio].drops++;
+		return qdisc_drop(skb, sch, to_free);
+	}
+
+	/* Drop the packet at the tail of the lowest priority qdisc. */
+	lp_qdisc = &q->qdiscs[lp];
+	to_drop = __skb_dequeue_tail(lp_qdisc);
+	BUG_ON(!to_drop);
+	qdisc_qstats_backlog_dec(sch, to_drop);
+	qdisc_drop(to_drop, sch, to_free);
+
+	q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+	q->qstats[lp].drops++;
+
+	__skb_queue_tail(qdisc, skb);
+	qdisc_qstats_backlog_inc(sch, skb);
+	q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+	/* Check to update highest and lowest priorities. */
+	if (skb_queue_empty(lp_qdisc)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->lowest_prio = prio;
+			q->highest_prio = prio;
+		} else {
+			q->lowest_prio = calc_new_low_prio(q);
+		}
+	}
+
+	if (prio > q->highest_prio)
+		q->highest_prio = prio;
+
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+	struct sk_buff *skb = __skb_dequeue(hpq);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	sch->q.qlen--;
+	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_bstats_update(sch, skb);
+
+	q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+	/* Update highest priority field. */
+	if (skb_queue_empty(hpq)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->highest_prio = 0;
+			q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+		} else {
+			q->highest_prio = calc_new_high_prio(q);
+		}
+	}
+	return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct tc_skbprio_qopt *ctl = nla_data(opt);
+	const unsigned int min_limit = 1;
+
+	if (ctl->limit == (typeof(ctl->limit))-1)
+		q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
+	else if (ctl->limit < min_limit ||
+		ctl->limit > qdisc_dev(sch)->tx_queue_len)
+		return -EINVAL;
+	else
+		q->max_limit = ctl->limit;
+
+	return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	const unsigned int min_limit = 1;
+
+	/* Initialise all queues, one for each possible priority. */
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_head_init(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+	if (!opt) {
+		q->max_limit = max(qdisc_dev(sch)->tx_queue_len, min_limit);
+		return 0;
+	}
+	return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct tc_skbprio_qopt opt;
+
+	opt.limit = q->max_limit;
+
+	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+		return -1;
+
+	return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+		q->qstats[cl - 1].qlen) < 0)
+		return -1;
+	return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, i + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+	.leaf		=	skbprio_leaf,
+	.find		=	skbprio_find,
+	.dump		=	skbprio_dump_class,
+	.dump_stats	=	skbprio_dump_class_stats,
+	.walk		=	skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+	.cl_ops		=	&skbprio_class_ops,
+	.id		=	"skbprio",
+	.priv_size	=	sizeof(struct skbprio_sched_data),
+	.enqueue	=	skbprio_enqueue,
+	.dequeue	=	skbprio_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	skbprio_init,
+	.reset		=	skbprio_reset,
+	.change		=	skbprio_change,
+	.dump		=	skbprio_dump,
+	.destroy	=	skbprio_destroy,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+	return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+	unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-06-26  0:34 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-23 20:47 [PATCH v2 net-next] net/sched: add skbprio scheduler Nishanth Devarajan
2018-06-23 21:43 ` Cong Wang
2018-06-25 23:27   ` Nishanth Devarajan
2018-06-23 22:10 ` Alexander Duyck
2018-06-26  0:11   ` Nishanth Devarajan
2018-06-24 15:43 ` Jamal Hadi Salim
2018-06-26  0:34   ` Nishanth Devarajan
  -- strict thread matches above, loose matches on Subject: below --
2018-06-06 15:40 Nishanth Devarajan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.