All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] HTB O(1) class lookup
@ 2007-02-01  5:18 ` Simon Lodal
  0 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-01  5:18 UTC (permalink / raw)
  To: netdev; +Cc: lartc

[-- Attachment #1: Type: text/plain, Size: 761 bytes --]


This patch changes HTB's class storage from hash+lists to a two-level linear 
array, so it can do constant time (O(1)) class lookup by classid. It improves 
scalability for large number of classes.

Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps, 
using most of it's cycles traversing lists in htb_find(). The patch 
eliminates this problem, and has a measurable impact even with a few hundred 
classes.

Previously, scalability could be improved by increasing HTB_HSIZE, modify the 
hash function, and recompile, but this patch works for everyone without 
recompile and scales better too.

The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone 
is interested.

Signed-off-by: Simon Lodal <simonl@parknet.dk>

[-- Attachment #2: htb_O(1)_class_lookup.diff --]
[-- Type: text/x-diff, Size: 11212 bytes --]

--- linux-2.6.20-rc6.base/net/sched/sch_htb.c	2007-01-25 03:19:28.000000000 +0100
+++ linux-2.6.20-rc6/net/sched/sch_htb.c	2007-02-01 05:44:36.000000000 +0100
@@ -68,16 +68,37 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16		/* classid hash size */
-#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_MAX_CLS		(TC_H_MIN(-1)+1)
+#define HTB_CLS_ARRAY_SIZE	PAGE_SIZE
+#define HTB_CLS_PER_ARRAY	(HTB_CLS_ARRAY_SIZE / sizeof(void*))
+#define HTB_CLS_ARRAYS		(HTB_MAX_CLS / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_ARRAY(CID)	(CID / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_INDEX(CID)	(CID & (HTB_CLS_PER_ARRAY-1))
+
+
 #define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30012		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
+/* Whether to use rate computer. This is only used for statistics output to
+   userspace (tc -s class show dev ...); if you do not care about that and
+   want the last bit of performance, disable this. */
+#define HTB_RATECM
+#ifdef HTB_RATECM
+/* Time between rate computation updates, in seconds, for each class. */
+#define HTB_RATECM_UPDATE_INTERVAL	16
+/* How many HTB_RATECM_UPDATE_INTERVAL intervals to average over when
+   computing the rate. If set to 1, the computed rate will be exactly the
+   observed rate of the last interval. If set to higher values, the computed
+   rate will converge slower, but also vary less with small, temporary changes
+   in traffic.
+*/
+#define HTB_RATECM_AVERAGE_INTERVALS	2
+#endif
+
 /* used internaly to keep status of single class */
 enum htb_cmode {
 	HTB_CANT_SEND,		/* class can't send and can't borrow */
@@ -104,7 +125,7 @@
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
-	struct hlist_node hlist;	/* classid hash list item */
+	struct list_head clist;		/* classid list item */
 	struct list_head sibling;	/* sibling list item */
 	struct list_head children;	/* children list */
 
@@ -165,9 +186,13 @@
 	return rate->data[slot];
 }
 
+typedef struct htb_class* htb_cls_array[HTB_CLS_PER_ARRAY];
+
 struct htb_sched {
-	struct list_head root;	/* root classes list */
-	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head root;		/* root classes list */
+	struct list_head clist;		/* all classes list */
+	/* all classes arrays for fast lookup */
+	htb_cls_array* classes[HTB_CLS_ARRAYS];
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* self list - roots of self generating tree */
@@ -198,8 +223,10 @@
 	psched_time_t now;	/* cached dequeue time */
 	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-	struct timer_list rttim;	/* rate computer timer */
-	int recmp_bucket;	/* which hash bucket to recompute next */
+	struct timer_list rttim;/* rate computer timer */
+	int clscnt;		/* total number of classes */
+	struct list_head *rtcur;/* current class to update rate timer for */
+	int rtiter;		/* current iteration (1..UPDATE_INTERVAL) */
 #endif
 
 	/* non shaped skbs; let them go directly thru */
@@ -209,32 +236,22 @@
 	long direct_pkts;
 };
 
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
-	h ^= h >> 8;		/* stolen from cbq_hash */
-	h ^= h >> 4;
-	return h & 0xf;
-}
-
-/* find class in global hash table using given handle */
+/* find class in class arrays using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
-	struct htb_class *cl;
+	htb_cls_array* a;
+	int minorid;
 
 	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
 
-	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
-		if (cl->classid == handle)
-			return cl;
-	}
-	return NULL;
+	minorid = TC_H_MIN(handle);
+	a = q->classes[HTB_CLS_ARRAY(minorid)];
+	if (a == NULL)
+		return NULL;
+
+	return (*a)[HTB_CLS_INDEX(minorid)];
 }
 
 /**
@@ -689,13 +706,14 @@
 }
 
 #ifdef HTB_RATECM
-#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
+#define RT_GEN(R,S) R+=S-(R/HTB_RATECM_AVERAGE_INTERVALS);S=0
+/* recompute rate for approximately clscnt/UPDATE_INTERVAL classes */
 static void htb_rate_timer(unsigned long arg)
 {
 	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
 	struct htb_class *cl;
+	int cnt,done;
 
 
 	/* lock queue so that we can muck with it */
@@ -704,14 +722,35 @@
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
 
-	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE)
-		q->recmp_bucket = 0;
-
-	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
-		RT_GEN(cl->sum_bytes, cl->rate_bytes);
-		RT_GEN(cl->sum_packets, cl->rate_packets);
+	if (q->clscnt == 0)
+		goto done;
+	if (q->rtiter == HTB_RATECM_UPDATE_INTERVAL)
+		q->rtiter = 0;
+	if (q->rtiter == 0)
+		q->rtcur = q->clist.next;
+	q->rtiter++;
+	cnt = ((q->clscnt * q->rtiter) / HTB_RATECM_UPDATE_INTERVAL) -
+	      ((q->clscnt * (q->rtiter-1)) / HTB_RATECM_UPDATE_INTERVAL);
+	done = 0;
+	for (;;) {
+		/* end of class list */
+		if (q->rtcur == NULL)
+			break;
+		/* last iteration must continue until end */
+		if ((done == cnt) &&
+		    (q->rtiter < HTB_RATECM_UPDATE_INTERVAL)) {
+			break;
+		}
+		cl = list_entry(q->rtcur, struct htb_class, clist);
+		RT_GEN(cl->rate_bytes, cl->sum_bytes);
+		RT_GEN(cl->rate_packets, cl->sum_packets);
+		done++;
+		q->rtcur = q->rtcur->next;
+		if (q->rtcur == &q->clist)
+			q->rtcur = NULL;
 	}
+
+done:
 	spin_unlock_bh(&sch->dev->queue_lock);
 }
 #endif
@@ -1058,23 +1097,19 @@
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
+	struct list_head *p;
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
-
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
-			if (cl->level)
-				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
-			else {
-				if (cl->un.leaf.q)
-					qdisc_reset(cl->un.leaf.q);
-				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-			}
-			cl->prio_activity = 0;
-			cl->cmode = HTB_CAN_SEND;
-
+	list_for_each(p, &q->clist) {
+		struct htb_class *cl = list_entry(p, struct htb_class, clist);
+		if (cl->level)
+			memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+		else {
+			if (cl->un.leaf.q) 
+				qdisc_reset(cl->un.leaf.q);
+			INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		}
+		cl->prio_activity = 0;
+		cl->cmode = HTB_CAN_SEND;
 	}
 	sch->flags &= ~TCQ_F_THROTTLED;
 	del_timer(&q->timer);
@@ -1109,8 +1144,7 @@
 	}
 
 	INIT_LIST_HEAD(&q->root);
-	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_HLIST_HEAD(q->hash + i);
+	INIT_LIST_HEAD(&q->clist);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
@@ -1204,8 +1238,10 @@
 	struct htb_class *cl = (struct htb_class *)arg;
 
 #ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
+	cl->rate_est.bps = cl->rate_bytes /
+		(HTB_RATECM_UPDATE_INTERVAL * HTB_RATECM_AVERAGE_INTERVALS);
+	cl->rate_est.pps = cl->rate_packets /
+		(HTB_RATECM_UPDATE_INTERVAL * HTB_RATECM_AVERAGE_INTERVALS);
 #endif
 
 	if (!cl->level && cl->un.leaf.q)
@@ -1310,6 +1346,7 @@
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
+	int minorid = TC_H_MIN(cl->classid);
 
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
@@ -1325,7 +1362,7 @@
 						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
-	hlist_del_init(&cl->hlist);
+	list_del(&cl->clist);
 	list_del(&cl->sibling);
 
 	if (cl->prio_activity)
@@ -1334,6 +1371,7 @@
 	if (cl->cmode != HTB_CAN_SEND)
 		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 
+	(*q->classes[HTB_CLS_ARRAY(minorid)])[HTB_CLS_INDEX(minorid)] = NULL;
 	kfree(cl);
 }
 
@@ -1341,6 +1379,7 @@
 static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
+	int i;
 
 	del_timer_sync(&q->timer);
 #ifdef HTB_RATECM
@@ -1355,6 +1394,8 @@
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
 						  struct htb_class, sibling));
+	for (i=0; i<HTB_CLS_ARRAYS; i++)
+		kfree(q->classes[i]);
 
 	__skb_queue_purge(&q->direct_queue);
 }
@@ -1381,8 +1422,15 @@
 
 	sch_tree_lock(sch);
 
-	/* delete from hash and active; remainder in destroy_class */
-	hlist_del_init(&cl->hlist);
+	/* delete from class list and active; remainder in destroy_class */
+	if (q->rtcur == &cl->clist) {
+		q->rtcur = q->rtcur->next;
+		if (q->rtcur == &q->clist)
+			q->rtcur = NULL;
+	}
+	if (--q->clscnt == 0)
+		q->rtiter = 0;
+	list_del_init(&cl->clist);
 
 	if (!cl->level) {
 		qlen = cl->un.leaf.q->q.qlen;
@@ -1422,6 +1470,7 @@
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct rtattr *tb[TCA_HTB_RTAB];
 	struct tc_htb_opt *hopt;
+	int minorid = TC_H_MIN(classid);
 
 	/* extract all subattrs from opt attr */
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
@@ -1453,12 +1502,18 @@
 			goto failure;
 		}
 		err = -ENOBUFS;
+		if (q->classes[HTB_CLS_ARRAY(minorid)] == NULL) {
+			if ((q->classes[HTB_CLS_ARRAY(minorid)] = 
+			     kzalloc(sizeof(htb_cls_array), GFP_KERNEL))
+			    == NULL)
+				goto failure;
+		}
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
 
 		cl->refcnt = 1;
+		INIT_LIST_HEAD(&cl->clist);
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1503,10 +1558,12 @@
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
-		/* attach to the hash list and parent's family */
-		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+		/* attach to the classes list+array and parent's family */
+		list_add_tail(&cl->clist, &q->clist);
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
+		(*q->classes[HTB_CLS_ARRAY(minorid)])[HTB_CLS_INDEX(minorid)] = cl;
+		q->clscnt++;
 	} else
 		sch_tree_lock(sch);
 
@@ -1602,26 +1659,22 @@
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	int i;
+	struct list_head *p;
 
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
-
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
-			if (arg->count < arg->skip) {
-				arg->count++;
-				continue;
-			}
-			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
-				arg->stop = 1;
-				return;
-			}
+	list_for_each(p, &q->clist) {
+		struct htb_class *cl = list_entry(p, struct htb_class, clist);
+		if (arg->count < arg->skip) {
 			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+			arg->stop = 1;
+			return;
 		}
+		arg->count++;
 	}
 }
 

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] [PATCH] HTB O(1) class lookup
@ 2007-02-01  5:18 ` Simon Lodal
  0 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-01  5:18 UTC (permalink / raw)
  To: netdev; +Cc: lartc

[-- Attachment #1: Type: text/plain, Size: 761 bytes --]


This patch changes HTB's class storage from hash+lists to a two-level linear 
array, so it can do constant time (O(1)) class lookup by classid. It improves 
scalability for large number of classes.

Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps, 
using most of it's cycles traversing lists in htb_find(). The patch 
eliminates this problem, and has a measurable impact even with a few hundred 
classes.

Previously, scalability could be improved by increasing HTB_HSIZE, modify the 
hash function, and recompile, but this patch works for everyone without 
recompile and scales better too.

The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone 
is interested.

Signed-off-by: Simon Lodal <simonl@parknet.dk>

[-- Attachment #2: htb_O(1)_class_lookup.diff --]
[-- Type: text/x-diff, Size: 11212 bytes --]

--- linux-2.6.20-rc6.base/net/sched/sch_htb.c	2007-01-25 03:19:28.000000000 +0100
+++ linux-2.6.20-rc6/net/sched/sch_htb.c	2007-02-01 05:44:36.000000000 +0100
@@ -68,16 +68,37 @@
     one less than their parent.
 */
 
-#define HTB_HSIZE 16		/* classid hash size */
-#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_MAX_CLS		(TC_H_MIN(-1)+1)
+#define HTB_CLS_ARRAY_SIZE	PAGE_SIZE
+#define HTB_CLS_PER_ARRAY	(HTB_CLS_ARRAY_SIZE / sizeof(void*))
+#define HTB_CLS_ARRAYS		(HTB_MAX_CLS / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_ARRAY(CID)	(CID / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_INDEX(CID)	(CID & (HTB_CLS_PER_ARRAY-1))
+
+
 #define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30012		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER >> 16 != TC_HTB_PROTOVER
 #error "Mismatched sch_htb.c and pkt_sch.h"
 #endif
 
+/* Whether to use rate computer. This is only used for statistics output to
+   userspace (tc -s class show dev ...); if you do not care about that and
+   want the last bit of performance, disable this. */
+#define HTB_RATECM
+#ifdef HTB_RATECM
+/* Time between rate computation updates, in seconds, for each class. */
+#define HTB_RATECM_UPDATE_INTERVAL	16
+/* How many HTB_RATECM_UPDATE_INTERVAL intervals to average over when
+   computing the rate. If set to 1, the computed rate will be exactly the
+   observed rate of the last interval. If set to higher values, the computed
+   rate will converge slower, but also vary less with small, temporary changes
+   in traffic.
+*/
+#define HTB_RATECM_AVERAGE_INTERVALS	2
+#endif
+
 /* used internaly to keep status of single class */
 enum htb_cmode {
 	HTB_CANT_SEND,		/* class can't send and can't borrow */
@@ -104,7 +125,7 @@
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
-	struct hlist_node hlist;	/* classid hash list item */
+	struct list_head clist;		/* classid list item */
 	struct list_head sibling;	/* sibling list item */
 	struct list_head children;	/* children list */
 
@@ -165,9 +186,13 @@
 	return rate->data[slot];
 }
 
+typedef struct htb_class* htb_cls_array[HTB_CLS_PER_ARRAY];
+
 struct htb_sched {
-	struct list_head root;	/* root classes list */
-	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head root;		/* root classes list */
+	struct list_head clist;		/* all classes list */
+	/* all classes arrays for fast lookup */
+	htb_cls_array* classes[HTB_CLS_ARRAYS];
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* self list - roots of self generating tree */
@@ -198,8 +223,10 @@
 	psched_time_t now;	/* cached dequeue time */
 	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-	struct timer_list rttim;	/* rate computer timer */
-	int recmp_bucket;	/* which hash bucket to recompute next */
+	struct timer_list rttim;/* rate computer timer */
+	int clscnt;		/* total number of classes */
+	struct list_head *rtcur;/* current class to update rate timer for */
+	int rtiter;		/* current iteration (1..UPDATE_INTERVAL) */
 #endif
 
 	/* non shaped skbs; let them go directly thru */
@@ -209,32 +236,22 @@
 	long direct_pkts;
 };
 
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
-	h ^= h >> 8;		/* stolen from cbq_hash */
-	h ^= h >> 4;
-	return h & 0xf;
-}
-
-/* find class in global hash table using given handle */
+/* find class in class arrays using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
-	struct htb_class *cl;
+	htb_cls_array* a;
+	int minorid;
 
 	if (TC_H_MAJ(handle) != sch->handle)
 		return NULL;
 
-	hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
-		if (cl->classid == handle)
-			return cl;
-	}
-	return NULL;
+	minorid = TC_H_MIN(handle);
+	a = q->classes[HTB_CLS_ARRAY(minorid)];
+	if (a == NULL)
+		return NULL;
+
+	return (*a)[HTB_CLS_INDEX(minorid)];
 }
 
 /**
@@ -689,13 +706,14 @@
 }
 
 #ifdef HTB_RATECM
-#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
+#define RT_GEN(R,S) R+=S-(R/HTB_RATECM_AVERAGE_INTERVALS);S=0
+/* recompute rate for approximately clscnt/UPDATE_INTERVAL classes */
 static void htb_rate_timer(unsigned long arg)
 {
 	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
 	struct htb_class *cl;
+	int cnt,done;
 
 
 	/* lock queue so that we can muck with it */
@@ -704,14 +722,35 @@
 	q->rttim.expires = jiffies + HZ;
 	add_timer(&q->rttim);
 
-	/* scan and recompute one bucket at time */
-	if (++q->recmp_bucket >= HTB_HSIZE)
-		q->recmp_bucket = 0;
-
-	hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) {
-		RT_GEN(cl->sum_bytes, cl->rate_bytes);
-		RT_GEN(cl->sum_packets, cl->rate_packets);
+	if (q->clscnt == 0)
+		goto done;
+	if (q->rtiter == HTB_RATECM_UPDATE_INTERVAL)
+		q->rtiter = 0;
+	if (q->rtiter == 0)
+		q->rtcur = q->clist.next;
+	q->rtiter++;
+	cnt = ((q->clscnt * q->rtiter) / HTB_RATECM_UPDATE_INTERVAL) -
+	      ((q->clscnt * (q->rtiter-1)) / HTB_RATECM_UPDATE_INTERVAL);
+	done = 0;
+	for (;;) {
+		/* end of class list */
+		if (q->rtcur == NULL)
+			break;
+		/* last iteration must continue until end */
+		if ((done == cnt) &&
+		    (q->rtiter < HTB_RATECM_UPDATE_INTERVAL)) {
+			break;
+		}
+		cl = list_entry(q->rtcur, struct htb_class, clist);
+		RT_GEN(cl->rate_bytes, cl->sum_bytes);
+		RT_GEN(cl->rate_packets, cl->sum_packets);
+		done++;
+		q->rtcur = q->rtcur->next;
+		if (q->rtcur == &q->clist)
+			q->rtcur = NULL;
 	}
+
+done:
 	spin_unlock_bh(&sch->dev->queue_lock);
 }
 #endif
@@ -1058,23 +1097,19 @@
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	int i;
+	struct list_head *p;
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
-
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
-			if (cl->level)
-				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
-			else {
-				if (cl->un.leaf.q)
-					qdisc_reset(cl->un.leaf.q);
-				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
-			}
-			cl->prio_activity = 0;
-			cl->cmode = HTB_CAN_SEND;
-
+	list_for_each(p, &q->clist) {
+		struct htb_class *cl = list_entry(p, struct htb_class, clist);
+		if (cl->level)
+			memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+		else {
+			if (cl->un.leaf.q) 
+				qdisc_reset(cl->un.leaf.q);
+			INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		}
+		cl->prio_activity = 0;
+		cl->cmode = HTB_CAN_SEND;
 	}
 	sch->flags &= ~TCQ_F_THROTTLED;
 	del_timer(&q->timer);
@@ -1109,8 +1144,7 @@
 	}
 
 	INIT_LIST_HEAD(&q->root);
-	for (i = 0; i < HTB_HSIZE; i++)
-		INIT_HLIST_HEAD(q->hash + i);
+	INIT_LIST_HEAD(&q->clist);
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
@@ -1204,8 +1238,10 @@
 	struct htb_class *cl = (struct htb_class *)arg;
 
 #ifdef HTB_RATECM
-	cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE);
-	cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE);
+	cl->rate_est.bps = cl->rate_bytes /
+		(HTB_RATECM_UPDATE_INTERVAL * HTB_RATECM_AVERAGE_INTERVALS);
+	cl->rate_est.pps = cl->rate_packets /
+		(HTB_RATECM_UPDATE_INTERVAL * HTB_RATECM_AVERAGE_INTERVALS);
 #endif
 
 	if (!cl->level && cl->un.leaf.q)
@@ -1310,6 +1346,7 @@
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	struct htb_sched *q = qdisc_priv(sch);
+	int minorid = TC_H_MIN(cl->classid);
 
 	if (!cl->level) {
 		BUG_TRAP(cl->un.leaf.q);
@@ -1325,7 +1362,7 @@
 						  struct htb_class, sibling));
 
 	/* note: this delete may happen twice (see htb_delete) */
-	hlist_del_init(&cl->hlist);
+	list_del(&cl->clist);
 	list_del(&cl->sibling);
 
 	if (cl->prio_activity)
@@ -1334,6 +1371,7 @@
 	if (cl->cmode != HTB_CAN_SEND)
 		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 
+	(*q->classes[HTB_CLS_ARRAY(minorid)])[HTB_CLS_INDEX(minorid)] = NULL;
 	kfree(cl);
 }
 
@@ -1341,6 +1379,7 @@
 static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
+	int i;
 
 	del_timer_sync(&q->timer);
 #ifdef HTB_RATECM
@@ -1355,6 +1394,8 @@
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
 						  struct htb_class, sibling));
+	for (i=0; i<HTB_CLS_ARRAYS; i++)
+		kfree(q->classes[i]);
 
 	__skb_queue_purge(&q->direct_queue);
 }
@@ -1381,8 +1422,15 @@
 
 	sch_tree_lock(sch);
 
-	/* delete from hash and active; remainder in destroy_class */
-	hlist_del_init(&cl->hlist);
+	/* delete from class list and active; remainder in destroy_class */
+	if (q->rtcur == &cl->clist) {
+		q->rtcur = q->rtcur->next;
+		if (q->rtcur == &q->clist)
+			q->rtcur = NULL;
+	}
+	if (--q->clscnt == 0)
+		q->rtiter = 0;
+	list_del_init(&cl->clist);
 
 	if (!cl->level) {
 		qlen = cl->un.leaf.q->q.qlen;
@@ -1422,6 +1470,7 @@
 	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct rtattr *tb[TCA_HTB_RTAB];
 	struct tc_htb_opt *hopt;
+	int minorid = TC_H_MIN(classid);
 
 	/* extract all subattrs from opt attr */
 	if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) ||
@@ -1453,12 +1502,18 @@
 			goto failure;
 		}
 		err = -ENOBUFS;
+		if (q->classes[HTB_CLS_ARRAY(minorid)] == NULL) {
+			if ((q->classes[HTB_CLS_ARRAY(minorid)] = 
+			     kzalloc(sizeof(htb_cls_array), GFP_KERNEL))
+			    == NULL)
+				goto failure;
+		}
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
 
 		cl->refcnt = 1;
+		INIT_LIST_HEAD(&cl->clist);
 		INIT_LIST_HEAD(&cl->sibling);
-		INIT_HLIST_NODE(&cl->hlist);
 		INIT_LIST_HEAD(&cl->children);
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
@@ -1503,10 +1558,12 @@
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
-		/* attach to the hash list and parent's family */
-		hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
+		/* attach to the classes list+array and parent's family */
+		list_add_tail(&cl->clist, &q->clist);
 		list_add_tail(&cl->sibling,
 			      parent ? &parent->children : &q->root);
+		(*q->classes[HTB_CLS_ARRAY(minorid)])[HTB_CLS_INDEX(minorid)] = cl;
+		q->clscnt++;
 	} else
 		sch_tree_lock(sch);
 
@@ -1602,26 +1659,22 @@
 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	int i;
+	struct list_head *p;
 
 	if (arg->stop)
 		return;
 
-	for (i = 0; i < HTB_HSIZE; i++) {
-		struct hlist_node *p;
-		struct htb_class *cl;
-
-		hlist_for_each_entry(cl, p, q->hash + i, hlist) {
-			if (arg->count < arg->skip) {
-				arg->count++;
-				continue;
-			}
-			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
-				arg->stop = 1;
-				return;
-			}
+	list_for_each(p, &q->clist) {
+		struct htb_class *cl = list_entry(p, struct htb_class, clist);
+		if (arg->count < arg->skip) {
 			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+			arg->stop = 1;
+			return;
 		}
+		arg->count++;
 	}
 }
 

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]

_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01  5:18 ` [LARTC] " Simon Lodal
@ 2007-02-01  6:08   ` Patrick McHardy
  -1 siblings, 0 replies; 21+ messages in thread
From: Patrick McHardy @ 2007-02-01  6:08 UTC (permalink / raw)
  To: Simon Lodal; +Cc: netdev, lartc

Simon Lodal wrote:
> This patch changes HTB's class storage from hash+lists to a two-level linear 
> array, so it can do constant time (O(1)) class lookup by classid. It improves 
> scalability for large number of classes.
> 
> Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps, 
> using most of it's cycles traversing lists in htb_find(). The patch 
> eliminates this problem, and has a measurable impact even with a few hundred 
> classes.
> 
> Previously, scalability could be improved by increasing HTB_HSIZE, modify the 
> hash function, and recompile, but this patch works for everyone without 
> recompile and scales better too.

I agree that the current fixed sized hashes (additionally quite
small by default) are a big problem with many classes, for all of
HTB/HFSC/CBQ. But I think your approach is a bit wasteful, with
unfortunately chosen classids 128 classes are enough to reach the
maximum memory usage of ~512kb (with 4k pages and 8 byte pointers).

I have a patch for HFSC which introduces dynamic resizing of the
class hash. I have planned to generalize it (similar to tcf_hashinfo)
and convert HTB and CBQ as well, which as a nice side effect will
allow to get rid of some duplicated code, like hash walking.

If you give me a few days I'll try to finish and post it.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-01  6:08   ` Patrick McHardy
  0 siblings, 0 replies; 21+ messages in thread
From: Patrick McHardy @ 2007-02-01  6:08 UTC (permalink / raw)
  To: Simon Lodal; +Cc: netdev, lartc

Simon Lodal wrote:
> This patch changes HTB's class storage from hash+lists to a two-level linear 
> array, so it can do constant time (O(1)) class lookup by classid. It improves 
> scalability for large number of classes.
> 
> Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps, 
> using most of it's cycles traversing lists in htb_find(). The patch 
> eliminates this problem, and has a measurable impact even with a few hundred 
> classes.
> 
> Previously, scalability could be improved by increasing HTB_HSIZE, modify the 
> hash function, and recompile, but this patch works for everyone without 
> recompile and scales better too.

I agree that the current fixed sized hashes (additionally quite
small by default) are a big problem with many classes, for all of
HTB/HFSC/CBQ. But I think your approach is a bit wasteful, with
unfortunately chosen classids 128 classes are enough to reach the
maximum memory usage of ~512kb (with 4k pages and 8 byte pointers).

I have a patch for HFSC which introduces dynamic resizing of the
class hash. I have planned to generalize it (similar to tcf_hashinfo)
and convert HTB and CBQ as well, which as a nice side effect will
allow to get rid of some duplicated code, like hash walking.

If you give me a few days I'll try to finish and post it.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01  6:08   ` [LARTC] " Patrick McHardy
@ 2007-02-01  7:08     ` Simon Lodal
  -1 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-01  7:08 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev, lartc

On Thursday 01 February 2007 07:08, Patrick McHardy wrote:
> Simon Lodal wrote:
> > This patch changes HTB's class storage from hash+lists to a two-level
> > linear array, so it can do constant time (O(1)) class lookup by classid.
> > It improves scalability for large number of classes.
> >
> > Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps,
> > using most of it's cycles traversing lists in htb_find(). The patch
> > eliminates this problem, and has a measurable impact even with a few
> > hundred classes.
> >
> > Previously, scalability could be improved by increasing HTB_HSIZE, modify
> > the hash function, and recompile, but this patch works for everyone
> > without recompile and scales better too.
>
> I agree that the current fixed sized hashes (additionally quite
> small by default) are a big problem with many classes, for all of
> HTB/HFSC/CBQ. But I think your approach is a bit wasteful, with
> unfortunately chosen classids 128 classes are enough to reach the
> maximum memory usage of ~512kb (with 4k pages and 8 byte pointers).

I think it is a non-issue since it does not happen in practice.

Generally there are two ways to assign classids:

1) Manually, used when you have few classes. People usually use 100, 101, 102, 
200, 201 etc (probably unaware that they are hex). With 4k pages and 32bit 
pointers, everything below classid 400 is within the first page, which covers 
most "few classes" examples you can find lying around.

2) Script generated, in practice this is required if you have many classes. 
The classid's will then usually be forthrunning, at least in large chunks, 
which means minimal memory waste, and an optimal case for plain linear 
lookup; hashing them can only be wasteful.


> I have a patch for HFSC which introduces dynamic resizing of the
> class hash. I have planned to generalize it (similar to tcf_hashinfo)
> and convert HTB and CBQ as well, which as a nice side effect will
> allow to get rid of some duplicated code, like hash walking.

I have not been looking into HFSC and CBQ, was not aware that they have 
similar issues.


> If you give me a few days I'll try to finish and post it.

Memory is generally not an issue, but CPU is, and you can not beat the CPU 
efficiency of plain array lookup (always faster, and constant time).

If anything, I would find it more relevant to use array lookup with dynamic 
adjustment of the array size (HTB_CLS_ARRAY_SIZE in my patch); start out 
small to waste less memory, increase up to PAGE_SIZE as needed.

But then, it is probably too much effort for the gain (a few kb's in machines 
that should have plenty of RAM anyway), and requires more code => more 
complexity, bugs, maintenance.


Regards
Simon

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-01  7:08     ` Simon Lodal
  0 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-01  7:08 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev, lartc

On Thursday 01 February 2007 07:08, Patrick McHardy wrote:
> Simon Lodal wrote:
> > This patch changes HTB's class storage from hash+lists to a two-level
> > linear array, so it can do constant time (O(1)) class lookup by classid.
> > It improves scalability for large number of classes.
> >
> > Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps,
> > using most of it's cycles traversing lists in htb_find(). The patch
> > eliminates this problem, and has a measurable impact even with a few
> > hundred classes.
> >
> > Previously, scalability could be improved by increasing HTB_HSIZE, modify
> > the hash function, and recompile, but this patch works for everyone
> > without recompile and scales better too.
>
> I agree that the current fixed sized hashes (additionally quite
> small by default) are a big problem with many classes, for all of
> HTB/HFSC/CBQ. But I think your approach is a bit wasteful, with
> unfortunately chosen classids 128 classes are enough to reach the
> maximum memory usage of ~512kb (with 4k pages and 8 byte pointers).

I think it is a non-issue since it does not happen in practice.

Generally there are two ways to assign classids:

1) Manually, used when you have few classes. People usually use 100, 101, 102, 
200, 201 etc (probably unaware that they are hex). With 4k pages and 32bit 
pointers, everything below classid 400 is within the first page, which covers 
most "few classes" examples you can find lying around.

2) Script generated, in practice this is required if you have many classes. 
The classid's will then usually be forthrunning, at least in large chunks, 
which means minimal memory waste, and an optimal case for plain linear 
lookup; hashing them can only be wasteful.


> I have a patch for HFSC which introduces dynamic resizing of the
> class hash. I have planned to generalize it (similar to tcf_hashinfo)
> and convert HTB and CBQ as well, which as a nice side effect will
> allow to get rid of some duplicated code, like hash walking.

I have not been looking into HFSC and CBQ, was not aware that they have 
similar issues.


> If you give me a few days I'll try to finish and post it.

Memory is generally not an issue, but CPU is, and you can not beat the CPU 
efficiency of plain array lookup (always faster, and constant time).

If anything, I would find it more relevant to use array lookup with dynamic 
adjustment of the array size (HTB_CLS_ARRAY_SIZE in my patch); start out 
small to waste less memory, increase up to PAGE_SIZE as needed.

But then, it is probably too much effort for the gain (a few kb's in machines 
that should have plenty of RAM anyway), and requires more code => more 
complexity, bugs, maintenance.


Regards
Simon
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01  7:08     ` [LARTC] " Simon Lodal
  (?)
@ 2007-02-01 11:30     ` Andi Kleen
  2007-02-05 10:16         ` [LARTC] " Jarek Poplawski
  2007-02-05 18:21         ` [LARTC] " Simon Lodal
  -1 siblings, 2 replies; 21+ messages in thread
From: Andi Kleen @ 2007-02-01 11:30 UTC (permalink / raw)
  To: Simon Lodal; +Cc: Patrick McHardy, netdev, lartc

Simon Lodal <simonl@parknet.dk> writes:
> 
> Memory is generally not an issue, but CPU is, and you can not beat the CPU 
> efficiency of plain array lookup (always faster, and constant time).

Actually that's not true when the array doesn't fit in cache.

The cost of going out to memory over caches is so large (factor 100 and more) 
that often algorithms with smaller cache footprint can easily beat
algorithms that execute much less instructions if it has less cache misses. 
That is because not all instructions have the same cost; anything
in core is very fast but going out to memory is very slow.

That said I think I agree with your analysis that a two level
array is probably the right data structure for this and likely
not less cache efficient than the hash table.

And the worst memory consumption case considered by Patrick should
be relatively unlikely.

-Andi

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01  6:08   ` [LARTC] " Patrick McHardy
  (?)
  (?)
@ 2007-02-01 13:06   ` jamal
  -1 siblings, 0 replies; 21+ messages in thread
From: jamal @ 2007-02-01 13:06 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Simon Lodal, netdev, lartc

On Thu, 2007-01-02 at 07:08 +0100, Patrick McHardy wrote:

> 
> I have a patch for HFSC which introduces dynamic resizing of the
> class hash. 

One thing that has bitten me recently was tests to try and see how far i
can go insert xfrm SAD/SPDs - the resizing of the hashes kept allocing
more and more space until i ran out of memory, then swap took over and
hell broke loose. It would be nice in your approach to keep a
configurable upper bound on how much mem a hash table can chew.

> I have planned to generalize it (similar to tcf_hashinfo)
> and convert HTB and CBQ as well, which as a nice side effect will
> allow to get rid of some duplicated code, like hash walking.
> 

You know what would be really nice is a generic piece of code that would
apply for all sorts of netcode that uses hashes (theres a huge amount of
such code) and then converting over slowly all users to it: All
attributes to such hashes are known, max-size, hash() etc. The
tcf_hashinfo is a good start template for such an effort.

cheers,
jamal


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [LARTC] [PATCH] HTB O(1) class lookup
  2007-02-01  5:18 ` [LARTC] " Simon Lodal
  (?)
  (?)
@ 2007-02-01 22:44 ` Konrad Cempura
  -1 siblings, 0 replies; 21+ messages in thread
From: Konrad Cempura @ 2007-02-01 22:44 UTC (permalink / raw)
  To: lartc

Simon Lodal napisa³(a):
> The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone 
> is interested.

It's working also on 2.6.20-rc7.

I'm testing it and I'm impressed. Good work :)

_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01 11:30     ` Andi Kleen
@ 2007-02-05 10:16         ` Jarek Poplawski
  2007-02-05 18:21         ` [LARTC] " Simon Lodal
  1 sibling, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-05 10:16 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Simon Lodal, Patrick McHardy, netdev, lartc

On 01-02-2007 12:30, Andi Kleen wrote:
> Simon Lodal <simonl@parknet.dk> writes:
>> Memory is generally not an issue, but CPU is, and you can not beat the CPU 
>> efficiency of plain array lookup (always faster, and constant time).

Probably for some old (or embedded) lean boxes used for
small network routers, with memory hungry iptables -
memory could be an issue.

> Actually that's not true when the array doesn't fit in cache.
> 
> The cost of going out to memory over caches is so large (factor 100 and more) 
> that often algorithms with smaller cache footprint can easily beat
> algorithms that execute much less instructions if it has less cache misses. 
> That is because not all instructions have the same cost; anything
> in core is very fast but going out to memory is very slow.
> 
> That said I think I agree with your analysis that a two level
> array is probably the right data structure for this and likely
> not less cache efficient than the hash table.

Strange - it seems you gave only arguments against this
analysis...

> And the worst memory consumption case considered by Patrick should
> be relatively unlikely.

Anyway, such approach, that most users do something
this (reasonable) way, doesn't look like good
programming practice.

I wonder, why not try, at least for a while, to do this
a compile (menuconfig) option with a comment:
recommended for a large number of classes. After hash
optimization and some testing, final decisions could be
made.

Regards,
Jarek P.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-05 10:16         ` Jarek Poplawski
  0 siblings, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-05 10:16 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Simon Lodal, Patrick McHardy, netdev, lartc

On 01-02-2007 12:30, Andi Kleen wrote:
> Simon Lodal <simonl@parknet.dk> writes:
>> Memory is generally not an issue, but CPU is, and you can not beat the CPU 
>> efficiency of plain array lookup (always faster, and constant time).

Probably for some old (or embedded) lean boxes used for
small network routers, with memory hungry iptables -
memory could be an issue.

> Actually that's not true when the array doesn't fit in cache.
> 
> The cost of going out to memory over caches is so large (factor 100 and more) 
> that often algorithms with smaller cache footprint can easily beat
> algorithms that execute much less instructions if it has less cache misses. 
> That is because not all instructions have the same cost; anything
> in core is very fast but going out to memory is very slow.
> 
> That said I think I agree with your analysis that a two level
> array is probably the right data structure for this and likely
> not less cache efficient than the hash table.

Strange - it seems you gave only arguments against this
analysis...

> And the worst memory consumption case considered by Patrick should
> be relatively unlikely.

Anyway, such approach, that most users do something
this (reasonable) way, doesn't look like good
programming practice.

I wonder, why not try, at least for a while, to do this
a compile (menuconfig) option with a comment:
recommended for a large number of classes. After hash
optimization and some testing, final decisions could be
made.

Regards,
Jarek P.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-05 10:16         ` [LARTC] " Jarek Poplawski
  (?)
@ 2007-02-05 11:24         ` Andi Kleen
  2007-02-05 12:45           ` Ingo Oeser
  -1 siblings, 1 reply; 21+ messages in thread
From: Andi Kleen @ 2007-02-05 11:24 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: Simon Lodal, Patrick McHardy, netdev, lartc

On Monday 05 February 2007 11:16, Jarek Poplawski wrote:

> 
> Strange - it seems you gave only arguments against this
> analysis...

For a naturally clustered key space (as is common in this case) the two 
level structure is likely more cache efficient than a generic hash function. 
That is because the hash will likely spread out the natural clusters and then require
more cache lines to access them because there will be less sharing.

Ok in theory a very tuned for this case hash function might have similar 
properties, but normally people don't put that much care into 
designing hashes and just use some generic one.

> > And the worst memory consumption case considered by Patrick should
> > be relatively unlikely.
> 
> Anyway, such approach, that most users do something
> this (reasonable) way, doesn't look like good
> programming practice.

In the unlikely worst case they will get half a MB of tables. Hardly a 
show stopper. 

> I wonder, why not try, at least for a while, to do this
> a compile (menuconfig) option with a comment:
> recommended for a large number of classes. After hash
> optimization and some testing, final decisions could be
> made.

There are already far too many obscure CONFIGs. Don't add more.

-Andi
 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-05 11:24         ` Andi Kleen
@ 2007-02-05 12:45           ` Ingo Oeser
  0 siblings, 0 replies; 21+ messages in thread
From: Ingo Oeser @ 2007-02-05 12:45 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Jarek Poplawski, Simon Lodal, Patrick McHardy, netdev, lartc

Andi Kleen schrieb:
> On Monday 05 February 2007 11:16, Jarek Poplawski wrote:
> > I wonder, why not try, at least for a while, to do this
> > a compile (menuconfig) option with a comment:
> > recommended for a large number of classes. After hash
> > optimization and some testing, final decisions could be
> > made.
> 
> There are already far too many obscure CONFIGs. Don't add more.

And for people concerned about memory usage: There is always CONFIG_SMALL
for such decisions. Maybe one can limit worst case and average memory usage 
based on this config. The algorithm should stay the same.

Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-05 10:16         ` [LARTC] " Jarek Poplawski
@ 2007-02-05 17:14           ` Simon Lodal
  -1 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-05 17:14 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Monday 05 February 2007 11:16, Jarek Poplawski wrote:
> On 01-02-2007 12:30, Andi Kleen wrote:
> > Simon Lodal <simonl@parknet.dk> writes:
> >> Memory is generally not an issue, but CPU is, and you can not beat the
> >> CPU efficiency of plain array lookup (always faster, and constant time).
>
> Probably for some old (or embedded) lean boxes used for
> small network routers, with memory hungry iptables -
> memory could be an issue.

Sure, but if they are that constrained they probably do not run HTB in the 
first place.

We are talking about 4k initially, up to 256k worst case (or 512k if your 
router is 64bit, unlikely if "small" is a priority).

> > And the worst memory consumption case considered by Patrick should
> > be relatively unlikely.
>
> Anyway, such approach, that most users do something
> this (reasonable) way, doesn't look like good
> programming practice.

The current hash algorithm also assumes certain usage patterns, namely that 
you choose classids that generate different hash keys (= distribute uniformly 
across the buckets), or scalability will suffer very quickly. Even at 64 
classes you would probably see htb_find() near the top of a profiling 
analysis.

But I would say that it is just as unlikely as choosing 64 classids that cause 
my patch to allocate all 256k.

In these unlikely cases, my patch only wastes passive memory, while the 
current htb wastes cpu to a point where it can severely limit routing 
performance.


> I wonder, why not try, at least for a while, to do this
> a compile (menuconfig) option with a comment:
> recommended for a large number of classes. After hash
> optimization and some testing, final decisions could be
> made.

I decided not to do it because it would mean too many ifdefs 
(ugly+unmaintanable code).


Regards
Simon

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-05 17:14           ` Simon Lodal
  0 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-05 17:14 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Monday 05 February 2007 11:16, Jarek Poplawski wrote:
> On 01-02-2007 12:30, Andi Kleen wrote:
> > Simon Lodal <simonl@parknet.dk> writes:
> >> Memory is generally not an issue, but CPU is, and you can not beat the
> >> CPU efficiency of plain array lookup (always faster, and constant time).
>
> Probably for some old (or embedded) lean boxes used for
> small network routers, with memory hungry iptables -
> memory could be an issue.

Sure, but if they are that constrained they probably do not run HTB in the 
first place.

We are talking about 4k initially, up to 256k worst case (or 512k if your 
router is 64bit, unlikely if "small" is a priority).

> > And the worst memory consumption case considered by Patrick should
> > be relatively unlikely.
>
> Anyway, such approach, that most users do something
> this (reasonable) way, doesn't look like good
> programming practice.

The current hash algorithm also assumes certain usage patterns, namely that 
you choose classids that generate different hash keys (= distribute uniformly 
across the buckets), or scalability will suffer very quickly. Even at 64 
classes you would probably see htb_find() near the top of a profiling 
analysis.

But I would say that it is just as unlikely as choosing 64 classids that cause 
my patch to allocate all 256k.

In these unlikely cases, my patch only wastes passive memory, while the 
current htb wastes cpu to a point where it can severely limit routing 
performance.


> I wonder, why not try, at least for a while, to do this
> a compile (menuconfig) option with a comment:
> recommended for a large number of classes. After hash
> optimization and some testing, final decisions could be
> made.

I decided not to do it because it would mean too many ifdefs 
(ugly+unmaintanable code).


Regards
Simon
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-01 11:30     ` Andi Kleen
@ 2007-02-05 18:21         ` Simon Lodal
  2007-02-05 18:21         ` [LARTC] " Simon Lodal
  1 sibling, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-05 18:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Patrick McHardy, netdev, lartc

On Thursday 01 February 2007 12:30, Andi Kleen wrote:
> Simon Lodal <simonl@parknet.dk> writes:
> > Memory is generally not an issue, but CPU is, and you can not beat the
> > CPU efficiency of plain array lookup (always faster, and constant time).
>
> Actually that's not true when the array doesn't fit in cache.
>
> The cost of going out to memory over caches is so large (factor 100 and
> more) that often algorithms with smaller cache footprint can easily beat
> algorithms that execute much less instructions if it has less cache misses.
> That is because not all instructions have the same cost; anything
> in core is very fast but going out to memory is very slow.
>
> That said I think I agree with your analysis that a two level
> array is probably the right data structure for this and likely
> not less cache efficient than the hash table.

Good point.

The 2-level lookup generates 3 memory accesses (including getting at the 
htb_class struct). List traversal will generate many more memory accesses, 
unless the lists have 3 or fewer entries (currently that only holds true for 
up to 48 classes, uniformly distributed).

It is difficult to judge if the tables will be in cache or not. The tables are 
clearly extra baggage for the cachelines, compared to only having the 
htb_class structs (they are going to be fetched anyway).

On the other hand, if you have 10k classes, they are usually (real world) 
allocated for individual users, of which at most half are active at any time. 
With hashing, all 10k classes are fetched into cachelines all the time, only 
in order to traverse lists. That is >150k wasted cache (5000 x 32 bytes); 
plenty for 10k pointers in lookup tables.


Regards
Simon

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-05 18:21         ` Simon Lodal
  0 siblings, 0 replies; 21+ messages in thread
From: Simon Lodal @ 2007-02-05 18:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Patrick McHardy, netdev, lartc

On Thursday 01 February 2007 12:30, Andi Kleen wrote:
> Simon Lodal <simonl@parknet.dk> writes:
> > Memory is generally not an issue, but CPU is, and you can not beat the
> > CPU efficiency of plain array lookup (always faster, and constant time).
>
> Actually that's not true when the array doesn't fit in cache.
>
> The cost of going out to memory over caches is so large (factor 100 and
> more) that often algorithms with smaller cache footprint can easily beat
> algorithms that execute much less instructions if it has less cache misses.
> That is because not all instructions have the same cost; anything
> in core is very fast but going out to memory is very slow.
>
> That said I think I agree with your analysis that a two level
> array is probably the right data structure for this and likely
> not less cache efficient than the hash table.

Good point.

The 2-level lookup generates 3 memory accesses (including getting at the 
htb_class struct). List traversal will generate many more memory accesses, 
unless the lists have 3 or fewer entries (currently that only holds true for 
up to 48 classes, uniformly distributed).

It is difficult to judge if the tables will be in cache or not. The tables are 
clearly extra baggage for the cachelines, compared to only having the 
htb_class structs (they are going to be fetched anyway).

On the other hand, if you have 10k classes, they are usually (real world) 
allocated for individual users, of which at most half are active at any time. 
With hashing, all 10k classes are fetched into cachelines all the time, only 
in order to traverse lists. That is >150k wasted cache (5000 x 32 bytes); 
plenty for 10k pointers in lookup tables.


Regards
Simon
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-05 17:14           ` [LARTC] " Simon Lodal
@ 2007-02-06  8:08             ` Jarek Poplawski
  -1 siblings, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-06  8:08 UTC (permalink / raw)
  To: Simon Lodal; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Mon, Feb 05, 2007 at 06:14:13PM +0100, Simon Lodal wrote:
> On Monday 05 February 2007 11:16, Jarek Poplawski wrote:
> > On 01-02-2007 12:30, Andi Kleen wrote:
> > > Simon Lodal <simonl@parknet.dk> writes:
> > >> Memory is generally not an issue, but CPU is, and you can not beat the
> > >> CPU efficiency of plain array lookup (always faster, and constant time).
> >
> > Probably for some old (or embedded) lean boxes used for
> > small network routers, with memory hungry iptables -
> > memory could be an issue.
> 
> Sure, but if they are that constrained they probably do not run HTB in the 
> first place.
> 
> We are talking about 4k initially, up to 256k worst case (or 512k if your 
> router is 64bit, unlikely if "small" is a priority).
> 
> > > And the worst memory consumption case considered by Patrick should
> > > be relatively unlikely.
> >
> > Anyway, such approach, that most users do something
> > this (reasonable) way, doesn't look like good
> > programming practice.
> 
> The current hash algorithm also assumes certain usage patterns, namely that 
> you choose classids that generate different hash keys (= distribute uniformly 
> across the buckets), or scalability will suffer very quickly. Even at 64 
> classes you would probably see htb_find() near the top of a profiling 
> analysis.
> 
> But I would say that it is just as unlikely as choosing 64 classids that cause 
> my patch to allocate all 256k.
> 
> In these unlikely cases, my patch only wastes passive memory, while the 
> current htb wastes cpu to a point where it can severely limit routing 
> performance.
> 
> 
> > I wonder, why not try, at least for a while, to do this
> > a compile (menuconfig) option with a comment:
> > recommended for a large number of classes. After hash
> > optimization and some testing, final decisions could be
> > made.
> 
> I decided not to do it because it would mean too many ifdefs 
> (ugly+unmaintanable code).

As a matter of fact Andi's recommentation is enough
for me. In his first message he wrote "probably the
right data structure for this", so I thought: why
not test and make sure. It should be easier without
removing current solution. But his second message
convinced me.

Generally I think 512k (or even 256k) should matter
and don't agree HTB is not for constrained ones. It
could be dangerous attitude if every module in the
kernel were so "generous". And it could be contagious:
others don't care - why should I?

Some time ago low memory requirements and possibility
to run on older boxes were strong arguments for linux.
Did we give it up to BSDs?

So I only wanted to make sure there would be a real
gain, because, for consistency, probably the same
model should be used with others (CBQ, HFSC).

Cheers,
Jarek P.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-06  8:08             ` Jarek Poplawski
  0 siblings, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-06  8:08 UTC (permalink / raw)
  To: Simon Lodal; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Mon, Feb 05, 2007 at 06:14:13PM +0100, Simon Lodal wrote:
> On Monday 05 February 2007 11:16, Jarek Poplawski wrote:
> > On 01-02-2007 12:30, Andi Kleen wrote:
> > > Simon Lodal <simonl@parknet.dk> writes:
> > >> Memory is generally not an issue, but CPU is, and you can not beat the
> > >> CPU efficiency of plain array lookup (always faster, and constant time).
> >
> > Probably for some old (or embedded) lean boxes used for
> > small network routers, with memory hungry iptables -
> > memory could be an issue.
> 
> Sure, but if they are that constrained they probably do not run HTB in the 
> first place.
> 
> We are talking about 4k initially, up to 256k worst case (or 512k if your 
> router is 64bit, unlikely if "small" is a priority).
> 
> > > And the worst memory consumption case considered by Patrick should
> > > be relatively unlikely.
> >
> > Anyway, such approach, that most users do something
> > this (reasonable) way, doesn't look like good
> > programming practice.
> 
> The current hash algorithm also assumes certain usage patterns, namely that 
> you choose classids that generate different hash keys (= distribute uniformly 
> across the buckets), or scalability will suffer very quickly. Even at 64 
> classes you would probably see htb_find() near the top of a profiling 
> analysis.
> 
> But I would say that it is just as unlikely as choosing 64 classids that cause 
> my patch to allocate all 256k.
> 
> In these unlikely cases, my patch only wastes passive memory, while the 
> current htb wastes cpu to a point where it can severely limit routing 
> performance.
> 
> 
> > I wonder, why not try, at least for a while, to do this
> > a compile (menuconfig) option with a comment:
> > recommended for a large number of classes. After hash
> > optimization and some testing, final decisions could be
> > made.
> 
> I decided not to do it because it would mean too many ifdefs 
> (ugly+unmaintanable code).

As a matter of fact Andi's recommentation is enough
for me. In his first message he wrote "probably the
right data structure for this", so I thought: why
not test and make sure. It should be easier without
removing current solution. But his second message
convinced me.

Generally I think 512k (or even 256k) should matter
and don't agree HTB is not for constrained ones. It
could be dangerous attitude if every module in the
kernel were so "generous". And it could be contagious:
others don't care - why should I?

Some time ago low memory requirements and possibility
to run on older boxes were strong arguments for linux.
Did we give it up to BSDs?

So I only wanted to make sure there would be a real
gain, because, for consistency, probably the same
model should be used with others (CBQ, HFSC).

Cheers,
Jarek P.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] HTB O(1) class lookup
  2007-02-05 17:14           ` [LARTC] " Simon Lodal
@ 2007-02-08  7:36             ` Jarek Poplawski
  -1 siblings, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-08  7:36 UTC (permalink / raw)
  To: Simon Lodal; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Mon, Feb 05, 2007 at 06:14:13PM +0100, Simon Lodal wrote:
...
> Regards
...

It seems decisions makers need more time, so I'd add 
2 cents more:

1c: an indentation could be improved (spaces around
operators), like in these places:

>+#define HTB_MAX_CLS		(TC_H_MIN(-1)+1)
...
>+	htb_cls_array* a;
...
>+	int cnt,done;

etc.

2c: it is a question of taste, but here:

> 		err = -ENOBUFS;
>+		if (q->classes[HTB_CLS_ARRAY(minorid)] == NULL) {
>+			if ((q->classes[HTB_CLS_ARRAY(minorid)] = 
>+			     kzalloc(sizeof(htb_cls_array), GFP_KERNEL))
>+			    == NULL)
>+				goto failure;
>+		}
> 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
> 			goto failure;

it would be probably more readable and a bit merciful
to the stressed system to free this htb_cls_array after
the last error (I know it's not a leak).

Regards,
Jarek P.

PS: 1c extra - it's easier to read a diff if you use -p option. 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LARTC] Re: [PATCH] HTB O(1) class lookup
@ 2007-02-08  7:36             ` Jarek Poplawski
  0 siblings, 0 replies; 21+ messages in thread
From: Jarek Poplawski @ 2007-02-08  7:36 UTC (permalink / raw)
  To: Simon Lodal; +Cc: Andi Kleen, Patrick McHardy, netdev, lartc, Ingo Oeser

On Mon, Feb 05, 2007 at 06:14:13PM +0100, Simon Lodal wrote:
...
> Regards
...

It seems decisions makers need more time, so I'd add 
2 cents more:

1c: an indentation could be improved (spaces around
operators), like in these places:

>+#define HTB_MAX_CLS		(TC_H_MIN(-1)+1)
...
>+	htb_cls_array* a;
...
>+	int cnt,done;

etc.

2c: it is a question of taste, but here:

> 		err = -ENOBUFS;
>+		if (q->classes[HTB_CLS_ARRAY(minorid)] = NULL) {
>+			if ((q->classes[HTB_CLS_ARRAY(minorid)] = 
>+			     kzalloc(sizeof(htb_cls_array), GFP_KERNEL))
>+			    = NULL)
>+				goto failure;
>+		}
> 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) = NULL)
> 			goto failure;

it would be probably more readable and a bit merciful
to the stressed system to free this htb_cls_array after
the last error (I know it's not a leak).

Regards,
Jarek P.

PS: 1c extra - it's easier to read a diff if you use -p option. 
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2007-02-08  7:36 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-01  5:18 [PATCH] HTB O(1) class lookup Simon Lodal
2007-02-01  5:18 ` [LARTC] " Simon Lodal
2007-02-01  6:08 ` Patrick McHardy
2007-02-01  6:08   ` [LARTC] " Patrick McHardy
2007-02-01  7:08   ` Simon Lodal
2007-02-01  7:08     ` [LARTC] " Simon Lodal
2007-02-01 11:30     ` Andi Kleen
2007-02-05 10:16       ` Jarek Poplawski
2007-02-05 10:16         ` [LARTC] " Jarek Poplawski
2007-02-05 11:24         ` Andi Kleen
2007-02-05 12:45           ` Ingo Oeser
2007-02-05 17:14         ` Simon Lodal
2007-02-05 17:14           ` [LARTC] " Simon Lodal
2007-02-06  8:08           ` Jarek Poplawski
2007-02-06  8:08             ` [LARTC] " Jarek Poplawski
2007-02-08  7:36           ` Jarek Poplawski
2007-02-08  7:36             ` [LARTC] " Jarek Poplawski
2007-02-05 18:21       ` Simon Lodal
2007-02-05 18:21         ` [LARTC] " Simon Lodal
2007-02-01 13:06   ` jamal
2007-02-01 22:44 ` [LARTC] " Konrad Cempura

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.