All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps
@ 2024-04-18  7:32 Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
                   ` (15 more replies)
  0 siblings, 16 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Medium term goal is to implement "tc qdisc show" without needing
to acquire RTNL.

This first series makes the requested changes in 14 qdisc.

Notes :

 - RTNL is still held in "tc qdisc show", more changes are needed.

 - Qdisc returning many attributes might want/need to provide
   a consistent set of attributes. If that is the case, their
   dump() method could acquire the qdisc spinlock, to pair the
   spinlock acquision in their change() method.

V2: Addressed Simon feedback (Thanks a lot Simon)

Eric Dumazet (14):
  net_sched: sch_fq: implement lockless fq_dump()
  net_sched: cake: implement lockless cake_dump()
  net_sched: sch_cbs: implement lockless cbs_dump()
  net_sched: sch_choke: implement lockless choke_dump()
  net_sched: sch_codel: implement lockless codel_dump()
  net_sched: sch_tfs: implement lockless etf_dump()
  net_sched: sch_ets: implement lockless ets_dump()
  net_sched: sch_fifo: implement lockless __fifo_dump()
  net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  net_sched: sch_hfsc: implement lockless accesses to q->defcls
  net_sched: sch_hhf: implement lockless hhf_dump()
  net_sched: sch_pie: implement lockless pie_dump()
  net_sched: sch_skbprio: implement lockless skbprio_dump()

 include/net/red.h        |  12 ++---
 net/sched/sch_cake.c     | 110 ++++++++++++++++++++++-----------------
 net/sched/sch_cbs.c      |  20 +++----
 net/sched/sch_choke.c    |  21 ++++----
 net/sched/sch_codel.c    |  29 +++++++----
 net/sched/sch_etf.c      |  10 ++--
 net/sched/sch_ets.c      |  25 +++++----
 net/sched/sch_fifo.c     |  13 ++---
 net/sched/sch_fq.c       | 108 ++++++++++++++++++++++++--------------
 net/sched/sch_fq_codel.c |  57 ++++++++++++--------
 net/sched/sch_fq_pie.c   |  61 ++++++++++++----------
 net/sched/sch_hfsc.c     |   9 ++--
 net/sched/sch_hhf.c      |  35 ++++++++-----
 net/sched/sch_pie.c      |  39 +++++++-------
 net/sched/sch_skbprio.c  |   8 +--
 15 files changed, 323 insertions(+), 234 deletions(-)

-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 13:55   ` Simon Horman
  2024-04-18  7:32 ` [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
                   ` (14 subsequent siblings)
  15 siblings, 1 reply; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, fq_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() in fq_change()

v2: Addressed Simon feedback in V1: https://lore.kernel.org/netdev/20240416181915.GT2320920@kernel.org/

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq.c | 108 +++++++++++++++++++++++++++++----------------
 1 file changed, 69 insertions(+), 39 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index cdf23ff16f40bf244bb822e76016fde44e0c439b..238974725679327b0a0d483c011e15fc94ab0878 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -106,6 +106,8 @@ struct fq_perband_flows {
 	int		    quantum; /* based on band nr : 576KB, 192KB, 64KB */
 };
 
+#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
+
 struct fq_sched_data {
 /* Read mostly cache line */
 
@@ -122,7 +124,7 @@ struct fq_sched_data {
 	u8		rate_enable;
 	u8		fq_trees_log;
 	u8		horizon_drop;
-	u8		prio2band[(TC_PRIO_MAX + 1) >> 2];
+	u8		prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
 	u32		timer_slack; /* hrtimer slack in ns */
 
 /* Read/Write fields. */
@@ -159,7 +161,7 @@ struct fq_sched_data {
 /* return the i-th 2-bit value ("crumb") */
 static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
 {
-	return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
+	return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
 }
 
 /*
@@ -888,7 +890,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
 		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
 
 	q->fq_root = array;
-	q->fq_trees_log = log;
+	WRITE_ONCE(q->fq_trees_log, log);
 
 	sch_tree_unlock(sch);
 
@@ -927,11 +929,15 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
 static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
 {
 	const int num_elems = TC_PRIO_MAX + 1;
+	u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE];
 	int i;
 
-	memset(out, 0, num_elems / 4);
+	memset(tmp, 0, sizeof(tmp));
 	for (i = 0; i < num_elems; i++)
-		out[i / 4] |= in[i] << (2 * (i & 0x3));
+		tmp[i / 4] |= in[i] << (2 * (i & 0x3));
+
+	for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++)
+		WRITE_ONCE(out[i], tmp[i]);
 }
 
 static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
@@ -958,7 +964,7 @@ static int fq_load_weights(struct fq_sched_data *q,
 		}
 	}
 	for (i = 0; i < FQ_BANDS; i++)
-		q->band_flows[i].quantum = weights[i];
+		WRITE_ONCE(q->band_flows[i].quantum, weights[i]);
 	return 0;
 }
 
@@ -1011,16 +1017,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 			err = -EINVAL;
 	}
 	if (tb[TCA_FQ_PLIMIT])
-		sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_FQ_PLIMIT]));
 
 	if (tb[TCA_FQ_FLOW_PLIMIT])
-		q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+		WRITE_ONCE(q->flow_plimit,
+			   nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]));
 
 	if (tb[TCA_FQ_QUANTUM]) {
 		u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
 
 		if (quantum > 0 && quantum <= (1 << 20)) {
-			q->quantum = quantum;
+			WRITE_ONCE(q->quantum, quantum);
 		} else {
 			NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
 			err = -EINVAL;
@@ -1028,7 +1036,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_FQ_INITIAL_QUANTUM])
-		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+		WRITE_ONCE(q->initial_quantum,
+			   nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]));
 
 	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
 		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
@@ -1037,17 +1046,19 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
 		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
-		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+		WRITE_ONCE(q->flow_max_rate,
+			   (rate == ~0U) ? ~0UL : rate);
 	}
 	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
-		q->low_rate_threshold =
-			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+		WRITE_ONCE(q->low_rate_threshold,
+			   nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]));
 
 	if (tb[TCA_FQ_RATE_ENABLE]) {
 		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
 
 		if (enable <= 1)
-			q->rate_enable = enable;
+			WRITE_ONCE(q->rate_enable,
+				   enable);
 		else
 			err = -EINVAL;
 	}
@@ -1055,7 +1066,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
 		u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
 
-		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+		WRITE_ONCE(q->flow_refill_delay,
+			   usecs_to_jiffies(usecs_delay));
 	}
 
 	if (!err && tb[TCA_FQ_PRIOMAP])
@@ -1065,21 +1077,26 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 		err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack);
 
 	if (tb[TCA_FQ_ORPHAN_MASK])
-		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+		WRITE_ONCE(q->orphan_mask,
+			   nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]));
 
 	if (tb[TCA_FQ_CE_THRESHOLD])
-		q->ce_threshold = (u64)NSEC_PER_USEC *
-				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+		WRITE_ONCE(q->ce_threshold,
+			   (u64)NSEC_PER_USEC *
+			   nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]));
 
 	if (tb[TCA_FQ_TIMER_SLACK])
-		q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+		WRITE_ONCE(q->timer_slack,
+			   nla_get_u32(tb[TCA_FQ_TIMER_SLACK]));
 
 	if (tb[TCA_FQ_HORIZON])
-		q->horizon = (u64)NSEC_PER_USEC *
-				  nla_get_u32(tb[TCA_FQ_HORIZON]);
+		WRITE_ONCE(q->horizon,
+			   (u64)NSEC_PER_USEC *
+			   nla_get_u32(tb[TCA_FQ_HORIZON]));
 
 	if (tb[TCA_FQ_HORIZON_DROP])
-		q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+		WRITE_ONCE(q->horizon_drop,
+			   nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
 
 	if (!err) {
 
@@ -1160,13 +1177,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
 static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
-	u64 ce_threshold = q->ce_threshold;
 	struct tc_prio_qopt prio = {
 		.bands = FQ_BANDS,
 	};
-	u64 horizon = q->horizon;
 	struct nlattr *opts;
+	u64 ce_threshold;
 	s32 weights[3];
+	u64 horizon;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
@@ -1174,35 +1191,48 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
 
+	ce_threshold = READ_ONCE(q->ce_threshold);
 	do_div(ce_threshold, NSEC_PER_USEC);
+
+	horizon = READ_ONCE(q->horizon);
 	do_div(horizon, NSEC_PER_USEC);
 
-	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
-	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
-	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+	if (nla_put_u32(skb, TCA_FQ_PLIMIT,
+			READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
+			READ_ONCE(q->flow_plimit)) ||
+	    nla_put_u32(skb, TCA_FQ_QUANTUM,
+			READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM,
+			READ_ONCE(q->initial_quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE,
+			READ_ONCE(q->rate_enable)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
-			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
+			min_t(unsigned long,
+			      READ_ONCE(q->flow_max_rate), ~0U)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
-			jiffies_to_usecs(q->flow_refill_delay)) ||
-	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+			jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) ||
+	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK,
+			READ_ONCE(q->orphan_mask)) ||
 	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
-			q->low_rate_threshold) ||
+			READ_ONCE(q->low_rate_threshold)) ||
 	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
-	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
-	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG,
+			READ_ONCE(q->fq_trees_log)) ||
+	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
+			READ_ONCE(q->timer_slack)) ||
 	    nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
-	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
+	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
+		       READ_ONCE(q->horizon_drop)))
 		goto nla_put_failure;
 
 	fq_prio2band_decompress_crumb(q->prio2band, prio.priomap);
 	if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio))
 		goto nla_put_failure;
 
-	weights[0] = q->band_flows[0].quantum;
-	weights[1] = q->band_flows[1].quantum;
-	weights[2] = q->band_flows[2].quantum;
+	weights[0] = READ_ONCE(q->band_flows[0].quantum);
+	weights[1] = READ_ONCE(q->band_flows[1].quantum);
+	weights[2] = READ_ONCE(q->band_flows[2].quantum);
 	if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights))
 		goto nla_put_failure;
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 13:57   ` Simon Horman
  2024-04-18 15:20   ` Toke Høiland-Jørgensen
  2024-04-18  7:32 ` [PATCH v2 net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
                   ` (13 subsequent siblings)
  15 siblings, 2 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet,
	Toke Høiland-Jørgensen

Instead of relying on RTNL, cake_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in cake_change().

v2: addressed Simon feedback in V1: https://lore.kernel.org/netdev/20240417083549.GA3846178@kernel.org/

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Toke Høiland-Jørgensen <toke@toke.dk>
---
 net/sched/sch_cake.c | 110 +++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 47 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 2eabc4dc5b79a83ce423f73c9cccec86f14be7cf..9602dafe32e61d38dc00b0a35e1ee3f530989610 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2572,6 +2572,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 {
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	u16 rate_flags;
+	u8 flow_mode;
 	int err;
 
 	err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2579,10 +2581,11 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	if (err < 0)
 		return err;
 
+	flow_mode = q->flow_mode;
 	if (tb[TCA_CAKE_NAT]) {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-		q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
-		q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+		flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+		flow_mode |= CAKE_FLOW_NAT_FLAG *
 			!!nla_get_u32(tb[TCA_CAKE_NAT]);
 #else
 		NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
@@ -2592,29 +2595,34 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_BASE_RATE64])
-		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+		WRITE_ONCE(q->rate_bps,
+			   nla_get_u64(tb[TCA_CAKE_BASE_RATE64]));
 
 	if (tb[TCA_CAKE_DIFFSERV_MODE])
-		q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+		WRITE_ONCE(q->tin_mode,
+			   nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]));
 
+	rate_flags = q->rate_flags;
 	if (tb[TCA_CAKE_WASH]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
-			q->rate_flags |= CAKE_FLAG_WASH;
+			rate_flags |= CAKE_FLAG_WASH;
 		else
-			q->rate_flags &= ~CAKE_FLAG_WASH;
+			rate_flags &= ~CAKE_FLAG_WASH;
 	}
 
 	if (tb[TCA_CAKE_FLOW_MODE])
-		q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+		flow_mode = ((flow_mode & CAKE_FLOW_NAT_FLAG) |
 				(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
 					CAKE_FLOW_MASK));
 
 	if (tb[TCA_CAKE_ATM])
-		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+		WRITE_ONCE(q->atm_mode,
+			   nla_get_u32(tb[TCA_CAKE_ATM]));
 
 	if (tb[TCA_CAKE_OVERHEAD]) {
-		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
-		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+		WRITE_ONCE(q->rate_overhead,
+			   nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
+		rate_flags |= CAKE_FLAG_OVERHEAD;
 
 		q->max_netlen = 0;
 		q->max_adjlen = 0;
@@ -2623,7 +2631,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_RAW]) {
-		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+		rate_flags &= ~CAKE_FLAG_OVERHEAD;
 
 		q->max_netlen = 0;
 		q->max_adjlen = 0;
@@ -2632,54 +2640,58 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_MPU])
-		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+		WRITE_ONCE(q->rate_mpu,
+			   nla_get_u32(tb[TCA_CAKE_MPU]));
 
 	if (tb[TCA_CAKE_RTT]) {
-		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+		u32 interval = nla_get_u32(tb[TCA_CAKE_RTT]);
 
-		if (!q->interval)
-			q->interval = 1;
+		WRITE_ONCE(q->interval, max(interval, 1U));
 	}
 
 	if (tb[TCA_CAKE_TARGET]) {
-		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+		u32 target = nla_get_u32(tb[TCA_CAKE_TARGET]);
 
-		if (!q->target)
-			q->target = 1;
+		WRITE_ONCE(q->target, max(target, 1U));
 	}
 
 	if (tb[TCA_CAKE_AUTORATE]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
-			q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+			rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
 		else
-			q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+			rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
 	}
 
 	if (tb[TCA_CAKE_INGRESS]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
-			q->rate_flags |= CAKE_FLAG_INGRESS;
+			rate_flags |= CAKE_FLAG_INGRESS;
 		else
-			q->rate_flags &= ~CAKE_FLAG_INGRESS;
+			rate_flags &= ~CAKE_FLAG_INGRESS;
 	}
 
 	if (tb[TCA_CAKE_ACK_FILTER])
-		q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+		WRITE_ONCE(q->ack_filter,
+			   nla_get_u32(tb[TCA_CAKE_ACK_FILTER]));
 
 	if (tb[TCA_CAKE_MEMORY])
-		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+		WRITE_ONCE(q->buffer_config_limit,
+			   nla_get_u32(tb[TCA_CAKE_MEMORY]));
 
 	if (tb[TCA_CAKE_SPLIT_GSO]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
-			q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+			rate_flags |= CAKE_FLAG_SPLIT_GSO;
 		else
-			q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+			rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
 	}
 
 	if (tb[TCA_CAKE_FWMARK]) {
-		q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
-		q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+		WRITE_ONCE(q->fwmark_mask, nla_get_u32(tb[TCA_CAKE_FWMARK]));
+		WRITE_ONCE(q->fwmark_shft,
+			   q->fwmark_mask ? __ffs(q->fwmark_mask) : 0);
 	}
 
+	WRITE_ONCE(q->rate_flags, rate_flags);
+	WRITE_ONCE(q->flow_mode, flow_mode);
 	if (q->tins) {
 		sch_tree_lock(sch);
 		cake_reconfigure(sch);
@@ -2774,68 +2786,72 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
+	u16 rate_flags;
+	u8 flow_mode;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!opts)
 		goto nla_put_failure;
 
-	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
-			      TCA_CAKE_PAD))
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
+			      READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
-			q->flow_mode & CAKE_FLOW_MASK))
+	flow_mode = READ_ONCE(q->flow_mode);
+	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, flow_mode & CAKE_FLOW_MASK))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+	if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY,
+			READ_ONCE(q->buffer_config_limit)))
 		goto nla_put_failure;
 
+	rate_flags = READ_ONCE(q->rate_flags);
 	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
-			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+			!!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
-			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
+			!!(rate_flags & CAKE_FLAG_INGRESS)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_NAT,
-			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+			!!(flow_mode & CAKE_FLOW_NAT_FLAG)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_WASH,
-			!!(q->rate_flags & CAKE_FLAG_WASH)))
+			!!(rate_flags & CAKE_FLAG_WASH)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, READ_ONCE(q->rate_overhead)))
 		goto nla_put_failure;
 
-	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+	if (!(rate_flags & CAKE_FLAG_OVERHEAD))
 		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
 			goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+	if (nla_put_u32(skb, TCA_CAKE_ATM, READ_ONCE(q->atm_mode)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+	if (nla_put_u32(skb, TCA_CAKE_MPU, READ_ONCE(q->rate_mpu)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
-			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+			!!(rate_flags & CAKE_FLAG_SPLIT_GSO)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+	if (nla_put_u32(skb, TCA_CAKE_FWMARK, READ_ONCE(q->fwmark_mask)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, cbs_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in cbs_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_cbs.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 69001eff0315584df23a24f81ae3b4cf7bf5fd79..939425da18955bc8a3f3d2d5b852b2585e9bcaee 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	/* Everything went OK, save the parameters used. */
-	q->hicredit = qopt->hicredit;
-	q->locredit = qopt->locredit;
-	q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
-	q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
-	q->offload = qopt->offload;
+	WRITE_ONCE(q->hicredit, qopt->hicredit);
+	WRITE_ONCE(q->locredit, qopt->locredit);
+	WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT);
+	WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT);
+	WRITE_ONCE(q->offload, qopt->offload);
 
 	return 0;
 }
@@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!nest)
 		goto nla_put_failure;
 
-	opt.hicredit = q->hicredit;
-	opt.locredit = q->locredit;
-	opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
-	opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
-	opt.offload = q->offload;
+	opt.hicredit = READ_ONCE(q->hicredit);
+	opt.locredit = READ_ONCE(q->locredit);
+	opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT);
+	opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT);
+	opt.offload = READ_ONCE(q->offload);
 
 	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (2 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 13:58   ` Simon Horman
  2024-04-18  7:32 ` [PATCH v2 net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
                   ` (11 subsequent siblings)
  15 siblings, 1 reply; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, choke_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in choke_change().

v2: added a WRITE_ONCE(p->Scell_log, Scell_log)
    per Simon feedback in V1
    Removed the READ_ONCE(q->limit) in choke_enqueue()

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/red.h     | 12 ++++++------
 net/sched/sch_choke.c | 21 +++++++++++----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/include/net/red.h b/include/net/red.h
index 425364de0df7913cdd6361a0eb8d18b418372787..802287d52c9e37e76ba9154539f511629e4b9780 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -233,10 +233,10 @@ static inline void red_set_parms(struct red_parms *p,
 	int delta = qth_max - qth_min;
 	u32 max_p_delta;
 
-	p->qth_min	= qth_min << Wlog;
-	p->qth_max	= qth_max << Wlog;
-	p->Wlog		= Wlog;
-	p->Plog		= Plog;
+	WRITE_ONCE(p->qth_min, qth_min << Wlog);
+	WRITE_ONCE(p->qth_max, qth_max << Wlog);
+	WRITE_ONCE(p->Wlog, Wlog);
+	WRITE_ONCE(p->Plog, Plog);
 	if (delta <= 0)
 		delta = 1;
 	p->qth_delta	= delta;
@@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
 		max_P = red_maxp(Plog);
 		max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
 	}
-	p->max_P = max_P;
+	WRITE_ONCE(p->max_P, max_P);
 	max_p_delta = max_P / delta;
 	max_p_delta = max(max_p_delta, 1U);
 	p->max_P_reciprocal  = reciprocal_value(max_p_delta);
@@ -257,7 +257,7 @@ static inline void red_set_parms(struct red_parms *p,
 	p->target_min = qth_min + 2*delta;
 	p->target_max = qth_min + 3*delta;
 
-	p->Scell_log	= Scell_log;
+	WRITE_ONCE(p->Scell_log, Scell_log);
 	p->Scell_max	= (255 << Scell_log);
 
 	if (stab)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ea108030c6b410418f0b58ba7ea51e1b524d4df9..91072010923d1825e811adbb8ea2b3035dda57b3 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -405,8 +405,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 	} else
 		sch_tree_lock(sch);
 
-	q->flags = ctl->flags;
-	q->limit = ctl->limit;
+	WRITE_ONCE(q->flags, ctl->flags);
+	WRITE_ONCE(q->limit, ctl->limit);
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 		      ctl->Plog, ctl->Scell_log,
@@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
+	u8 Wlog = READ_ONCE(q->parms.Wlog);
 	struct nlattr *opts = NULL;
 	struct tc_red_qopt opt = {
-		.limit		= q->limit,
-		.flags		= q->flags,
-		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
-		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
-		.Wlog		= q->parms.Wlog,
-		.Plog		= q->parms.Plog,
-		.Scell_log	= q->parms.Scell_log,
+		.limit		= READ_ONCE(q->limit),
+		.flags		= READ_ONCE(q->flags),
+		.qth_min	= READ_ONCE(q->parms.qth_min) >> Wlog,
+		.qth_max	= READ_ONCE(q->parms.qth_max) >> Wlog,
+		.Wlog		= Wlog,
+		.Plog		= READ_ONCE(q->parms.Plog),
+		.Scell_log	= READ_ONCE(q->parms.Scell_log),
 	};
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -447,7 +448,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
-	    nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
+	    nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P)))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 05/14] net_sched: sch_codel: implement lockless codel_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (3 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, codel_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in codel_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_CODEL_TARGET]) {
 		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
 
-		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.target,
+			   ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_CE_THRESHOLD]) {
 		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
 
-		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.ce_threshold,
+			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
-		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.interval,
+			   ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_CODEL_LIMIT]));
 
 	if (tb[TCA_CODEL_ECN])
-		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+		WRITE_ONCE(q->params.ecn,
+			   !!nla_get_u32(tb[TCA_CODEL_ECN]));
 
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > sch->limit) {
@@ -183,6 +188,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt,
 static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct codel_sched_data *q = qdisc_priv(sch);
+	codel_time_t ce_threshold;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -190,17 +196,18 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CODEL_TARGET,
-			codel_time_to_us(q->params.target)) ||
+			codel_time_to_us(READ_ONCE(q->params.target))) ||
 	    nla_put_u32(skb, TCA_CODEL_LIMIT,
-			sch->limit) ||
+			READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_CODEL_INTERVAL,
-			codel_time_to_us(q->params.interval)) ||
+			codel_time_to_us(READ_ONCE(q->params.interval))) ||
 	    nla_put_u32(skb, TCA_CODEL_ECN,
-			q->params.ecn))
+			READ_ONCE(q->params.ecn)))
 		goto nla_put_failure;
-	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	ce_threshold = READ_ONCE(q->params.ce_threshold);
+	if (ce_threshold != CODEL_DISABLED_THRESHOLD &&
 	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
-			codel_time_to_us(q->params.ce_threshold)))
+			codel_time_to_us(ce_threshold)))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (4 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, codel_dump() can use READ_ONCE()
annotations.

There is no etf_change() yet, this patch imply aligns
this qdisc with others.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_etf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 2e4bef713b6abc4aad836bc9248796c20a22e476..c74d778c32a1eda639650df4d1d103c5338f14e6 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -467,15 +467,15 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!nest)
 		goto nla_put_failure;
 
-	opt.delta = q->delta;
-	opt.clockid = q->clockid;
-	if (q->offload)
+	opt.delta = READ_ONCE(q->delta);
+	opt.clockid = READ_ONCE(q->clockid);
+	if (READ_ONCE(q->offload))
 		opt.flags |= TC_ETF_OFFLOAD_ON;
 
-	if (q->deadline_mode)
+	if (READ_ONCE(q->deadline_mode))
 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 
-	if (q->skip_sock_check)
+	if (READ_ONCE(q->skip_sock_check))
 		opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
 
 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 07/14] net_sched: sch_ets: implement lockless ets_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (5 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, ets_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in ets_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_ets.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 835b4460b44854a803d3054e744702022b7551f4..f80bc05d4c5a5050226e6cfd30fa951c0b61029f 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -646,7 +646,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 
 	sch_tree_lock(sch);
 
-	q->nbands = nbands;
+	WRITE_ONCE(q->nbands, nbands);
 	for (i = nstrict; i < q->nstrict; i++) {
 		if (q->classes[i].qdisc->q.qlen) {
 			list_add_tail(&q->classes[i].alist, &q->active);
@@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 			list_del(&q->classes[i].alist);
 		qdisc_tree_flush_backlog(q->classes[i].qdisc);
 	}
-	q->nstrict = nstrict;
+	WRITE_ONCE(q->nstrict, nstrict);
 	memcpy(q->prio2band, priomap, sizeof(priomap));
 
 	for (i = 0; i < q->nbands; i++)
-		q->classes[i].quantum = quanta[i];
+		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
 
 	for (i = oldbands; i < q->nbands; i++) {
 		q->classes[i].qdisc = queues[i];
@@ -676,7 +676,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 	for (i = q->nbands; i < oldbands; i++) {
 		qdisc_put(q->classes[i].qdisc);
 		q->classes[i].qdisc = NULL;
-		q->classes[i].quantum = 0;
+		WRITE_ONCE(q->classes[i].quantum, 0);
 		q->classes[i].deficit = 0;
 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
@@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct ets_sched *q = qdisc_priv(sch);
 	struct nlattr *opts;
 	struct nlattr *nest;
+	u8 nbands, nstrict;
 	int band;
 	int prio;
 	int err;
@@ -745,21 +746,22 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!opts)
 		goto nla_err;
 
-	if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+	nbands = READ_ONCE(q->nbands);
+	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
 		goto nla_err;
 
-	if (q->nstrict &&
-	    nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+	nstrict = READ_ONCE(q->nstrict);
+	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
 		goto nla_err;
 
-	if (q->nbands > q->nstrict) {
+	if (nbands > nstrict) {
 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
 		if (!nest)
 			goto nla_err;
 
-		for (band = q->nstrict; band < q->nbands; band++) {
+		for (band = nstrict; band < nbands; band++) {
 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
-					q->classes[band].quantum))
+					READ_ONCE(q->classes[band].quantum)))
 				goto nla_err;
 		}
 
@@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_err;
 
 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
-		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
+			       READ_ONCE(q->prio2band[prio])))
 			goto nla_err;
 	}
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (6 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 15:04   ` Simon Horman
  2024-04-18  7:32 ` [PATCH v2 net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
                   ` (7 subsequent siblings)
  15 siblings, 1 reply; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, __fifo_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in __fifo_init().

Also add missing READ_ONCE(sh->limit) in bfifo_enqueue(),
pfifo_enqueue() and pfifo_tail_enqueue().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fifo.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 450f5c67ac4956e21b544dfd81f886714171eced..b50b2c2cc09bc6ee5b23d9d5d3abea4423ff75b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,7 +19,8 @@
 static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			 struct sk_buff **to_free)
 {
-	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+		   READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_drop(skb, sch, to_free);
@@ -28,7 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			 struct sk_buff **to_free)
 {
-	if (likely(sch->q.qlen < sch->limit))
+	if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_drop(skb, sch, to_free);
@@ -39,7 +40,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 {
 	unsigned int prev_backlog;
 
-	if (likely(sch->q.qlen < sch->limit))
+	if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	prev_backlog = sch->qstats.backlog;
@@ -105,14 +106,14 @@ static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
 		if (is_bfifo)
 			limit *= psched_mtu(qdisc_dev(sch));
 
-		sch->limit = limit;
+		WRITE_ONCE(sch->limit, limit);
 	} else {
 		struct tc_fifo_qopt *ctl = nla_data(opt);
 
 		if (nla_len(opt) < sizeof(*ctl))
 			return -EINVAL;
 
-		sch->limit = ctl->limit;
+		WRITE_ONCE(sch->limit, ctl->limit);
 	}
 
 	if (is_bfifo)
@@ -154,7 +155,7 @@ static void fifo_destroy(struct Qdisc *sch)
 
 static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	struct tc_fifo_qopt opt = { .limit = sch->limit };
+	struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) };
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (7 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, fq_codel_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in fq_codel_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_fq_codel.c | 57 ++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 79f9d6de6c852268fa293f8067d029d385b54976..4f908c11ba9528f8f9f3af6752ff10005d6f6511 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -396,40 +396,49 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_CODEL_TARGET]) {
 		u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
 
-		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.target,
+			   (target * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
 		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
 
-		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.ce_threshold,
+			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])
-		q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]);
+		WRITE_ONCE(q->cparams.ce_threshold_selector,
+			   nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]));
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])
-		q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]);
+		WRITE_ONCE(q->cparams.ce_threshold_mask,
+			   nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]));
 
 	if (tb[TCA_FQ_CODEL_INTERVAL]) {
 		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
 
-		q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.interval,
+			   (interval * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]));
 
 	if (tb[TCA_FQ_CODEL_ECN])
-		q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
+		WRITE_ONCE(q->cparams.ecn,
+			   !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]));
 
 	if (quantum)
-		q->quantum = quantum;
+		WRITE_ONCE(q->quantum, quantum);
 
 	if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
-		q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+		WRITE_ONCE(q->drop_batch_size,
+			   max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])));
 
 	if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
-		q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+		WRITE_ONCE(q->memory_limit,
+			   min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])));
 
 	while (sch->q.qlen > sch->limit ||
 	       q->memory_usage > q->memory_limit) {
@@ -522,6 +531,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
 static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
+	codel_time_t ce_threshold;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
-			codel_time_to_us(q->cparams.target)) ||
+			codel_time_to_us(READ_ONCE(q->cparams.target))) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
-			sch->limit) ||
+			READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
-			codel_time_to_us(q->cparams.interval)) ||
+			codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_ECN,
-			q->cparams.ecn) ||
+			READ_ONCE(q->cparams.ecn)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
-			q->quantum) ||
+			READ_ONCE(q->quantum)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
-			q->drop_batch_size) ||
+			READ_ONCE(q->drop_batch_size)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
-			q->memory_limit) ||
+			READ_ONCE(q->memory_limit)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
-			q->flows_cnt))
+			READ_ONCE(q->flows_cnt)))
 		goto nla_put_failure;
 
-	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
+	ce_threshold = READ_ONCE(q->cparams.ce_threshold);
+	if (ce_threshold != CODEL_DISABLED_THRESHOLD) {
 		if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
-				codel_time_to_us(q->cparams.ce_threshold)))
+				codel_time_to_us(ce_threshold)))
 			goto nla_put_failure;
-		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
+		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
+			       READ_ONCE(q->cparams.ce_threshold_selector)))
 			goto nla_put_failure;
-		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
+		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK,
+			       READ_ONCE(q->cparams.ce_threshold_mask)))
 			goto nla_put_failure;
 	}
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (8 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, fq_pie_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in fq_pie_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_fq_pie.c | 61 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 358cf304f4c91203749bac10f6e9154eda0a3778..c38f33ff80bde74cfe33de7558f66e5962ffe56b 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -299,8 +299,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
 
-		q->p_params.limit = limit;
-		sch->limit = limit;
+		WRITE_ONCE(q->p_params.limit, limit);
+		WRITE_ONCE(sch->limit, limit);
 	}
 	if (tb[TCA_FQ_PIE_FLOWS]) {
 		if (q->flows) {
@@ -322,39 +322,45 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 		u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
 
 		/* convert to pschedtime */
-		q->p_params.target =
-			PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+		WRITE_ONCE(q->p_params.target,
+			   PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
 	}
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_FQ_PIE_TUPDATE])
-		q->p_params.tupdate =
-			usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
+		WRITE_ONCE(q->p_params.tupdate,
+			usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
 
 	if (tb[TCA_FQ_PIE_ALPHA])
-		q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
+		WRITE_ONCE(q->p_params.alpha,
+			   nla_get_u32(tb[TCA_FQ_PIE_ALPHA]));
 
 	if (tb[TCA_FQ_PIE_BETA])
-		q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
+		WRITE_ONCE(q->p_params.beta,
+			   nla_get_u32(tb[TCA_FQ_PIE_BETA]));
 
 	if (tb[TCA_FQ_PIE_QUANTUM])
-		q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
+		WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]));
 
 	if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
-		q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
+		WRITE_ONCE(q->memory_limit,
+			   nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]));
 
 	if (tb[TCA_FQ_PIE_ECN_PROB])
-		q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
+		WRITE_ONCE(q->ecn_prob,
+			   nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]));
 
 	if (tb[TCA_FQ_PIE_ECN])
-		q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
+		WRITE_ONCE(q->p_params.ecn,
+			   nla_get_u32(tb[TCA_FQ_PIE_ECN]));
 
 	if (tb[TCA_FQ_PIE_BYTEMODE])
-		q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
+		WRITE_ONCE(q->p_params.bytemode,
+			   nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]));
 
 	if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
-		q->p_params.dq_rate_estimator =
-			nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
+		WRITE_ONCE(q->p_params.dq_rate_estimator,
+			   nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]));
 
 	/* Drop excess packets if new limit is lower */
 	while (sch->q.qlen > sch->limit) {
@@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 		return -EMSGSIZE;
 
 	/* convert target from pschedtime to us */
-	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
+	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_TARGET,
-			((u32)PSCHED_TICKS2NS(q->p_params.target)) /
+			((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) /
 			NSEC_PER_USEC) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
-			jiffies_to_usecs(q->p_params.tupdate)) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
+			jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT,
+			READ_ONCE(q->memory_limit)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
-			q->p_params.dq_rate_estimator))
+			READ_ONCE(q->p_params.dq_rate_estimator)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (9 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 15:04   ` Simon Horman
  2024-04-18  7:32 ` [PATCH v2 net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
                   ` (4 subsequent siblings)
  15 siblings, 1 reply; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, hfsc_dump_qdisc() can use READ_ONCE()
annotation, paired with WRITE_ONCE() one in hfsc_change_qdisc().

Use READ_ONCE(q->defcls) in hfsc_classify() to
no longer acquire qdisc lock from hfsc_change_qdisc().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_hfsc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 4e626df742d7a937c219ae9755816f099b6f0680..c287bf8423b47b7ca022fc2e6ca19b77f3ec13a0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1174,7 +1174,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	}
 
 	/* classification failed, try default class */
-	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle),
+				       READ_ONCE(q->defcls)), sch);
 	if (cl == NULL || cl->level > 0)
 		return NULL;
 
@@ -1443,9 +1444,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
 		return -EINVAL;
 	qopt = nla_data(opt);
 
-	sch_tree_lock(sch);
-	q->defcls = qopt->defcls;
-	sch_tree_unlock(sch);
+	WRITE_ONCE(q->defcls, qopt->defcls);
 
 	return 0;
 }
@@ -1525,7 +1524,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 
-	qopt.defcls = q->defcls;
+	qopt.defcls = READ_ONCE(q->defcls);
 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
 		goto nla_put_failure;
 	return skb->len;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (10 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, hhf_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in hhf_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_hhf.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 3f906df1435b2edea4286ba56bab68066be238b1..44d9efe1a96a89bdb44a6d48071b3eed90fd5554 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -534,27 +534,31 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
 	sch_tree_lock(sch);
 
 	if (tb[TCA_HHF_BACKLOG_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+		WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]));
 
-	q->quantum = new_quantum;
-	q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+	WRITE_ONCE(q->quantum, new_quantum);
+	WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight);
 
 	if (tb[TCA_HHF_HH_FLOWS_LIMIT])
-		q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+		WRITE_ONCE(q->hh_flows_limit,
+			   nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]));
 
 	if (tb[TCA_HHF_RESET_TIMEOUT]) {
 		u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
 
-		q->hhf_reset_timeout = usecs_to_jiffies(us);
+		WRITE_ONCE(q->hhf_reset_timeout,
+			   usecs_to_jiffies(us));
 	}
 
 	if (tb[TCA_HHF_ADMIT_BYTES])
-		q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+		WRITE_ONCE(q->hhf_admit_bytes,
+			   nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]));
 
 	if (tb[TCA_HHF_EVICT_TIMEOUT]) {
 		u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
 
-		q->hhf_evict_timeout = usecs_to_jiffies(us);
+		WRITE_ONCE(q->hhf_evict_timeout,
+			   usecs_to_jiffies(us));
 	}
 
 	qlen = sch->q.qlen;
@@ -657,15 +661,18 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+	if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT,
+			READ_ONCE(q->hh_flows_limit)) ||
 	    nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
-			jiffies_to_usecs(q->hhf_reset_timeout)) ||
-	    nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+			jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) ||
+	    nla_put_u32(skb, TCA_HHF_ADMIT_BYTES,
+			READ_ONCE(q->hhf_admit_bytes)) ||
 	    nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
-			jiffies_to_usecs(q->hhf_evict_timeout)) ||
-	    nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+			jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) ||
+	    nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT,
+			READ_ONCE(q->hhf_non_hh_weight)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 13/14] net_sched: sch_pie: implement lockless pie_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (11 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18  7:32 ` [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, pie_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in pie_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 net/sched/sch_pie.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 1764059b063536f619fb4a32b53adfe8c639a8e5..b3dcb845b32759357f4db1980a7cb4db531bfad5 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -156,36 +156,38 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
 		u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
 
 		/* convert to pschedtime */
-		q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+		WRITE_ONCE(q->params.target,
+			   PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
 	}
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_PIE_TUPDATE])
-		q->params.tupdate =
-			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+		WRITE_ONCE(q->params.tupdate,
+			   usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])));
 
 	if (tb[TCA_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
 
-		q->params.limit = limit;
-		sch->limit = limit;
+		WRITE_ONCE(q->params.limit, limit);
+		WRITE_ONCE(sch->limit, limit);
 	}
 
 	if (tb[TCA_PIE_ALPHA])
-		q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
+		WRITE_ONCE(q->params.alpha, nla_get_u32(tb[TCA_PIE_ALPHA]));
 
 	if (tb[TCA_PIE_BETA])
-		q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
+		WRITE_ONCE(q->params.beta, nla_get_u32(tb[TCA_PIE_BETA]));
 
 	if (tb[TCA_PIE_ECN])
-		q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
+		WRITE_ONCE(q->params.ecn, nla_get_u32(tb[TCA_PIE_ECN]));
 
 	if (tb[TCA_PIE_BYTEMODE])
-		q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+		WRITE_ONCE(q->params.bytemode,
+			   nla_get_u32(tb[TCA_PIE_BYTEMODE]));
 
 	if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
-		q->params.dq_rate_estimator =
-				nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
+		WRITE_ONCE(q->params.dq_rate_estimator,
+			   nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]));
 
 	/* Drop excess packets if new limit is lower */
 	qlen = sch->q.qlen;
@@ -469,17 +471,18 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	/* convert target from pschedtime to us */
 	if (nla_put_u32(skb, TCA_PIE_TARGET,
-			((u32)PSCHED_TICKS2NS(q->params.target)) /
+			((u32)PSCHED_TICKS2NS(READ_ONCE(q->params.target))) /
 			NSEC_PER_USEC) ||
-	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
+	    nla_put_u32(skb, TCA_PIE_LIMIT, READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_PIE_TUPDATE,
-			jiffies_to_usecs(q->params.tupdate)) ||
-	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
-	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
+			jiffies_to_usecs(READ_ONCE(q->params.tupdate))) ||
+	    nla_put_u32(skb, TCA_PIE_ALPHA, READ_ONCE(q->params.alpha)) ||
+	    nla_put_u32(skb, TCA_PIE_BETA, READ_ONCE(q->params.beta)) ||
 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
-	    nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
+	    nla_put_u32(skb, TCA_PIE_BYTEMODE,
+			READ_ONCE(q->params.bytemode)) ||
 	    nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
-			q->params.dq_rate_estimator))
+			READ_ONCE(q->params.dq_rate_estimator)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump()
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (12 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
@ 2024-04-18  7:32 ` Eric Dumazet
  2024-04-18 15:04   ` Simon Horman
  2024-04-18 10:23 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Jamal Hadi Salim
  2024-04-19 10:40 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less " patchwork-bot+netdevbpf
  15 siblings, 1 reply; 29+ messages in thread
From: Eric Dumazet @ 2024-04-18  7:32 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Toke Høiland-Jørgensen,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, Eric Dumazet

Instead of relying on RTNL, skbprio_dump() can use READ_ONCE()
annotation, paired with WRITE_ONCE() one in skbprio_change().

Also add a READ_ONCE(sch->limit) in skbprio_enqueue().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_skbprio.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index b4dd626c309c36725e6030a338d21d1fabcb6704..20ff7386b74bd89c00b50a8f0def91b6c5cce7f4 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -79,7 +79,9 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	prio = min(skb->priority, max_priority);
 
 	qdisc = &q->qdiscs[prio];
-	if (sch->q.qlen < sch->limit) {
+
+	/* sch->limit can change under us from skbprio_change() */
+	if (sch->q.qlen < READ_ONCE(sch->limit)) {
 		__skb_queue_tail(qdisc, skb);
 		qdisc_qstats_backlog_inc(sch, skb);
 		q->qstats[prio].backlog += qdisc_pkt_len(skb);
@@ -172,7 +174,7 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
 	if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
 
-	sch->limit = ctl->limit;
+	WRITE_ONCE(sch->limit, ctl->limit);
 	return 0;
 }
 
@@ -200,7 +202,7 @@ static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tc_skbprio_qopt opt;
 
-	opt.limit = sch->limit;
+	opt.limit = READ_ONCE(sch->limit);
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 		return -1;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (13 preceding siblings ...)
  2024-04-18  7:32 ` [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
@ 2024-04-18 10:23 ` Jamal Hadi Salim
  2024-04-18 15:08   ` tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] " Simon Horman
  2024-04-19 10:40 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less " patchwork-bot+netdevbpf
  15 siblings, 1 reply; 29+ messages in thread
From: Jamal Hadi Salim @ 2024-04-18 10:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
>
> Medium term goal is to implement "tc qdisc show" without needing
> to acquire RTNL.
>
> This first series makes the requested changes in 14 qdisc.
>
> Notes :
>
>  - RTNL is still held in "tc qdisc show", more changes are needed.
>
>  - Qdisc returning many attributes might want/need to provide
>    a consistent set of attributes. If that is the case, their
>    dump() method could acquire the qdisc spinlock, to pair the
>    spinlock acquision in their change() method.
>

For the series:
Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>

Not a show-stopper, we'll run the tdc tests after (and use this as an
opportunity to add more tests if needed).
For your next series we'll try to do that after you post.

cheers,
jamal
> V2: Addressed Simon feedback (Thanks a lot Simon)
>
> Eric Dumazet (14):
>   net_sched: sch_fq: implement lockless fq_dump()
>   net_sched: cake: implement lockless cake_dump()
>   net_sched: sch_cbs: implement lockless cbs_dump()
>   net_sched: sch_choke: implement lockless choke_dump()
>   net_sched: sch_codel: implement lockless codel_dump()
>   net_sched: sch_tfs: implement lockless etf_dump()
>   net_sched: sch_ets: implement lockless ets_dump()
>   net_sched: sch_fifo: implement lockless __fifo_dump()
>   net_sched: sch_fq_codel: implement lockless fq_codel_dump()
>   net_sched: sch_fq_pie: implement lockless fq_pie_dump()
>   net_sched: sch_hfsc: implement lockless accesses to q->defcls
>   net_sched: sch_hhf: implement lockless hhf_dump()
>   net_sched: sch_pie: implement lockless pie_dump()
>   net_sched: sch_skbprio: implement lockless skbprio_dump()
>
>  include/net/red.h        |  12 ++---
>  net/sched/sch_cake.c     | 110 ++++++++++++++++++++++-----------------
>  net/sched/sch_cbs.c      |  20 +++----
>  net/sched/sch_choke.c    |  21 ++++----
>  net/sched/sch_codel.c    |  29 +++++++----
>  net/sched/sch_etf.c      |  10 ++--
>  net/sched/sch_ets.c      |  25 +++++----
>  net/sched/sch_fifo.c     |  13 ++---
>  net/sched/sch_fq.c       | 108 ++++++++++++++++++++++++--------------
>  net/sched/sch_fq_codel.c |  57 ++++++++++++--------
>  net/sched/sch_fq_pie.c   |  61 ++++++++++++----------
>  net/sched/sch_hfsc.c     |   9 ++--
>  net/sched/sch_hhf.c      |  35 ++++++++-----
>  net/sched/sch_pie.c      |  39 +++++++-------
>  net/sched/sch_skbprio.c  |   8 +--
>  15 files changed, 323 insertions(+), 234 deletions(-)
>
> --
> 2.44.0.683.g7961c838ac-goog
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
@ 2024-04-18 13:55   ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 13:55 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:32:35AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() in fq_change()
> 
> v2: Addressed Simon feedback in V1: https://lore.kernel.org/netdev/20240416181915.GT2320920@kernel.org/
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
@ 2024-04-18 13:57   ` Simon Horman
  2024-04-18 15:20   ` Toke Høiland-Jørgensen
  1 sibling, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 13:57 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet, Toke Høiland-Jørgensen

On Thu, Apr 18, 2024 at 07:32:36AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, cake_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in cake_change().
> 
> v2: addressed Simon feedback in V1: https://lore.kernel.org/netdev/20240417083549.GA3846178@kernel.org/
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Toke Høiland-Jørgensen <toke@toke.dk>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
@ 2024-04-18 13:58   ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 13:58 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:32:38AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, choke_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in choke_change().
> 
> v2: added a WRITE_ONCE(p->Scell_log, Scell_log)
>     per Simon feedback in V1
>     Removed the READ_ONCE(q->limit) in choke_enqueue()
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
@ 2024-04-18 15:04   ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 15:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:32:42AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, __fifo_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in __fifo_init().
> 
> Also add missing READ_ONCE(sh->limit) in bfifo_enqueue(),
> pfifo_enqueue() and pfifo_tail_enqueue().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
@ 2024-04-18 15:04   ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 15:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:32:48AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, skbprio_dump() can use READ_ONCE()
> annotation, paired with WRITE_ONCE() one in skbprio_change().
> 
> Also add a READ_ONCE(sch->limit) in skbprio_enqueue().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls
  2024-04-18  7:32 ` [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
@ 2024-04-18 15:04   ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-18 15:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:32:45AM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, hfsc_dump_qdisc() can use READ_ONCE()
> annotation, paired with WRITE_ONCE() one in hfsc_change_qdisc().
> 
> Use READ_ONCE(q->defcls) in hfsc_classify() to
> no longer acquire qdisc lock from hfsc_change_qdisc().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] qdisc dumps
  2024-04-18 10:23 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Jamal Hadi Salim
@ 2024-04-18 15:08   ` Simon Horman
  2024-04-18 23:05     ` Jamal Hadi Salim
  0 siblings, 1 reply; 29+ messages in thread
From: Simon Horman @ 2024-04-18 15:08 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 06:23:27AM -0400, Jamal Hadi Salim wrote:
> On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > Medium term goal is to implement "tc qdisc show" without needing
> > to acquire RTNL.
> >
> > This first series makes the requested changes in 14 qdisc.
> >
> > Notes :
> >
> >  - RTNL is still held in "tc qdisc show", more changes are needed.
> >
> >  - Qdisc returning many attributes might want/need to provide
> >    a consistent set of attributes. If that is the case, their
> >    dump() method could acquire the qdisc spinlock, to pair the
> >    spinlock acquision in their change() method.
> >
> 
> For the series:
> Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>
> 
> Not a show-stopper, we'll run the tdc tests after (and use this as an
> opportunity to add more tests if needed).
> For your next series we'll try to do that after you post.

Hi Jamal,

On the topic of tdc, I noticed the following both
with and without this series applied. Is this something
you are aware of?

not ok 990 ce7d - Add mq Qdisc to multi-queue device (4 queues)

I'm not sure if it is valid, but I tried running tdc like this:

$ ng --build --config tools/testing/selftests/tc-testing/config
$ vng -v --run . --user root --cpus 4 -- \
	"cd ./tools/testing/selftests/tc-testing; ./tdc.py;"

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-18  7:32 ` [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
  2024-04-18 13:57   ` Simon Horman
@ 2024-04-18 15:20   ` Toke Høiland-Jørgensen
  1 sibling, 0 replies; 29+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-04-18 15:20 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Simon Horman, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet, Eric Dumazet

Eric Dumazet <edumazet@google.com> writes:

> Instead of relying on RTNL, cake_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in cake_change().
>
> v2: addressed Simon feedback in V1: https://lore.kernel.org/netdev/20240417083549.GA3846178@kernel.org/
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Toke Høiland-Jørgensen <toke@toke.dk>

Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] qdisc dumps
  2024-04-18 15:08   ` tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] " Simon Horman
@ 2024-04-18 23:05     ` Jamal Hadi Salim
  2024-04-19  7:18       ` Simon Horman
  0 siblings, 1 reply; 29+ messages in thread
From: Jamal Hadi Salim @ 2024-04-18 23:05 UTC (permalink / raw)
  To: Simon Horman
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 11:08 AM Simon Horman <horms@kernel.org> wrote:
>
> On Thu, Apr 18, 2024 at 06:23:27AM -0400, Jamal Hadi Salim wrote:
> > On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > Medium term goal is to implement "tc qdisc show" without needing
> > > to acquire RTNL.
> > >
> > > This first series makes the requested changes in 14 qdisc.
> > >
> > > Notes :
> > >
> > >  - RTNL is still held in "tc qdisc show", more changes are needed.
> > >
> > >  - Qdisc returning many attributes might want/need to provide
> > >    a consistent set of attributes. If that is the case, their
> > >    dump() method could acquire the qdisc spinlock, to pair the
> > >    spinlock acquision in their change() method.
> > >
> >
> > For the series:
> > Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>
> >
> > Not a show-stopper, we'll run the tdc tests after (and use this as an
> > opportunity to add more tests if needed).
> > For your next series we'll try to do that after you post.
>
> Hi Jamal,
>
> On the topic of tdc, I noticed the following both
> with and without this series applied. Is this something
> you are aware of?
>
> not ok 990 ce7d - Add mq Qdisc to multi-queue device (4 queues)
>

Since you said it also happens before Eric's patch, I took a look in
the test and nothing seems to stand out. Which iproute2 version are
you using?
We are running tdc in tandem with net-next (and iproute2-next) via
nipa for a while now and didn't see this problem pop up. So I am
guessing something in your setup?


> I'm not sure if it is valid, but I tried running tdc like this:
>
> $ ng --build --config tools/testing/selftests/tc-testing/config
> $ vng -v --run . --user root --cpus 4 -- \
>         "cd ./tools/testing/selftests/tc-testing; ./tdc.py;"

This looks reasonable...

cheers,
jamal

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] qdisc dumps
  2024-04-18 23:05     ` Jamal Hadi Salim
@ 2024-04-19  7:18       ` Simon Horman
  2024-04-19 14:24         ` Jamal Hadi Salim
  0 siblings, 1 reply; 29+ messages in thread
From: Simon Horman @ 2024-04-19  7:18 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Thu, Apr 18, 2024 at 07:05:08PM -0400, Jamal Hadi Salim wrote:
> On Thu, Apr 18, 2024 at 11:08 AM Simon Horman <horms@kernel.org> wrote:
> >
> > On Thu, Apr 18, 2024 at 06:23:27AM -0400, Jamal Hadi Salim wrote:
> > > On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
> > > >
> > > > Medium term goal is to implement "tc qdisc show" without needing
> > > > to acquire RTNL.
> > > >
> > > > This first series makes the requested changes in 14 qdisc.
> > > >
> > > > Notes :
> > > >
> > > >  - RTNL is still held in "tc qdisc show", more changes are needed.
> > > >
> > > >  - Qdisc returning many attributes might want/need to provide
> > > >    a consistent set of attributes. If that is the case, their
> > > >    dump() method could acquire the qdisc spinlock, to pair the
> > > >    spinlock acquision in their change() method.
> > > >
> > >
> > > For the series:
> > > Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>
> > >
> > > Not a show-stopper, we'll run the tdc tests after (and use this as an
> > > opportunity to add more tests if needed).
> > > For your next series we'll try to do that after you post.
> >
> > Hi Jamal,
> >
> > On the topic of tdc, I noticed the following both
> > with and without this series applied. Is this something
> > you are aware of?
> >
> > not ok 990 ce7d - Add mq Qdisc to multi-queue device (4 queues)
> >
> 
> Since you said it also happens before Eric's patch, I took a look in
> the test and nothing seems to stand out. Which iproute2 version are
> you using?
> We are running tdc in tandem with net-next (and iproute2-next) via
> nipa for a while now and didn't see this problem pop up. So I am
> guessing something in your setup?

Thanks Jamal,

I appreciate you checking this.
I agree it seems likely that it relates to my environment.
And I'll try out iproute2-next.

For the record I'm using the Fedora 39 packaged iproute2,
iproute-6.4.0-2.fc39.x86_64.

For the kernel, I was using net-next from within the past few days.

> > I'm not sure if it is valid, but I tried running tdc like this:
> >
> > $ ng --build --config tools/testing/selftests/tc-testing/config
> > $ vng -v --run . --user root --cpus 4 -- \
> >         "cd ./tools/testing/selftests/tc-testing; ./tdc.py;"
> 
> This looks reasonable...

Thanks, that was my main question.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps
  2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (14 preceding siblings ...)
  2024-04-18 10:23 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Jamal Hadi Salim
@ 2024-04-19 10:40 ` patchwork-bot+netdevbpf
  15 siblings, 0 replies; 29+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-04-19 10:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, pabeni, jhs, horms, toke, xiyou.wangcong, jiri,
	netdev, eric.dumazet

Hello:

This series was applied to netdev/net-next.git (main)
by David S. Miller <davem@davemloft.net>:

On Thu, 18 Apr 2024 07:32:34 +0000 you wrote:
> Medium term goal is to implement "tc qdisc show" without needing
> to acquire RTNL.
> 
> This first series makes the requested changes in 14 qdisc.
> 
> Notes :
> 
> [...]

Here is the summary with links:
  - [v2,net-next,01/14] net_sched: sch_fq: implement lockless fq_dump()
    https://git.kernel.org/netdev/net-next/c/24bcc3076790
  - [v2,net-next,02/14] net_sched: cake: implement lockless cake_dump()
    https://git.kernel.org/netdev/net-next/c/9263650102bb
  - [v2,net-next,03/14] net_sched: sch_cbs: implement lockless cbs_dump()
    https://git.kernel.org/netdev/net-next/c/8eb54a421a62
  - [v2,net-next,04/14] net_sched: sch_choke: implement lockless choke_dump()
    https://git.kernel.org/netdev/net-next/c/7253c1d1e7a5
  - [v2,net-next,05/14] net_sched: sch_codel: implement lockless codel_dump()
    https://git.kernel.org/netdev/net-next/c/c45bd26c829e
  - [v2,net-next,06/14] net_sched: sch_tfs: implement lockless etf_dump()
    https://git.kernel.org/netdev/net-next/c/a1ac3a7c3d1e
  - [v2,net-next,07/14] net_sched: sch_ets: implement lockless ets_dump()
    https://git.kernel.org/netdev/net-next/c/c5f1dde7f731
  - [v2,net-next,08/14] net_sched: sch_fifo: implement lockless __fifo_dump()
    https://git.kernel.org/netdev/net-next/c/01daf66b791e
  - [v2,net-next,09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
    https://git.kernel.org/netdev/net-next/c/396a0038508a
  - [v2,net-next,10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
    https://git.kernel.org/netdev/net-next/c/13a9965de324
  - [v2,net-next,11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls
    https://git.kernel.org/netdev/net-next/c/49e8ae537002
  - [v2,net-next,12/14] net_sched: sch_hhf: implement lockless hhf_dump()
    https://git.kernel.org/netdev/net-next/c/293c7e2b3e2f
  - [v2,net-next,13/14] net_sched: sch_pie: implement lockless pie_dump()
    https://git.kernel.org/netdev/net-next/c/6c00dc4fdb40
  - [v2,net-next,14/14] net_sched: sch_skbprio: implement lockless skbprio_dump()
    https://git.kernel.org/netdev/net-next/c/c85cedb38f41

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] qdisc dumps
  2024-04-19  7:18       ` Simon Horman
@ 2024-04-19 14:24         ` Jamal Hadi Salim
  2024-04-22 18:34           ` Simon Horman
  0 siblings, 1 reply; 29+ messages in thread
From: Jamal Hadi Salim @ 2024-04-19 14:24 UTC (permalink / raw)
  To: Simon Horman
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, Paolo Abeni,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Fri, Apr 19, 2024 at 3:18 AM Simon Horman <horms@kernel.org> wrote:
>
> On Thu, Apr 18, 2024 at 07:05:08PM -0400, Jamal Hadi Salim wrote:
> > On Thu, Apr 18, 2024 at 11:08 AM Simon Horman <horms@kernel.org> wrote:
> > >
> > > On Thu, Apr 18, 2024 at 06:23:27AM -0400, Jamal Hadi Salim wrote:
> > > > On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
> > > > >
> > > > > Medium term goal is to implement "tc qdisc show" without needing
> > > > > to acquire RTNL.
> > > > >
> > > > > This first series makes the requested changes in 14 qdisc.
> > > > >
> > > > > Notes :
> > > > >
> > > > >  - RTNL is still held in "tc qdisc show", more changes are needed.
> > > > >
> > > > >  - Qdisc returning many attributes might want/need to provide
> > > > >    a consistent set of attributes. If that is the case, their
> > > > >    dump() method could acquire the qdisc spinlock, to pair the
> > > > >    spinlock acquision in their change() method.
> > > > >
> > > >
> > > > For the series:
> > > > Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>
> > > >
> > > > Not a show-stopper, we'll run the tdc tests after (and use this as an
> > > > opportunity to add more tests if needed).
> > > > For your next series we'll try to do that after you post.
> > >
> > > Hi Jamal,
> > >
> > > On the topic of tdc, I noticed the following both
> > > with and without this series applied. Is this something
> > > you are aware of?
> > >
> > > not ok 990 ce7d - Add mq Qdisc to multi-queue device (4 queues)
> > >
> >
> > Since you said it also happens before Eric's patch, I took a look in
> > the test and nothing seems to stand out. Which iproute2 version are
> > you using?
> > We are running tdc in tandem with net-next (and iproute2-next) via
> > nipa for a while now and didn't see this problem pop up. So I am
> > guessing something in your setup?
>
> Thanks Jamal,
>
> I appreciate you checking this.
> I agree it seems likely that it relates to my environment.
> And I'll try out iproute2-next.
>

Yeah, that would work although i think what you showed earlier should
have worked with just iproute2. Actually one thing comes to mind
noticing you are using tdc.py - that test uses netdevsim. You may have
to modprobe netdevsim. If you run it via tdc.sh it would probe and
load it for you

> For the record I'm using the Fedora 39 packaged iproute2,
> iproute-6.4.0-2.fc39.x86_64.
>

We use debian and ubuntu mostly.

> For the kernel, I was using net-next from within the past few days.
>

Havent tested the latest/greatest net-next but say 3-4 days old net-next.

> > > I'm not sure if it is valid, but I tried running tdc like this:
> > >
> > > $ ng --build --config tools/testing/selftests/tc-testing/config
> > > $ vng -v --run . --user root --cpus 4 -- \
> > >         "cd ./tools/testing/selftests/tc-testing; ./tdc.py;"
> >
> > This looks reasonable...
>
> Thanks, that was my main question.

Just what i said above.

cheers,
jamal

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] qdisc dumps
  2024-04-19 14:24         ` Jamal Hadi Salim
@ 2024-04-22 18:34           ` Simon Horman
  0 siblings, 0 replies; 29+ messages in thread
From: Simon Horman @ 2024-04-22 18:34 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Eric Dumazet, David S. Miller, Jakub Kicinski, Paolo Abeni,
	Toke Høiland-Jørgensen, Cong Wang, Jiri Pirko, netdev,
	eric.dumazet

On Fri, Apr 19, 2024 at 10:24:52AM -0400, Jamal Hadi Salim wrote:
> On Fri, Apr 19, 2024 at 3:18 AM Simon Horman <horms@kernel.org> wrote:
> >
> > On Thu, Apr 18, 2024 at 07:05:08PM -0400, Jamal Hadi Salim wrote:
> > > On Thu, Apr 18, 2024 at 11:08 AM Simon Horman <horms@kernel.org> wrote:
> > > >
> > > > On Thu, Apr 18, 2024 at 06:23:27AM -0400, Jamal Hadi Salim wrote:
> > > > > On Thu, Apr 18, 2024 at 3:32 AM Eric Dumazet <edumazet@google.com> wrote:
> > > > > >
> > > > > > Medium term goal is to implement "tc qdisc show" without needing
> > > > > > to acquire RTNL.
> > > > > >
> > > > > > This first series makes the requested changes in 14 qdisc.
> > > > > >
> > > > > > Notes :
> > > > > >
> > > > > >  - RTNL is still held in "tc qdisc show", more changes are needed.
> > > > > >
> > > > > >  - Qdisc returning many attributes might want/need to provide
> > > > > >    a consistent set of attributes. If that is the case, their
> > > > > >    dump() method could acquire the qdisc spinlock, to pair the
> > > > > >    spinlock acquision in their change() method.
> > > > > >
> > > > >
> > > > > For the series:
> > > > > Reviewed-by: Jamal Hadi Salim<jhs@mojatatu.com>
> > > > >
> > > > > Not a show-stopper, we'll run the tdc tests after (and use this as an
> > > > > opportunity to add more tests if needed).
> > > > > For your next series we'll try to do that after you post.
> > > >
> > > > Hi Jamal,
> > > >
> > > > On the topic of tdc, I noticed the following both
> > > > with and without this series applied. Is this something
> > > > you are aware of?
> > > >
> > > > not ok 990 ce7d - Add mq Qdisc to multi-queue device (4 queues)
> > > >
> > >
> > > Since you said it also happens before Eric's patch, I took a look in
> > > the test and nothing seems to stand out. Which iproute2 version are
> > > you using?
> > > We are running tdc in tandem with net-next (and iproute2-next) via
> > > nipa for a while now and didn't see this problem pop up. So I am
> > > guessing something in your setup?
> >
> > Thanks Jamal,
> >
> > I appreciate you checking this.
> > I agree it seems likely that it relates to my environment.
> > And I'll try out iproute2-next.
> >
> 
> Yeah, that would work although i think what you showed earlier should
> have worked with just iproute2. Actually one thing comes to mind
> noticing you are using tdc.py - that test uses netdevsim. You may have
> to modprobe netdevsim. If you run it via tdc.sh it would probe and
> load it for you

Thanks, tdc.sh seems to work better,
as does invoking TDC via make (which calls tdc.sh).

...


^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2024-04-22 18:34 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-18  7:32 [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
2024-04-18 13:55   ` Simon Horman
2024-04-18  7:32 ` [PATCH v2 net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
2024-04-18 13:57   ` Simon Horman
2024-04-18 15:20   ` Toke Høiland-Jørgensen
2024-04-18  7:32 ` [PATCH v2 net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
2024-04-18 13:58   ` Simon Horman
2024-04-18  7:32 ` [PATCH v2 net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
2024-04-18 15:04   ` Simon Horman
2024-04-18  7:32 ` [PATCH v2 net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
2024-04-18 15:04   ` Simon Horman
2024-04-18  7:32 ` [PATCH v2 net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
2024-04-18  7:32 ` [PATCH v2 net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
2024-04-18 15:04   ` Simon Horman
2024-04-18 10:23 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Jamal Hadi Salim
2024-04-18 15:08   ` tdc [Was: Re: [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less] " Simon Horman
2024-04-18 23:05     ` Jamal Hadi Salim
2024-04-19  7:18       ` Simon Horman
2024-04-19 14:24         ` Jamal Hadi Salim
2024-04-22 18:34           ` Simon Horman
2024-04-19 10:40 ` [PATCH v2 net-next 00/14] net_sched: first series for RTNL-less " patchwork-bot+netdevbpf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.