All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps
@ 2024-04-15 13:20 Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
                   ` (13 more replies)
  0 siblings, 14 replies; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Medium term goal is to implement "tc qdisc show" without needing
to acquire RTNL.

This first series makes the requested changes in 14 qdisc.

Eric Dumazet (14):
  net_sched: sch_fq: implement lockless fq_dump()
  net_sched: cake: implement lockless cake_dump()
  net_sched: sch_cbs: implement lockless cbs_dump()
  net_sched: sch_choke: implement lockless choke_dump()
  net_sched: sch_codel: implement lockless codel_dump()
  net_sched: sch_tfs: implement lockless etf_dump()
  net_sched: sch_ets: implement lockless ets_dump()
  net_sched: sch_fifo: implement lockless __fifo_dump()
  net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  net_sched: sch_hfsc: implement lockless accesses to q->defcls
  net_sched: sch_hhf: implement lockless hhf_dump()
  net_sched: sch_pie: implement lockless pie_dump()
  net_sched: sch_skbprio: implement lockless skbprio_dump()

 include/net/red.h        | 10 ++--
 net/sched/sch_cake.c     | 98 ++++++++++++++++++++++------------------
 net/sched/sch_cbs.c      | 20 ++++----
 net/sched/sch_choke.c    | 23 +++++-----
 net/sched/sch_codel.c    | 29 +++++++-----
 net/sched/sch_etf.c      | 10 ++--
 net/sched/sch_ets.c      | 25 +++++-----
 net/sched/sch_fifo.c     | 13 +++---
 net/sched/sch_fq.c       | 96 ++++++++++++++++++++++++---------------
 net/sched/sch_fq_codel.c | 57 ++++++++++++++---------
 net/sched/sch_fq_pie.c   | 61 ++++++++++++++-----------
 net/sched/sch_hfsc.c     |  9 ++--
 net/sched/sch_hhf.c      | 35 ++++++++------
 net/sched/sch_pie.c      | 39 ++++++++--------
 net/sched/sch_skbprio.c  |  8 ++--
 15 files changed, 306 insertions(+), 227 deletions(-)

-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-16 18:19   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
                   ` (12 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, fq_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() in fq_change()

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
 		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
 
 	q->fq_root = array;
-	q->fq_trees_log = log;
+	WRITE_ONCE(q->fq_trees_log, log);
 
 	sch_tree_unlock(sch);
 
@@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
 
 	memset(out, 0, num_elems / 4);
 	for (i = 0; i < num_elems; i++)
-		out[i / 4] |= in[i] << (2 * (i & 0x3));
+		out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
 }
 
 static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
@@ -958,7 +958,7 @@ static int fq_load_weights(struct fq_sched_data *q,
 		}
 	}
 	for (i = 0; i < FQ_BANDS; i++)
-		q->band_flows[i].quantum = weights[i];
+		WRITE_ONCE(q->band_flows[i].quantum, weights[i]);
 	return 0;
 }
 
@@ -1011,16 +1011,18 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 			err = -EINVAL;
 	}
 	if (tb[TCA_FQ_PLIMIT])
-		sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_FQ_PLIMIT]));
 
 	if (tb[TCA_FQ_FLOW_PLIMIT])
-		q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+		WRITE_ONCE(q->flow_plimit,
+			   nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]));
 
 	if (tb[TCA_FQ_QUANTUM]) {
 		u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
 
 		if (quantum > 0 && quantum <= (1 << 20)) {
-			q->quantum = quantum;
+			WRITE_ONCE(q->quantum, quantum);
 		} else {
 			NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
 			err = -EINVAL;
@@ -1028,7 +1030,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_FQ_INITIAL_QUANTUM])
-		q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+		WRITE_ONCE(q->initial_quantum,
+			   nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]));
 
 	if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
 		pr_warn_ratelimited("sch_fq: defrate %u ignored.\n",
@@ -1037,17 +1040,19 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_FLOW_MAX_RATE]) {
 		u32 rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
-		q->flow_max_rate = (rate == ~0U) ? ~0UL : rate;
+		WRITE_ONCE(q->flow_max_rate,
+			   (rate == ~0U) ? ~0UL : rate);
 	}
 	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
-		q->low_rate_threshold =
-			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+		WRITE_ONCE(q->low_rate_threshold,
+			   nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]));
 
 	if (tb[TCA_FQ_RATE_ENABLE]) {
 		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
 
 		if (enable <= 1)
-			q->rate_enable = enable;
+			WRITE_ONCE(q->rate_enable,
+				   enable);
 		else
 			err = -EINVAL;
 	}
@@ -1055,7 +1060,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_FLOW_REFILL_DELAY]) {
 		u32 usecs_delay = nla_get_u32(tb[TCA_FQ_FLOW_REFILL_DELAY]) ;
 
-		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
+		WRITE_ONCE(q->flow_refill_delay,
+			   usecs_to_jiffies(usecs_delay));
 	}
 
 	if (!err && tb[TCA_FQ_PRIOMAP])
@@ -1065,21 +1071,26 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 		err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack);
 
 	if (tb[TCA_FQ_ORPHAN_MASK])
-		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+		WRITE_ONCE(q->orphan_mask,
+			   nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]));
 
 	if (tb[TCA_FQ_CE_THRESHOLD])
-		q->ce_threshold = (u64)NSEC_PER_USEC *
-				  nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+		WRITE_ONCE(q->ce_threshold,
+			   (u64)NSEC_PER_USEC *
+			   nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]));
 
 	if (tb[TCA_FQ_TIMER_SLACK])
-		q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
+		WRITE_ONCE(q->timer_slack,
+			   nla_get_u32(tb[TCA_FQ_TIMER_SLACK]));
 
 	if (tb[TCA_FQ_HORIZON])
-		q->horizon = (u64)NSEC_PER_USEC *
-				  nla_get_u32(tb[TCA_FQ_HORIZON]);
+		WRITE_ONCE(q->horizon,
+			   (u64)NSEC_PER_USEC *
+			   nla_get_u32(tb[TCA_FQ_HORIZON]));
 
 	if (tb[TCA_FQ_HORIZON_DROP])
-		q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+		WRITE_ONCE(q->horizon_drop,
+			   nla_get_u8(tb[TCA_FQ_HORIZON_DROP]));
 
 	if (!err) {
 
@@ -1160,13 +1171,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
 static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
-	u64 ce_threshold = q->ce_threshold;
 	struct tc_prio_qopt prio = {
 		.bands = FQ_BANDS,
 	};
-	u64 horizon = q->horizon;
 	struct nlattr *opts;
+	u64 ce_threshold;
 	s32 weights[3];
+	u64 horizon;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (opts == NULL)
@@ -1174,35 +1185,48 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
 
+	ce_threshold = READ_ONCE(q->ce_threshold);
 	do_div(ce_threshold, NSEC_PER_USEC);
+
+	horizon = READ_ONCE(q->horizon);
 	do_div(horizon, NSEC_PER_USEC);
 
-	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
-	    nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
-	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+	if (nla_put_u32(skb, TCA_FQ_PLIMIT,
+			READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT,
+			READ_ONCE(q->flow_plimit)) ||
+	    nla_put_u32(skb, TCA_FQ_QUANTUM,
+			READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM,
+			READ_ONCE(q->initial_quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_RATE_ENABLE,
+			READ_ONCE(q->rate_enable)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE,
-			min_t(unsigned long, q->flow_max_rate, ~0U)) ||
+			min_t(unsigned long,
+			      READ_ONCE(q->flow_max_rate), ~0U)) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
-			jiffies_to_usecs(q->flow_refill_delay)) ||
-	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+			jiffies_to_usecs(READ_ONCE(q->flow_refill_delay))) ||
+	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK,
+			READ_ONCE(q->orphan_mask)) ||
 	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
-			q->low_rate_threshold) ||
+			READ_ONCE(q->low_rate_threshold)) ||
 	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
-	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
-	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG,
+			READ_ONCE(q->fq_trees_log)) ||
+	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK,
+			READ_ONCE(q->timer_slack)) ||
 	    nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
-	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
+	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP,
+		       READ_ONCE(q->horizon_drop)))
 		goto nla_put_failure;
 
 	fq_prio2band_decompress_crumb(q->prio2band, prio.priomap);
 	if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio))
 		goto nla_put_failure;
 
-	weights[0] = q->band_flows[0].quantum;
-	weights[1] = q->band_flows[1].quantum;
-	weights[2] = q->band_flows[2].quantum;
+	weights[0] = READ_ONCE(q->band_flows[0].quantum);
+	weights[1] = READ_ONCE(q->band_flows[1].quantum);
+	weights[2] = READ_ONCE(q->band_flows[2].quantum);
 	if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights))
 		goto nla_put_failure;
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17  8:35   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
                   ` (11 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, cake_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in cake_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_cake.c | 98 +++++++++++++++++++++++++-------------------
 1 file changed, 55 insertions(+), 43 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 2eabc4dc5b79a83ce423f73c9cccec86f14be7cf..bb37a0dedcc1e4b3418f6681d87108aad7ea066f 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2572,6 +2572,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 {
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	u16 rate_flags;
 	int err;
 
 	err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2592,16 +2593,19 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_BASE_RATE64])
-		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+		WRITE_ONCE(q->rate_bps,
+			   nla_get_u64(tb[TCA_CAKE_BASE_RATE64]));
 
 	if (tb[TCA_CAKE_DIFFSERV_MODE])
-		q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+		WRITE_ONCE(q->tin_mode,
+			   nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]));
 
+	rate_flags = q->rate_flags;
 	if (tb[TCA_CAKE_WASH]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
-			q->rate_flags |= CAKE_FLAG_WASH;
+			rate_flags |= CAKE_FLAG_WASH;
 		else
-			q->rate_flags &= ~CAKE_FLAG_WASH;
+			rate_flags &= ~CAKE_FLAG_WASH;
 	}
 
 	if (tb[TCA_CAKE_FLOW_MODE])
@@ -2610,11 +2614,13 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 					CAKE_FLOW_MASK));
 
 	if (tb[TCA_CAKE_ATM])
-		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+		WRITE_ONCE(q->atm_mode,
+			   nla_get_u32(tb[TCA_CAKE_ATM]));
 
 	if (tb[TCA_CAKE_OVERHEAD]) {
-		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
-		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+		WRITE_ONCE(q->rate_overhead,
+			   nla_get_s32(tb[TCA_CAKE_OVERHEAD]));
+		rate_flags |= CAKE_FLAG_OVERHEAD;
 
 		q->max_netlen = 0;
 		q->max_adjlen = 0;
@@ -2623,7 +2629,7 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_RAW]) {
-		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+		rate_flags &= ~CAKE_FLAG_OVERHEAD;
 
 		q->max_netlen = 0;
 		q->max_adjlen = 0;
@@ -2632,54 +2638,57 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_CAKE_MPU])
-		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+		WRITE_ONCE(q->rate_mpu,
+			   nla_get_u32(tb[TCA_CAKE_MPU]));
 
 	if (tb[TCA_CAKE_RTT]) {
-		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+		u32 interval = nla_get_u32(tb[TCA_CAKE_RTT]);
 
-		if (!q->interval)
-			q->interval = 1;
+		WRITE_ONCE(q->interval, max(interval, 1U));
 	}
 
 	if (tb[TCA_CAKE_TARGET]) {
-		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+		u32 target = nla_get_u32(tb[TCA_CAKE_TARGET]);
 
-		if (!q->target)
-			q->target = 1;
+		WRITE_ONCE(q->target, max(target, 1U));
 	}
 
 	if (tb[TCA_CAKE_AUTORATE]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
-			q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+			rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
 		else
-			q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+			rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
 	}
 
 	if (tb[TCA_CAKE_INGRESS]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
-			q->rate_flags |= CAKE_FLAG_INGRESS;
+			rate_flags |= CAKE_FLAG_INGRESS;
 		else
-			q->rate_flags &= ~CAKE_FLAG_INGRESS;
+			rate_flags &= ~CAKE_FLAG_INGRESS;
 	}
 
 	if (tb[TCA_CAKE_ACK_FILTER])
-		q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+		WRITE_ONCE(q->ack_filter,
+			   nla_get_u32(tb[TCA_CAKE_ACK_FILTER]));
 
 	if (tb[TCA_CAKE_MEMORY])
-		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+		WRITE_ONCE(q->buffer_config_limit,
+			   nla_get_u32(tb[TCA_CAKE_MEMORY]));
 
 	if (tb[TCA_CAKE_SPLIT_GSO]) {
 		if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
-			q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+			rate_flags |= CAKE_FLAG_SPLIT_GSO;
 		else
-			q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+			rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
 	}
 
 	if (tb[TCA_CAKE_FWMARK]) {
-		q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
-		q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+		WRITE_ONCE(q->fwmark_mask, nla_get_u32(tb[TCA_CAKE_FWMARK]));
+		WRITE_ONCE(q->fwmark_shft,
+			   q->fwmark_mask ? __ffs(q->fwmark_mask) : 0);
 	}
 
+	WRITE_ONCE(q->rate_flags, rate_flags);
 	if (q->tins) {
 		sch_tree_lock(sch);
 		cake_reconfigure(sch);
@@ -2774,68 +2783,71 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cake_sched_data *q = qdisc_priv(sch);
 	struct nlattr *opts;
+	u16 rate_flags;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
 	if (!opts)
 		goto nla_put_failure;
 
-	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
-			      TCA_CAKE_PAD))
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
+			      READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
-			q->flow_mode & CAKE_FLOW_MASK))
+			READ_ONCE(q->flow_mode) & CAKE_FLOW_MASK))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+	if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY,
+			READ_ONCE(q->buffer_config_limit)))
 		goto nla_put_failure;
 
+	rate_flags = READ_ONCE(q->rate_flags);
 	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
-			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+			!!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
-			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
+			!!(rate_flags & CAKE_FLAG_INGRESS)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_NAT,
-			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+			!!(READ_ONCE(q->flow_mode) & CAKE_FLOW_NAT_FLAG)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_WASH,
-			!!(q->rate_flags & CAKE_FLAG_WASH)))
+			!!(rate_flags & CAKE_FLAG_WASH)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, READ_ONCE(q->rate_overhead)))
 		goto nla_put_failure;
 
-	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+	if (!(rate_flags & CAKE_FLAG_OVERHEAD))
 		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
 			goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+	if (nla_put_u32(skb, TCA_CAKE_ATM, READ_ONCE(q->atm_mode)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+	if (nla_put_u32(skb, TCA_CAKE_MPU, READ_ONCE(q->rate_mpu)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
-			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+			!!(rate_flags & CAKE_FLAG_SPLIT_GSO)))
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+	if (nla_put_u32(skb, TCA_CAKE_FWMARK, READ_ONCE(q->fwmark_mask)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17  9:27   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
                   ` (10 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, cbs_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in cbs_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_cbs.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 69001eff0315584df23a24f81ae3b4cf7bf5fd79..939425da18955bc8a3f3d2d5b852b2585e9bcaee 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	/* Everything went OK, save the parameters used. */
-	q->hicredit = qopt->hicredit;
-	q->locredit = qopt->locredit;
-	q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
-	q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
-	q->offload = qopt->offload;
+	WRITE_ONCE(q->hicredit, qopt->hicredit);
+	WRITE_ONCE(q->locredit, qopt->locredit);
+	WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT);
+	WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT);
+	WRITE_ONCE(q->offload, qopt->offload);
 
 	return 0;
 }
@@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!nest)
 		goto nla_put_failure;
 
-	opt.hicredit = q->hicredit;
-	opt.locredit = q->locredit;
-	opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
-	opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
-	opt.offload = q->offload;
+	opt.hicredit = READ_ONCE(q->hicredit);
+	opt.locredit = READ_ONCE(q->locredit);
+	opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT);
+	opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT);
+	opt.offload = READ_ONCE(q->offload);
 
 	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (2 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 13:14   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, choke_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in choke_change().

Also use READ_ONCE(q->limit) in choke_enqueue() as the value
could change from choke_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/red.h     | 10 +++++-----
 net/sched/sch_choke.c | 23 ++++++++++++-----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/include/net/red.h b/include/net/red.h
index 425364de0df7913cdd6361a0eb8d18b418372787..7daf7cf6130aeccf3d81a77600f4445759f174b7 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -233,10 +233,10 @@ static inline void red_set_parms(struct red_parms *p,
 	int delta = qth_max - qth_min;
 	u32 max_p_delta;
 
-	p->qth_min	= qth_min << Wlog;
-	p->qth_max	= qth_max << Wlog;
-	p->Wlog		= Wlog;
-	p->Plog		= Plog;
+	WRITE_ONCE(p->qth_min, qth_min << Wlog);
+	WRITE_ONCE(p->qth_max, qth_max << Wlog);
+	WRITE_ONCE(p->Wlog, Wlog);
+	WRITE_ONCE(p->Plog, Plog);
 	if (delta <= 0)
 		delta = 1;
 	p->qth_delta	= delta;
@@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
 		max_P = red_maxp(Plog);
 		max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
 	}
-	p->max_P = max_P;
+	WRITE_ONCE(p->max_P, max_P);
 	max_p_delta = max_P / delta;
 	max_p_delta = max(max_p_delta, 1U);
 	p->max_P_reciprocal  = reciprocal_value(max_p_delta);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ea108030c6b410418f0b58ba7ea51e1b524d4df9..51e68c9661727e64862392a76880bed5fa0c27a2 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -264,7 +264,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	}
 
 	/* Admit new packet */
-	if (sch->q.qlen < q->limit) {
+	if (sch->q.qlen < READ_ONCE(q->limit)) {
 		q->tab[q->tail] = skb;
 		q->tail = (q->tail + 1) & q->tab_mask;
 		++sch->q.qlen;
@@ -405,8 +405,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 	} else
 		sch_tree_lock(sch);
 
-	q->flags = ctl->flags;
-	q->limit = ctl->limit;
+	WRITE_ONCE(q->flags, ctl->flags);
+	WRITE_ONCE(q->limit, ctl->limit);
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 		      ctl->Plog, ctl->Scell_log,
@@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt,
 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
+	u8 Wlog = READ_ONCE(q->parms.Wlog);
 	struct nlattr *opts = NULL;
 	struct tc_red_qopt opt = {
-		.limit		= q->limit,
-		.flags		= q->flags,
-		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
-		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
-		.Wlog		= q->parms.Wlog,
-		.Plog		= q->parms.Plog,
-		.Scell_log	= q->parms.Scell_log,
+		.limit		= READ_ONCE(q->limit),
+		.flags		= READ_ONCE(q->flags),
+		.qth_min	= READ_ONCE(q->parms.qth_min) >> Wlog,
+		.qth_max	= READ_ONCE(q->parms.qth_max) >> Wlog,
+		.Wlog		= Wlog,
+		.Plog		= READ_ONCE(q->parms.Plog),
+		.Scell_log	= READ_ONCE(q->parms.Scell_log),
 	};
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -447,7 +448,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
-	    nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
+	    nla_put_u32(skb, TCA_CHOKE_MAX_P, READ_ONCE(q->parms.max_P)))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (3 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 15:59   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, codel_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in codel_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_CODEL_TARGET]) {
 		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
 
-		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.target,
+			   ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_CE_THRESHOLD]) {
 		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
 
-		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.ce_threshold,
+			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
-		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.interval,
+			   ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_CODEL_LIMIT]));
 
 	if (tb[TCA_CODEL_ECN])
-		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+		WRITE_ONCE(q->params.ecn,
+			   !!nla_get_u32(tb[TCA_CODEL_ECN]));
 
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > sch->limit) {
@@ -183,6 +188,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt,
 static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct codel_sched_data *q = qdisc_priv(sch);
+	codel_time_t ce_threshold;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -190,17 +196,18 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CODEL_TARGET,
-			codel_time_to_us(q->params.target)) ||
+			codel_time_to_us(READ_ONCE(q->params.target))) ||
 	    nla_put_u32(skb, TCA_CODEL_LIMIT,
-			sch->limit) ||
+			READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_CODEL_INTERVAL,
-			codel_time_to_us(q->params.interval)) ||
+			codel_time_to_us(READ_ONCE(q->params.interval))) ||
 	    nla_put_u32(skb, TCA_CODEL_ECN,
-			q->params.ecn))
+			READ_ONCE(q->params.ecn)))
 		goto nla_put_failure;
-	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	ce_threshold = READ_ONCE(q->params.ce_threshold);
+	if (ce_threshold != CODEL_DISABLED_THRESHOLD &&
 	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
-			codel_time_to_us(q->params.ce_threshold)))
+			codel_time_to_us(ce_threshold)))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (4 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 16:27   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, codel_dump() can use READ_ONCE()
annotations.

There is no etf_change() yet, this patch imply aligns
this qdisc with others.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_etf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 2e4bef713b6abc4aad836bc9248796c20a22e476..c74d778c32a1eda639650df4d1d103c5338f14e6 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -467,15 +467,15 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!nest)
 		goto nla_put_failure;
 
-	opt.delta = q->delta;
-	opt.clockid = q->clockid;
-	if (q->offload)
+	opt.delta = READ_ONCE(q->delta);
+	opt.clockid = READ_ONCE(q->clockid);
+	if (READ_ONCE(q->offload))
 		opt.flags |= TC_ETF_OFFLOAD_ON;
 
-	if (q->deadline_mode)
+	if (READ_ONCE(q->deadline_mode))
 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 
-	if (q->skip_sock_check)
+	if (READ_ONCE(q->skip_sock_check))
 		opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
 
 	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (5 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 16:54   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, ets_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in ets_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_ets.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 835b4460b44854a803d3054e744702022b7551f4..f80bc05d4c5a5050226e6cfd30fa951c0b61029f 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -646,7 +646,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 
 	sch_tree_lock(sch);
 
-	q->nbands = nbands;
+	WRITE_ONCE(q->nbands, nbands);
 	for (i = nstrict; i < q->nstrict; i++) {
 		if (q->classes[i].qdisc->q.qlen) {
 			list_add_tail(&q->classes[i].alist, &q->active);
@@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 			list_del(&q->classes[i].alist);
 		qdisc_tree_flush_backlog(q->classes[i].qdisc);
 	}
-	q->nstrict = nstrict;
+	WRITE_ONCE(q->nstrict, nstrict);
 	memcpy(q->prio2band, priomap, sizeof(priomap));
 
 	for (i = 0; i < q->nbands; i++)
-		q->classes[i].quantum = quanta[i];
+		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
 
 	for (i = oldbands; i < q->nbands; i++) {
 		q->classes[i].qdisc = queues[i];
@@ -676,7 +676,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
 	for (i = q->nbands; i < oldbands; i++) {
 		qdisc_put(q->classes[i].qdisc);
 		q->classes[i].qdisc = NULL;
-		q->classes[i].quantum = 0;
+		WRITE_ONCE(q->classes[i].quantum, 0);
 		q->classes[i].deficit = 0;
 		gnet_stats_basic_sync_init(&q->classes[i].bstats);
 		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
@@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct ets_sched *q = qdisc_priv(sch);
 	struct nlattr *opts;
 	struct nlattr *nest;
+	u8 nbands, nstrict;
 	int band;
 	int prio;
 	int err;
@@ -745,21 +746,22 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (!opts)
 		goto nla_err;
 
-	if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+	nbands = READ_ONCE(q->nbands);
+	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
 		goto nla_err;
 
-	if (q->nstrict &&
-	    nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+	nstrict = READ_ONCE(q->nstrict);
+	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
 		goto nla_err;
 
-	if (q->nbands > q->nstrict) {
+	if (nbands > nstrict) {
 		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
 		if (!nest)
 			goto nla_err;
 
-		for (band = q->nstrict; band < q->nbands; band++) {
+		for (band = nstrict; band < nbands; band++) {
 			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
-					q->classes[band].quantum))
+					READ_ONCE(q->classes[band].quantum)))
 				goto nla_err;
 		}
 
@@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_err;
 
 	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
-		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
+			       READ_ONCE(q->prio2band[prio])))
 			goto nla_err;
 	}
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (6 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, __fifo_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in __fifo_init().

Also add missing READ_ONCE(sh->limit) in bfifo_enqueue(),
pfifo_enqueue() and pfifo_tail_enqueue().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fifo.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 450f5c67ac4956e21b544dfd81f886714171eced..b50b2c2cc09bc6ee5b23d9d5d3abea4423ff75b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,7 +19,8 @@
 static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			 struct sk_buff **to_free)
 {
-	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+		   READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_drop(skb, sch, to_free);
@@ -28,7 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			 struct sk_buff **to_free)
 {
-	if (likely(sch->q.qlen < sch->limit))
+	if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_drop(skb, sch, to_free);
@@ -39,7 +40,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 {
 	unsigned int prev_backlog;
 
-	if (likely(sch->q.qlen < sch->limit))
+	if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
 		return qdisc_enqueue_tail(skb, sch);
 
 	prev_backlog = sch->qstats.backlog;
@@ -105,14 +106,14 @@ static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
 		if (is_bfifo)
 			limit *= psched_mtu(qdisc_dev(sch));
 
-		sch->limit = limit;
+		WRITE_ONCE(sch->limit, limit);
 	} else {
 		struct tc_fifo_qopt *ctl = nla_data(opt);
 
 		if (nla_len(opt) < sizeof(*ctl))
 			return -EINVAL;
 
-		sch->limit = ctl->limit;
+		WRITE_ONCE(sch->limit, ctl->limit);
 	}
 
 	if (is_bfifo)
@@ -154,7 +155,7 @@ static void fifo_destroy(struct Qdisc *sch)
 
 static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	struct tc_fifo_qopt opt = { .limit = sch->limit };
+	struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) };
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (7 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 17:07   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, fq_codel_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in fq_codel_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_codel.c | 57 ++++++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 79f9d6de6c852268fa293f8067d029d385b54976..4f908c11ba9528f8f9f3af6752ff10005d6f6511 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -396,40 +396,49 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_CODEL_TARGET]) {
 		u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
 
-		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.target,
+			   (target * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
 		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
 
-		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.ce_threshold,
+			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR])
-		q->cparams.ce_threshold_selector = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]);
+		WRITE_ONCE(q->cparams.ce_threshold_selector,
+			   nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR]));
 	if (tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK])
-		q->cparams.ce_threshold_mask = nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]);
+		WRITE_ONCE(q->cparams.ce_threshold_mask,
+			   nla_get_u8(tb[TCA_FQ_CODEL_CE_THRESHOLD_MASK]));
 
 	if (tb[TCA_FQ_CODEL_INTERVAL]) {
 		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
 
-		q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->cparams.interval,
+			   (interval * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_FQ_CODEL_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]));
 
 	if (tb[TCA_FQ_CODEL_ECN])
-		q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
+		WRITE_ONCE(q->cparams.ecn,
+			   !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]));
 
 	if (quantum)
-		q->quantum = quantum;
+		WRITE_ONCE(q->quantum, quantum);
 
 	if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
-		q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+		WRITE_ONCE(q->drop_batch_size,
+			   max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])));
 
 	if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
-		q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
+		WRITE_ONCE(q->memory_limit,
+			   min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])));
 
 	while (sch->q.qlen > sch->limit ||
 	       q->memory_usage > q->memory_limit) {
@@ -522,6 +531,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
 static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
+	codel_time_t ce_threshold;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
-			codel_time_to_us(q->cparams.target)) ||
+			codel_time_to_us(READ_ONCE(q->cparams.target))) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
-			sch->limit) ||
+			READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
-			codel_time_to_us(q->cparams.interval)) ||
+			codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_ECN,
-			q->cparams.ecn) ||
+			READ_ONCE(q->cparams.ecn)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
-			q->quantum) ||
+			READ_ONCE(q->quantum)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
-			q->drop_batch_size) ||
+			READ_ONCE(q->drop_batch_size)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
-			q->memory_limit) ||
+			READ_ONCE(q->memory_limit)) ||
 	    nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
-			q->flows_cnt))
+			READ_ONCE(q->flows_cnt)))
 		goto nla_put_failure;
 
-	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
+	ce_threshold = READ_ONCE(q->cparams.ce_threshold);
+	if (ce_threshold != CODEL_DISABLED_THRESHOLD) {
 		if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
-				codel_time_to_us(q->cparams.ce_threshold)))
+				codel_time_to_us(ce_threshold)))
 			goto nla_put_failure;
-		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
+		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
+			       READ_ONCE(q->cparams.ce_threshold_selector)))
 			goto nla_put_failure;
-		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
+		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK,
+			       READ_ONCE(q->cparams.ce_threshold_mask)))
 			goto nla_put_failure;
 	}
 
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (8 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 17:13   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, fq_pie_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in fq_pie_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_fq_pie.c | 61 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 358cf304f4c91203749bac10f6e9154eda0a3778..c38f33ff80bde74cfe33de7558f66e5962ffe56b 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -299,8 +299,8 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
 
-		q->p_params.limit = limit;
-		sch->limit = limit;
+		WRITE_ONCE(q->p_params.limit, limit);
+		WRITE_ONCE(sch->limit, limit);
 	}
 	if (tb[TCA_FQ_PIE_FLOWS]) {
 		if (q->flows) {
@@ -322,39 +322,45 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 		u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
 
 		/* convert to pschedtime */
-		q->p_params.target =
-			PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+		WRITE_ONCE(q->p_params.target,
+			   PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
 	}
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_FQ_PIE_TUPDATE])
-		q->p_params.tupdate =
-			usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
+		WRITE_ONCE(q->p_params.tupdate,
+			usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
 
 	if (tb[TCA_FQ_PIE_ALPHA])
-		q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
+		WRITE_ONCE(q->p_params.alpha,
+			   nla_get_u32(tb[TCA_FQ_PIE_ALPHA]));
 
 	if (tb[TCA_FQ_PIE_BETA])
-		q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
+		WRITE_ONCE(q->p_params.beta,
+			   nla_get_u32(tb[TCA_FQ_PIE_BETA]));
 
 	if (tb[TCA_FQ_PIE_QUANTUM])
-		q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
+		WRITE_ONCE(q->quantum, nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]));
 
 	if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
-		q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
+		WRITE_ONCE(q->memory_limit,
+			   nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]));
 
 	if (tb[TCA_FQ_PIE_ECN_PROB])
-		q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
+		WRITE_ONCE(q->ecn_prob,
+			   nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]));
 
 	if (tb[TCA_FQ_PIE_ECN])
-		q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
+		WRITE_ONCE(q->p_params.ecn,
+			   nla_get_u32(tb[TCA_FQ_PIE_ECN]));
 
 	if (tb[TCA_FQ_PIE_BYTEMODE])
-		q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
+		WRITE_ONCE(q->p_params.bytemode,
+			   nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]));
 
 	if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
-		q->p_params.dq_rate_estimator =
-			nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
+		WRITE_ONCE(q->p_params.dq_rate_estimator,
+			   nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]));
 
 	/* Drop excess packets if new limit is lower */
 	while (sch->q.qlen > sch->limit) {
@@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 		return -EMSGSIZE;
 
 	/* convert target from pschedtime to us */
-	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
+	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, READ_ONCE(q->flows_cnt)) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_TARGET,
-			((u32)PSCHED_TICKS2NS(q->p_params.target)) /
+			((u32)PSCHED_TICKS2NS(READ_ONCE(q->p_params.target))) /
 			NSEC_PER_USEC) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
-			jiffies_to_usecs(q->p_params.tupdate)) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
-	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
+			jiffies_to_usecs(READ_ONCE(q->p_params.tupdate))) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ALPHA, READ_ONCE(q->p_params.alpha)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_BETA, READ_ONCE(q->p_params.beta)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT,
+			READ_ONCE(q->memory_limit)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, READ_ONCE(q->ecn_prob)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_ECN, READ_ONCE(q->p_params.ecn)) ||
+	    nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, READ_ONCE(q->p_params.bytemode)) ||
 	    nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
-			q->p_params.dq_rate_estimator))
+			READ_ONCE(q->p_params.dq_rate_estimator)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (9 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, hfsc_dump_qdisc() can use READ_ONCE()
annotation, paired with WRITE_ONCE() one in hfsc_change_qdisc().

Use READ_ONCE(q->defcls) in hfsc_classify() to
no longer acquire qdisc lock from hfsc_change_qdisc().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_hfsc.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 4e626df742d7a937c219ae9755816f099b6f0680..c287bf8423b47b7ca022fc2e6ca19b77f3ec13a0 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1174,7 +1174,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 	}
 
 	/* classification failed, try default class */
-	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle),
+				       READ_ONCE(q->defcls)), sch);
 	if (cl == NULL || cl->level > 0)
 		return NULL;
 
@@ -1443,9 +1444,7 @@ hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt,
 		return -EINVAL;
 	qopt = nla_data(opt);
 
-	sch_tree_lock(sch);
-	q->defcls = qopt->defcls;
-	sch_tree_unlock(sch);
+	WRITE_ONCE(q->defcls, qopt->defcls);
 
 	return 0;
 }
@@ -1525,7 +1524,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 
-	qopt.defcls = q->defcls;
+	qopt.defcls = READ_ONCE(q->defcls);
 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
 		goto nla_put_failure;
 	return skb->len;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (10 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 17:26   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
  2024-04-15 13:20 ` [PATCH net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, hhf_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in hhf_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_hhf.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 3f906df1435b2edea4286ba56bab68066be238b1..44d9efe1a96a89bdb44a6d48071b3eed90fd5554 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -534,27 +534,31 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
 	sch_tree_lock(sch);
 
 	if (tb[TCA_HHF_BACKLOG_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+		WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]));
 
-	q->quantum = new_quantum;
-	q->hhf_non_hh_weight = new_hhf_non_hh_weight;
+	WRITE_ONCE(q->quantum, new_quantum);
+	WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight);
 
 	if (tb[TCA_HHF_HH_FLOWS_LIMIT])
-		q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
+		WRITE_ONCE(q->hh_flows_limit,
+			   nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]));
 
 	if (tb[TCA_HHF_RESET_TIMEOUT]) {
 		u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
 
-		q->hhf_reset_timeout = usecs_to_jiffies(us);
+		WRITE_ONCE(q->hhf_reset_timeout,
+			   usecs_to_jiffies(us));
 	}
 
 	if (tb[TCA_HHF_ADMIT_BYTES])
-		q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
+		WRITE_ONCE(q->hhf_admit_bytes,
+			   nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]));
 
 	if (tb[TCA_HHF_EVICT_TIMEOUT]) {
 		u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
 
-		q->hhf_evict_timeout = usecs_to_jiffies(us);
+		WRITE_ONCE(q->hhf_evict_timeout,
+			   usecs_to_jiffies(us));
 	}
 
 	qlen = sch->q.qlen;
@@ -657,15 +661,18 @@ static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
-	    nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
-	    nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
+	if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) ||
+	    nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) ||
+	    nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT,
+			READ_ONCE(q->hh_flows_limit)) ||
 	    nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
-			jiffies_to_usecs(q->hhf_reset_timeout)) ||
-	    nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
+			jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) ||
+	    nla_put_u32(skb, TCA_HHF_ADMIT_BYTES,
+			READ_ONCE(q->hhf_admit_bytes)) ||
 	    nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
-			jiffies_to_usecs(q->hhf_evict_timeout)) ||
-	    nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
+			jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) ||
+	    nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT,
+			READ_ONCE(q->hhf_non_hh_weight)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (11 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  2024-04-17 17:28   ` Simon Horman
  2024-04-15 13:20 ` [PATCH net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet
  13 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, pie_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in pie_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_pie.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 1764059b063536f619fb4a32b53adfe8c639a8e5..b3dcb845b32759357f4db1980a7cb4db531bfad5 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -156,36 +156,38 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
 		u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
 
 		/* convert to pschedtime */
-		q->params.target = PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
+		WRITE_ONCE(q->params.target,
+			   PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC));
 	}
 
 	/* tupdate is in jiffies */
 	if (tb[TCA_PIE_TUPDATE])
-		q->params.tupdate =
-			usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE]));
+		WRITE_ONCE(q->params.tupdate,
+			   usecs_to_jiffies(nla_get_u32(tb[TCA_PIE_TUPDATE])));
 
 	if (tb[TCA_PIE_LIMIT]) {
 		u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
 
-		q->params.limit = limit;
-		sch->limit = limit;
+		WRITE_ONCE(q->params.limit, limit);
+		WRITE_ONCE(sch->limit, limit);
 	}
 
 	if (tb[TCA_PIE_ALPHA])
-		q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
+		WRITE_ONCE(q->params.alpha, nla_get_u32(tb[TCA_PIE_ALPHA]));
 
 	if (tb[TCA_PIE_BETA])
-		q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
+		WRITE_ONCE(q->params.beta, nla_get_u32(tb[TCA_PIE_BETA]));
 
 	if (tb[TCA_PIE_ECN])
-		q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
+		WRITE_ONCE(q->params.ecn, nla_get_u32(tb[TCA_PIE_ECN]));
 
 	if (tb[TCA_PIE_BYTEMODE])
-		q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+		WRITE_ONCE(q->params.bytemode,
+			   nla_get_u32(tb[TCA_PIE_BYTEMODE]));
 
 	if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
-		q->params.dq_rate_estimator =
-				nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
+		WRITE_ONCE(q->params.dq_rate_estimator,
+			   nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]));
 
 	/* Drop excess packets if new limit is lower */
 	qlen = sch->q.qlen;
@@ -469,17 +471,18 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	/* convert target from pschedtime to us */
 	if (nla_put_u32(skb, TCA_PIE_TARGET,
-			((u32)PSCHED_TICKS2NS(q->params.target)) /
+			((u32)PSCHED_TICKS2NS(READ_ONCE(q->params.target))) /
 			NSEC_PER_USEC) ||
-	    nla_put_u32(skb, TCA_PIE_LIMIT, sch->limit) ||
+	    nla_put_u32(skb, TCA_PIE_LIMIT, READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_PIE_TUPDATE,
-			jiffies_to_usecs(q->params.tupdate)) ||
-	    nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
-	    nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
+			jiffies_to_usecs(READ_ONCE(q->params.tupdate))) ||
+	    nla_put_u32(skb, TCA_PIE_ALPHA, READ_ONCE(q->params.alpha)) ||
+	    nla_put_u32(skb, TCA_PIE_BETA, READ_ONCE(q->params.beta)) ||
 	    nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
-	    nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
+	    nla_put_u32(skb, TCA_PIE_BYTEMODE,
+			READ_ONCE(q->params.bytemode)) ||
 	    nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
-			q->params.dq_rate_estimator))
+			READ_ONCE(q->params.dq_rate_estimator)))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump()
  2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
                   ` (12 preceding siblings ...)
  2024-04-15 13:20 ` [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
@ 2024-04-15 13:20 ` Eric Dumazet
  13 siblings, 0 replies; 44+ messages in thread
From: Eric Dumazet @ 2024-04-15 13:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: Jamal Hadi Salim, Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Eric Dumazet

Instead of relying on RTNL, skbprio_dump() can use READ_ONCE()
annotation, paired with WRITE_ONCE() one in skbprio_change().

Also add a READ_ONCE(sch->limit) in skbprio_enqueue().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_skbprio.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
index b4dd626c309c36725e6030a338d21d1fabcb6704..20ff7386b74bd89c00b50a8f0def91b6c5cce7f4 100644
--- a/net/sched/sch_skbprio.c
+++ b/net/sched/sch_skbprio.c
@@ -79,7 +79,9 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	prio = min(skb->priority, max_priority);
 
 	qdisc = &q->qdiscs[prio];
-	if (sch->q.qlen < sch->limit) {
+
+	/* sch->limit can change under us from skbprio_change() */
+	if (sch->q.qlen < READ_ONCE(sch->limit)) {
 		__skb_queue_tail(qdisc, skb);
 		qdisc_qstats_backlog_inc(sch, skb);
 		q->qstats[prio].backlog += qdisc_pkt_len(skb);
@@ -172,7 +174,7 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
 	if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
 
-	sch->limit = ctl->limit;
+	WRITE_ONCE(sch->limit, ctl->limit);
 	return 0;
 }
 
@@ -200,7 +202,7 @@ static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tc_skbprio_qopt opt;
 
-	opt.limit = sch->limit;
+	opt.limit = READ_ONCE(sch->limit);
 
 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
 		return -1;
-- 
2.44.0.683.g7961c838ac-goog


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-15 13:20 ` [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
@ 2024-04-16 18:19   ` Simon Horman
  2024-04-16 18:33     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-16 18:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() in fq_change()
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
>  1 file changed, 60 insertions(+), 36 deletions(-)
> 
> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> --- a/net/sched/sch_fq.c
> +++ b/net/sched/sch_fq.c
> @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
>  		fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
>  
>  	q->fq_root = array;
> -	q->fq_trees_log = log;
> +	WRITE_ONCE(q->fq_trees_log, log);
>  
>  	sch_tree_unlock(sch);
>  
> @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
>  
>  	memset(out, 0, num_elems / 4);
>  	for (i = 0; i < num_elems; i++)
> -		out[i / 4] |= in[i] << (2 * (i & 0x3));
> +		out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
>  }
>  

Hi Eric,

I am a little unsure about the handling of q->prio2band in this patch.

It seems to me that fq_prio2band_compress_crumb() is used to
to store values in q->prio2band, and is called (indirectly)
from fq_change() (and directly from fq_init()).

While fq_prio2band_decompress_crumb() is used to read values
from q->prio2band, and is called from fq_dump().

So I am wondering if should use WRITE_ONCE() when storing elements
of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
reading elements of in.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-16 18:19   ` Simon Horman
@ 2024-04-16 18:33     ` Eric Dumazet
  2024-04-17  8:45       ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-16 18:33 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Tue, Apr 16, 2024 at 8:19 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() in fq_change()
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
> >  1 file changed, 60 insertions(+), 36 deletions(-)
> >
> > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> > --- a/net/sched/sch_fq.c
> > +++ b/net/sched/sch_fq.c
> > @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
> >               fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
> >
> >       q->fq_root = array;
> > -     q->fq_trees_log = log;
> > +     WRITE_ONCE(q->fq_trees_log, log);
> >
> >       sch_tree_unlock(sch);
> >
> > @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
> >
> >       memset(out, 0, num_elems / 4);
> >       for (i = 0; i < num_elems; i++)
> > -             out[i / 4] |= in[i] << (2 * (i & 0x3));
> > +             out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> >  }
> >
>
> Hi Eric,
>
> I am a little unsure about the handling of q->prio2band in this patch.
>
> It seems to me that fq_prio2band_compress_crumb() is used to
> to store values in q->prio2band, and is called (indirectly)
> from fq_change() (and directly from fq_init()).
>
> While fq_prio2band_decompress_crumb() is used to read values
> from q->prio2band, and is called from fq_dump().
>
> So I am wondering if should use WRITE_ONCE() when storing elements
> of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
> reading elements of in.

Yeah, you are probably right, I recall being a bit lazy on this part,
thanks !

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-15 13:20 ` [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
@ 2024-04-17  8:35   ` Simon Horman
  2024-04-17  8:54     ` Eric Dumazet
  2024-04-17 12:25     ` Toke Høiland-Jørgensen
  0 siblings, 2 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17  8:35 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Toke Høiland-Jørgensen, cake

+ Toke Høiland-Jørgensen <toke@toke.dk>
  cake@lists.bufferbloat.net

On Mon, Apr 15, 2024 at 01:20:42PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, cake_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in cake_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

...

> @@ -2774,68 +2783,71 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
>  {
>  	struct cake_sched_data *q = qdisc_priv(sch);
>  	struct nlattr *opts;
> +	u16 rate_flags;
>  
>  	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
>  	if (!opts)
>  		goto nla_put_failure;
>  
> -	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
> -			      TCA_CAKE_PAD))
> +	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
> +			      READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
> -			q->flow_mode & CAKE_FLOW_MASK))
> +			READ_ONCE(q->flow_mode) & CAKE_FLOW_MASK))
>  		goto nla_put_failure;

Hi Eric,

q->flow_mode is read twice in this function. Once here...

>  
> -	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
> +	if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
>  		goto nla_put_failure;
>  
> -	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
> +	if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
>  		goto nla_put_failure;
>  
> -	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
> +	if (nla_put_u32(skb, TCA_CAKE_MEMORY,
> +			READ_ONCE(q->buffer_config_limit)))
>  		goto nla_put_failure;
>  
> +	rate_flags = READ_ONCE(q->rate_flags);
>  	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
> -			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
> +			!!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
> -			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
> +			!!(rate_flags & CAKE_FLAG_INGRESS)))
>  		goto nla_put_failure;
>  
> -	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
> +	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, TCA_CAKE_NAT,
> -			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
> +			!!(READ_ONCE(q->flow_mode) & CAKE_FLOW_NAT_FLAG)))
>  		goto nla_put_failure;

... and once here.

I am assuming that it isn't a big deal, but perhaps it is better to save
q->flow_mode into a local variable.

Also, more importantly, q->flow_mode does not seem to be handled
using WRITE_ONCE() in cake_change(). It's a non-trivial case,
which I guess is well served by a mechanism built around a local variable.

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-16 18:33     ` Eric Dumazet
@ 2024-04-17  8:45       ` Eric Dumazet
  2024-04-17  9:00         ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17  8:45 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Tue, Apr 16, 2024 at 8:33 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Tue, Apr 16, 2024 at 8:19 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() in fq_change()
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
> > >  1 file changed, 60 insertions(+), 36 deletions(-)
> > >
> > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > > index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> > > --- a/net/sched/sch_fq.c
> > > +++ b/net/sched/sch_fq.c
> > > @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
> > >               fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
> > >
> > >       q->fq_root = array;
> > > -     q->fq_trees_log = log;
> > > +     WRITE_ONCE(q->fq_trees_log, log);
> > >
> > >       sch_tree_unlock(sch);
> > >
> > > @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
> > >
> > >       memset(out, 0, num_elems / 4);
> > >       for (i = 0; i < num_elems; i++)
> > > -             out[i / 4] |= in[i] << (2 * (i & 0x3));
> > > +             out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> > >  }
> > >
> >
> > Hi Eric,
> >
> > I am a little unsure about the handling of q->prio2band in this patch.
> >
> > It seems to me that fq_prio2band_compress_crumb() is used to
> > to store values in q->prio2band, and is called (indirectly)
> > from fq_change() (and directly from fq_init()).
> >
> > While fq_prio2band_decompress_crumb() is used to read values
> > from q->prio2band, and is called from fq_dump().
> >
> > So I am wondering if should use WRITE_ONCE() when storing elements
> > of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
> > reading elements of in.
>
> Yeah, you are probably right, I recall being a bit lazy on this part,
> thanks !

I will squash in V2 this part :

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 934c220b3f4336dc2f70af74d7758218492b675d..238974725679327b0a0d483c011e15fc94ab0878
100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -106,6 +106,8 @@ struct fq_perband_flows {
        int                 quantum; /* based on band nr : 576KB, 192KB, 64KB */
 };

+#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
+
 struct fq_sched_data {
 /* Read mostly cache line */

@@ -122,7 +124,7 @@ struct fq_sched_data {
        u8              rate_enable;
        u8              fq_trees_log;
        u8              horizon_drop;
-       u8              prio2band[(TC_PRIO_MAX + 1) >> 2];
+       u8              prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
        u32             timer_slack; /* hrtimer slack in ns */

 /* Read/Write fields. */
@@ -159,7 +161,7 @@ struct fq_sched_data {
 /* return the i-th 2-bit value ("crumb") */
 static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
 {
-       return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
+       return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
 }

 /*
@@ -927,11 +929,15 @@ static const struct nla_policy
fq_policy[TCA_FQ_MAX + 1] = {
 static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
 {
        const int num_elems = TC_PRIO_MAX + 1;
+       u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE];
        int i;

-       memset(out, 0, num_elems / 4);
+       memset(tmp, 0, sizeof(tmp));
        for (i = 0; i < num_elems; i++)
-               out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
+               tmp[i / 4] |= in[i] << (2 * (i & 0x3));
+
+       for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++)
+               WRITE_ONCE(out[i], tmp[i]);
 }

 static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-17  8:35   ` Simon Horman
@ 2024-04-17  8:54     ` Eric Dumazet
  2024-04-17  9:24       ` Simon Horman
  2024-04-17 12:25     ` Toke Høiland-Jørgensen
  1 sibling, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17  8:54 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Toke Høiland-Jørgensen, cake

On Wed, Apr 17, 2024 at 10:35 AM Simon Horman <horms@kernel.org> wrote:
>
> + Toke Høiland-Jørgensen <toke@toke.dk>
>   cake@lists.bufferbloat.net
>
> On Mon, Apr 15, 2024 at 01:20:42PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, cake_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in cake_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>
> ...
>
> > @@ -2774,68 +2783,71 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
> >  {
> >       struct cake_sched_data *q = qdisc_priv(sch);
> >       struct nlattr *opts;
> > +     u16 rate_flags;
> >
> >       opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
> >       if (!opts)
> >               goto nla_put_failure;
> >
> > -     if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
> > -                           TCA_CAKE_PAD))
> > +     if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
> > +                           READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
> >               goto nla_put_failure;
> >
> >       if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
> > -                     q->flow_mode & CAKE_FLOW_MASK))
> > +                     READ_ONCE(q->flow_mode) & CAKE_FLOW_MASK))
> >               goto nla_put_failure;
>
> Hi Eric,
>
> q->flow_mode is read twice in this function. Once here...
>
> >
> > -     if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
> > +     if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
> >               goto nla_put_failure;
> >
> > -     if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
> > +     if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
> >               goto nla_put_failure;
> >
> > -     if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
> > +     if (nla_put_u32(skb, TCA_CAKE_MEMORY,
> > +                     READ_ONCE(q->buffer_config_limit)))
> >               goto nla_put_failure;
> >
> > +     rate_flags = READ_ONCE(q->rate_flags);
> >       if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
> > -                     !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
> > +                     !!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
> >               goto nla_put_failure;
> >
> >       if (nla_put_u32(skb, TCA_CAKE_INGRESS,
> > -                     !!(q->rate_flags & CAKE_FLAG_INGRESS)))
> > +                     !!(rate_flags & CAKE_FLAG_INGRESS)))
> >               goto nla_put_failure;
> >
> > -     if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
> > +     if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
> >               goto nla_put_failure;
> >
> >       if (nla_put_u32(skb, TCA_CAKE_NAT,
> > -                     !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
> > +                     !!(READ_ONCE(q->flow_mode) & CAKE_FLOW_NAT_FLAG)))
> >               goto nla_put_failure;
>
> ... and once here.
>
> I am assuming that it isn't a big deal, but perhaps it is better to save
> q->flow_mode into a local variable.
>
> Also, more importantly, q->flow_mode does not seem to be handled
> using WRITE_ONCE() in cake_change(). It's a non-trivial case,
> which I guess is well served by a mechanism built around a local variable.

Thanks !

I will squash in v2:

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index bb37a0dedcc1e4b3418f6681d87108aad7ea066f..9602dafe32e61d38dc00b0a35e1ee3f530989610
100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2573,6 +2573,7 @@ static int cake_change(struct Qdisc *sch, struct
nlattr *opt,
        struct cake_sched_data *q = qdisc_priv(sch);
        struct nlattr *tb[TCA_CAKE_MAX + 1];
        u16 rate_flags;
+       u8 flow_mode;
        int err;

        err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
@@ -2580,10 +2581,11 @@ static int cake_change(struct Qdisc *sch,
struct nlattr *opt,
        if (err < 0)
                return err;

+       flow_mode = q->flow_mode;
        if (tb[TCA_CAKE_NAT]) {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-               q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
-               q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+               flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+               flow_mode |= CAKE_FLOW_NAT_FLAG *
                        !!nla_get_u32(tb[TCA_CAKE_NAT]);
 #else
                NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
@@ -2609,7 +2611,7 @@ static int cake_change(struct Qdisc *sch, struct
nlattr *opt,
        }

        if (tb[TCA_CAKE_FLOW_MODE])
-               q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+               flow_mode = ((flow_mode & CAKE_FLOW_NAT_FLAG) |
                                (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
                                        CAKE_FLOW_MASK));

@@ -2689,6 +2691,7 @@ static int cake_change(struct Qdisc *sch, struct
nlattr *opt,
        }

        WRITE_ONCE(q->rate_flags, rate_flags);
+       WRITE_ONCE(q->flow_mode, flow_mode);
        if (q->tins) {
                sch_tree_lock(sch);
                cake_reconfigure(sch);
@@ -2784,6 +2787,7 @@ static int cake_dump(struct Qdisc *sch, struct
sk_buff *skb)
        struct cake_sched_data *q = qdisc_priv(sch);
        struct nlattr *opts;
        u16 rate_flags;
+       u8 flow_mode;

        opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
        if (!opts)
@@ -2793,8 +2797,8 @@ static int cake_dump(struct Qdisc *sch, struct
sk_buff *skb)
                              READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
                goto nla_put_failure;

-       if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
-                       READ_ONCE(q->flow_mode) & CAKE_FLOW_MASK))
+       flow_mode = READ_ONCE(q->flow_mode);
+       if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, flow_mode & CAKE_FLOW_MASK))
                goto nla_put_failure;

        if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
@@ -2820,7 +2824,7 @@ static int cake_dump(struct Qdisc *sch, struct
sk_buff *skb)
                goto nla_put_failure;

        if (nla_put_u32(skb, TCA_CAKE_NAT,
-                       !!(READ_ONCE(q->flow_mode) & CAKE_FLOW_NAT_FLAG)))
+                       !!(flow_mode & CAKE_FLOW_NAT_FLAG)))
                goto nla_put_failure;

        if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, READ_ONCE(q->tin_mode)))

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-17  8:45       ` Eric Dumazet
@ 2024-04-17  9:00         ` Simon Horman
  2024-04-17  9:02           ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17  9:00 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 10:45:09AM +0200, Eric Dumazet wrote:
> On Tue, Apr 16, 2024 at 8:33 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Tue, Apr 16, 2024 at 8:19 PM Simon Horman <horms@kernel.org> wrote:
> > >
> > > On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> > > > Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> > > > annotations, paired with WRITE_ONCE() in fq_change()
> > > >
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > ---
> > > >  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
> > > >  1 file changed, 60 insertions(+), 36 deletions(-)
> > > >
> > > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > > > index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> > > > --- a/net/sched/sch_fq.c
> > > > +++ b/net/sched/sch_fq.c
> > > > @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
> > > >               fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
> > > >
> > > >       q->fq_root = array;
> > > > -     q->fq_trees_log = log;
> > > > +     WRITE_ONCE(q->fq_trees_log, log);
> > > >
> > > >       sch_tree_unlock(sch);
> > > >
> > > > @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
> > > >
> > > >       memset(out, 0, num_elems / 4);
> > > >       for (i = 0; i < num_elems; i++)
> > > > -             out[i / 4] |= in[i] << (2 * (i & 0x3));
> > > > +             out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> > > >  }
> > > >
> > >
> > > Hi Eric,
> > >
> > > I am a little unsure about the handling of q->prio2band in this patch.
> > >
> > > It seems to me that fq_prio2band_compress_crumb() is used to
> > > to store values in q->prio2band, and is called (indirectly)
> > > from fq_change() (and directly from fq_init()).
> > >
> > > While fq_prio2band_decompress_crumb() is used to read values
> > > from q->prio2band, and is called from fq_dump().
> > >
> > > So I am wondering if should use WRITE_ONCE() when storing elements
> > > of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
> > > reading elements of in.
> >
> > Yeah, you are probably right, I recall being a bit lazy on this part,
> > thanks !
> 
> I will squash in V2 this part :
> 
> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> index 934c220b3f4336dc2f70af74d7758218492b675d..238974725679327b0a0d483c011e15fc94ab0878
> 100644
> --- a/net/sched/sch_fq.c
> +++ b/net/sched/sch_fq.c
> @@ -106,6 +106,8 @@ struct fq_perband_flows {
>         int                 quantum; /* based on band nr : 576KB, 192KB, 64KB */
>  };
> 
> +#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
> +
>  struct fq_sched_data {
>  /* Read mostly cache line */
> 
> @@ -122,7 +124,7 @@ struct fq_sched_data {
>         u8              rate_enable;
>         u8              fq_trees_log;
>         u8              horizon_drop;
> -       u8              prio2band[(TC_PRIO_MAX + 1) >> 2];
> +       u8              prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
>         u32             timer_slack; /* hrtimer slack in ns */
> 
>  /* Read/Write fields. */
> @@ -159,7 +161,7 @@ struct fq_sched_data {
>  /* return the i-th 2-bit value ("crumb") */
>  static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
>  {
> -       return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
> +       return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
>  }

Thanks Eric,

assuming that it is ok for this version of fq_prio2band() to run
from fq_enqueue(), this update looks good to me.

> 
>  /*
> @@ -927,11 +929,15 @@ static const struct nla_policy
> fq_policy[TCA_FQ_MAX + 1] = {
>  static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
>  {
>         const int num_elems = TC_PRIO_MAX + 1;
> +       u8 tmp[FQ_PRIO2BAND_CRUMB_SIZE];
>         int i;
> 
> -       memset(out, 0, num_elems / 4);
> +       memset(tmp, 0, sizeof(tmp));
>         for (i = 0; i < num_elems; i++)
> -               out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> +               tmp[i / 4] |= in[i] << (2 * (i & 0x3));
> +
> +       for (i = 0; i < FQ_PRIO2BAND_CRUMB_SIZE; i++)
> +               WRITE_ONCE(out[i], tmp[i]);
>  }
> 
>  static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out)
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-17  9:00         ` Simon Horman
@ 2024-04-17  9:02           ` Eric Dumazet
  2024-04-17  9:23             ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17  9:02 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 11:00 AM Simon Horman <horms@kernel.org> wrote:
>
> On Wed, Apr 17, 2024 at 10:45:09AM +0200, Eric Dumazet wrote:
> > On Tue, Apr 16, 2024 at 8:33 PM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Tue, Apr 16, 2024 at 8:19 PM Simon Horman <horms@kernel.org> wrote:
> > > >
> > > > On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> > > > > Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> > > > > annotations, paired with WRITE_ONCE() in fq_change()
> > > > >
> > > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > > ---
> > > > >  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
> > > > >  1 file changed, 60 insertions(+), 36 deletions(-)
> > > > >
> > > > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > > > > index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> > > > > --- a/net/sched/sch_fq.c
> > > > > +++ b/net/sched/sch_fq.c
> > > > > @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
> > > > >               fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
> > > > >
> > > > >       q->fq_root = array;
> > > > > -     q->fq_trees_log = log;
> > > > > +     WRITE_ONCE(q->fq_trees_log, log);
> > > > >
> > > > >       sch_tree_unlock(sch);
> > > > >
> > > > > @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
> > > > >
> > > > >       memset(out, 0, num_elems / 4);
> > > > >       for (i = 0; i < num_elems; i++)
> > > > > -             out[i / 4] |= in[i] << (2 * (i & 0x3));
> > > > > +             out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> > > > >  }
> > > > >
> > > >
> > > > Hi Eric,
> > > >
> > > > I am a little unsure about the handling of q->prio2band in this patch.
> > > >
> > > > It seems to me that fq_prio2band_compress_crumb() is used to
> > > > to store values in q->prio2band, and is called (indirectly)
> > > > from fq_change() (and directly from fq_init()).
> > > >
> > > > While fq_prio2band_decompress_crumb() is used to read values
> > > > from q->prio2band, and is called from fq_dump().
> > > >
> > > > So I am wondering if should use WRITE_ONCE() when storing elements
> > > > of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
> > > > reading elements of in.
> > >
> > > Yeah, you are probably right, I recall being a bit lazy on this part,
> > > thanks !
> >
> > I will squash in V2 this part :
> >
> > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > index 934c220b3f4336dc2f70af74d7758218492b675d..238974725679327b0a0d483c011e15fc94ab0878
> > 100644
> > --- a/net/sched/sch_fq.c
> > +++ b/net/sched/sch_fq.c
> > @@ -106,6 +106,8 @@ struct fq_perband_flows {
> >         int                 quantum; /* based on band nr : 576KB, 192KB, 64KB */
> >  };
> >
> > +#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
> > +
> >  struct fq_sched_data {
> >  /* Read mostly cache line */
> >
> > @@ -122,7 +124,7 @@ struct fq_sched_data {
> >         u8              rate_enable;
> >         u8              fq_trees_log;
> >         u8              horizon_drop;
> > -       u8              prio2band[(TC_PRIO_MAX + 1) >> 2];
> > +       u8              prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
> >         u32             timer_slack; /* hrtimer slack in ns */
> >
> >  /* Read/Write fields. */
> > @@ -159,7 +161,7 @@ struct fq_sched_data {
> >  /* return the i-th 2-bit value ("crumb") */
> >  static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
> >  {
> > -       return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
> > +       return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
> >  }
>
> Thanks Eric,
>
> assuming that it is ok for this version of fq_prio2band() to run
> from fq_enqueue(), this update looks good to me.

It is ok, a READ_ONCE() here is not adding any constraint on compiler output.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump()
  2024-04-17  9:02           ` Eric Dumazet
@ 2024-04-17  9:23             ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17  9:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 11:02:55AM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 11:00 AM Simon Horman <horms@kernel.org> wrote:
> >
> > On Wed, Apr 17, 2024 at 10:45:09AM +0200, Eric Dumazet wrote:
> > > On Tue, Apr 16, 2024 at 8:33 PM Eric Dumazet <edumazet@google.com> wrote:
> > > >
> > > > On Tue, Apr 16, 2024 at 8:19 PM Simon Horman <horms@kernel.org> wrote:
> > > > >
> > > > > On Mon, Apr 15, 2024 at 01:20:41PM +0000, Eric Dumazet wrote:
> > > > > > Instead of relying on RTNL, fq_dump() can use READ_ONCE()
> > > > > > annotations, paired with WRITE_ONCE() in fq_change()
> > > > > >
> > > > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > > > ---
> > > > > >  net/sched/sch_fq.c | 96 +++++++++++++++++++++++++++++-----------------
> > > > > >  1 file changed, 60 insertions(+), 36 deletions(-)
> > > > > >
> > > > > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > > > > > index cdf23ff16f40bf244bb822e76016fde44e0c439b..934c220b3f4336dc2f70af74d7758218492b675d 100644
> > > > > > --- a/net/sched/sch_fq.c
> > > > > > +++ b/net/sched/sch_fq.c
> > > > > > @@ -888,7 +888,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
> > > > > >               fq_rehash(q, old_fq_root, q->fq_trees_log, array, log);
> > > > > >
> > > > > >       q->fq_root = array;
> > > > > > -     q->fq_trees_log = log;
> > > > > > +     WRITE_ONCE(q->fq_trees_log, log);
> > > > > >
> > > > > >       sch_tree_unlock(sch);
> > > > > >
> > > > > > @@ -931,7 +931,7 @@ static void fq_prio2band_compress_crumb(const u8 *in, u8 *out)
> > > > > >
> > > > > >       memset(out, 0, num_elems / 4);
> > > > > >       for (i = 0; i < num_elems; i++)
> > > > > > -             out[i / 4] |= in[i] << (2 * (i & 0x3));
> > > > > > +             out[i / 4] |= READ_ONCE(in[i]) << (2 * (i & 0x3));
> > > > > >  }
> > > > > >
> > > > >
> > > > > Hi Eric,
> > > > >
> > > > > I am a little unsure about the handling of q->prio2band in this patch.
> > > > >
> > > > > It seems to me that fq_prio2band_compress_crumb() is used to
> > > > > to store values in q->prio2band, and is called (indirectly)
> > > > > from fq_change() (and directly from fq_init()).
> > > > >
> > > > > While fq_prio2band_decompress_crumb() is used to read values
> > > > > from q->prio2band, and is called from fq_dump().
> > > > >
> > > > > So I am wondering if should use WRITE_ONCE() when storing elements
> > > > > of out. And fq_prio2band_decompress_crumb should use READ_ONCE when
> > > > > reading elements of in.
> > > >
> > > > Yeah, you are probably right, I recall being a bit lazy on this part,
> > > > thanks !
> > >
> > > I will squash in V2 this part :
> > >
> > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> > > index 934c220b3f4336dc2f70af74d7758218492b675d..238974725679327b0a0d483c011e15fc94ab0878
> > > 100644
> > > --- a/net/sched/sch_fq.c
> > > +++ b/net/sched/sch_fq.c
> > > @@ -106,6 +106,8 @@ struct fq_perband_flows {
> > >         int                 quantum; /* based on band nr : 576KB, 192KB, 64KB */
> > >  };
> > >
> > > +#define FQ_PRIO2BAND_CRUMB_SIZE ((TC_PRIO_MAX + 1) >> 2)
> > > +
> > >  struct fq_sched_data {
> > >  /* Read mostly cache line */
> > >
> > > @@ -122,7 +124,7 @@ struct fq_sched_data {
> > >         u8              rate_enable;
> > >         u8              fq_trees_log;
> > >         u8              horizon_drop;
> > > -       u8              prio2band[(TC_PRIO_MAX + 1) >> 2];
> > > +       u8              prio2band[FQ_PRIO2BAND_CRUMB_SIZE];
> > >         u32             timer_slack; /* hrtimer slack in ns */
> > >
> > >  /* Read/Write fields. */
> > > @@ -159,7 +161,7 @@ struct fq_sched_data {
> > >  /* return the i-th 2-bit value ("crumb") */
> > >  static u8 fq_prio2band(const u8 *prio2band, unsigned int prio)
> > >  {
> > > -       return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3;
> > > +       return (READ_ONCE(prio2band[prio / 4]) >> (2 * (prio & 0x3))) & 0x3;
> > >  }
> >
> > Thanks Eric,
> >
> > assuming that it is ok for this version of fq_prio2band() to run
> > from fq_enqueue(), this update looks good to me.
> 
> It is ok, a READ_ONCE() here is not adding any constraint on compiler output.

Thanks for confirming.
This looks good to me.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-17  8:54     ` Eric Dumazet
@ 2024-04-17  9:24       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17  9:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Toke Høiland-Jørgensen, cake

On Wed, Apr 17, 2024 at 10:54:30AM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 10:35 AM Simon Horman <horms@kernel.org> wrote:
> >
> > + Toke Høiland-Jørgensen <toke@toke.dk>
> >   cake@lists.bufferbloat.net
> >
> > On Mon, Apr 15, 2024 at 01:20:42PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, cake_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in cake_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> >
> > ...
> >
> > > @@ -2774,68 +2783,71 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
> > >  {
> > >       struct cake_sched_data *q = qdisc_priv(sch);
> > >       struct nlattr *opts;
> > > +     u16 rate_flags;
> > >
> > >       opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
> > >       if (!opts)
> > >               goto nla_put_failure;
> > >
> > > -     if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
> > > -                           TCA_CAKE_PAD))
> > > +     if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64,
> > > +                           READ_ONCE(q->rate_bps), TCA_CAKE_PAD))
> > >               goto nla_put_failure;
> > >
> > >       if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
> > > -                     q->flow_mode & CAKE_FLOW_MASK))
> > > +                     READ_ONCE(q->flow_mode) & CAKE_FLOW_MASK))
> > >               goto nla_put_failure;
> >
> > Hi Eric,
> >
> > q->flow_mode is read twice in this function. Once here...
> >
> > >
> > > -     if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
> > > +     if (nla_put_u32(skb, TCA_CAKE_RTT, READ_ONCE(q->interval)))
> > >               goto nla_put_failure;
> > >
> > > -     if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
> > > +     if (nla_put_u32(skb, TCA_CAKE_TARGET, READ_ONCE(q->target)))
> > >               goto nla_put_failure;
> > >
> > > -     if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
> > > +     if (nla_put_u32(skb, TCA_CAKE_MEMORY,
> > > +                     READ_ONCE(q->buffer_config_limit)))
> > >               goto nla_put_failure;
> > >
> > > +     rate_flags = READ_ONCE(q->rate_flags);
> > >       if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
> > > -                     !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
> > > +                     !!(rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
> > >               goto nla_put_failure;
> > >
> > >       if (nla_put_u32(skb, TCA_CAKE_INGRESS,
> > > -                     !!(q->rate_flags & CAKE_FLAG_INGRESS)))
> > > +                     !!(rate_flags & CAKE_FLAG_INGRESS)))
> > >               goto nla_put_failure;
> > >
> > > -     if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
> > > +     if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, READ_ONCE(q->ack_filter)))
> > >               goto nla_put_failure;
> > >
> > >       if (nla_put_u32(skb, TCA_CAKE_NAT,
> > > -                     !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
> > > +                     !!(READ_ONCE(q->flow_mode) & CAKE_FLOW_NAT_FLAG)))
> > >               goto nla_put_failure;
> >
> > ... and once here.
> >
> > I am assuming that it isn't a big deal, but perhaps it is better to save
> > q->flow_mode into a local variable.
> >
> > Also, more importantly, q->flow_mode does not seem to be handled
> > using WRITE_ONCE() in cake_change(). It's a non-trivial case,
> > which I guess is well served by a mechanism built around a local variable.
> 
> Thanks !
> 
> I will squash in v2:

Likewise, thanks.
This looks good to me.

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump()
  2024-04-15 13:20 ` [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
@ 2024-04-17  9:27   ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17  9:27 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet,
	Vinicius Costa Gomes

+ Vinicius Costa Gomes

On Mon, Apr 15, 2024 at 01:20:43PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, cbs_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in cbs_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>

(Eric, I need to step away from my desk for a while.
 So there will be a bit of a delay in reviewing the rest
 of the series.)

> ---
>  net/sched/sch_cbs.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
> index 69001eff0315584df23a24f81ae3b4cf7bf5fd79..939425da18955bc8a3f3d2d5b852b2585e9bcaee 100644
> --- a/net/sched/sch_cbs.c
> +++ b/net/sched/sch_cbs.c
> @@ -389,11 +389,11 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
>  	}
>  
>  	/* Everything went OK, save the parameters used. */
> -	q->hicredit = qopt->hicredit;
> -	q->locredit = qopt->locredit;
> -	q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
> -	q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
> -	q->offload = qopt->offload;
> +	WRITE_ONCE(q->hicredit, qopt->hicredit);
> +	WRITE_ONCE(q->locredit, qopt->locredit);
> +	WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT);
> +	WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT);
> +	WRITE_ONCE(q->offload, qopt->offload);
>  
>  	return 0;
>  }
> @@ -459,11 +459,11 @@ static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	if (!nest)
>  		goto nla_put_failure;
>  
> -	opt.hicredit = q->hicredit;
> -	opt.locredit = q->locredit;
> -	opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
> -	opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
> -	opt.offload = q->offload;
> +	opt.hicredit = READ_ONCE(q->hicredit);
> +	opt.locredit = READ_ONCE(q->locredit);
> +	opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT);
> +	opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT);
> +	opt.offload = READ_ONCE(q->offload);
>  
>  	if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
>  		goto nla_put_failure;
> -- 
> 2.44.0.683.g7961c838ac-goog
> 
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump()
  2024-04-17  8:35   ` Simon Horman
  2024-04-17  8:54     ` Eric Dumazet
@ 2024-04-17 12:25     ` Toke Høiland-Jørgensen
  1 sibling, 0 replies; 44+ messages in thread
From: Toke Høiland-Jørgensen @ 2024-04-17 12:25 UTC (permalink / raw)
  To: Simon Horman, Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet, cake

Simon Horman <horms@kernel.org> writes:

> + Toke Høiland-Jørgensen <toke@toke.dk>
>   cake@lists.bufferbloat.net

Thanks!

> On Mon, Apr 15, 2024 at 01:20:42PM +0000, Eric Dumazet wrote:
>> Instead of relying on RTNL, cake_dump() can use READ_ONCE()
>> annotations, paired with WRITE_ONCE() ones in cake_change().
>> 
>> Signed-off-by: Eric Dumazet <edumazet@google.com>

Just to be sure I understand this correctly: the idea is that with
READ/WRITE_ONCE annotations, we can dump the qdisc options without
taking the RTNL lock. This means that a dump not be consistent across a
concurrent reconfig that changes multiple parameters, but each parameter
will be either the new or the old value. Right?

-Toke


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-15 13:20 ` [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
@ 2024-04-17 13:14   ` Simon Horman
  2024-04-17 13:41     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17 13:14 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:44PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, choke_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in choke_change().
> 
> Also use READ_ONCE(q->limit) in choke_enqueue() as the value
> could change from choke_change().

Hi Eric,

I'm wondering if you could expand on why q->limit needs this treatment
but not other fields, f.e. q->parms.qth_min (aka p->qth_min).

> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/red.h     | 10 +++++-----
>  net/sched/sch_choke.c | 23 ++++++++++++-----------
>  2 files changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/red.h b/include/net/red.h

...

> @@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
>  		max_P = red_maxp(Plog);
>  		max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
>  	}
> -	p->max_P = max_P;
> +	WRITE_ONCE(p->max_P, max_P);
>  	max_p_delta = max_P / delta;
>  	max_p_delta = max(max_p_delta, 1U);
>  	p->max_P_reciprocal  = reciprocal_value(max_p_delta);

A little further down in this function p->Scell_log is set.
I think it also needs the WRITE_ONCE() treatment as it
is read in choke_dump().

> diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c

...

> @@ -431,15 +431,16 @@ static int choke_init(struct Qdisc *sch, struct nlattr *opt,
>  static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
>  {
>  	struct choke_sched_data *q = qdisc_priv(sch);
> +	u8 Wlog = READ_ONCE(q->parms.Wlog);
>  	struct nlattr *opts = NULL;
>  	struct tc_red_qopt opt = {
> -		.limit		= q->limit,
> -		.flags		= q->flags,
> -		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
> -		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
> -		.Wlog		= q->parms.Wlog,
> -		.Plog		= q->parms.Plog,
> -		.Scell_log	= q->parms.Scell_log,
> +		.limit		= READ_ONCE(q->limit),
> +		.flags		= READ_ONCE(q->flags),
> +		.qth_min	= READ_ONCE(q->parms.qth_min) >> Wlog,
> +		.qth_max	= READ_ONCE(q->parms.qth_max) >> Wlog,
> +		.Wlog		= Wlog,
> +		.Plog		= READ_ONCE(q->parms.Plog),
> +		.Scell_log	= READ_ONCE(q->parms.Scell_log),
>  	};
>  
>  	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-17 13:14   ` Simon Horman
@ 2024-04-17 13:41     ` Eric Dumazet
  2024-04-17 14:44       ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17 13:41 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 3:14 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:44PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, choke_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in choke_change().
> >
> > Also use READ_ONCE(q->limit) in choke_enqueue() as the value
> > could change from choke_change().
>
> Hi Eric,
>
> I'm wondering if you could expand on why q->limit needs this treatment
> but not other fields, f.e. q->parms.qth_min (aka p->qth_min).

Other fields got their WRITE_ONCE() in red_set_parms()

+       WRITE_ONCE(p->qth_min, qth_min << Wlog);
+       WRITE_ONCE(p->qth_max, qth_max << Wlog);
+       WRITE_ONCE(p->Wlog, Wlog);
+       WRITE_ONCE(p->Plog, Plog);


>
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  include/net/red.h     | 10 +++++-----
> >  net/sched/sch_choke.c | 23 ++++++++++++-----------
> >  2 files changed, 17 insertions(+), 16 deletions(-)
> >
> > diff --git a/include/net/red.h b/include/net/red.h
>
> ...
>
> > @@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
> >               max_P = red_maxp(Plog);
> >               max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
> >       }
> > -     p->max_P = max_P;
> > +     WRITE_ONCE(p->max_P, max_P);
> >       max_p_delta = max_P / delta;
> >       max_p_delta = max(max_p_delta, 1U);
> >       p->max_P_reciprocal  = reciprocal_value(max_p_delta);
>
> A little further down in this function p->Scell_log is set.
> I think it also needs the WRITE_ONCE() treatment as it
> is read in choke_dump().
>

I will squash in v2 the missing WRITE_ONCE(), thanks !

diff --git a/include/net/red.h b/include/net/red.h
index 7daf7cf6130aeccf3d81a77600f4445759f174b7..802287d52c9e37e76ba9154539f511629e4b9780
100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -257,7 +257,7 @@ static inline void red_set_parms(struct red_parms *p,
        p->target_min = qth_min + 2*delta;
        p->target_max = qth_min + 3*delta;

-       p->Scell_log    = Scell_log;
+       WRITE_ONCE(p->Scell_log, Scell_log);
        p->Scell_max    = (255 << Scell_log);

        if (stab)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump()
  2024-04-17 13:41     ` Eric Dumazet
@ 2024-04-17 14:44       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 14:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 03:41:36PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 3:14 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:44PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, choke_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in choke_change().
> > >
> > > Also use READ_ONCE(q->limit) in choke_enqueue() as the value
> > > could change from choke_change().
> >
> > Hi Eric,
> >
> > I'm wondering if you could expand on why q->limit needs this treatment
> > but not other fields, f.e. q->parms.qth_min (aka p->qth_min).
> 
> Other fields got their WRITE_ONCE() in red_set_parms()
> 
> +       WRITE_ONCE(p->qth_min, qth_min << Wlog);
> +       WRITE_ONCE(p->qth_max, qth_max << Wlog);
> +       WRITE_ONCE(p->Wlog, Wlog);
> +       WRITE_ONCE(p->Plog, Plog);

Thanks, understood.

I do wonder if other schedulers may need WRITE_ONCE() in
their enqueue() function. But I have not analysed this yet.

> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  include/net/red.h     | 10 +++++-----
> > >  net/sched/sch_choke.c | 23 ++++++++++++-----------
> > >  2 files changed, 17 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/include/net/red.h b/include/net/red.h
> >
> > ...
> >
> > > @@ -244,7 +244,7 @@ static inline void red_set_parms(struct red_parms *p,
> > >               max_P = red_maxp(Plog);
> > >               max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
> > >       }
> > > -     p->max_P = max_P;
> > > +     WRITE_ONCE(p->max_P, max_P);
> > >       max_p_delta = max_P / delta;
> > >       max_p_delta = max(max_p_delta, 1U);
> > >       p->max_P_reciprocal  = reciprocal_value(max_p_delta);
> >
> > A little further down in this function p->Scell_log is set.
> > I think it also needs the WRITE_ONCE() treatment as it
> > is read in choke_dump().
> >
> 
> I will squash in v2 the missing WRITE_ONCE(), thanks !

Likewise, thanks.
Your proposed update looks good to me.

> diff --git a/include/net/red.h b/include/net/red.h
> index 7daf7cf6130aeccf3d81a77600f4445759f174b7..802287d52c9e37e76ba9154539f511629e4b9780
> 100644
> --- a/include/net/red.h
> +++ b/include/net/red.h
> @@ -257,7 +257,7 @@ static inline void red_set_parms(struct red_parms *p,
>         p->target_min = qth_min + 2*delta;
>         p->target_max = qth_min + 3*delta;
> 
> -       p->Scell_log    = Scell_log;
> +       WRITE_ONCE(p->Scell_log, Scell_log);
>         p->Scell_max    = (255 << Scell_log);
> 
>         if (stab)
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump()
  2024-04-15 13:20 ` [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
@ 2024-04-17 15:59   ` Simon Horman
  2024-04-17 16:05     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17 15:59 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in codel_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> --- a/net/sched/sch_codel.c
> +++ b/net/sched/sch_codel.c
> @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
>  	if (tb[TCA_CODEL_TARGET]) {
>  		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
>  
> -		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.target,
> +			   ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_CE_THRESHOLD]) {
>  		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
>  
> -		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.ce_threshold,
> +			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_INTERVAL]) {
>  		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
>  
> -		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.interval,
> +			   ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_LIMIT])
> -		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> +		WRITE_ONCE(sch->limit,
> +			   nla_get_u32(tb[TCA_CODEL_LIMIT]));
>  

Hi Eric,

Sorry to be so bothersome.

As a follow-up to our discussion of patch 4/14 (net_choke),
I'm wondering if reading sch->limit in codel_qdisc_enqueue()
should be updated to use READ_ONCE().

>  	if (tb[TCA_CODEL_ECN])
> -		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
> +		WRITE_ONCE(q->params.ecn,
> +			   !!nla_get_u32(tb[TCA_CODEL_ECN]));
>  
>  	qlen = sch->q.qlen;
>  	while (sch->q.qlen > sch->limit) {

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump()
  2024-04-17 15:59   ` Simon Horman
@ 2024-04-17 16:05     ` Eric Dumazet
  2024-04-17 16:21       ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17 16:05 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 5:59 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in codel_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
> >  1 file changed, 18 insertions(+), 11 deletions(-)
> >
> > diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> > index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> > --- a/net/sched/sch_codel.c
> > +++ b/net/sched/sch_codel.c
> > @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
> >       if (tb[TCA_CODEL_TARGET]) {
> >               u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
> >
> > -             q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.target,
> > +                        ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_CE_THRESHOLD]) {
> >               u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
> >
> > -             q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.ce_threshold,
> > +                        (val * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_INTERVAL]) {
> >               u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
> >
> > -             q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.interval,
> > +                        ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_LIMIT])
> > -             sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> > +             WRITE_ONCE(sch->limit,
> > +                        nla_get_u32(tb[TCA_CODEL_LIMIT]));
> >
>
> Hi Eric,
>
> Sorry to be so bothersome.
>
> As a follow-up to our discussion of patch 4/14 (net_choke),
> I'm wondering if reading sch->limit in codel_qdisc_enqueue()
> should be updated to use READ_ONCE().

No worries !

A READ_ONCE() in codel_qdisc_enqueue() is not needed
because codel_change() writes  all the fields  under the protection of
qdisc spinlock.

sch_tree_lock() / ... / sch_tree_unlock()

Note that I removed the READ_ONCE() in choke enqueue() in V2,
for the same reason.

>
> >       if (tb[TCA_CODEL_ECN])
> > -             q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
> > +             WRITE_ONCE(q->params.ecn,
> > +                        !!nla_get_u32(tb[TCA_CODEL_ECN]));
> >
> >       qlen = sch->q.qlen;
> >       while (sch->q.qlen > sch->limit) {
>
> ...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump()
  2024-04-17 16:05     ` Eric Dumazet
@ 2024-04-17 16:21       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 16:21 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 06:05:46PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 5:59 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in codel_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
> > >  1 file changed, 18 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> > > index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> > > --- a/net/sched/sch_codel.c
> > > +++ b/net/sched/sch_codel.c
> > > @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
> > >       if (tb[TCA_CODEL_TARGET]) {
> > >               u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
> > >
> > > -             q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.target,
> > > +                        ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_CE_THRESHOLD]) {
> > >               u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
> > >
> > > -             q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.ce_threshold,
> > > +                        (val * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_INTERVAL]) {
> > >               u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
> > >
> > > -             q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.interval,
> > > +                        ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_LIMIT])
> > > -             sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> > > +             WRITE_ONCE(sch->limit,
> > > +                        nla_get_u32(tb[TCA_CODEL_LIMIT]));
> > >
> >
> > Hi Eric,
> >
> > Sorry to be so bothersome.
> >
> > As a follow-up to our discussion of patch 4/14 (net_choke),
> > I'm wondering if reading sch->limit in codel_qdisc_enqueue()
> > should be updated to use READ_ONCE().
> 
> No worries !
> 
> A READ_ONCE() in codel_qdisc_enqueue() is not needed
> because codel_change() writes  all the fields  under the protection of
> qdisc spinlock.
> 
> sch_tree_lock() / ... / sch_tree_unlock()
> 
> Note that I removed the READ_ONCE() in choke enqueue() in V2,
> for the same reason.

Perfect, now I understand.

With that cleared up, I'm happy with (my understanding of) this patch.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump()
  2024-04-15 13:20 ` [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
@ 2024-04-17 16:27   ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 16:27 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:46PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> annotations.
> 
> There is no etf_change() yet, this patch imply aligns
> this qdisc with others.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump()
  2024-04-15 13:20 ` [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
@ 2024-04-17 16:54   ` Simon Horman
  2024-04-17 17:08     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17 16:54 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:47PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, ets_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in ets_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_ets.c | 25 ++++++++++++++-----------
>  1 file changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
> index 835b4460b44854a803d3054e744702022b7551f4..f80bc05d4c5a5050226e6cfd30fa951c0b61029f 100644
> --- a/net/sched/sch_ets.c
> +++ b/net/sched/sch_ets.c

...

> @@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
>  			list_del(&q->classes[i].alist);
>  		qdisc_tree_flush_backlog(q->classes[i].qdisc);
>  	}
> -	q->nstrict = nstrict;
> +	WRITE_ONCE(q->nstrict, nstrict);
>  	memcpy(q->prio2band, priomap, sizeof(priomap));

Hi Eric,

I think that writing elements of q->prio2band needs WRITE_ONCE() treatment too.

>  	for (i = 0; i < q->nbands; i++)
> -		q->classes[i].quantum = quanta[i];
> +		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
>  
>  	for (i = oldbands; i < q->nbands; i++) {
>  		q->classes[i].qdisc = queues[i];

...

> @@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	struct ets_sched *q = qdisc_priv(sch);
>  	struct nlattr *opts;
>  	struct nlattr *nest;
> +	u8 nbands, nstrict;
>  	int band;
>  	int prio;
>  	int err;

The next few lines of this function are:

	err = ets_offload_dump(sch);
	if (err)
		return err;

Where ets_offload_dump may indirectly call ndo_setup_tc().
And I am concerned that ndo_setup_tc() expects RTNL to be held,
although perhaps that assumption is out of date.

...

> @@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
>  		goto nla_err;
>  
>  	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
> -		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
> +		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
> +			       READ_ONCE(q->prio2band[prio])))
>  			goto nla_err;
>  	}

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  2024-04-15 13:20 ` [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
@ 2024-04-17 17:07   ` Simon Horman
  2024-04-17 17:14     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:49PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, fq_codel_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in fq_codel_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_fq_codel.c | 57 ++++++++++++++++++++++++----------------
>  1 file changed, 35 insertions(+), 22 deletions(-)
> 
> diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c

...

> @@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
>  		goto nla_put_failure;
>  
>  	if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
> -			codel_time_to_us(q->cparams.target)) ||
> +			codel_time_to_us(READ_ONCE(q->cparams.target))) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
> -			sch->limit) ||
> +			READ_ONCE(sch->limit)) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
> -			codel_time_to_us(q->cparams.interval)) ||
> +			codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_ECN,
> -			q->cparams.ecn) ||
> +			READ_ONCE(q->cparams.ecn)) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
> -			q->quantum) ||
> +			READ_ONCE(q->quantum)) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
> -			q->drop_batch_size) ||
> +			READ_ONCE(q->drop_batch_size)) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
> -			q->memory_limit) ||
> +			READ_ONCE(q->memory_limit)) ||
>  	    nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
> -			q->flows_cnt))
> +			READ_ONCE(q->flows_cnt)))

Hi Eric,

I think you missed the corresponding update for q->flows_cnt
in fq_codel_change().

>  		goto nla_put_failure;
>  
> -	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD) {
> +	ce_threshold = READ_ONCE(q->cparams.ce_threshold);
> +	if (ce_threshold != CODEL_DISABLED_THRESHOLD) {
>  		if (nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
> -				codel_time_to_us(q->cparams.ce_threshold)))
> +				codel_time_to_us(ce_threshold)))
>  			goto nla_put_failure;
> -		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR, q->cparams.ce_threshold_selector))
> +		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_SELECTOR,
> +			       READ_ONCE(q->cparams.ce_threshold_selector)))
>  			goto nla_put_failure;
> -		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK, q->cparams.ce_threshold_mask))
> +		if (nla_put_u8(skb, TCA_FQ_CODEL_CE_THRESHOLD_MASK,
> +			       READ_ONCE(q->cparams.ce_threshold_mask)))
>  			goto nla_put_failure;
>  	}
>  
> -- 
> 2.44.0.683.g7961c838ac-goog
> 
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump()
  2024-04-17 16:54   ` Simon Horman
@ 2024-04-17 17:08     ` Eric Dumazet
  2024-04-17 17:17       ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17 17:08 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 6:54 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:47PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, ets_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in ets_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_ets.c | 25 ++++++++++++++-----------
> >  1 file changed, 14 insertions(+), 11 deletions(-)
> >
> > diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
> > index 835b4460b44854a803d3054e744702022b7551f4..f80bc05d4c5a5050226e6cfd30fa951c0b61029f 100644
> > --- a/net/sched/sch_ets.c
> > +++ b/net/sched/sch_ets.c
>
> ...
>
> > @@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
> >                       list_del(&q->classes[i].alist);
> >               qdisc_tree_flush_backlog(q->classes[i].qdisc);
> >       }
> > -     q->nstrict = nstrict;
> > +     WRITE_ONCE(q->nstrict, nstrict);
> >       memcpy(q->prio2band, priomap, sizeof(priomap));
>
> Hi Eric,
>
> I think that writing elements of q->prio2band needs WRITE_ONCE() treatment too.

Not really, these are bytes, a cpu will not write over bytes one bit at a time.

I could add WRITE_ONCE(), but this is overkill IMO.

>
> >       for (i = 0; i < q->nbands; i++)
> > -             q->classes[i].quantum = quanta[i];
> > +             WRITE_ONCE(q->classes[i].quantum, quanta[i]);
> >
> >       for (i = oldbands; i < q->nbands; i++) {
> >               q->classes[i].qdisc = queues[i];
>
> ...
>
> > @@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
> >       struct ets_sched *q = qdisc_priv(sch);
> >       struct nlattr *opts;
> >       struct nlattr *nest;
> > +     u8 nbands, nstrict;
> >       int band;
> >       int prio;
> >       int err;
>
> The next few lines of this function are:
>
>         err = ets_offload_dump(sch);
>         if (err)
>                 return err;
>
> Where ets_offload_dump may indirectly call ndo_setup_tc().
> And I am concerned that ndo_setup_tc() expects RTNL to be held,
> although perhaps that assumption is out of date.

Thanks, we will add rtnl locking later only in the helper,
or make sure it can run under RCU.

Note the patch series does not yet remove RTNL locking.

Clearly, masking and setting TCQ_F_OFFLOADED in sch->flags in a dump
operation is not very nice IMO.


>
> ...
>
> > @@ -771,7 +773,8 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
> >               goto nla_err;
> >
> >       for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
> > -             if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
> > +             if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
> > +                            READ_ONCE(q->prio2band[prio])))
> >                       goto nla_err;
> >       }
>
> ...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  2024-04-15 13:20 ` [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
@ 2024-04-17 17:13   ` Simon Horman
  2024-04-17 17:15     ` Eric Dumazet
  0 siblings, 1 reply; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:13 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:50PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, fq_pie_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in fq_pie_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_fq_pie.c | 61 +++++++++++++++++++++++-------------------
>  1 file changed, 34 insertions(+), 27 deletions(-)
> 
> diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c

...

> @@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
>  		return -EMSGSIZE;
>  
>  	/* convert target from pschedtime to us */
> -	if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
> -	    nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||

Hi Eric,

I think you missed the corresponding change for q->flows_cnt
in fq_pie_change().

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  2024-04-17 17:07   ` Simon Horman
@ 2024-04-17 17:14     ` Eric Dumazet
  2024-04-17 17:22       ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17 17:14 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 7:07 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:49PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, fq_codel_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in fq_codel_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_fq_codel.c | 57 ++++++++++++++++++++++++----------------
> >  1 file changed, 35 insertions(+), 22 deletions(-)
> >
> > diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
>
> ...
>
> > @@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
> >               goto nla_put_failure;
> >
> >       if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
> > -                     codel_time_to_us(q->cparams.target)) ||
> > +                     codel_time_to_us(READ_ONCE(q->cparams.target))) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
> > -                     sch->limit) ||
> > +                     READ_ONCE(sch->limit)) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
> > -                     codel_time_to_us(q->cparams.interval)) ||
> > +                     codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_ECN,
> > -                     q->cparams.ecn) ||
> > +                     READ_ONCE(q->cparams.ecn)) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
> > -                     q->quantum) ||
> > +                     READ_ONCE(q->quantum)) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
> > -                     q->drop_batch_size) ||
> > +                     READ_ONCE(q->drop_batch_size)) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
> > -                     q->memory_limit) ||
> > +                     READ_ONCE(q->memory_limit)) ||
> >           nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
> > -                     q->flows_cnt))
> > +                     READ_ONCE(q->flows_cnt)))
>
> Hi Eric,
>
> I think you missed the corresponding update for q->flows_cnt
> in fq_codel_change().

q->flows_cnt is set at init time only, it can not change yet.

Blindly using READ_ONCE() in a dump seems good hygiene,
it is not needed yet, but does no harm.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  2024-04-17 17:13   ` Simon Horman
@ 2024-04-17 17:15     ` Eric Dumazet
  2024-04-17 17:23       ` Simon Horman
  0 siblings, 1 reply; 44+ messages in thread
From: Eric Dumazet @ 2024-04-17 17:15 UTC (permalink / raw)
  To: Simon Horman
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 7:13 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:50PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, fq_pie_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in fq_pie_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_fq_pie.c | 61 +++++++++++++++++++++++-------------------
> >  1 file changed, 34 insertions(+), 27 deletions(-)
> >
> > diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
>
> ...
>
> > @@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
> >               return -EMSGSIZE;
> >
> >       /* convert target from pschedtime to us */
> > -     if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
> > -         nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
>
> Hi Eric,
>
> I think you missed the corresponding change for q->flows_cnt
> in fq_pie_change().
>

net/sched/sch_fq_pie.c copied the code I wrote in fq_codel,
q->flows_cnt is set once.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump()
  2024-04-17 17:08     ` Eric Dumazet
@ 2024-04-17 17:17       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:17 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 07:08:17PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 6:54 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:47PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, ets_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in ets_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_ets.c | 25 ++++++++++++++-----------
> > >  1 file changed, 14 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
> > > index 835b4460b44854a803d3054e744702022b7551f4..f80bc05d4c5a5050226e6cfd30fa951c0b61029f 100644
> > > --- a/net/sched/sch_ets.c
> > > +++ b/net/sched/sch_ets.c
> >
> > ...
> >
> > > @@ -658,11 +658,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
> > >                       list_del(&q->classes[i].alist);
> > >               qdisc_tree_flush_backlog(q->classes[i].qdisc);
> > >       }
> > > -     q->nstrict = nstrict;
> > > +     WRITE_ONCE(q->nstrict, nstrict);
> > >       memcpy(q->prio2band, priomap, sizeof(priomap));
> >
> > Hi Eric,
> >
> > I think that writing elements of q->prio2band needs WRITE_ONCE() treatment too.
> 
> Not really, these are bytes, a cpu will not write over bytes one bit at a time.
> 
> I could add WRITE_ONCE(), but this is overkill IMO.

Thanks, armed with that understanding I'm now happy with this patch.

Reviewed-by: Simon Horman <horms@kernel.org>

> > >       for (i = 0; i < q->nbands; i++)
> > > -             q->classes[i].quantum = quanta[i];
> > > +             WRITE_ONCE(q->classes[i].quantum, quanta[i]);
> > >
> > >       for (i = oldbands; i < q->nbands; i++) {
> > >               q->classes[i].qdisc = queues[i];
> >
> > ...
> >
> > > @@ -733,6 +733,7 @@ static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
> > >       struct ets_sched *q = qdisc_priv(sch);
> > >       struct nlattr *opts;
> > >       struct nlattr *nest;
> > > +     u8 nbands, nstrict;
> > >       int band;
> > >       int prio;
> > >       int err;
> >
> > The next few lines of this function are:
> >
> >         err = ets_offload_dump(sch);
> >         if (err)
> >                 return err;
> >
> > Where ets_offload_dump may indirectly call ndo_setup_tc().
> > And I am concerned that ndo_setup_tc() expects RTNL to be held,
> > although perhaps that assumption is out of date.
> 
> Thanks, we will add rtnl locking later only in the helper,
> or make sure it can run under RCU.
> 
> Note the patch series does not yet remove RTNL locking.

Yes, I understand that.
I was more flagging this as something that needs to be addressed.
Sorry for not being clearer.

> Clearly, masking and setting TCQ_F_OFFLOADED in sch->flags in a dump
> operation is not very nice IMO.

Yes, I noticed that too.

...

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump()
  2024-04-17 17:14     ` Eric Dumazet
@ 2024-04-17 17:22       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:22 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 07:14:10PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 7:07 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:49PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, fq_codel_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in fq_codel_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_fq_codel.c | 57 ++++++++++++++++++++++++----------------
> > >  1 file changed, 35 insertions(+), 22 deletions(-)
> > >
> > > diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
> >
> > ...
> >
> > > @@ -529,30 +539,33 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
> > >               goto nla_put_failure;
> > >
> > >       if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
> > > -                     codel_time_to_us(q->cparams.target)) ||
> > > +                     codel_time_to_us(READ_ONCE(q->cparams.target))) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
> > > -                     sch->limit) ||
> > > +                     READ_ONCE(sch->limit)) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
> > > -                     codel_time_to_us(q->cparams.interval)) ||
> > > +                     codel_time_to_us(READ_ONCE(q->cparams.interval))) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_ECN,
> > > -                     q->cparams.ecn) ||
> > > +                     READ_ONCE(q->cparams.ecn)) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
> > > -                     q->quantum) ||
> > > +                     READ_ONCE(q->quantum)) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE,
> > > -                     q->drop_batch_size) ||
> > > +                     READ_ONCE(q->drop_batch_size)) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_MEMORY_LIMIT,
> > > -                     q->memory_limit) ||
> > > +                     READ_ONCE(q->memory_limit)) ||
> > >           nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
> > > -                     q->flows_cnt))
> > > +                     READ_ONCE(q->flows_cnt)))
> >
> > Hi Eric,
> >
> > I think you missed the corresponding update for q->flows_cnt
> > in fq_codel_change().
> 
> q->flows_cnt is set at init time only, it can not change yet.

Sorry, I missed that important detail.

> Blindly using READ_ONCE() in a dump seems good hygiene,
> it is not needed yet, but does no harm.

Thanks, understood.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump()
  2024-04-17 17:15     ` Eric Dumazet
@ 2024-04-17 17:23       ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Wed, Apr 17, 2024 at 07:15:40PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 7:13 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:50PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, fq_pie_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in fq_pie_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_fq_pie.c | 61 +++++++++++++++++++++++-------------------
> > >  1 file changed, 34 insertions(+), 27 deletions(-)
> > >
> > > diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
> >
> > ...
> >
> > > @@ -471,22 +477,23 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
> > >               return -EMSGSIZE;
> > >
> > >       /* convert target from pschedtime to us */
> > > -     if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
> > > -         nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
> >
> > Hi Eric,
> >
> > I think you missed the corresponding change for q->flows_cnt
> > in fq_pie_change().
> >
> 
> net/sched/sch_fq_pie.c copied the code I wrote in fq_codel,
> q->flows_cnt is set once.

Thanks, I see it now.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump()
  2024-04-15 13:20 ` [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
@ 2024-04-17 17:26   ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:26 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:52PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, hhf_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in hhf_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump()
  2024-04-15 13:20 ` [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
@ 2024-04-17 17:28   ` Simon Horman
  0 siblings, 0 replies; 44+ messages in thread
From: Simon Horman @ 2024-04-17 17:28 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David S . Miller, Jakub Kicinski, Paolo Abeni, Jamal Hadi Salim,
	Cong Wang, Jiri Pirko, netdev, eric.dumazet

On Mon, Apr 15, 2024 at 01:20:53PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, pie_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in pie_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2024-04-17 17:29 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-15 13:20 [PATCH net-next 00/14] net_sched: first series for RTNL-less qdisc dumps Eric Dumazet
2024-04-15 13:20 ` [PATCH net-next 01/14] net_sched: sch_fq: implement lockless fq_dump() Eric Dumazet
2024-04-16 18:19   ` Simon Horman
2024-04-16 18:33     ` Eric Dumazet
2024-04-17  8:45       ` Eric Dumazet
2024-04-17  9:00         ` Simon Horman
2024-04-17  9:02           ` Eric Dumazet
2024-04-17  9:23             ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 02/14] net_sched: cake: implement lockless cake_dump() Eric Dumazet
2024-04-17  8:35   ` Simon Horman
2024-04-17  8:54     ` Eric Dumazet
2024-04-17  9:24       ` Simon Horman
2024-04-17 12:25     ` Toke Høiland-Jørgensen
2024-04-15 13:20 ` [PATCH net-next 03/14] net_sched: sch_cbs: implement lockless cbs_dump() Eric Dumazet
2024-04-17  9:27   ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 04/14] net_sched: sch_choke: implement lockless choke_dump() Eric Dumazet
2024-04-17 13:14   ` Simon Horman
2024-04-17 13:41     ` Eric Dumazet
2024-04-17 14:44       ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 05/14] net_sched: sch_codel: implement lockless codel_dump() Eric Dumazet
2024-04-17 15:59   ` Simon Horman
2024-04-17 16:05     ` Eric Dumazet
2024-04-17 16:21       ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 06/14] net_sched: sch_tfs: implement lockless etf_dump() Eric Dumazet
2024-04-17 16:27   ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 07/14] net_sched: sch_ets: implement lockless ets_dump() Eric Dumazet
2024-04-17 16:54   ` Simon Horman
2024-04-17 17:08     ` Eric Dumazet
2024-04-17 17:17       ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 08/14] net_sched: sch_fifo: implement lockless __fifo_dump() Eric Dumazet
2024-04-15 13:20 ` [PATCH net-next 09/14] net_sched: sch_fq_codel: implement lockless fq_codel_dump() Eric Dumazet
2024-04-17 17:07   ` Simon Horman
2024-04-17 17:14     ` Eric Dumazet
2024-04-17 17:22       ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 10/14] net_sched: sch_fq_pie: implement lockless fq_pie_dump() Eric Dumazet
2024-04-17 17:13   ` Simon Horman
2024-04-17 17:15     ` Eric Dumazet
2024-04-17 17:23       ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 11/14] net_sched: sch_hfsc: implement lockless accesses to q->defcls Eric Dumazet
2024-04-15 13:20 ` [PATCH net-next 12/14] net_sched: sch_hhf: implement lockless hhf_dump() Eric Dumazet
2024-04-17 17:26   ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 13/14] net_sched: sch_pie: implement lockless pie_dump() Eric Dumazet
2024-04-17 17:28   ` Simon Horman
2024-04-15 13:20 ` [PATCH net-next 14/14] net_sched: sch_skbprio: implement lockless skbprio_dump() Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.