linux-can.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job()
@ 2023-12-21 12:37 Oliver Hartkopp
  2024-01-11 12:14 ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 5+ messages in thread
From: Oliver Hartkopp @ 2023-12-21 12:37 UTC (permalink / raw)
  To: linux-can; +Cc: Oliver Hartkopp, Sebastian Andrzej Siewior

As reported by Sebastian Andrzej Siewior the use of local_bh_disable()
is only feasible in uni processor systems to update the modification rules.
The usual use-case to update the modification rules is to update the data
of the modifications but not the modification types (AND/OR/XOR/SET) or
the checksum functions itself.

To omit additional memory allocations to maintain fast modification
switching times, the modification description space is doubled at gw-job
creation time so that only the reference to the active modification
desciption is changed under rcu protection.

Link: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/
Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs")
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
 net/can/gw.c | 99 ++++++++++++++++++++++++++++------------------------
 1 file changed, 53 insertions(+), 46 deletions(-)

diff --git a/net/can/gw.c b/net/can/gw.c
index 37528826935e..6411f1c60f48 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -128,11 +128,13 @@ struct cgw_job {
 	struct hlist_node list;
 	struct rcu_head rcu;
 	u32 handled_frames;
 	u32 dropped_frames;
 	u32 deleted_frames;
-	struct cf_mod mod;
+	struct cf_mod *mod;
+	struct cf_mod mod1;
+	struct cf_mod mod2;
 	union {
 		/* CAN frame data source */
 		struct net_device *dev;
 	} src;
 	union {
@@ -504,11 +506,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	/* clone the given skb, which has not been done in can_rcv()
 	 *
 	 * When there is at least one modification function activated,
 	 * we need to copy the skb as we want to modify skb->data.
 	 */
-	if (gwj->mod.modfunc[0])
+	if (gwj->mod->modfunc[0])
 		nskb = skb_copy(skb, GFP_ATOMIC);
 	else
 		nskb = skb_clone(skb, GFP_ATOMIC);
 
 	if (!nskb) {
@@ -527,12 +529,12 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 
 	/* pointer to modifiable CAN frame */
 	cf = (struct canfd_frame *)nskb->data;
 
 	/* perform preprocessed modification functions if there are any */
-	while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
-		(*gwj->mod.modfunc[modidx++])(cf, &gwj->mod);
+	while (modidx < MAX_MODFUNCTIONS && gwj->mod->modfunc[modidx])
+		(*gwj->mod->modfunc[modidx++])(cf, gwj->mod);
 
 	/* Has the CAN frame been modified? */
 	if (modidx) {
 		/* get available space for the processed CAN frame type */
 		int max_len = nskb->len - offsetof(struct canfd_frame, data);
@@ -544,15 +546,15 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 			kfree_skb(nskb);
 			return;
 		}
 
 		/* check for checksum updates */
-		if (gwj->mod.csumfunc.crc8)
-			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+		if (gwj->mod->csumfunc.crc8)
+			(*gwj->mod->csumfunc.crc8)(cf, &gwj->mod->csum.crc8);
 
-		if (gwj->mod.csumfunc.xor)
-			(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
+		if (gwj->mod->csumfunc.xor)
+			(*gwj->mod->csumfunc.xor)(cf, &gwj->mod->csum.xor);
 	}
 
 	/* clear the skb timestamp if not configured the other way */
 	if (!(gwj->flags & CGW_FLAGS_CAN_SRC_TSTAMP))
 		nskb->tstamp = 0;
@@ -651,83 +653,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 	}
 
 	if (gwj->flags & CGW_FLAGS_CAN_FD) {
 		struct cgw_fdframe_mod mb;
 
-		if (gwj->mod.modtype.and) {
-			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.and;
+		if (gwj->mod->modtype.and) {
+			memcpy(&mb.cf, &gwj->mod->modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.and;
 			if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.or) {
-			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.or;
+		if (gwj->mod->modtype.or) {
+			memcpy(&mb.cf, &gwj->mod->modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.or;
 			if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.xor) {
-			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.xor;
+		if (gwj->mod->modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod->modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.xor;
 			if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.set) {
-			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.set;
+		if (gwj->mod->modtype.set) {
+			memcpy(&mb.cf, &gwj->mod->modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.set;
 			if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 	} else {
 		struct cgw_frame_mod mb;
 
-		if (gwj->mod.modtype.and) {
-			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.and;
+		if (gwj->mod->modtype.and) {
+			memcpy(&mb.cf, &gwj->mod->modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.and;
 			if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.or) {
-			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.or;
+		if (gwj->mod->modtype.or) {
+			memcpy(&mb.cf, &gwj->mod->modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.or;
 			if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.xor) {
-			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.xor;
+		if (gwj->mod->modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod->modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.xor;
 			if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 
-		if (gwj->mod.modtype.set) {
-			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
-			mb.modtype = gwj->mod.modtype.set;
+		if (gwj->mod->modtype.set) {
+			memcpy(&mb.cf, &gwj->mod->modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod->modtype.set;
 			if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
 				goto cancel;
 		}
 	}
 
-	if (gwj->mod.uid) {
-		if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
+	if (gwj->mod->uid) {
+		if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod->uid) < 0)
 			goto cancel;
 	}
 
-	if (gwj->mod.csumfunc.crc8) {
+	if (gwj->mod->csumfunc.crc8) {
 		if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
-			    &gwj->mod.csum.crc8) < 0)
+			    &gwj->mod->csum.crc8) < 0)
 			goto cancel;
 	}
 
-	if (gwj->mod.csumfunc.xor) {
+	if (gwj->mod->csumfunc.xor) {
 		if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN,
-			    &gwj->mod.csum.xor) < 0)
+			    &gwj->mod->csum.xor) < 0)
 			goto cancel;
 	}
 
 	if (gwj->gwtype == CGW_TYPE_CAN_CAN) {
 		if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) {
@@ -1085,21 +1087,25 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	if (mod.uid) {
 		ASSERT_RTNL();
 
 		/* check for updating an existing job with identical uid */
 		hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
-			if (gwj->mod.uid != mod.uid)
+			if (gwj->mod->uid != mod.uid)
 				continue;
 
 			/* interfaces & filters must be identical */
 			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
 				return -EINVAL;
 
 			/* update modifications with disabled softirq & quit */
-			local_bh_disable();
-			memcpy(&gwj->mod, &mod, sizeof(mod));
-			local_bh_enable();
+			if (gwj->mod == &gwj->mod1) {
+				memcpy(&gwj->mod2, &mod, sizeof(mod));
+				rcu_replace_pointer(gwj->mod, &gwj->mod2, true);
+			} else {
+				memcpy(&gwj->mod1, &mod, sizeof(mod));
+				rcu_replace_pointer(gwj->mod, &gwj->mod1, true);
+			}
 			return 0;
 		}
 	}
 
 	/* ifindex == 0 is not allowed for job creation */
@@ -1114,13 +1120,14 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
 	gwj->dropped_frames = 0;
 	gwj->deleted_frames = 0;
 	gwj->flags = r->flags;
 	gwj->gwtype = r->gwtype;
 	gwj->limit_hops = limhops;
+	gwj->mod = &gwj->mod1;
 
 	/* insert already parsed information */
-	memcpy(&gwj->mod, &mod, sizeof(mod));
+	memcpy(gwj->mod, &mod, sizeof(mod));
 	memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
 
 	err = -ENODEV;
 
 	gwj->src.dev = __dev_get_by_index(net, gwj->ccgw.src_idx);
@@ -1219,16 +1226,16 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 		if (gwj->limit_hops != limhops)
 			continue;
 
 		/* we have a match when uid is enabled and identical */
-		if (gwj->mod.uid || mod.uid) {
-			if (gwj->mod.uid != mod.uid)
+		if (gwj->mod->uid || mod.uid) {
+			if (gwj->mod->uid != mod.uid)
 				continue;
 		} else {
 			/* no uid => check for identical modifications */
-			if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+			if (memcmp(gwj->mod, &mod, sizeof(mod)))
 				continue;
 		}
 
 		/* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
 		if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job()
  2023-12-21 12:37 [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job() Oliver Hartkopp
@ 2024-01-11 12:14 ` Sebastian Andrzej Siewior
  2024-01-21 10:07   ` Oliver Hartkopp
  0 siblings, 1 reply; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2024-01-11 12:14 UTC (permalink / raw)
  To: Oliver Hartkopp; +Cc: linux-can, tglx

On 2023-12-21 13:37:03 [+0100], Oliver Hartkopp wrote:
> --- a/net/can/gw.c
> +++ b/net/can/gw.c
> @@ -128,11 +128,13 @@ struct cgw_job {
>  	struct hlist_node list;
>  	struct rcu_head rcu;
>  	u32 handled_frames;
>  	u32 dropped_frames;
>  	u32 deleted_frames;
> -	struct cf_mod mod;
> +	struct cf_mod *mod;
> +	struct cf_mod mod1;
> +	struct cf_mod mod2;

I wouldn't put here mod1/2. That cf_mod struct has 736 bytes in my
allmod build so it gains some weight. It also introduces a run-time
problem, more on that later.
Also *mod requires __rcu annotation. 

> @@ -504,11 +506,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
>  	/* clone the given skb, which has not been done in can_rcv()
>  	 *
>  	 * When there is at least one modification function activated,
>  	 * we need to copy the skb as we want to modify skb->data.
>  	 */
> -	if (gwj->mod.modfunc[0])
> +	if (gwj->mod->modfunc[0])

Almost. You need
	struct cf_mod *mod;
	…

	rcu_read_lock();
	mod = rcu_dereference(gwj->mod);
	if (mod->modfunc[0])
	…

	rcu_read_unlock();
	/* no longer touching mod but I am leaving can_can_gw_rcv()
	 * anyway */

>  		nskb = skb_copy(skb, GFP_ATOMIC);
>  	else
>  		nskb = skb_clone(skb, GFP_ATOMIC);
>  
>  	if (!nskb) {
> @@ -1085,21 +1087,25 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh,
>  	if (mod.uid) {
>  		ASSERT_RTNL();
>  
>  		/* check for updating an existing job with identical uid */
>  		hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
> -			if (gwj->mod.uid != mod.uid)
> +			if (gwj->mod->uid != mod.uid)
>  				continue;
>  
>  			/* interfaces & filters must be identical */
>  			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
>  				return -EINVAL;
>  
>  			/* update modifications with disabled softirq & quit */
> -			local_bh_disable();
> -			memcpy(&gwj->mod, &mod, sizeof(mod));
> -			local_bh_enable();
> +			if (gwj->mod == &gwj->mod1) {
> +				memcpy(&gwj->mod2, &mod, sizeof(mod));
> +				rcu_replace_pointer(gwj->mod, &gwj->mod2, true);
> +			} else {
> +				memcpy(&gwj->mod1, &mod, sizeof(mod));
> +				rcu_replace_pointer(gwj->mod, &gwj->mod1, true);
> +			}

Why are you afraid of doing
	mod = kmalloc(sizeof(*mod), GFP_KERNEL);

before invoking cgw_parse_attr()?

Let me try to sell this to you:
- Your stack usage shrinks by 736 bytes (as per previous estimation)
- If this is a new one, you attach the pointer to the struct cgw_job so
  it is not lost.
- If this is an update, you avoid memcpy() and use rcu_replace_pointer()
  as it is intended.

The construct of yours has the problem that it does not wait until the
RCU grace period completes. Meaning on first update mod1 is used, you
switch over to mod2. On the second update mod2 is used and you switch
back to mod1. There is no guarantee that all current users of mod ->
mod1 are gone by the time you switch from mod2 back to mod1. This
of course requires two updates (one after the other).

If you allocate the struct on entry (assuming the salesmen me was
successful) you could use 

	old_mod = rcu_replace_pointer(gwj->mod, new_mod, lockdep_rtnl_is_held());

to grab the about to be replaced cf_mod and once the replacement is
over and at the end, kfree_rcu() on it or
	kfree_rcu_mightsleep(old_mod);

to safe a RCU head.

>  			return 0;
>  		}
>  	}
>  
>  	/* ifindex == 0 is not allowed for job creation */
> @@ -1219,16 +1226,16 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
>  
>  		if (gwj->limit_hops != limhops)
>  			continue;
>  
>  		/* we have a match when uid is enabled and identical */
> -		if (gwj->mod.uid || mod.uid) {
> -			if (gwj->mod.uid != mod.uid)
> +		if (gwj->mod->uid || mod.uid) {
> +			if (gwj->mod->uid != mod.uid)
>  				continue;

Also here you must use rcu_dereference() for gwj->mod. Since all
add/remove jobs happen under the RTNL lock you could use
	rcu_dereference_protected(, lockdep_rtnl_is_held());

Sebastian

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job()
  2024-01-11 12:14 ` Sebastian Andrzej Siewior
@ 2024-01-21 10:07   ` Oliver Hartkopp
  2024-01-22 10:10     ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 5+ messages in thread
From: Oliver Hartkopp @ 2024-01-21 10:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior; +Cc: linux-can, tglx

On 2024-01-11 13:14, Sebastian Andrzej Siewior wrote:

> Why are you afraid of doing
> 	mod = kmalloc(sizeof(*mod), GFP_KERNEL);
> 
> before invoking cgw_parse_attr()?

The update of the modification content should be performed instantly and 
without any potential scheduling from kmalloc().

As you pointed out one of the problems may arise from changing the 
modification functions but not from changing the modification content.

So what about the below patch then?

Would a spin_lock() or spin_lock_bh() be an alternative to lock this 
update against the modification execution in can_can_gw_rcv()?

Best regards,
Oliver

diff --git a/net/can/gw.c b/net/can/gw.c
index 37528826935e..79aeb380b66a 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1085,21 +1085,27 @@ static int cgw_create_job(struct sk_buff *skb, 
struct nlmsghdr *nlh,
  	if (mod.uid) {
  		ASSERT_RTNL();

  		/* check for updating an existing job with identical uid */
  		hlist_for_each_entry(gwj, &net->can.cgw_list, list) {
+			int mi;
+
  			if (gwj->mod.uid != mod.uid)
  				continue;

  			/* interfaces & filters must be identical */
  			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
  				return -EINVAL;

-			/* update modifications with disabled softirq & quit */
-			local_bh_disable();
+			/* modification functions must be identical */
+			for (mi = 0; mi < MAX_MODFUNCTIONS; mi++) {
+				if (gwj->mod.modfunc[mi] != mod.modfunc[mi])
+					return -EINVAL;
+			}
+
+			/* update only the modification content & quit */
  			memcpy(&gwj->mod, &mod, sizeof(mod));
-			local_bh_enable();
  			return 0;
  		}
  	}

  	/* ifindex == 0 is not allowed for job creation */


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job()
  2024-01-21 10:07   ` Oliver Hartkopp
@ 2024-01-22 10:10     ` Sebastian Andrzej Siewior
  2024-04-29  9:47       ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2024-01-22 10:10 UTC (permalink / raw)
  To: Oliver Hartkopp; +Cc: linux-can, tglx

On 2024-01-21 11:07:34 [+0100], Oliver Hartkopp wrote:
> On 2024-01-11 13:14, Sebastian Andrzej Siewior wrote:
> 
> > Why are you afraid of doing
> > 	mod = kmalloc(sizeof(*mod), GFP_KERNEL);
> > 
> > before invoking cgw_parse_attr()?
> 
> The update of the modification content should be performed instantly and
> without any potential scheduling from kmalloc().

In most cases, the memory would be acquired without a delay. But if the
memory is tight it will get delayed over returning a NULL pointer and
(depending on the size) the NULL pointer can still be returned.
This code path is preemptible so the scheduler _can_ preempt the code in
favour of another task.

> As you pointed out one of the problems may arise from changing the
> modification functions but not from changing the modification content.
> 
> So what about the below patch then?

Well, you do change parts of the data structure while the other side can
read it so I would say no. Also, in general there is no guarantee if the
memcpy() implementation copies the data from front or the end. 

> Would a spin_lock() or spin_lock_bh() be an alternative to lock this update
> against the modification execution in can_can_gw_rcv()?

Yes, that is what locks are for. You would have to put the lock outside
of ccgw and acquire it before reading or writting ccgw.

> Best regards,
> Oliver

Sebastian

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job()
  2024-01-22 10:10     ` Sebastian Andrzej Siewior
@ 2024-04-29  9:47       ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2024-04-29  9:47 UTC (permalink / raw)
  To: Oliver Hartkopp; +Cc: linux-can, tglx

On 2024-01-22 11:10:38 [+0100], To Oliver Hartkopp wrote:
> > Would a spin_lock() or spin_lock_bh() be an alternative to lock this update
> > against the modification execution in can_can_gw_rcv()?
> 
> Yes, that is what locks are for. You would have to put the lock outside
> of ccgw and acquire it before reading or writting ccgw.

Hi :)

> > Best regards,
> > Oliver

Sebastian

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-04-29  9:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-21 12:37 [RFC PATCH] can: gw: fix RCU/BH usage in cgw_create_job() Oliver Hartkopp
2024-01-11 12:14 ` Sebastian Andrzej Siewior
2024-01-21 10:07   ` Oliver Hartkopp
2024-01-22 10:10     ` Sebastian Andrzej Siewior
2024-04-29  9:47       ` Sebastian Andrzej Siewior

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).