linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: linux-2.6.12-rc1-mm4-RT-V0.7.42-08
       [not found] <1112433149.13131.8.camel@twins>
@ 2005-04-03  5:59 ` Peter Zijlstra
  2005-04-04 17:40   ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Ingo Molnar
  0 siblings, 1 reply; 3+ messages in thread
From: Peter Zijlstra @ 2005-04-03  5:59 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: LKML

[-- Attachment #1: Type: text/plain, Size: 2344 bytes --]

Hi Ingo,

I need the following two patches to keep my system alive and avoid
the BUGs in the log send to you earlier (private mail).

------------------- example BUG ------------------------

BUG: using smp_processor_id() in preemptible [00000001] code: java/16460
caller is icmp_send+0x9b/0x3d0
 [<c020b1db>] smp_processor_id+0x9b/0xb0 (8)
 [<c032246b>] icmp_send+0x9b/0x3d0 (28)
 [<c013ae67>] check_wakeup_timing+0x47/0x160 (8)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (40)
 [<c014d159>] kmem_cache_free+0x39/0x120 (8)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (12)
 [<c014d159>] kmem_cache_free+0x39/0x120 (8)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (24)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (28)
 [<c0276066>] rtl8139_start_xmit+0x86/0x1d0 (8)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (40)
 [<c02f3259>] qdisc_restart+0x1b9/0x230 (8)
 [<c02f3259>] qdisc_restart+0x1b9/0x230 (36)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (8)
 [<c02760ae>] rtl8139_start_xmit+0xce/0x1d0 (16)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (32)
 [<c02ffe33>] ip_fragment+0x663/0x7f0 (60)
 [<c013138f>] rcu_read_unlock+0x5f/0x70 (32)
 [<c02f9d2a>] __ip_route_output_key+0xba/0xe0 (8)
 [<c02fe950>] ip_finish_output+0x0/0x2f0 (16)
 [<c02ff383>] ip_queue_xmit+0x263/0x590 (24)
 [<c02ff383>] ip_queue_xmit+0x263/0x590 (20)
 [<c03102eb>] tcp_transmit_skb+0x38b/0x750 (196)
 [<c031113e>] tcp_write_xmit+0x10e/0x2f0 (56)
 [<c03054ab>] tcp_sendmsg+0xffb/0x1050 (52)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (12)
 [<c013b9c5>] unqueue_me+0x25/0x130 (8)
 [<c0325f4a>] inet_sendmsg+0x4a/0x70 (92)
 [<c02dd795>] sock_aio_write+0x135/0x150 (24)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (88)
 [<c0163c69>] do_sync_write+0xb9/0x110 (20)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (44)
 [<c0164d52>] fget_light+0x62/0x90 (8)
 [<c034269d>] _raw_spin_unlock+0xd/0x30 (44)
 [<c0134c50>] autoremove_wake_function+0x0/0x50 (8)
 [<c0163d9b>] vfs_write+0xdb/0x170 (52)
 [<c0163ee1>] sys_write+0x41/0x70 (24)
 [<c0102f0b>] sysenter_past_esp+0x54/0x75 (28)
---------------------------
| preempt count: 00000002 ]
| 2-level deep critical section nesting:
----------------------------------------
.. [<c03422e6>] .... _raw_spin_lock+0x16/0x90
.....[<00000000>] ..   ( <= 0x0)
.. [<c03422e6>] .... _raw_spin_lock+0x16/0x90
.....[<00000000>] ..   ( <= 0x0)


-- 
Peter Zijlstra <a.p.zijlstra@chello.nl>

[-- Attachment #2: 2.6.12-rc1-icmp.patch --]
[-- Type: text/x-patch, Size: 903 bytes --]



Since there is no lock held yet we are still preemptable; and since 
icmp_socket is a per cpu variable switching cpus gives weird results.


Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>


--- /usr/src/linux-2.6.12-rc1-mm4-RT-V0.7.42-08/net/ipv4/icmp.c~	2005-03-25 10:14:32.000000000 +0100
+++ /usr/src/linux-2.6.12-rc1-mm4-RT-V0.7.42-08/net/ipv4/icmp.c	2005-04-02 18:58:19.000000000 +0200
@@ -376,8 +376,8 @@
 
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
-	struct sock *sk = icmp_socket->sk;
-	struct inet_sock *inet = inet_sk(sk);
+	struct sock *sk;
+	struct inet_sock *inet;
 	struct ipcm_cookie ipc;
 	struct rtable *rt = (struct rtable *)skb->dst;
 	u32 daddr;
@@ -388,6 +388,9 @@
 	if (icmp_xmit_lock())
 		return;
 
+	sk = icmp_socket->sk;
+	inet = inet_sk(sk);
+
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 

[-- Attachment #3: 2.6.12-rc1-icmp-rt.patch --]
[-- Type: text/x-patch, Size: 4296 bytes --]


Since on PREEMPT_RT spinlocks allow preemption, it's possible to
change cpu at (almost) any point in time. Make sure we stick to
the per cpu variable we started with.


Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>


--- /usr/src/linux-2.6.12-rc1-mm4-RT-V0.7.42-08/net/ipv4/icmp_orig.c	2005-04-03 00:09:08.000000000 +0200
+++ /usr/src/linux-2.6.12-rc1-mm4-RT-V0.7.42-08/net/ipv4/icmp.c	2005-04-03 00:20:22.000000000 +0200
@@ -228,27 +228,32 @@
  *
  *	On SMP we have one ICMP socket per-cpu.
  */
-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
-#define icmp_socket	__get_cpu_var(__icmp_socket)
+static DEFINE_PER_CPU_LOCKED(struct socket *, __icmp_socket) = NULL;
 
 static __inline__ struct socket * icmp_xmit_lock(int cpu)
 {
+	struct socket *icmp_socket;
+
 	local_bh_disable();
+	icmp_socket = get_cpu_var_locked(__icmp_socket, cpu);
 
 	if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
 		/* This can happen if the output path signals a
 		 * dst_link_failure() for an outgoing ICMP packet.
 		 */
+		put_cpu_var_locked(__icmp_socket, cpu);
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
 
-	return 0;
+	return icmp_socket;
 }
 
 static void icmp_xmit_unlock(int cpu)
 {
+	struct socket *icmp_socket = __get_cpu_var_locked(__icmp_socket, cpu);
 	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
+	put_cpu_var_locked(__icmp_socket, cpu);
 }
 
 /*
@@ -345,7 +350,8 @@
 }
 
 static void icmp_push_reply(struct icmp_bxm *icmp_param,
-			    struct ipcm_cookie *ipc, struct rtable *rt)
+			    struct ipcm_cookie *ipc, struct rtable *rt,
+				struct socket *icmp_socket)
 {
 	struct sk_buff *skb;
 
@@ -377,16 +383,19 @@
 
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
+	struct socket *icmp_socket;
 	struct sock *sk;
 	struct inet_sock *inet;
 	struct ipcm_cookie ipc;
 	struct rtable *rt = (struct rtable *)skb->dst;
 	u32 daddr;
+	int cpu;
 
 	if (ip_options_echo(&icmp_param->replyopts, skb))
 		goto out;
 
-	if (icmp_xmit_lock())
+	cpu = _smp_processor_id();
+	if (!(icmp_socket = icmp_xmit_lock(cpu)))
 		return;
 
 	sk = icmp_socket->sk;
@@ -414,10 +423,10 @@
 	}
 	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
 			       icmp_param->data.icmph.code))
-		icmp_push_reply(icmp_param, &ipc, rt);
+		icmp_push_reply(icmp_param, &ipc, rt, icmp_socket);
 	ip_rt_put(rt);
 out_unlock:
-	icmp_xmit_unlock();
+	icmp_xmit_unlock(cpu);
 out:;
 }
 
@@ -435,8 +444,9 @@
 
 void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 {
+	struct socket *icmp_socket;
 	struct iphdr *iph;
-	int room;
+	int room, cpu;
 	struct icmp_bxm icmp_param;
 	struct rtable *rt = (struct rtable *)skb_in->dst;
 	struct ipcm_cookie ipc;
@@ -507,7 +517,8 @@
 		}
 	}
 
-	if (icmp_xmit_lock())
+	cpu = _smp_processor_id();
+	if (!(icmp_socket = icmp_xmit_lock(cpu)))
 		return;
 
 	/*
@@ -580,11 +591,11 @@
 		icmp_param.data_len = room;
 	icmp_param.head_len = sizeof(struct icmphdr);
 
-	icmp_push_reply(&icmp_param, &ipc, rt);
+	icmp_push_reply(&icmp_param, &ipc, rt, icmp_socket);
 ende:
 	ip_rt_put(rt);
 out_unlock:
-	icmp_xmit_unlock();
+	icmp_xmit_unlock(cpu);
 out:;
 }
 
@@ -1116,20 +1127,20 @@
 			continue;
 
 		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
-				       &per_cpu(__icmp_socket, i));
+				       &__get_cpu_var_locked(__icmp_socket, i));
 
 		if (err < 0)
 			panic("Failed to create the ICMP control socket.\n");
 
-		per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
-		per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_sndbuf =
 			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
 
-		inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
+		inet = inet_sk(__get_cpu_var_locked(__icmp_socket, i)->sk);
 		inet->uc_ttl = -1;
 		inet->pmtudisc = IP_PMTUDISC_DONT;
 
@@ -1137,7 +1148,7 @@
 		 * see it, we do not wish this socket to see incoming
 		 * packets.
 		 */
-		per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_prot->unhash(__get_cpu_var_locked(__icmp_socket, i)->sk);
 	}
 }
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: linux-2.6.12-rc1-mm4-RT-V0.7.42-08
  2005-04-03  5:59 ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Peter Zijlstra
@ 2005-04-04 17:40   ` Ingo Molnar
  2005-04-04 19:21     ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Peter Zijlstra
  0 siblings, 1 reply; 3+ messages in thread
From: Ingo Molnar @ 2005-04-04 17:40 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: LKML


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> Hi Ingo,
> 
> I need the following two patches to keep my system alive and avoid
> the BUGs in the log send to you earlier (private mail).

hm, the second patch does not apply (and the merge didnt look trivial) - 
maybe it depends on some patch in -mm that is not yet upstream?

	Ingo

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: linux-2.6.12-rc1-mm4-RT-V0.7.42-08
  2005-04-04 17:40   ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Ingo Molnar
@ 2005-04-04 19:21     ` Peter Zijlstra
  0 siblings, 0 replies; 3+ messages in thread
From: Peter Zijlstra @ 2005-04-04 19:21 UTC (permalink / raw)
  To: LKML

[-- Attachment #1: Type: text/plain, Size: 1144 bytes --]

On Mon, 2005-04-04 at 19:40 +0200, Ingo Molnar wrote:
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> 
> > Hi Ingo,
> > 
> > I need the following two patches to keep my system alive and avoid
> > the BUGs in the log send to you earlier (private mail).
> 
> hm, the second patch does not apply (and the merge didnt look trivial) - 
> maybe it depends on some patch in -mm that is not yet upstream?
> 

I messed it up; apparently I type :wa to often and forgot to keep a
decent base file (yes I'm a vim user). I also seem to have gotten the
patch base dir wrong.

The two patches again; now hopefully with correct patch level and proper
base. They apply to my tree in order 2.6.12-rc1-icmp{,-rt}.patch.

Not ran nor compile tested on vanilla; just made then apply cleanly.

Could you please comment on the patches since I'm learning this stuff as
I go; and validation of my ideas would be most welcome. For instance;
was I right to convert the per cpu var __icmp_socket to a _LOCKED?
I think so since without the extra lock one could have race conditions
within the spin_trylock arguments double dereference.

Kind regards,

Peter Zijlstra


[-- Attachment #2: 2.6.12-rc1-icmp.patch --]
[-- Type: text/x-patch, Size: 881 bytes --]



Since there is no lock held yet we are still preemptable; and since 
icmp_socket is a per cpu variable switching cpus gives weird results.


Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>


--- linux-2.6.12-rc1-RT-V0.7.42-08/net/ipv4/icmp_base.c	2005-04-04 20:26:23.000000000 +0200
+++ linux-2.6.12-rc1-RT-V0.7.42-08/net/ipv4/icmp.c	2005-04-04 20:26:38.000000000 +0200
@@ -376,8 +376,8 @@
 
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
-	struct sock *sk = icmp_socket->sk;
-	struct inet_sock *inet = inet_sk(sk);
+	struct sock *sk;
+	struct inet_sock *inet;
 	struct ipcm_cookie ipc;
 	struct rtable *rt = (struct rtable *)skb->dst;
 	u32 daddr;
@@ -388,6 +388,9 @@
 	if (icmp_xmit_lock())
 		return;
 
+	sk = icmp_socket->sk;
+	inet = inet_sk(sk);
+
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 

[-- Attachment #3: 2.6.12-rc1-icmp-rt.patch --]
[-- Type: text/x-patch, Size: 4348 bytes --]


Since on PREEMPT_RT spinlocks allow preemption, it's possible to
change cpu at (almost) any point in time. Make sure we stick to
the per cpu variable we started with.


Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>


--- linux-2.6.12-rc1-RT-V0.7.42-08/net/ipv4/icmp_p1.c	2005-04-04 20:26:38.000000000 +0200
+++ linux-2.6.12-rc1-RT-V0.7.42-08/net/ipv4/icmp.c	2005-04-04 20:27:54.000000000 +0200
@@ -228,26 +228,32 @@
  *
  *	On SMP we have one ICMP socket per-cpu.
  */
-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
-#define icmp_socket	__get_cpu_var(__icmp_socket)
+static DEFINE_PER_CPU_LOCKED(struct socket *, __icmp_socket) = NULL;
 
-static __inline__ int icmp_xmit_lock(void)
+static __inline__ struct socket * icmp_xmit_lock(int cpu)
 {
+	struct socket *icmp_socket;
+
 	local_bh_disable();
+	icmp_socket = get_cpu_var_locked(__icmp_socket, cpu);
 
 	if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
 		/* This can happen if the output path signals a
 		 * dst_link_failure() for an outgoing ICMP packet.
 		 */
+		put_cpu_var_locked(__icmp_socket, cpu);
 		local_bh_enable();
-		return 1;
+		return NULL;
 	}
-	return 0;
+
+	return icmp_socket;
 }
 
-static void icmp_xmit_unlock(void)
+static void icmp_xmit_unlock(int cpu)
 {
+	struct socket *icmp_socket = __get_cpu_var_locked(__icmp_socket, cpu);
 	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
+	put_cpu_var_locked(__icmp_socket, cpu);
 }
 
 /*
@@ -344,7 +350,8 @@
 }
 
 static void icmp_push_reply(struct icmp_bxm *icmp_param,
-			    struct ipcm_cookie *ipc, struct rtable *rt)
+			    struct ipcm_cookie *ipc, struct rtable *rt,
+				struct socket *icmp_socket)
 {
 	struct sk_buff *skb;
 
@@ -376,16 +383,19 @@
 
 static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
+	struct socket *icmp_socket;
 	struct sock *sk;
 	struct inet_sock *inet;
 	struct ipcm_cookie ipc;
 	struct rtable *rt = (struct rtable *)skb->dst;
 	u32 daddr;
+	int cpu;
 
 	if (ip_options_echo(&icmp_param->replyopts, skb))
 		goto out;
 
-	if (icmp_xmit_lock())
+	cpu = _smp_processor_id();
+	if (!(icmp_socket = icmp_xmit_lock(cpu)))
 		return;
 
 	sk = icmp_socket->sk;
@@ -413,10 +423,10 @@
 	}
 	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,
 			       icmp_param->data.icmph.code))
-		icmp_push_reply(icmp_param, &ipc, rt);
+		icmp_push_reply(icmp_param, &ipc, rt, icmp_socket);
 	ip_rt_put(rt);
 out_unlock:
-	icmp_xmit_unlock();
+	icmp_xmit_unlock(cpu);
 out:;
 }
 
@@ -434,8 +444,9 @@
 
 void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 {
+	struct socket *icmp_socket;
 	struct iphdr *iph;
-	int room;
+	int room, cpu;
 	struct icmp_bxm icmp_param;
 	struct rtable *rt = (struct rtable *)skb_in->dst;
 	struct ipcm_cookie ipc;
@@ -506,7 +517,8 @@
 		}
 	}
 
-	if (icmp_xmit_lock())
+	cpu = _smp_processor_id();
+	if (!(icmp_socket = icmp_xmit_lock(cpu)))
 		return;
 
 	/*
@@ -579,11 +591,11 @@
 		icmp_param.data_len = room;
 	icmp_param.head_len = sizeof(struct icmphdr);
 
-	icmp_push_reply(&icmp_param, &ipc, rt);
+	icmp_push_reply(&icmp_param, &ipc, rt, icmp_socket);
 ende:
 	ip_rt_put(rt);
 out_unlock:
-	icmp_xmit_unlock();
+	icmp_xmit_unlock(cpu);
 out:;
 }
 
@@ -1115,20 +1127,20 @@
 			continue;
 
 		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
-				       &per_cpu(__icmp_socket, i));
+				       &__get_cpu_var_locked(__icmp_socket, i));
 
 		if (err < 0)
 			panic("Failed to create the ICMP control socket.\n");
 
-		per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
-		per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_sndbuf =
 			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
 
-		inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
+		inet = inet_sk(__get_cpu_var_locked(__icmp_socket, i)->sk);
 		inet->uc_ttl = -1;
 		inet->pmtudisc = IP_PMTUDISC_DONT;
 
@@ -1136,7 +1148,7 @@
 		 * see it, we do not wish this socket to see incoming
 		 * packets.
 		 */
-		per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
+		__get_cpu_var_locked(__icmp_socket, i)->sk->sk_prot->unhash(__get_cpu_var_locked(__icmp_socket, i)->sk);
 	}
 }
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2005-04-04 19:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1112433149.13131.8.camel@twins>
2005-04-03  5:59 ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Peter Zijlstra
2005-04-04 17:40   ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Ingo Molnar
2005-04-04 19:21     ` linux-2.6.12-rc1-mm4-RT-V0.7.42-08 Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).