All of lore.kernel.org
 help / color / mirror / Atom feed
* Oops in drivers\base\firmware_class
@ 2009-09-16 18:44 Lars Ericsson
  2009-09-16 20:57 ` Frederik Deweerdt
  0 siblings, 1 reply; 7+ messages in thread
From: Lars Ericsson @ 2009-09-16 18:44 UTC (permalink / raw)
  To: David.Woodhouse, sachinp; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 674 bytes --]

Hi,

I have discovered a Oops in the firmware_loading_store function. 
At first it looks like a timing issue but after adding a BUG_ON test,
it fails every time. 

drivers\base\firmware_class:
------------------------------
 541 01c0 F6463401 	testb $1,52(%esi)
 542 01c4 0F843FFF 	je .L38
 542      FFFF
 543              	.loc 1 174 0
 544 01ca 8B4630   	movl 48(%esi),%eax
 545 01cd 8B4004   	movl 4(%eax),%eax	<---- Oops
 546 01d0 E8FCFFFF 	call vfree
 546      FF

The code that fails was introduced in commit
6e03a201bbe8137487f340d26aa662110e324b20 

Attached you will find the:
- Oops with the vanilla 2.6.31
- The BUG_ON patch
- Oops with the patched 2.6.31

/Lars

[-- Attachment #2: Vanilla_Opps.txt --]
[-- Type: text/plain, Size: 2407 bytes --]

[    7.249453] rt61pci 0000:00:09.0: firmware: requesting rt2561s.bin
[    7.314512] BUG: unable to handle kernel NULL pointer dereference at 00000004
[    7.315639] IP: [<d04d51cd>] firmware_loading_store+0xed/0x160 [firmware_class]
[    7.315639] *pde = 00000000
[    7.315639] Oops: 0000 [#1] PREEMPT
[    7.315639] last sysfs file: /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
[    7.315639] Modules linked in: arc4 ecb cryptomgr crypto_blkcipher aead pcompress crypto_hash crypto_algapi rt61pci rt2x00pci rt2x00lib firmware_class mac80211 input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
[    7.315639]
[    7.315639] Pid: 1004, comm: echo Tainted: P           (2.6.31 #1)
[    7.315639] EIP: 0060:[<d04d51cd>] EFLAGS: 00010202 CPU: 0
[    7.315639] EIP is at firmware_loading_store+0xed/0x160 [firmware_class]
[    7.315639] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a EDX: 00000028
[    7.315639] ESI: cf65ca80 EDI: cea29c00 EBP: cea29c08 ESP: cf777f2c
[    7.315639]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[    7.315639] Process echo (pid: 1004, ti=cf776000 task=cf755aa0 task.ti=cf776000)
[    7.315639] Stack:
[    7.315639]  d04d50e0 cea29c00 00000002 c028051d 00000002 c0414c40 ce9c0420 c01c9c98
[    7.315639] <0> 00000002 c0414c40 00000002 b7f15000 ce9c1e80 cf44c000 c01c9ce6 ce9c1e94
[    7.315639] <0> cf44c000 b7f15000 00000002 c01c9cb0 c018d362 cf777f9c cf44c000 fffffff7
[    7.315639] Call Trace:
[    7.315639]  [<d04d50e0>] ? firmware_loading_store+0x0/0x160 [firmware_class]
[    7.315639]  [<c028051d>] ? dev_attr_store+0x1d/0x20
[    7.315639]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
[    7.315639]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
[    7.315639]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
[    7.315639]  [<c018d362>] ? vfs_write+0x82/0xf0
[    7.315639]  [<c018d47c>] ? sys_write+0x3c/0x70
[    7.315639]  [<c0102ba1>] ? syscall_call+0x7/0xb
[    7.315639] Code: 91 c9 ef 3b 5e 3c 7c ed eb b5 8d 74 26 00 83 f8 ff 0f 85 50 ff ff ff e9 6a ff ff ff 89 f6 f6 46 34 01 0f 84 3f ff ff ff 8b 46 30 <8b> 40 04 e8 5b 05 cb ef 31 c9 8b 56 3c 8b 46 38 8b 5e 30 68 61
[    7.315639] EIP: [<d04d51cd>] firmware_loading_store+0xed/0x160 [firmware_class] SS:ESP 0068:cf777f2c
[    7.315639] CR2: 0000000000000004
[    7.540848] ---[ end trace 6ebe83c102d3b046 ]---

[-- Attachment #3: BUG_ON_firmware_class.c.patch --]
[-- Type: application/octet-stream, Size: 664 bytes --]

diff -Nurp 1/linux-2.6.31/drivers/base/firmware_class.c linux/linux-2.6.31/drivers/base/firmware_class.c
--- ./drivers/base/firmware_class.c	2009-09-16 14:17:45.000000000 +0200
+++ ./drivers/base/firmware_class.c	2009-09-16 14:20:46.000000000 +0200
@@ -171,6 +172,8 @@ static ssize_t firmware_loading_store(st
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
+			/* http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=6e03a201bbe8137487f340d26aa662110e324b20 */
+			BUG_ON(fw_priv->fw == NULL);
 			vfree(fw_priv->fw->data);
 			fw_priv->fw->data = vmap(fw_priv->pages,
 						 fw_priv->nr_pages,

[-- Attachment #4: BUG_ON_Oops.txt --]
[-- Type: text/plain, Size: 2237 bytes --]

[    7.242449] ------------[ cut here ]------------
[    7.243706] Kernel BUG at d04d523e [verbose debug info unavailable]
[    7.243706] invalid opcode: 0000 [#1] PREEMPT
[    7.243706] last sysfs file: /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
[    7.243706] Modules linked in: arc4 ecb cryptomgr crypto_blkcipher aead pcompress crypto_hash crypto_algapi rt61pci rt2x00pci rt2x00lib firmware_class mac80211 input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
[    7.243706]
[    7.243706] Pid: 1004, comm: echo Tainted: P           (2.6.31 #1)
[    7.243706] EIP: 0060:[<d04d523e>] EFLAGS: 00010246 CPU: 0
[    7.243706] EIP is at firmware_loading_store+0x15e/0x170 [firmware_class]
[    7.243706] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a EDX: 00000028
[    7.243706] ESI: cf4f2a20 EDI: cf4ffc00 EBP: cf4ffc08 ESP: cea37f2c
[    7.243706]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[    7.243706] Process echo (pid: 1004, ti=cea36000 task=cf6c2380 task.ti=cea36000)
[    7.243706] Stack:
[    7.243706]  d04d50e0 cf4ffc00 00000002 c028051d 00000002 c0414c40 cf4fd420 c01c9c98
[    7.243706] <0> 00000002 c0414c40 00000002 b7fc8000 cf636e80 cf471980 c01c9ce6 cf636e94
[    7.243706] <0> cf471980 b7fc8000 00000002 c01c9cb0 c018d362 cea37f9c cf471980 fffffff7
[    7.243706] Call Trace:
[    7.243706]  [<d04d50e0>] ? firmware_loading_store+0x0/0x170 [firmware_class]
[    7.243706]  [<c028051d>] ? dev_attr_store+0x1d/0x20
[    7.243706]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
[    7.243706]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
[    7.243706]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
[    7.243706]  [<c018d362>] ? vfs_write+0x82/0xf0
[    7.243706]  [<c018d47c>] ? sys_write+0x3c/0x70
[    7.243706]  [<c0102ba1>] ? syscall_call+0x7/0xb
[    7.243706] Code: 66 34 fe e9 14 ff ff ff 8b 47 08 68 1c 5f 4d d0 50 89 f8 e8 65 b2 da ef 50 68 c3 5f 4d d0 e8 da 3c c5 ef 83 c4 10 e9 ea fe ff ff <0f> 0b eb fe 8d b4 26 00 00 00 00 8d bc 27 00 00 00 00 55 89 cd
[    7.243706] EIP: [<d04d523e>] firmware_loading_store+0x15e/0x170 [firmware_class] SS:ESP 0068:cea37f2c
[    7.458557] ---[ end trace 604593037057054f ]---

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: Oops in drivers\base\firmware_class
  2009-09-16 18:44 Oops in drivers\base\firmware_class Lars Ericsson
@ 2009-09-16 20:57 ` Frederik Deweerdt
  2009-09-18 17:53   ` Lars Ericsson
  0 siblings, 1 reply; 7+ messages in thread
From: Frederik Deweerdt @ 2009-09-16 20:57 UTC (permalink / raw)
  To: Lars Ericsson; +Cc: David.Woodhouse, sachinp, linux-kernel

Hello Lars,

On Wed, Sep 16, 2009 at 08:44:43PM +0200, Lars Ericsson wrote:
> Hi,
> 
> I have discovered a Oops in the firmware_loading_store function. 
> At first it looks like a timing issue but after adding a BUG_ON test,
> it fails every time. 
> 
> drivers\base\firmware_class:
> ------------------------------
>  541 01c0 F6463401 	testb $1,52(%esi)
>  542 01c4 0F843FFF 	je .L38
>  542      FFFF
>  543              	.loc 1 174 0
>  544 01ca 8B4630   	movl 48(%esi),%eax
>  545 01cd 8B4004   	movl 4(%eax),%eax	<---- Oops
>  546 01d0 E8FCFFFF 	call vfree
>  546      FF
> 
> The code that fails was introduced in commit
> 6e03a201bbe8137487f340d26aa662110e324b20 
Could you try the following patch?

It seems that we're missing the locking around the accesses to
fw_priv->fw.

Thanks,
Frederik

Signed-off-by: Frederik Deweerdt <frederik.deweerdt@xprog.eu>

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8a267c4..49105ed 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -171,12 +171,18 @@ static ssize_t firmware_loading_store(struct device *dev,
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
+			mutex_lock(&fw_lock);
+			if (!fw_priv->fw) {
+				mutex_unlock(&fw_lock);
+				break;
+			}
 			vfree(fw_priv->fw->data);
 			fw_priv->fw->data = vmap(fw_priv->pages,
 						 fw_priv->nr_pages,
 						 0, PAGE_KERNEL_RO);
 			if (!fw_priv->fw->data) {
 				dev_err(dev, "%s: vmap() failed\n", __func__);
+				mutex_unlock(&fw_lock);
 				goto err;
 			}
 			/* Pages will be freed by vfree() */
@@ -185,6 +191,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 			fw_priv->nr_pages = 0;
 			complete(&fw_priv->completion);
 			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
+			mutex_unlock(&fw_lock);
 			break;
 		}
 		/* fallthrough */
> 
> Attached you will find the:
> - Oops with the vanilla 2.6.31
> - The BUG_ON patch
> - Oops with the patched 2.6.31
> 
> /Lars

> [    7.249453] rt61pci 0000:00:09.0: firmware: requesting rt2561s.bin
> [    7.314512] BUG: unable to handle kernel NULL pointer dereference at 00000004
> [    7.315639] IP: [<d04d51cd>] firmware_loading_store+0xed/0x160 [firmware_class]
> [    7.315639] *pde = 00000000
> [    7.315639] Oops: 0000 [#1] PREEMPT
> [    7.315639] last sysfs file: /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
> [    7.315639] Modules linked in: arc4 ecb cryptomgr crypto_blkcipher aead pcompress crypto_hash crypto_algapi rt61pci rt2x00pci rt2x00lib firmware_class mac80211 input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
> [    7.315639]
> [    7.315639] Pid: 1004, comm: echo Tainted: P           (2.6.31 #1)
> [    7.315639] EIP: 0060:[<d04d51cd>] EFLAGS: 00010202 CPU: 0
> [    7.315639] EIP is at firmware_loading_store+0xed/0x160 [firmware_class]
> [    7.315639] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a EDX: 00000028
> [    7.315639] ESI: cf65ca80 EDI: cea29c00 EBP: cea29c08 ESP: cf777f2c
> [    7.315639]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> [    7.315639] Process echo (pid: 1004, ti=cf776000 task=cf755aa0 task.ti=cf776000)
> [    7.315639] Stack:
> [    7.315639]  d04d50e0 cea29c00 00000002 c028051d 00000002 c0414c40 ce9c0420 c01c9c98
> [    7.315639] <0> 00000002 c0414c40 00000002 b7f15000 ce9c1e80 cf44c000 c01c9ce6 ce9c1e94
> [    7.315639] <0> cf44c000 b7f15000 00000002 c01c9cb0 c018d362 cf777f9c cf44c000 fffffff7
> [    7.315639] Call Trace:
> [    7.315639]  [<d04d50e0>] ? firmware_loading_store+0x0/0x160 [firmware_class]
> [    7.315639]  [<c028051d>] ? dev_attr_store+0x1d/0x20
> [    7.315639]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
> [    7.315639]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
> [    7.315639]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
> [    7.315639]  [<c018d362>] ? vfs_write+0x82/0xf0
> [    7.315639]  [<c018d47c>] ? sys_write+0x3c/0x70
> [    7.315639]  [<c0102ba1>] ? syscall_call+0x7/0xb
> [    7.315639] Code: 91 c9 ef 3b 5e 3c 7c ed eb b5 8d 74 26 00 83 f8 ff 0f 85 50 ff ff ff e9 6a ff ff ff 89 f6 f6 46 34 01 0f 84 3f ff ff ff 8b 46 30 <8b> 40 04 e8 5b 05 cb ef 31 c9 8b 56 3c 8b 46 38 8b 5e 30 68 61
> [    7.315639] EIP: [<d04d51cd>] firmware_loading_store+0xed/0x160 [firmware_class] SS:ESP 0068:cf777f2c
> [    7.315639] CR2: 0000000000000004
> [    7.540848] ---[ end trace 6ebe83c102d3b046 ]---


> [    7.242449] ------------[ cut here ]------------
> [    7.243706] Kernel BUG at d04d523e [verbose debug info unavailable]
> [    7.243706] invalid opcode: 0000 [#1] PREEMPT
> [    7.243706] last sysfs file: /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
> [    7.243706] Modules linked in: arc4 ecb cryptomgr crypto_blkcipher aead pcompress crypto_hash crypto_algapi rt61pci rt2x00pci rt2x00lib firmware_class mac80211 input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
> [    7.243706]
> [    7.243706] Pid: 1004, comm: echo Tainted: P           (2.6.31 #1)
> [    7.243706] EIP: 0060:[<d04d523e>] EFLAGS: 00010246 CPU: 0
> [    7.243706] EIP is at firmware_loading_store+0x15e/0x170 [firmware_class]
> [    7.243706] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a EDX: 00000028
> [    7.243706] ESI: cf4f2a20 EDI: cf4ffc00 EBP: cf4ffc08 ESP: cea37f2c
> [    7.243706]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> [    7.243706] Process echo (pid: 1004, ti=cea36000 task=cf6c2380 task.ti=cea36000)
> [    7.243706] Stack:
> [    7.243706]  d04d50e0 cf4ffc00 00000002 c028051d 00000002 c0414c40 cf4fd420 c01c9c98
> [    7.243706] <0> 00000002 c0414c40 00000002 b7fc8000 cf636e80 cf471980 c01c9ce6 cf636e94
> [    7.243706] <0> cf471980 b7fc8000 00000002 c01c9cb0 c018d362 cea37f9c cf471980 fffffff7
> [    7.243706] Call Trace:
> [    7.243706]  [<d04d50e0>] ? firmware_loading_store+0x0/0x170 [firmware_class]
> [    7.243706]  [<c028051d>] ? dev_attr_store+0x1d/0x20
> [    7.243706]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
> [    7.243706]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
> [    7.243706]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
> [    7.243706]  [<c018d362>] ? vfs_write+0x82/0xf0
> [    7.243706]  [<c018d47c>] ? sys_write+0x3c/0x70
> [    7.243706]  [<c0102ba1>] ? syscall_call+0x7/0xb
> [    7.243706] Code: 66 34 fe e9 14 ff ff ff 8b 47 08 68 1c 5f 4d d0 50 89 f8 e8 65 b2 da ef 50 68 c3 5f 4d d0 e8 da 3c c5 ef 83 c4 10 e9 ea fe ff ff <0f> 0b eb fe 8d b4 26 00 00 00 00 8d bc 27 00 00 00 00 55 89 cd
> [    7.243706] EIP: [<d04d523e>] firmware_loading_store+0x15e/0x170 [firmware_class] SS:ESP 0068:cea37f2c
> [    7.458557] ---[ end trace 604593037057054f ]---


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: Oops in drivers\base\firmware_class
  2009-09-16 20:57 ` Frederik Deweerdt
@ 2009-09-18 17:53   ` Lars Ericsson
  2009-09-21 13:32     ` [patch -stable] firware_class oops: fix firmware_loading_store locking Frederik Deweerdt
  0 siblings, 1 reply; 7+ messages in thread
From: Lars Ericsson @ 2009-09-18 17:53 UTC (permalink / raw)
  To: 'Frederik Deweerdt'
  Cc: David.Woodhouse, sachinp, linux-kernel, 'Ivo van Doorn'

> On Wed, Sep 16, 2009 at 08:44:43PM +0200, Lars Ericsson wrote:
> > Hi,
> > 
> > I have discovered a Oops in the firmware_loading_store function. 
> > At first it looks like a timing issue but after adding a 
> BUG_ON test,
> > it fails every time. 
> > 
> > drivers\base\firmware_class:
> > ------------------------------
> >  541 01c0 F6463401 	testb $1,52(%esi)
> >  542 01c4 0F843FFF 	je .L38
> >  542      FFFF
> >  543              	.loc 1 174 0
> >  544 01ca 8B4630   	movl 48(%esi),%eax
> >  545 01cd 8B4004   	movl 4(%eax),%eax	<---- Oops
> >  546 01d0 E8FCFFFF 	call vfree
> >  546      FF
> > 
> > The code that fails was introduced in commit
> > 6e03a201bbe8137487f340d26aa662110e324b20 
> Could you try the following patch?

No, problem.
I have tried the patch and it works for me. I will continue my 
tests and report any problems. 

Should I close the http://bugzilla.kernel.org/show_bug.cgi?id=14185 ,
which I have created before I noticed you reply ?

Thanks for the help !

/Lars

> It seems that we're missing the locking around the accesses to
> fw_priv->fw.
> 
> Signed-off-by: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
> 
> diff --git a/drivers/base/firmware_class.c 
> b/drivers/base/firmware_class.c
> index 8a267c4..49105ed 100644
> --- a/drivers/base/firmware_class.c
> +++ b/drivers/base/firmware_class.c
> @@ -171,12 +171,18 @@ static ssize_t 
> firmware_loading_store(struct device *dev,
>  		break;
>  	case 0:
>  		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
> +			mutex_lock(&fw_lock);
> +			if (!fw_priv->fw) {
> +				mutex_unlock(&fw_lock);
> +				break;
> +			}
>  			vfree(fw_priv->fw->data);
>  			fw_priv->fw->data = vmap(fw_priv->pages,
>  						 fw_priv->nr_pages,
>  						 0, PAGE_KERNEL_RO);
>  			if (!fw_priv->fw->data) {
>  				dev_err(dev, "%s: vmap() 
> failed\n", __func__);
> +				mutex_unlock(&fw_lock);
>  				goto err;
>  			}
>  			/* Pages will be freed by vfree() */
> @@ -185,6 +191,7 @@ static ssize_t 
> firmware_loading_store(struct device *dev,
>  			fw_priv->nr_pages = 0;
>  			complete(&fw_priv->completion);
>  			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
> +			mutex_unlock(&fw_lock);
>  			break;
>  		}
>  		/* fallthrough */
> > 
> > Attached you will find the:
> > - Oops with the vanilla 2.6.31
> > - The BUG_ON patch
> > - Oops with the patched 2.6.31
> > 
> > /Lars
> 
> > [    7.249453] rt61pci 0000:00:09.0: firmware: requesting 
> rt2561s.bin
> > [    7.314512] BUG: unable to handle kernel NULL pointer 
> dereference at 00000004
> > [    7.315639] IP: [<d04d51cd>] 
> firmware_loading_store+0xed/0x160 [firmware_class]
> > [    7.315639] *pde = 00000000
> > [    7.315639] Oops: 0000 [#1] PREEMPT
> > [    7.315639] last sysfs file: 
> /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
> > [    7.315639] Modules linked in: arc4 ecb cryptomgr 
> crypto_blkcipher aead pcompress crypto_hash crypto_algapi 
> rt61pci rt2x00pci rt2x00lib firmware_class mac80211 
> input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram 
> ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
> > [    7.315639]
> > [    7.315639] Pid: 1004, comm: echo Tainted: P           
> (2.6.31 #1)
> > [    7.315639] EIP: 0060:[<d04d51cd>] EFLAGS: 00010202 CPU: 0
> > [    7.315639] EIP is at firmware_loading_store+0xed/0x160 
> [firmware_class]
> > [    7.315639] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a 
> EDX: 00000028
> > [    7.315639] ESI: cf65ca80 EDI: cea29c00 EBP: cea29c08 
> ESP: cf777f2c
> > [    7.315639]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> > [    7.315639] Process echo (pid: 1004, ti=cf776000 
> task=cf755aa0 task.ti=cf776000)
> > [    7.315639] Stack:
> > [    7.315639]  d04d50e0 cea29c00 00000002 c028051d 
> 00000002 c0414c40 ce9c0420 c01c9c98
> > [    7.315639] <0> 00000002 c0414c40 00000002 b7f15000 
> ce9c1e80 cf44c000 c01c9ce6 ce9c1e94
> > [    7.315639] <0> cf44c000 b7f15000 00000002 c01c9cb0 
> c018d362 cf777f9c cf44c000 fffffff7
> > [    7.315639] Call Trace:
> > [    7.315639]  [<d04d50e0>] ? 
> firmware_loading_store+0x0/0x160 [firmware_class]
> > [    7.315639]  [<c028051d>] ? dev_attr_store+0x1d/0x20
> > [    7.315639]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
> > [    7.315639]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
> > [    7.315639]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
> > [    7.315639]  [<c018d362>] ? vfs_write+0x82/0xf0
> > [    7.315639]  [<c018d47c>] ? sys_write+0x3c/0x70
> > [    7.315639]  [<c0102ba1>] ? syscall_call+0x7/0xb
> > [    7.315639] Code: 91 c9 ef 3b 5e 3c 7c ed eb b5 8d 74 26 
> 00 83 f8 ff 0f 85 50 ff ff ff e9 6a ff ff ff 89 f6 f6 46 34 
> 01 0f 84 3f ff ff ff 8b 46 30 <8b> 40 04 e8 5b 05 cb ef 31 c9 
> 8b 56 3c 8b 46 38 8b 5e 30 68 61
> > [    7.315639] EIP: [<d04d51cd>] 
> firmware_loading_store+0xed/0x160 [firmware_class] SS:ESP 
> 0068:cf777f2c
> > [    7.315639] CR2: 0000000000000004
> > [    7.540848] ---[ end trace 6ebe83c102d3b046 ]---
> 
> 
> > [    7.242449] ------------[ cut here ]------------
> > [    7.243706] Kernel BUG at d04d523e [verbose debug info 
> unavailable]
> > [    7.243706] invalid opcode: 0000 [#1] PREEMPT
> > [    7.243706] last sysfs file: 
> /sys/devices/pci0000:00/0000:00:09.0/firmware/0000:00:09.0/loading
> > [    7.243706] Modules linked in: arc4 ecb cryptomgr 
> crypto_blkcipher aead pcompress crypto_hash crypto_algapi 
> rt61pci rt2x00pci rt2x00lib firmware_class mac80211 
> input_polldev crc_itu_t eeprom_93cx6 cfg80211 ndccanram 
> ndccan ndcser(P) ndcscan(P) eu16550(P) ndccon(P)
> > [    7.243706]
> > [    7.243706] Pid: 1004, comm: echo Tainted: P           
> (2.6.31 #1)
> > [    7.243706] EIP: 0060:[<d04d523e>] EFLAGS: 00010246 CPU: 0
> > [    7.243706] EIP is at firmware_loading_store+0x15e/0x170 
> [firmware_class]
> > [    7.243706] EAX: 00000000 EBX: d04d50e0 ECX: 0000000a 
> EDX: 00000028
> > [    7.243706] ESI: cf4f2a20 EDI: cf4ffc00 EBP: cf4ffc08 
> ESP: cea37f2c
> > [    7.243706]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> > [    7.243706] Process echo (pid: 1004, ti=cea36000 
> task=cf6c2380 task.ti=cea36000)
> > [    7.243706] Stack:
> > [    7.243706]  d04d50e0 cf4ffc00 00000002 c028051d 
> 00000002 c0414c40 cf4fd420 c01c9c98
> > [    7.243706] <0> 00000002 c0414c40 00000002 b7fc8000 
> cf636e80 cf471980 c01c9ce6 cf636e94
> > [    7.243706] <0> cf471980 b7fc8000 00000002 c01c9cb0 
> c018d362 cea37f9c cf471980 fffffff7
> > [    7.243706] Call Trace:
> > [    7.243706]  [<d04d50e0>] ? 
> firmware_loading_store+0x0/0x170 [firmware_class]
> > [    7.243706]  [<c028051d>] ? dev_attr_store+0x1d/0x20
> > [    7.243706]  [<c01c9c98>] ? flush_write_buffer+0x38/0x50
> > [    7.243706]  [<c01c9ce6>] ? sysfs_write_file+0x36/0x60
> > [    7.243706]  [<c01c9cb0>] ? sysfs_write_file+0x0/0x60
> > [    7.243706]  [<c018d362>] ? vfs_write+0x82/0xf0
> > [    7.243706]  [<c018d47c>] ? sys_write+0x3c/0x70
> > [    7.243706]  [<c0102ba1>] ? syscall_call+0x7/0xb
> > [    7.243706] Code: 66 34 fe e9 14 ff ff ff 8b 47 08 68 1c 
> 5f 4d d0 50 89 f8 e8 65 b2 da ef 50 68 c3 5f 4d d0 e8 da 3c 
> c5 ef 83 c4 10 e9 ea fe ff ff <0f> 0b eb fe 8d b4 26 00 00 00 
> 00 8d bc 27 00 00 00 00 55 89 cd
> > [    7.243706] EIP: [<d04d523e>] 
> firmware_loading_store+0x15e/0x170 [firmware_class] SS:ESP 
> 0068:cea37f2c
> > [    7.458557] ---[ end trace 604593037057054f ]---
> 
> --
> To unsubscribe from this list: send the line "unsubscribe 
> linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [patch -stable] firware_class oops: fix firmware_loading_store locking
  2009-09-18 17:53   ` Lars Ericsson
@ 2009-09-21 13:32     ` Frederik Deweerdt
  2009-09-23 16:42       ` Linus Torvalds
  0 siblings, 1 reply; 7+ messages in thread
From: Frederik Deweerdt @ 2009-09-21 13:32 UTC (permalink / raw)
  To: greg, torvalds, Lars Ericsson
  Cc: 'Frederik Deweerdt',
	David.Woodhouse, sachinp, linux-kernel, 'Ivo van Doorn'

Hi all,
On Fri, Sep 18, 2009 at 07:53:40PM +0200, Lars Ericsson wrote:
> > On Wed, Sep 16, 2009 at 08:44:43PM +0200, Lars Ericsson wrote:
> > > Hi,
> > > 
> > > I have discovered a Oops in the firmware_loading_store function. 
> > > At first it looks like a timing issue but after adding a 
> > BUG_ON test,
> > > it fails every time. 
> > > 
> > > drivers\base\firmware_class:
> > > ------------------------------
> > >  541 01c0 F6463401 	testb $1,52(%esi)
> > >  542 01c4 0F843FFF 	je .L38
> > >  542      FFFF
> > >  543              	.loc 1 174 0
> > >  544 01ca 8B4630   	movl 48(%esi),%eax
> > >  545 01cd 8B4004   	movl 4(%eax),%eax	<---- Oops
> > >  546 01d0 E8FCFFFF 	call vfree
> > >  546      FF
> > > 
> > > The code that fails was introduced in commit
> > > 6e03a201bbe8137487f340d26aa662110e324b20 
> > Could you try the following patch?
> 
> No, problem.
> I have tried the patch and it works for me. I will continue my 
> tests and report any problems. 
> 
> Should I close the http://bugzilla.kernel.org/show_bug.cgi?id=14185 ,
> which I have created before I noticed you reply ?
> 
I'd rather wait someone picks it up for mainline inclusion. I've added
your {Reported,Tested}-by tags.

The bug its vanilla 2.6.31, and should be considered for -stable inclusion.

Regards,
Frederik

----

The code introduced by commit 6e03a201bbe8137487f340d26aa662110e324b20 leads
to a potential null deref. The following patch adds the proper locking
around the accesses to fw_priv->fw.
See http://bugzilla.kernel.org/show_bug.cgi?id=14185 for a full bug report.


Reported-by: Lars Ericsson <Lars_Ericsson@telia.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
Tested-by: Lars Ericsson <Lars_Ericsson@telia.com>


diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 8a267c4..49105ed 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -171,12 +171,18 @@ static ssize_t firmware_loading_store(struct device *dev,
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
+			mutex_lock(&fw_lock);
+			if (!fw_priv->fw) {
+				mutex_unlock(&fw_lock);
+				break;
+			}
 			vfree(fw_priv->fw->data);
 			fw_priv->fw->data = vmap(fw_priv->pages,
 						 fw_priv->nr_pages,
 						 0, PAGE_KERNEL_RO);
 			if (!fw_priv->fw->data) {
 				dev_err(dev, "%s: vmap() failed\n", __func__);
+				mutex_unlock(&fw_lock);
 				goto err;
 			}
 			/* Pages will be freed by vfree() */
@@ -185,6 +191,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 			fw_priv->nr_pages = 0;
 			complete(&fw_priv->completion);
 			clear_bit(FW_STATUS_LOADING, &fw_priv->status);
+			mutex_unlock(&fw_lock);
 			break;
 		}
 		/* fallthrough */


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [patch -stable] firware_class oops: fix firmware_loading_store locking
  2009-09-21 13:32     ` [patch -stable] firware_class oops: fix firmware_loading_store locking Frederik Deweerdt
@ 2009-09-23 16:42       ` Linus Torvalds
  2009-09-24 15:13         ` Sachin Sant
  2009-09-24 15:26         ` Frederik Deweerdt
  0 siblings, 2 replies; 7+ messages in thread
From: Linus Torvalds @ 2009-09-23 16:42 UTC (permalink / raw)
  To: Frederik Deweerdt
  Cc: greg, Lars Ericsson, David.Woodhouse, sachinp, linux-kernel,
	'Ivo van Doorn'



On Mon, 21 Sep 2009, Frederik Deweerdt wrote:
<
> I'd rather wait someone picks it up for mainline inclusion. I've added
> your {Reported,Tested}-by tags.
> 
> The bug its vanilla 2.6.31, and should be considered for -stable inclusion.
> 
> Regards,
> Frederik
> 
> ----
> 
> The code introduced by commit 6e03a201bbe8137487f340d26aa662110e324b20 leads
> to a potential null deref. The following patch adds the proper locking
> around the accesses to fw_priv->fw.
> See http://bugzilla.kernel.org/show_bug.cgi?id=14185 for a full bug report.

I don't think this is correct.

I think you should protect the FW_STATUS_LOADING bit too, shouldn't you?

As it is, it does this:

	if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
		mutex_lock(&fw_lock);
		...
		clear_bit(FW_STATUS_LOADING, &fw_priv->status);
		mutex_unlock(&fw_lock);
		break;
	}

and if this code can race (which it obviously can, since your addition of 
fw_lock mutex matters), then I think it can race on that FW_STATUS_LOADING 
bit too. No?

So my gut feel is that the whole damn function should be protected by the 
mutex_lock thing. IOW, the patch would be something like the appended.

UNTESTED. Somebody needs to test this, verify, and send it back to me.

Am I missing something?

		Linus

---
 drivers/base/firmware_class.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 7376367..1b803df 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -150,13 +150,12 @@ static ssize_t firmware_loading_store(struct device *dev,
 	int loading = simple_strtol(buf, NULL, 10);
 	int i;
 
+	mutex_lock(&fw_lock);
 	switch (loading) {
 	case 1:
-		mutex_lock(&fw_lock);
-		if (!fw_priv->fw) {
-			mutex_unlock(&fw_lock);
+		if (!fw_priv->fw)
 			break;
-		}
+
 		vfree(fw_priv->fw->data);
 		fw_priv->fw->data = NULL;
 		for (i = 0; i < fw_priv->nr_pages; i++)
@@ -167,7 +166,6 @@ static ssize_t firmware_loading_store(struct device *dev,
 		fw_priv->nr_pages = 0;
 		fw_priv->fw->size = 0;
 		set_bit(FW_STATUS_LOADING, &fw_priv->status);
-		mutex_unlock(&fw_lock);
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
@@ -195,6 +193,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 		fw_load_abort(fw_priv);
 		break;
 	}
+	mutex_unlock(&fw_lock);
 
 	return count;
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [patch -stable] firware_class oops: fix firmware_loading_store locking
  2009-09-23 16:42       ` Linus Torvalds
@ 2009-09-24 15:13         ` Sachin Sant
  2009-09-24 15:26         ` Frederik Deweerdt
  1 sibling, 0 replies; 7+ messages in thread
From: Sachin Sant @ 2009-09-24 15:13 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Frederik Deweerdt, greg, Lars Ericsson, David.Woodhouse,
	linux-kernel, 'Ivo van Doorn'

Linus Torvalds wrote:
> I don't think this is correct.
>
> I think you should protect the FW_STATUS_LOADING bit too, shouldn't you?
>
> As it is, it does this:
>
> 	if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
> 		mutex_lock(&fw_lock);
> 		...
> 		clear_bit(FW_STATUS_LOADING, &fw_priv->status);
> 		mutex_unlock(&fw_lock);
> 		break;
> 	}
>
> and if this code can race (which it obviously can, since your addition of 
> fw_lock mutex matters), then I think it can race on that FW_STATUS_LOADING 
> bit too. No?
>
> So my gut feel is that the whole damn function should be protected by the 
> mutex_lock thing. IOW, the patch would be something like the appended.
>
> UNTESTED. Somebody needs to test this, verify, and send it back to me.
>   
I did a quick boot test with this patch and didn't find any issues.

But that said i haven't been able to recreate the problem reported by Lars,
so not sure how relevant would be the test results from me.

Thanks
-Sachin


-- 

---------------------------------
Sachin Sant
IBM Linux Technology Center
India Systems and Technology Labs
Bangalore, India
---------------------------------


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch -stable] firware_class oops: fix firmware_loading_store locking
  2009-09-23 16:42       ` Linus Torvalds
  2009-09-24 15:13         ` Sachin Sant
@ 2009-09-24 15:26         ` Frederik Deweerdt
  1 sibling, 0 replies; 7+ messages in thread
From: Frederik Deweerdt @ 2009-09-24 15:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Frederik Deweerdt, greg, Lars Ericsson, David.Woodhouse, sachinp,
	linux-kernel, 'Ivo van Doorn'

On Wed, Sep 23, 2009 at 09:42:41AM -0700, Linus Torvalds wrote:
> 
> 
> On Mon, 21 Sep 2009, Frederik Deweerdt wrote:
> <
> > I'd rather wait someone picks it up for mainline inclusion. I've added
> > your {Reported,Tested}-by tags.
> > 
> > The bug its vanilla 2.6.31, and should be considered for -stable inclusion.
> > 
> > Regards,
> > Frederik
> > 
> > ----
> > 
> > The code introduced by commit 6e03a201bbe8137487f340d26aa662110e324b20 leads
> > to a potential null deref. The following patch adds the proper locking
> > around the accesses to fw_priv->fw.
> > See http://bugzilla.kernel.org/show_bug.cgi?id=14185 for a full bug report.
> 
> I don't think this is correct.
> 
> I think you should protect the FW_STATUS_LOADING bit too, shouldn't you?
> 
> As it is, it does this:
> 
> 	if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
> 		mutex_lock(&fw_lock);
> 		...
> 		clear_bit(FW_STATUS_LOADING, &fw_priv->status);
> 		mutex_unlock(&fw_lock);
> 		break;
> 	}
> 
> and if this code can race (which it obviously can, since your addition of 
> fw_lock mutex matters), then I think it can race on that FW_STATUS_LOADING 
> bit too. No?
> 
> So my gut feel is that the whole damn function should be protected by the 
> mutex_lock thing. IOW, the patch would be something like the appended.
> 
> UNTESTED. Somebody needs to test this, verify, and send it back to me.
> 
> Am I missing something?
You're right, the status must be protected too, but we would want to
keep the check on fw_priv->fw not being NULL (patch follows).

I cannot reproduce the bug here, Lars could you please have a look ?

Regards,
Frederik

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 7376367..21ac040 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -150,13 +150,15 @@ static ssize_t firmware_loading_store(struct device *dev,
 	int loading = simple_strtol(buf, NULL, 10);
 	int i;
 
+
+	mutex_lock(&fw_lock);
+	if (!fw_priv->fw) {
+		mutex_unlock(&fw_lock);
+		return -ENODEV;
+	}
+
 	switch (loading) {
 	case 1:
-		mutex_lock(&fw_lock);
-		if (!fw_priv->fw) {
-			mutex_unlock(&fw_lock);
-			break;
-		}
 		vfree(fw_priv->fw->data);
 		fw_priv->fw->data = NULL;
 		for (i = 0; i < fw_priv->nr_pages; i++)
@@ -167,7 +169,6 @@ static ssize_t firmware_loading_store(struct device *dev,
 		fw_priv->nr_pages = 0;
 		fw_priv->fw->size = 0;
 		set_bit(FW_STATUS_LOADING, &fw_priv->status);
-		mutex_unlock(&fw_lock);
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
@@ -195,6 +196,7 @@ static ssize_t firmware_loading_store(struct device *dev,
 		fw_load_abort(fw_priv);
 		break;
 	}
+	mutex_unlock(&fw_lock);
 
 	return count;
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-09-24 15:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-16 18:44 Oops in drivers\base\firmware_class Lars Ericsson
2009-09-16 20:57 ` Frederik Deweerdt
2009-09-18 17:53   ` Lars Ericsson
2009-09-21 13:32     ` [patch -stable] firware_class oops: fix firmware_loading_store locking Frederik Deweerdt
2009-09-23 16:42       ` Linus Torvalds
2009-09-24 15:13         ` Sachin Sant
2009-09-24 15:26         ` Frederik Deweerdt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.