linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* ohci1394 broke 2.6.19 -> 2.6.20-rc1
@ 2007-02-05 23:21 Robert Crocombe
  2007-02-06  0:03 ` Stefan Richter
  0 siblings, 1 reply; 6+ messages in thread
From: Robert Crocombe @ 2007-02-05 23:21 UTC (permalink / raw)
  To: linux1394-devel, linux-kernel

Prior to testing a patch for bugzilla bug 7569 (hosts lost on bus
reset), I wanted to reproduce the behavior.  I can under the noted
2.6.16-blah kernels, but moving to anything more recent than 2.6.19
means ohci1394 is non-functional (no 1394 hosts are detected) and the
module cannot be removed.

I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't.  Lots of detail at:

http://bugzilla.kernel.org/show_bug.cgi?id=7942

-- 
Robert Crocombe

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1
  2007-02-05 23:21 ohci1394 broke 2.6.19 -> 2.6.20-rc1 Robert Crocombe
@ 2007-02-06  0:03 ` Stefan Richter
  2007-02-06  0:39   ` Stefan Richter
  0 siblings, 1 reply; 6+ messages in thread
From: Stefan Richter @ 2007-02-06  0:03 UTC (permalink / raw)
  To: Robert Crocombe; +Cc: linux1394-devel, linux-kernel

Robert Crocombe wrote:
> Prior to testing a patch for bugzilla bug 7569 (hosts lost on bus
> reset),

(Well, I think a lot more has to be done for that bug than in the patch
I posted so far.)

> I wanted to reproduce the behavior.  I can under the noted
> 2.6.16-blah kernels, but moving to anything more recent than 2.6.19
> means ohci1394 is non-functional (no 1394 hosts are detected) and the
> module cannot be removed.
> 
> I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't.  Lots of detail at:
> 
> http://bugzilla.kernel.org/show_bug.cgi?id=7942

I get a spinlock lockup on 2.6.20-rc6 + some 1394 updates with a similar
trace to what Robert posted at bugzilla --- *if* I use ieee1394's option
disable_nodemgr=1. (Never used it before.) The key appears to be how
hpsb_alloc_host interacts with the driver core. I don't know if hosts.c
is at fault or some change in the driver core.

There is one notable change in hpsb_alloc_host after 2.6.19: I moved the
device registering out of a mutex. But I don't think that's relevant.
The lockup which I got was with only one FireWire controller.

$ git diff v2.6.19..v2.6.20-rc1 drivers/ieee1394/hosts.c
diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index d90a3a1..ee82a53 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -31,9 +31,10 @@
 #include "config_roms.h"
 
 
-static void delayed_reset_bus(void * __reset_info)
+static void delayed_reset_bus(struct work_struct *work)
 {
-	struct hpsb_host *host = (struct hpsb_host*)__reset_info;
+	struct hpsb_host *host =
+		container_of(work, struct hpsb_host, delayed_reset.work);
 	int generation = host->csr.generation + 1;
 
 	/* The generation field rolls over to 2 rather than 0 per IEEE
@@ -43,9 +44,10 @@ static void delayed_reset_bus(void * __reset_info)
 
 	CSR_SET_BUS_INFO_GENERATION(host->csr.rom, generation);
 	if (csr1212_generate_csr_image(host->csr.rom) != CSR1212_SUCCESS) {
-		/* CSR image creation failed, reset generation field and do not
-		 * issue a bus reset. */
-		CSR_SET_BUS_INFO_GENERATION(host->csr.rom, host->csr.generation);
+		/* CSR image creation failed.
+		 * Reset generation field and do not issue a bus reset. */
+		CSR_SET_BUS_INFO_GENERATION(host->csr.rom,
+					    host->csr.generation);
 		return;
 	}
 
@@ -53,7 +55,8 @@ static void delayed_reset_bus(void * __reset_info)
 
 	host->update_config_rom = 0;
 	if (host->driver->set_hw_config_rom)
-		host->driver->set_hw_config_rom(host, host->csr.rom->bus_info_data);
+		host->driver->set_hw_config_rom(host,
+						host->csr.rom->bus_info_data);
 
 	host->csr.gen_timestamp[host->csr.generation] = jiffies;
 	hpsb_reset_bus(host, SHORT_RESET);
@@ -69,7 +72,8 @@ static int dummy_devctl(struct hpsb_host *h, enum devctl_cmd c, int arg)
 	return -1;
 }
 
-static int dummy_isoctl(struct hpsb_iso *iso, enum isoctl_cmd command, unsigned long arg)
+static int dummy_isoctl(struct hpsb_iso *iso, enum isoctl_cmd command,
+			unsigned long arg)
 {
 	return -1;
 }
@@ -122,15 +126,13 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 	int i;
 	int hostnum = 0;
 
-	h = kzalloc(sizeof(*h) + extra, SLAB_KERNEL);
+	h = kzalloc(sizeof(*h) + extra, GFP_KERNEL);
 	if (!h)
 		return NULL;
 
 	h->csr.rom = csr1212_create_csr(&csr_bus_ops, CSR_BUS_INFO_SIZE, h);
-	if (!h->csr.rom) {
-		kfree(h);
-		return NULL;
-	}
+	if (!h->csr.rom)
+		goto fail;
 
 	h->hostdata = h + 1;
 	h->driver = drv;
@@ -145,21 +147,20 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 
 	atomic_set(&h->generation, 0);
 
-	INIT_WORK(&h->delayed_reset, delayed_reset_bus, h);
+	INIT_DELAYED_WORK(&h->delayed_reset, delayed_reset_bus);
 	
 	init_timer(&h->timeout);
 	h->timeout.data = (unsigned long) h;
 	h->timeout.function = abort_timedouts;
-	h->timeout_interval = HZ / 20; // 50ms by default
+	h->timeout_interval = HZ / 20; /* 50ms, half of minimum SPLIT_TIMEOUT */
 
 	h->topology_map = h->csr.topology_map + 3;
 	h->speed_map = (u8 *)(h->csr.speed_map + 2);
 
 	mutex_lock(&host_num_alloc);
-
 	while (nodemgr_for_each_host(&hostnum, alloc_hostnum_cb))
 		hostnum++;
-
+	mutex_unlock(&host_num_alloc);
 	h->id = hostnum;
 
 	memcpy(&h->device, &nodemgr_dev_template_host, sizeof(h->device));
@@ -170,13 +171,19 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 	h->class_dev.class = &hpsb_host_class;
 	snprintf(h->class_dev.class_id, BUS_ID_SIZE, "fw-host%d", h->id);
 
-	device_register(&h->device);
-	class_device_register(&h->class_dev);
+	if (device_register(&h->device))
+		goto fail;
+	if (class_device_register(&h->class_dev)) {
+		device_unregister(&h->device);
+		goto fail;
+	}
 	get_device(&h->device);
 
-	mutex_unlock(&host_num_alloc);
-
 	return h;
+
+fail:
+	kfree(h);
+	return NULL;
 }
 
 int hpsb_add_host(struct hpsb_host *host)
@@ -228,13 +235,14 @@ int hpsb_update_config_rom_image(struct hpsb_host *host)
 	if (time_before(jiffies, host->csr.gen_timestamp[next_gen] + 60 * HZ))
 		/* Wait 60 seconds from the last time this generation number was
 		 * used. */
-		reset_delay = (60 * HZ) + host->csr.gen_timestamp[next_gen] - jiffies;
+		reset_delay =
+			(60 * HZ) + host->csr.gen_timestamp[next_gen] - jiffies;
 	else
 		/* Wait 1 second in case some other code wants to change the
 		 * Config ROM in the near future. */
 		reset_delay = HZ;
 
-	PREPARE_WORK(&host->delayed_reset, delayed_reset_bus, host);
+	PREPARE_DELAYED_WORK(&host->delayed_reset, delayed_reset_bus);
 	schedule_delayed_work(&host->delayed_reset, reset_delay);
 
 	return 0;



-- 
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1
  2007-02-06  0:03 ` Stefan Richter
@ 2007-02-06  0:39   ` Stefan Richter
  2007-02-06  0:48     ` Stefan Richter
  2007-02-06  1:04     ` ohci1394 broke 2.6.19 -> 2.6.20-rc1 Robert Crocombe
  0 siblings, 2 replies; 6+ messages in thread
From: Stefan Richter @ 2007-02-06  0:39 UTC (permalink / raw)
  To: Robert Crocombe; +Cc: linux1394-devel, linux-kernel

On  6 Feb, Stefan Richter wrote:
> Robert Crocombe wrote:
>> moving to anything more recent than 2.6.19
>> means ohci1394 is non-functional (no 1394 hosts are detected) and the
>> module cannot be removed.

Not using the option disable_nodemgr=1 should avoid the bug.

>> I have narrowed it down to 2.6.19 works, 2.6.20-rc1 doesn't.  Lots of detail at:
>> 
>> http://bugzilla.kernel.org/show_bug.cgi?id=7942
> 
> I get a spinlock lockup on 2.6.20-rc6 + some 1394 updates with a similar
> trace to what Robert posted at bugzilla --- *if* I use ieee1394's option
> disable_nodemgr=1. (Never used it before.) The key appears to be how
> hpsb_alloc_host interacts with the driver core. I don't know if hosts.c
> is at fault or some change in the driver core.

It's my oversight, see patch.


From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Subject: ieee1394: fix host device registering when nodemgr disabled

Since my commit 8252bbb1363b7fe963a3eb6f8a36da619a6f5a65 in 2.6.20-rc1,
host devices have a dummy driver attached.  Alas the driver was not
registered before use if ieee1394 was loaded with disable_nodemgr=1.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/ieee1394/ieee1394_core.c |    1 +
 drivers/ieee1394/nodemgr.c       |    3 +--
 drivers/ieee1394/nodemgr.h       |    4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

Index: linux-2.6.20-rc6/drivers/ieee1394/ieee1394_core.c
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/ieee1394_core.c	2007-02-03 18:04:50.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/ieee1394_core.c	2007-02-06 01:28:04.000000000 +0100
@@ -1132,6 +1132,7 @@ static int __init ieee1394_init(void)
 		}
 	}
 
+	ret = driver_register(&nodemgr_mid_layer_driver);
 	ret = class_register(&hpsb_host_class);
 	if (ret < 0)
 		goto release_all_bus;
Index: linux-2.6.20-rc6/drivers/ieee1394/nodemgr.c
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/nodemgr.c	2007-01-27 14:07:00.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/nodemgr.c	2007-02-06 01:29:21.000000000 +0100
@@ -249,7 +249,7 @@ static struct device nodemgr_dev_templat
  * useful drivers for them yet, and there would be a deadlock possible if the
  * driver core scans the host device while the host's low-level driver (i.e.
  * the host's parent device) is being removed. */
-static struct device_driver nodemgr_mid_layer_driver = {
+struct device_driver nodemgr_mid_layer_driver = {
 	.bus		= &ieee1394_bus_type,
 	.name		= "nodemgr",
 	.owner		= THIS_MODULE,
@@ -1857,7 +1857,6 @@ int init_ieee1394_nodemgr(void)
 		class_unregister(&nodemgr_ne_class);
 		return error;
 	}
-	error = driver_register(&nodemgr_mid_layer_driver);
 	hpsb_register_highlevel(&nodemgr_highlevel);
 	return 0;
 }
Index: linux-2.6.20-rc6/drivers/ieee1394/nodemgr.h
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/nodemgr.h	2007-01-27 14:07:00.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/nodemgr.h	2007-02-06 01:30:27.000000000 +0100
@@ -181,10 +181,8 @@ int nodemgr_for_each_host(void *__data, 
 int init_ieee1394_nodemgr(void);
 void cleanup_ieee1394_nodemgr(void);
 
-/* The template for a host device */
+extern struct device_driver nodemgr_mid_layer_driver;
 extern struct device nodemgr_dev_template_host;
-
-/* Bus attributes we export */
 extern struct bus_attribute *const fw_bus_attrs[];
 
 #endif /* _IEEE1394_NODEMGR_H */



-- 
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1
  2007-02-06  0:39   ` Stefan Richter
@ 2007-02-06  0:48     ` Stefan Richter
  2007-02-06  1:20       ` [PATCH update] ieee1394: fix host device registering when nodemgr disabled Stefan Richter
  2007-02-06  1:04     ` ohci1394 broke 2.6.19 -> 2.6.20-rc1 Robert Crocombe
  1 sibling, 1 reply; 6+ messages in thread
From: Stefan Richter @ 2007-02-06  0:48 UTC (permalink / raw)
  To: Robert Crocombe; +Cc: linux1394-devel, linux-kernel

I wrote:
> @@ -1132,6 +1132,7 @@ static int __init ieee1394_init(void)
>  		}
>  	}
>  
> +	ret = driver_register(&nodemgr_mid_layer_driver);
>  	ret = class_register(&hpsb_host_class);
>  	if (ret < 0)
>  		goto release_all_bus;

I think I make that a
	WARN_ON(driver_register(&nodemgr_mid_layer_driver));
-- 
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: ohci1394 broke 2.6.19 -> 2.6.20-rc1
  2007-02-06  0:39   ` Stefan Richter
  2007-02-06  0:48     ` Stefan Richter
@ 2007-02-06  1:04     ` Robert Crocombe
  1 sibling, 0 replies; 6+ messages in thread
From: Robert Crocombe @ 2007-02-06  1:04 UTC (permalink / raw)
  To: Stefan Richter; +Cc: linux1394-devel, linux-kernel

On 2/5/07, Stefan Richter <stefanr@s5r6.in-berlin.de> wrote:
> It's my oversight, see patch.

Yes, this fixes things.  Thanks!

-- 
Robert Crocombe

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH update] ieee1394: fix host device registering when nodemgr disabled
  2007-02-06  0:48     ` Stefan Richter
@ 2007-02-06  1:20       ` Stefan Richter
  0 siblings, 0 replies; 6+ messages in thread
From: Stefan Richter @ 2007-02-06  1:20 UTC (permalink / raw)
  To: Robert Crocombe; +Cc: linux1394-devel, linux-kernel

Since my commit 8252bbb1363b7fe963a3eb6f8a36da619a6f5a65 in 2.6.20-rc1,
host devices have a dummy driver attached.  Alas the driver was not
registered before use if ieee1394 was loaded with disable_nodemgr=1.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---

Update:  - work without extern variables,
         - better error checks,
	 - add missing driver_unregister.

 drivers/ieee1394/nodemgr.c |   26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

Index: linux-2.6.20-rc5/drivers/ieee1394/nodemgr.c
===================================================================
--- linux-2.6.20-rc5.orig/drivers/ieee1394/nodemgr.c
+++ linux-2.6.20-rc5/drivers/ieee1394/nodemgr.c
@@ -258,7 +258,6 @@ static struct device_driver nodemgr_mid_
 struct device nodemgr_dev_template_host = {
 	.bus		= &ieee1394_bus_type,
 	.release	= nodemgr_release_host,
-	.driver		= &nodemgr_mid_layer_driver,
 };
 
 
@@ -1850,22 +1849,33 @@ int init_ieee1394_nodemgr(void)
 
 	error = class_register(&nodemgr_ne_class);
 	if (error)
-		return error;
-
+		goto fail_ne;
 	error = class_register(&nodemgr_ud_class);
-	if (error) {
-		class_unregister(&nodemgr_ne_class);
-		return error;
-	}
+	if (error)
+		goto fail_ud;
+
+	/* don't show this driver if nodemgr is off (disable_nodmgr=1) */
+	nodemgr_dev_template_host.driver = &nodemgr_mid_layer_driver;
+
 	error = driver_register(&nodemgr_mid_layer_driver);
+	if (error)
+		goto fail_ml;
+
 	hpsb_register_highlevel(&nodemgr_highlevel);
 	return 0;
+
+fail_ml:
+	class_unregister(&nodemgr_ud_class);
+fail_ud:
+	class_unregister(&nodemgr_ne_class);
+fail_ne:
+	return error;
 }
 
 void cleanup_ieee1394_nodemgr(void)
 {
 	hpsb_unregister_highlevel(&nodemgr_highlevel);
-
+	driver_unregister(&nodemgr_mid_layer_driver);
 	class_unregister(&nodemgr_ud_class);
 	class_unregister(&nodemgr_ne_class);
 }


-- 
Stefan Richter
-=====-=-=== --=- --==-
http://arcgraph.de/sr/


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-02-06  1:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-05 23:21 ohci1394 broke 2.6.19 -> 2.6.20-rc1 Robert Crocombe
2007-02-06  0:03 ` Stefan Richter
2007-02-06  0:39   ` Stefan Richter
2007-02-06  0:48     ` Stefan Richter
2007-02-06  1:20       ` [PATCH update] ieee1394: fix host device registering when nodemgr disabled Stefan Richter
2007-02-06  1:04     ` ohci1394 broke 2.6.19 -> 2.6.20-rc1 Robert Crocombe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).