All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes
@ 2004-11-04  7:01 Tejun Heo
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c Tejun Heo
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:01 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 Hello,

 The following 5 patches are misc bugfixes spanning kobject, sysfs and
bus.  All patches are independent.

-- 
tejun


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c
  2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
@ 2004-11-04  7:02 ` Tejun Heo
  2004-11-04 18:35   ` Greg KH
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix Tejun Heo
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:02 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 df_01_driver_attach_comment_fix.patch

 bus_match() was renamed to driver_probe_device() but the comment for
device_attach() wasn't updated.  This patch updates it.


Signed-off-by: Tejun Heo <tj@home-tj.org>


Index: linux-export/drivers/base/bus.c
===================================================================
--- linux-export.orig/drivers/base/bus.c	2004-11-04 11:04:13.000000000 +0900
+++ linux-export/drivers/base/bus.c	2004-11-04 11:04:13.000000000 +0900
@@ -335,10 +335,10 @@ int device_attach(struct device * dev)
  *	driver_attach - try to bind driver to devices.
  *	@drv:	driver.
  *
- *	Walk the list of devices that the bus has on it and try to match
- *	the driver with each one.
- *	If bus_match() returns 0 and the @dev->driver is set, we've found
- *	a compatible pair.
+ *	Walk the list of devices that the bus has on it and try to
+ *	match the driver with each one.  If driver_probe_device()
+ *	returns 0 and the @dev->driver is set, we've found a
+ *	compatible pair.
  *
  *	Note that we ignore the -ENODEV error from driver_probe_device(),
  *	since it's perfectly valid for a driver not to bind to any devices.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c Tejun Heo
@ 2004-11-04  7:02 ` Tejun Heo
  2004-11-04 18:58   ` Greg KH
  2004-11-04  7:03 ` [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix Tejun Heo
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:02 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 df_02_bus_rescan_devcies_fix.patch

 bus_rescan_devices() eventually calls device_attach() and thus
requires write locking the corresponding bus.  The original code just
called bus_for_each_dev() which only read locks the bus.  This patch
separates __bus_for_each_dev() and __bus_for_each_drv(), which don't
do locking themselves, out from the original functions and call them
with read lock in the original functions and with write lock in
bus_rescan_devices().


Signed-off-by: Tejun Heo <tj@home-tj.org>


Index: linux-export/drivers/base/bus.c
===================================================================
--- linux-export.orig/drivers/base/bus.c	2004-11-04 11:04:13.000000000 +0900
+++ linux-export/drivers/base/bus.c	2004-11-04 11:04:13.000000000 +0900
@@ -135,6 +135,54 @@ static struct kobj_type ktype_bus = {
 
 decl_subsys(bus, &ktype_bus, NULL);
 
+static int
+__bus_for_each_dev(struct bus_type * bus, struct device * start,
+		   void * data, int (*fn)(struct device *, void *))
+{
+	struct device *dev;
+	struct list_head * head;
+	int error = 0;
+
+	if (!(bus = get_bus(bus)))
+		return -EINVAL;
+
+	head = &bus->devices.list;
+	dev = list_prepare_entry(start, head, bus_list);
+	list_for_each_entry_continue(dev, head, bus_list) {
+		get_device(dev);
+		error = fn(dev, data);
+		put_device(dev);
+		if (error)
+			break;
+	}
+	put_bus(bus);
+	return error;
+}
+
+static int
+__bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+		   void * data, int (*fn)(struct device_driver *, void *))
+{
+	struct list_head * head;
+	struct device_driver *drv;
+	int error = 0;
+
+	if(!(bus = get_bus(bus)))
+		return -EINVAL;
+
+	head = &bus->drivers.list;
+	drv = list_prepare_entry(start, head, kobj.entry);
+	list_for_each_entry_continue(drv, head, kobj.entry) {
+		get_driver(drv);
+		error = fn(drv, data);
+		put_driver(drv);
+		if(error)
+			break;
+	}
+	put_bus(bus);
+	return error;
+}
+
 /**
  *	bus_for_each_dev - device iterator.
  *	@bus:	bus type.
@@ -154,30 +202,15 @@ decl_subsys(bus, &ktype_bus, NULL);
  *	to retain this data, it should do, and increment the reference
  *	count in the supplied callback.
  */
+
 int bus_for_each_dev(struct bus_type * bus, struct device * start,
 		     void * data, int (*fn)(struct device *, void *))
 {
-	struct device *dev;
-	struct list_head * head;
-	int error = 0;
-
-	if (!(bus = get_bus(bus)))
-		return -EINVAL;
-
-	head = &bus->devices.list;
-	dev = list_prepare_entry(start, head, bus_list);
-
+	int ret;
 	down_read(&bus->subsys.rwsem);
-	list_for_each_entry_continue(dev, head, bus_list) {
-		get_device(dev);
-		error = fn(dev, data);
-		put_device(dev);
-		if (error)
-			break;
-	}
+	ret = __bus_for_each_dev(bus, start, data, fn);
 	up_read(&bus->subsys.rwsem);
-	put_bus(bus);
-	return error;
+	return ret;
 }
 
 /**
@@ -203,27 +236,11 @@ int bus_for_each_dev(struct bus_type * b
 int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
 		     void * data, int (*fn)(struct device_driver *, void *))
 {
-	struct list_head * head;
-	struct device_driver *drv;
-	int error = 0;
-
-	if(!(bus = get_bus(bus)))
-		return -EINVAL;
-
-	head = &bus->drivers.list;
-	drv = list_prepare_entry(start, head, kobj.entry);
-
+	int ret;
 	down_read(&bus->subsys.rwsem);
-	list_for_each_entry_continue(drv, head, kobj.entry) {
-		get_driver(drv);
-		error = fn(drv, data);
-		put_driver(drv);
-		if(error)
-			break;
-	}
+	ret = __bus_for_each_drv(bus, start, data, fn);
 	up_read(&bus->subsys.rwsem);
-	put_bus(bus);
-	return error;
+	return ret;
 }
 
 /**
@@ -601,7 +618,9 @@ int bus_rescan_devices(struct bus_type *
 {
 	int count = 0;
 
-	bus_for_each_dev(bus, NULL, &count, bus_rescan_devices_helper);
+	down_write(&bus->subsys.rwsem);
+	__bus_for_each_dev(bus, NULL, &count, bus_rescan_devices_helper);
+	up_write(&bus->subsys.rwsem);
 
 	return count;
 }

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix
  2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c Tejun Heo
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix Tejun Heo
@ 2004-11-04  7:03 ` Tejun Heo
  2004-11-04 18:59   ` Greg KH
  2004-11-04  7:04 ` [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix Tejun Heo
  2004-11-04  7:05 ` [PATCH 2.6.10-rc1 5/5] driver-model: device_add() " Tejun Heo
  4 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:03 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 df_03_sysfs_release_fix.patch

 Some attributes are allocated dynamically (e.g. module and device
parameters) and are usually deallocated when the assoicated kobject is
released.  So, it's not safe to access attr after putting the kobject.


Signed-off-by: Tejun Heo <tj@home-tj.org>


Index: linux-export/fs/sysfs/file.c
===================================================================
--- linux-export.orig/fs/sysfs/file.c	2004-11-04 10:25:58.000000000 +0900
+++ linux-export/fs/sysfs/file.c	2004-11-04 11:04:14.000000000 +0900
@@ -330,11 +330,13 @@ static int sysfs_release(struct inode * 
 {
 	struct kobject * kobj = to_kobj(filp->f_dentry->d_parent);
 	struct attribute * attr = to_attr(filp->f_dentry);
+	struct module * owner = attr->owner;
 	struct sysfs_buffer * buffer = filp->private_data;
 
 	if (kobj) 
 		kobject_put(kobj);
-	module_put(attr->owner);
+	/* After this point, attr should not be accessed. */
+	module_put(owner);
 
 	if (buffer) {
 		if (buffer->page)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix
  2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
                   ` (2 preceding siblings ...)
  2004-11-04  7:03 ` [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix Tejun Heo
@ 2004-11-04  7:04 ` Tejun Heo
  2004-11-04 19:00   ` Greg KH
  2004-11-04  7:05 ` [PATCH 2.6.10-rc1 5/5] driver-model: device_add() " Tejun Heo
  4 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:04 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 df_04_kobject_add_ref_fix.patch

 In kobject_add(), @kobj wasn't put'd properly on error path.  This
patch fixes it.


Signed-off-by: Tejun Heo <tj@home-tj.org>


Index: linux-export/lib/kobject.c
===================================================================
--- linux-export.orig/lib/kobject.c	2004-11-04 10:25:58.000000000 +0900
+++ linux-export/lib/kobject.c	2004-11-04 11:04:14.000000000 +0900
@@ -183,6 +183,7 @@ int kobject_add(struct kobject * kobj)
 		unlink(kobj);
 		if (parent)
 			kobject_put(parent);
+		kobject_put(kobj);
 	} else {
 		kobject_hotplug(kobj, KOBJ_ADD);
 	}

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 5/5] driver-model: device_add() error path reference counting fix
  2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
                   ` (3 preceding siblings ...)
  2004-11-04  7:04 ` [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix Tejun Heo
@ 2004-11-04  7:05 ` Tejun Heo
  2004-11-04 20:07   ` Greg KH
  4 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-04  7:05 UTC (permalink / raw)
  To: mochel, greg; +Cc: linux-kernel

 df_05_device_add_ref_fix.patch

 In device_add(), @dev wan't put'd properly when it has zero length
bus_id (error path).  Fixed.


Signed-off-by: Tejun Heo <tj@home-tj.org>


Index: linux-export/drivers/base/core.c
===================================================================
--- linux-export.orig/drivers/base/core.c	2004-11-04 10:25:58.000000000 +0900
+++ linux-export/drivers/base/core.c	2004-11-04 11:04:14.000000000 +0900
@@ -209,12 +209,13 @@ void device_initialize(struct device *de
  */
 int device_add(struct device *dev)
 {
-	struct device * parent;
+	struct device * parent = NULL;
 	int error;
 
+	error = -EINVAL;
 	dev = get_device(dev);
 	if (!dev || !strlen(dev->bus_id))
-		return -EINVAL;
+		goto Error;
 
 	parent = get_device(dev->parent);
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c Tejun Heo
@ 2004-11-04 18:35   ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2004-11-04 18:35 UTC (permalink / raw)
  To: Tejun Heo; +Cc: mochel, linux-kernel

On Thu, Nov 04, 2004 at 04:02:16PM +0900, Tejun Heo wrote:
>  df_01_driver_attach_comment_fix.patch
> 
>  bus_match() was renamed to driver_probe_device() but the comment for
> device_attach() wasn't updated.  This patch updates it.
> 
> 
> Signed-off-by: Tejun Heo <tj@home-tj.org>

Applied, thanks.

Oh, Pat's email address is not at osdl.org anymore, it's
<mochel@digitalimplant.org> now.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-04  7:02 ` [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix Tejun Heo
@ 2004-11-04 18:58   ` Greg KH
  2004-11-04 19:12     ` Dmitry Torokhov
  0 siblings, 1 reply; 20+ messages in thread
From: Greg KH @ 2004-11-04 18:58 UTC (permalink / raw)
  To: Tejun Heo; +Cc: mochel, linux-kernel

On Thu, Nov 04, 2004 at 04:02:58PM +0900, Tejun Heo wrote:
>  df_02_bus_rescan_devcies_fix.patch
> 
>  bus_rescan_devices() eventually calls device_attach() and thus
> requires write locking the corresponding bus.  The original code just
> called bus_for_each_dev() which only read locks the bus.  This patch
> separates __bus_for_each_dev() and __bus_for_each_drv(), which don't
> do locking themselves, out from the original functions and call them
> with read lock in the original functions and with write lock in
> bus_rescan_devices().
> 
> 
> Signed-off-by: Tejun Heo <tj@home-tj.org>

Thanks, I cleaned up the formatting a bit in this patch and applied it.

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix
  2004-11-04  7:03 ` [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix Tejun Heo
@ 2004-11-04 18:59   ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2004-11-04 18:59 UTC (permalink / raw)
  To: Tejun Heo; +Cc: mochel, linux-kernel

On Thu, Nov 04, 2004 at 04:03:37PM +0900, Tejun Heo wrote:
>  df_03_sysfs_release_fix.patch
> 
>  Some attributes are allocated dynamically (e.g. module and device
> parameters) and are usually deallocated when the assoicated kobject is
> released.  So, it's not safe to access attr after putting the kobject.
> 
> 
> Signed-off-by: Tejun Heo <tj@home-tj.org>

Applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix
  2004-11-04  7:04 ` [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix Tejun Heo
@ 2004-11-04 19:00   ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2004-11-04 19:00 UTC (permalink / raw)
  To: Tejun Heo; +Cc: mochel, linux-kernel

On Thu, Nov 04, 2004 at 04:04:32PM +0900, Tejun Heo wrote:
>  df_04_kobject_add_ref_fix.patch
> 
>  In kobject_add(), @kobj wasn't put'd properly on error path.  This
> patch fixes it.
> 
> 
> Signed-off-by: Tejun Heo <tj@home-tj.org>

Applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-04 18:58   ` Greg KH
@ 2004-11-04 19:12     ` Dmitry Torokhov
  2004-11-05  6:14       ` Tejun Heo
  0 siblings, 1 reply; 20+ messages in thread
From: Dmitry Torokhov @ 2004-11-04 19:12 UTC (permalink / raw)
  To: Greg KH; +Cc: Tejun Heo, LKML

Greg KH <greg@kroah.com> wrote:
> On Thu, Nov 04, 2004 at 04:02:58PM +0900, Tejun Heo wrote:
> >  df_02_bus_rescan_devcies_fix.patch
> > 
> >  bus_rescan_devices() eventually calls device_attach() and thus
> > requires write locking the corresponding bus.  The original code just
> > called bus_for_each_dev() which only read locks the bus.  This patch
> > separates __bus_for_each_dev() and __bus_for_each_drv(), which don't
> > do locking themselves, out from the original functions and call them
> > with read lock in the original functions and with write lock in
> > bus_rescan_devices().
> > 
> > 
> > Signed-off-by: Tejun Heo <tj@home-tj.org>
> 
> Thanks, I cleaned up the formatting a bit in this patch and applied it.
> 

Hmm, I do not like that the patch now fiddles with bus's rwsem before
incrementing bus's refcount.

I think just iterating through device list right the bus_rescan_devices
will be good enough. I sent the patch together with other 3, did it get
lost? 

-- 
Dmitry


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 5/5] driver-model: device_add() error path reference counting fix
  2004-11-04  7:05 ` [PATCH 2.6.10-rc1 5/5] driver-model: device_add() " Tejun Heo
@ 2004-11-04 20:07   ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2004-11-04 20:07 UTC (permalink / raw)
  To: Tejun Heo; +Cc: mochel, linux-kernel

On Thu, Nov 04, 2004 at 04:05:02PM +0900, Tejun Heo wrote:
>  df_05_device_add_ref_fix.patch
> 
>  In device_add(), @dev wan't put'd properly when it has zero length
> bus_id (error path).  Fixed.

Applied, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-04 19:12     ` Dmitry Torokhov
@ 2004-11-05  6:14       ` Tejun Heo
  2004-11-05 16:33         ` Dmitry Torokhov
  0 siblings, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-05  6:14 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: Greg KH, LKML

 Hello Dmitry,

On Thu, Nov 04, 2004 at 11:12:58AM -0800, Dmitry Torokhov wrote:
> Hmm, I do not like that the patch now fiddles with bus's rwsem before
> incrementing bus's refcount.

 Yes, I agree with you.

> I think just iterating through device list right the bus_rescan_devices
> will be good enough. I sent the patch together with other 3, did it get
> lost?

 Or maybe just adding a @write_lock argument to bus_for_each_*()
functions or create a new function named __bus_for_each_*() which
takes additional @write_lock argument.

 However, I don't really think the get_bus/put_bus() calls are
necessary there.  Unless the caller already has a reference to the
bus, we're in trouble anyway.  get_bus() cannot synchronize with the
last put_bus(), we need outer synchronization to protect that
(individual bus drivers are expected to perform the synchronization).

 One thing that bothers me is all the "if (kobject_get(x))" codes.
Currently, it doesn't seem to do anything other than checking if x is
NULL or not and incrementing the reference count.  i.e. the return
value only represents if x is NULL or not.  I find the codes
misleading.  It seems like above statement can synchronize get() and
the last put() which isn't true.  I don't how how the using xchg in
kref went, but with preemtion turned on, I don't see how it's gonna
change the situation.

 Well, I should write the following in another mail but I'll just
continue here.

  Another problem I've encountered regarding kobject refcounting is
that currently each sysfs attr carries its owner and gets the owning
module when the attr is accessed.  However, there are attributes which
are provided not by the module which implements whatever the kobject
represents but by upper-level module or the kernel builtins.  So, it's
possible to unload a module while it's kobject is still alive by
holding on to such attributes, and, when the attribute is closed,
panic occurs, of course (release callback is unloaded already).

 Modules which device_driver kobjects are okay because unload_sem is
used to prevent the module from unloading before it is released;
however, bus_type, class and class_device kobjects have this problem.
I found detach_state attr and all common USB attributes.  Also, all
class_device attributes, including simple_class dev attribute, which
are defined by the class it belongs to suffer this problem (allocated
by individual drivers but owned by the class).

 We can do one of

 1. add unload_sem to all driver-model structures.
	-> I believe this defeats the purpose of try_module_get()
	stuff.  We wouldn't know when the module will unload.

 2. get the owner field correct for all attributes.
	-> This will be a chore.  We'll need to allocate separate
	attr structures for each kobject for all the generic attrs.

 3. remove the owner field from the attribute structure and add an
    owner to kobject and make sysfs ops to hold the owner of the
    respective kobject.
	-> I think this is the best and most consistent solution.  As
	we already assume that module which implements a child kobject
	should depend upon the module which implements its parent, all
	attr module reference counting will be correct by using the
	kobject's owner.

 So, I'm currently working on solution #3.

 IMHO, we really need some kind of documentation describing the
synchronization, reference counting and behavior assumptions in the
driver-model.  It just seems a bit too subtle and fragile as it stands
now.

 Thanks.

-- 
tejun


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-05  6:14       ` Tejun Heo
@ 2004-11-05 16:33         ` Dmitry Torokhov
  2004-11-09  1:27           ` Tejun Heo
  2004-11-10  0:40           ` Greg KH
  0 siblings, 2 replies; 20+ messages in thread
From: Dmitry Torokhov @ 2004-11-05 16:33 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Greg KH, LKML

On Fri, 5 Nov 2004 15:14:39 +0900, Tejun Heo <tj@home-tj.org> wrote:
> Or maybe just adding a @write_lock argument to bus_for_each_*()
> functions or create a new function named __bus_for_each_*() which
> takes additional @write_lock argument.
> 
> However, I don't really think the get_bus/put_bus() calls are
> necessary there.  Unless the caller already has a reference to the
> bus, we're in trouble anyway.  get_bus() cannot synchronize with the
> last put_bus(), we need outer synchronization to protect that
> (individual bus drivers are expected to perform the synchronization).
> 

Yes I think you are right.

> One thing that bothers me is all the "if (kobject_get(x))" codes.
> Currently, it doesn't seem to do anything other than checking if x is
> NULL or not and incrementing the reference count.  i.e. the return
> value only represents if x is NULL or not.  I find the codes
> misleading.  It seems like above statement can synchronize get() and
> the last put() which isn't true.  I don't how how the using xchg in
> kref went, but with preemtion turned on, I don't see how it's gonna
> change the situation.
>

I agree it is somewhat misleading. And even if _get would synchronize
with _put taht wouldmean that the caller does not have a valid reference
and the kernel will crash there next time callers uses the object in
question.

I think we just have lay down the rules that one needs to get a reference
to an object in the following cases:
- object creation (first reference)
- linking some other object to the object in question. Every link to the
  object should increase reference count.
- before passing object to another thread of execution to guarantee that
  the object will live long enough for both threads.

This we can get rid of the litter in form:
if (get_bus(bus)) {
                sysfs_remove_file(&bus->subsys.kset.kobj, &attr->attr);
                put_bus(bus);
        }

sysfs_remove_file should do just fine by itself, if caller does not have bus
reference it's its fault.

Greg, what do you think?

> Well, I should write the following in another mail but I'll just
> continue here.
> 
>  Another problem I've encountered regarding kobject refcounting is
> that currently each sysfs attr carries its owner and gets the owning
> module when the attr is accessed.  However, there are attributes which
> are provided not by the module which implements whatever the kobject
> represents but by upper-level module or the kernel builtins.  So, it's
> possible to unload a module while it's kobject is still alive by
> holding on to such attributes, and, when the attribute is closed,
> panic occurs, of course (release callback is unloaded already).
> 

I actually spent quite few days thinking about this problem and here
is what I come up with:

You have bus core module that provides generic release function for
devices on the bus. When object is registered you bump up module
count for the core module.
You also have modues that provide devices. At device unregister time
they do all device-specific cleanup, but leave attributes (and memory)
handled by core modules intact. This allows to unload device module
safely at any time and then when reference to the last generic attribute
is dropped the generic cleanup finally removes last traces of the device.

<skip>
> 
> We can do one of
> 
> 1. add unload_sem to all driver-model structures.
>        -> I believe this defeats the purpose of try_module_get()
>        stuff.  We wouldn't know when the module will unload.
> 

As Al Viro pointed out "rmmod module < /sys/bus/devices/xxxx/attr"
will deadlock the kernel with this scheme.

> 2. get the owner field correct for all attributes.
>        -> This will be a chore.  We'll need to allocate separate
>        attr structures for each kobject for all the generic attrs.
> 
> 3. remove the owner field from the attribute structure and add an
>    owner to kobject and make sysfs ops to hold the owner of the
>    respective kobject.
>        -> I think this is the best and most consistent solution.  As
>        we already assume that module which implements a child kobject
>        should depend upon the module which implements its parent, all
>        attr module reference counting will be correct by using the
>        kobject's owner.
> 
> So, I'm currently working on solution #3.
> 

I think this is an overkill and will inflate every kobject out there,
unless you will find a hole in my scheme...

-- 
Dmitry

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-05 16:33         ` Dmitry Torokhov
@ 2004-11-09  1:27           ` Tejun Heo
  2004-11-09  1:43             ` Dmitry Torokhov
  2004-11-10  0:40           ` Greg KH
  1 sibling, 1 reply; 20+ messages in thread
From: Tejun Heo @ 2004-11-09  1:27 UTC (permalink / raw)
  To: dtor_core; +Cc: Greg KH, LKML

On Fri, Nov 05, 2004 at 11:33:37AM -0500, Dmitry Torokhov wrote:
> On Fri, 5 Nov 2004 15:14:39 +0900, Tejun Heo <tj@home-tj.org> wrote:
>
> >  Another problem I've encountered regarding kobject refcounting is
> > that currently each sysfs attr carries its owner and gets the owning
> > module when the attr is accessed.  However, there are attributes which
> > are provided not by the module which implements whatever the kobject
> > represents but by upper-level module or the kernel builtins.  So, it's
> > possible to unload a module while it's kobject is still alive by
> > holding on to such attributes, and, when the attribute is closed,
> > panic occurs, of course (release callback is unloaded already).
> > 
> 
> I actually spent quite few days thinking about this problem and here
> is what I come up with:
> 
> You have bus core module that provides generic release function for
> devices on the bus. When object is registered you bump up module
> count for the core module.
> You also have modues that provide devices. At device unregister time
> they do all device-specific cleanup, but leave attributes (and memory)
> handled by core modules intact. This allows to unload device module
> safely at any time and then when reference to the last generic attribute
> is dropped the generic cleanup finally removes last traces of the device.

 Are you suggesting splitting every bus implementation into two
separate modules?  Otherwise, we will have to move kobj field of every
bus-specific device structure to the head (to use the common release
function).  Either way, it can be done, but it just seems another
twist added to the already twisted refcounting.  Moreoever, not only
the devices are problematic, refcounting in class devices and block
device hierarchy seems broken too.

> <skip>
> > 
> > We can do one of
> > 
> > 1. add unload_sem to all driver-model structures.
> >        -> I believe this defeats the purpose of try_module_get()
> >        stuff.  We wouldn't know when the module will unload.
> > 
> 
> As Al Viro pointed out "rmmod module < /sys/bus/devices/xxxx/attr"
> will deadlock the kernel with this scheme.
> 
> > 2. get the owner field correct for all attributes.
> >        -> This will be a chore.  We'll need to allocate separate
> >        attr structures for each kobject for all the generic attrs.
> > 
> > 3. remove the owner field from the attribute structure and add an
> >    owner to kobject and make sysfs ops to hold the owner of the
> >    respective kobject.
> >        -> I think this is the best and most consistent solution.  As
> >        we already assume that module which implements a child kobject
> >        should depend upon the module which implements its parent, all
> >        attr module reference counting will be correct by using the
> >        kobject's owner.
> > 
> > So, I'm currently working on solution #3.
> > 
> 
> I think this is an overkill and will inflate every kobject out there,
> unless you will find a hole in my scheme...

 But all attrs will be deflated and it just seems the right thing to
do(tm).

-- 
tejun


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-09  1:27           ` Tejun Heo
@ 2004-11-09  1:43             ` Dmitry Torokhov
  2004-11-10  7:13               ` Tejun Heo
  0 siblings, 1 reply; 20+ messages in thread
From: Dmitry Torokhov @ 2004-11-09  1:43 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Greg KH, LKML

On Monday 08 November 2004 08:27 pm, Tejun Heo wrote:
> On Fri, Nov 05, 2004 at 11:33:37AM -0500, Dmitry Torokhov wrote:
> > On Fri, 5 Nov 2004 15:14:39 +0900, Tejun Heo <tj@home-tj.org> wrote:
> >
> > >  Another problem I've encountered regarding kobject refcounting is
> > > that currently each sysfs attr carries its owner and gets the owning
> > > module when the attr is accessed.  However, there are attributes which
> > > are provided not by the module which implements whatever the kobject
> > > represents but by upper-level module or the kernel builtins.  So, it's
> > > possible to unload a module while it's kobject is still alive by
> > > holding on to such attributes, and, when the attribute is closed,
> > > panic occurs, of course (release callback is unloaded already).
> > > 
> > 
> > I actually spent quite few days thinking about this problem and here
> > is what I come up with:
> > 
> > You have bus core module that provides generic release function for
> > devices on the bus. When object is registered you bump up module
> > count for the core module.
> > You also have modues that provide devices. At device unregister time
> > they do all device-specific cleanup, but leave attributes (and memory)
> > handled by core modules intact. This allows to unload device module
> > safely at any time and then when reference to the last generic attribute
> > is dropped the generic cleanup finally removes last traces of the device.
> 
>  Are you suggesting splitting every bus implementation into two
> separate modules?  Otherwise, we will have to move kobj field of every
> bus-specific device structure to the head (to use the common release
> function).  Either way, it can be done, but it just seems another
> twist added to the already twisted refcounting.  Moreoever, not only
> the devices are problematic, refcounting in class devices and block
> device hierarchy seems broken too.
>

It is already defacto implementation standard - every subsystem has a core
modules (pci, usbcore, serio) plus modules (drivers) actually talking to
hardware and registering devices (ports, whatever) with the corresponding
core. It is just a matter of splitting cleanup functions into core and
device-specific parts.
 
> > <skip>
> > > 
> > > We can do one of
> > > 
> > > 1. add unload_sem to all driver-model structures.
> > >        -> I believe this defeats the purpose of try_module_get()
> > >        stuff.  We wouldn't know when the module will unload.
> > > 
> > 
> > As Al Viro pointed out "rmmod module < /sys/bus/devices/xxxx/attr"
> > will deadlock the kernel with this scheme.
> > 
> > > 2. get the owner field correct for all attributes.
> > >        -> This will be a chore.  We'll need to allocate separate
> > >        attr structures for each kobject for all the generic attrs.
> > > 
> > > 3. remove the owner field from the attribute structure and add an
> > >    owner to kobject and make sysfs ops to hold the owner of the
> > >    respective kobject.
> > >        -> I think this is the best and most consistent solution.  As
> > >        we already assume that module which implements a child kobject
> > >        should depend upon the module which implements its parent, all
> > >        attr module reference counting will be correct by using the
> > >        kobject's owner.
> > > 
> > > So, I'm currently working on solution #3.
> > > 
> > 
> > I think this is an overkill and will inflate every kobject out there,
> > unless you will find a hole in my scheme...
> 
>  But all attrs will be deflated and it just seems the right thing to
> do(tm).
> 

I think there are much more kobjects than attrs.

-- 
Dmitry

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-05 16:33         ` Dmitry Torokhov
  2004-11-09  1:27           ` Tejun Heo
@ 2004-11-10  0:40           ` Greg KH
  2004-11-10  4:17             ` Dmitry Torokhov
  1 sibling, 1 reply; 20+ messages in thread
From: Greg KH @ 2004-11-10  0:40 UTC (permalink / raw)
  To: dtor_core; +Cc: Tejun Heo, LKML

On Fri, Nov 05, 2004 at 11:33:37AM -0500, Dmitry Torokhov wrote:
> On Fri, 5 Nov 2004 15:14:39 +0900, Tejun Heo <tj@home-tj.org> wrote:
> > Or maybe just adding a @write_lock argument to bus_for_each_*()
> > functions or create a new function named __bus_for_each_*() which
> > takes additional @write_lock argument.
> > 
> > However, I don't really think the get_bus/put_bus() calls are
> > necessary there.  Unless the caller already has a reference to the
> > bus, we're in trouble anyway.  get_bus() cannot synchronize with the
> > last put_bus(), we need outer synchronization to protect that
> > (individual bus drivers are expected to perform the synchronization).
> > 
> 
> Yes I think you are right.
> 
> > One thing that bothers me is all the "if (kobject_get(x))" codes.
> > Currently, it doesn't seem to do anything other than checking if x is
> > NULL or not and incrementing the reference count.  i.e. the return
> > value only represents if x is NULL or not.  I find the codes
> > misleading.  It seems like above statement can synchronize get() and
> > the last put() which isn't true.  I don't how how the using xchg in
> > kref went, but with preemtion turned on, I don't see how it's gonna
> > change the situation.
> >
> 
> I agree it is somewhat misleading. And even if _get would synchronize
> with _put taht wouldmean that the caller does not have a valid reference
> and the kernel will crash there next time callers uses the object in
> question.
> 
> I think we just have lay down the rules that one needs to get a reference
> to an object in the following cases:
> - object creation (first reference)
> - linking some other object to the object in question. Every link to the
>   object should increase reference count.
> - before passing object to another thread of execution to guarantee that
>   the object will live long enough for both threads.

The rules are defined.  See my OLS 2004 paper on krefs for details.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-10  0:40           ` Greg KH
@ 2004-11-10  4:17             ` Dmitry Torokhov
  2004-11-16  5:52               ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Dmitry Torokhov @ 2004-11-10  4:17 UTC (permalink / raw)
  To: Greg KH; +Cc: Tejun Heo, LKML

On Tuesday 09 November 2004 07:40 pm, Greg KH wrote:
> > I think we just have lay down the rules that one needs to get a reference
> > to an object in the following cases:
> > - object creation (first reference)
> > - linking some other object to the object in question. Every link to the
> > ? object should increase reference count.
> > - before passing object to another thread of execution to guarantee that
> > ? the object will live long enough for both threads.
> 
> The rules are defined. ?See my OLS 2004 paper on krefs for details.
> 

So that means you will patches like the one below, right?

-- 
Dmitry


===================================================================


ChangeSet@1.1964, 2004-11-09 23:14:31-05:00, dtor_core@ameritech.net
  Driver core: remove unnecessary get_driver/put_driver from driver.c
  
  Signed-off-by: Dmitry Torokhov <dtor@mail.ru>


 driver.c |   13 ++-----------
 1 files changed, 2 insertions(+), 11 deletions(-)


===================================================================



diff -Nru a/drivers/base/driver.c b/drivers/base/driver.c
--- a/drivers/base/driver.c	2004-11-09 23:16:43 -05:00
+++ b/drivers/base/driver.c	2004-11-09 23:16:43 -05:00
@@ -26,13 +26,7 @@
 
 int driver_create_file(struct device_driver * drv, struct driver_attribute * attr)
 {
-	int error;
-	if (get_driver(drv)) {
-		error = sysfs_create_file(&drv->kobj, &attr->attr);
-		put_driver(drv);
-	} else
-		error = -EINVAL;
-	return error;
+	return sysfs_create_file(&drv->kobj, &attr->attr);
 }
 
 
@@ -44,10 +38,7 @@
 
 void driver_remove_file(struct device_driver * drv, struct driver_attribute * attr)
 {
-	if (get_driver(drv)) {
-		sysfs_remove_file(&drv->kobj, &attr->attr);
-		put_driver(drv);
-	}
+	sysfs_remove_file(&drv->kobj, &attr->attr);
 }
 
 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-09  1:43             ` Dmitry Torokhov
@ 2004-11-10  7:13               ` Tejun Heo
  0 siblings, 0 replies; 20+ messages in thread
From: Tejun Heo @ 2004-11-10  7:13 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: Greg KH, LKML

 Hello, Dmitry.

On Mon, Nov 08, 2004 at 08:43:21PM -0500, Dmitry Torokhov wrote:
> On Monday 08 November 2004 08:27 pm, Tejun Heo wrote:
> >  Are you suggesting splitting every bus implementation into two
> > separate modules?  Otherwise, we will have to move kobj field of every
> > bus-specific device structure to the head (to use the common release
> > function).  Either way, it can be done, but it just seems another
> > twist added to the already twisted refcounting.  Moreoever, not only
> > the devices are problematic, refcounting in class devices and block
> > device hierarchy seems broken too.
> >
> 
> It is already defacto implementation standard - every subsystem has a core
> modules (pci, usbcore, serio) plus modules (drivers) actually talking to
> hardware and registering devices (ports, whatever) with the corresponding
> core. It is just a matter of splitting cleanup functions into core and
> device-specific parts.

 AFAIK, PCI isn't compiled as modules and USB has ohci/uhci/ehci stuff
and serio has ports stuff.  But still, I cannot agree with you.

 1. It's too twisted.
	a. to prevent premature unload, we depend on
	   - attr -> kobj -> module_ref to bus core module.  By doing
	   this, we're making a chain of two different types of
	   reference counts.  IMHO, it's just a bit too subtle.
	b. to enable bus module unload, we depend on
	   - device creation/detach operations are done in a separate
	     module than device release.

 2. too restrictive.
	a. any bus implementation must be implemented as two separate
	   modules just to enable peaceful module unloading
	b. all other places which make use of kobject and any type of
	   generic attr must use only the release routine proper to
	   the core module.  No callbacks, no data dereferences....
	   (for example, scsi_host class uses scsi_host_template
	    structure which contain class device specific callbacks
	    when releaseing scsi_host class device.  This is
	    currently broken and won't be fixed by your method.)

 3. not intutive.  Maybe this is the most important.
	a. it's natural to expect the release code has access to
	   codes & data of whatever it's releasing.
	b. children bypassing dependency hierarchy and depending
	   directly on indirect ancestor is just difficult to
           track, memorize and do correctly.
    IMO, above a and b are the primary reasons for why kobject/attr
    refcountings are currently done incorrectly in many places.

> >  But all attrs will be deflated and it just seems the right thing to
> > do(tm).
> > 
> 
> I think there are much more kobjects than attrs.

 Yes, there are.  But many objects which contain kobjs keep separate
owner field anyway, which can be removed after kobj can handle module
refcounting, and even without that, I think 4 or 8 bytes addition to
struct kobject is worth it if we can get the refcounting straight.

 Thanks.

-- 
tejun


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix
  2004-11-10  4:17             ` Dmitry Torokhov
@ 2004-11-16  5:52               ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2004-11-16  5:52 UTC (permalink / raw)
  To: Dmitry Torokhov; +Cc: Tejun Heo, LKML

On Tue, Nov 09, 2004 at 11:17:31PM -0500, Dmitry Torokhov wrote:
> On Tuesday 09 November 2004 07:40 pm, Greg KH wrote:
> > > I think we just have lay down the rules that one needs to get a reference
> > > to an object in the following cases:
> > > - object creation (first reference)
> > > - linking some other object to the object in question. Every link to the
> > > ? object should increase reference count.
> > > - before passing object to another thread of execution to guarantee that
> > > ? the object will live long enough for both threads.
> > 
> > The rules are defined. ?See my OLS 2004 paper on krefs for details.
> > 
> 
> So that means you will patches like the one below, right?
> 
> diff -Nru a/drivers/base/driver.c b/drivers/base/driver.c
> --- a/drivers/base/driver.c	2004-11-09 23:16:43 -05:00
> +++ b/drivers/base/driver.c	2004-11-09 23:16:43 -05:00
> @@ -26,13 +26,7 @@
>  
>  int driver_create_file(struct device_driver * drv, struct driver_attribute * attr)
>  {
> -	int error;
> -	if (get_driver(drv)) {
> -		error = sysfs_create_file(&drv->kobj, &attr->attr);
> -		put_driver(drv);
> -	} else
> -		error = -EINVAL;
> -	return error;
> +	return sysfs_create_file(&drv->kobj, &attr->attr);
>  }
>  
>  
> @@ -44,10 +38,7 @@
>  
>  void driver_remove_file(struct device_driver * drv, struct driver_attribute * attr)
>  {
> -	if (get_driver(drv)) {
> -		sysfs_remove_file(&drv->kobj, &attr->attr);
> -		put_driver(drv);
> -	}
> +	sysfs_remove_file(&drv->kobj, &attr->attr);
>  }

While not totally necessary, the current code is safe :)

I'll ask Pat about this next time I see him.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2004-11-16  5:58 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-11-04  7:01 [PATCH 2.6.10-rc1 0/5] driver-model: misc bug fixes Tejun Heo
2004-11-04  7:02 ` [PATCH 2.6.10-rc1 1/5] driver-model: comment fix in bus.c Tejun Heo
2004-11-04 18:35   ` Greg KH
2004-11-04  7:02 ` [PATCH 2.6.10-rc1 2/5] driver-model: bus_recan_devices() locking fix Tejun Heo
2004-11-04 18:58   ` Greg KH
2004-11-04 19:12     ` Dmitry Torokhov
2004-11-05  6:14       ` Tejun Heo
2004-11-05 16:33         ` Dmitry Torokhov
2004-11-09  1:27           ` Tejun Heo
2004-11-09  1:43             ` Dmitry Torokhov
2004-11-10  7:13               ` Tejun Heo
2004-11-10  0:40           ` Greg KH
2004-11-10  4:17             ` Dmitry Torokhov
2004-11-16  5:52               ` Greg KH
2004-11-04  7:03 ` [PATCH 2.6.10-rc1 3/5] driver-model: sysfs_release() dangling pointer reference fix Tejun Heo
2004-11-04 18:59   ` Greg KH
2004-11-04  7:04 ` [PATCH 2.6.10-rc1 4/5] driver-model: kobject_add() error path reference counting fix Tejun Heo
2004-11-04 19:00   ` Greg KH
2004-11-04  7:05 ` [PATCH 2.6.10-rc1 5/5] driver-model: device_add() " Tejun Heo
2004-11-04 20:07   ` Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.