All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
@ 2010-03-18 13:58 Milan Broz
  2010-03-18 13:58 ` [PATCH 2/3] Add genhd flag requesting notification of partition changes only Milan Broz
                   ` (3 more replies)
  0 siblings, 4 replies; 40+ messages in thread
From: Milan Broz @ 2010-03-18 13:58 UTC (permalink / raw)
  To: dm-devel, kay.sievers; +Cc: Milan Broz

Block layer sends ADD uevent when new device is initialised.

But the device-mapper block devices are more complex,
initialisation consists of allocating underlying device and
loading mapping table.

Because from the userspace all block devices should behave
the same, patch defines new flag indicating that ADD event
should be suppressed in block layer.

If the flag is set, caller then take full responsibility
for enabling and sending events later when device is ready
to use.

Signed-off-by: Milan Broz <mbroz@redhat.com>
---
 drivers/md/dm-ioctl.c |   29 +++++++++++++++++++++++------
 drivers/md/dm.c       |   24 +++++++++++++++++++++---
 drivers/md/dm.h       |    5 +++++
 fs/partitions/check.c |    4 ++++
 include/linux/genhd.h |    1 +
 5 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index d7500e1..b670922 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -61,6 +61,24 @@ static DECLARE_RWSEM(_hash_lock);
  */
 static DEFINE_MUTEX(dm_hash_cells_mutex);
 
+static void dm_ioctl_uevent(struct mapped_device *md,
+			    enum kobject_action action,
+			    unsigned cookie,
+			    uint32_t *dm_flags)
+{
+	/*
+	 * The first change event is translated to add event.
+	 */
+	if (!dm_initialised_md(md)) {
+		if (action == KOBJ_CHANGE)
+			action = KOBJ_ADD;
+		WARN_ON(action != KOBJ_ADD);
+	}
+
+	if (!dm_kobject_uevent(md, action, cookie))
+		*dm_flags |= DM_UEVENT_GENERATED_FLAG;
+}
+
 static void init_buckets(struct list_head *buckets)
 {
 	unsigned int i;
@@ -345,8 +363,7 @@ static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old,
 		dm_table_put(table);
 	}
 
-	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie))
-		*flags |= DM_UEVENT_GENERATED_FLAG;
+	dm_ioctl_uevent(hc->md, KOBJ_CHANGE, cookie, flags);
 
 	dm_put(hc->md);
 	up_write(&_hash_lock);
@@ -738,8 +755,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
 	__hash_remove(hc);
 	up_write(&_hash_lock);
 
-	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
-		param->flags |= DM_UEVENT_GENERATED_FLAG;
+	dm_ioctl_uevent(md, KOBJ_REMOVE, param->event_nr, &param->flags);
 
 	dm_put(md);
 	return 0;
@@ -903,8 +919,9 @@ static int do_resume(struct dm_ioctl *param)
 
 	if (dm_suspended_md(md)) {
 		r = dm_resume(md);
-		if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
-			param->flags |= DM_UEVENT_GENERATED_FLAG;
+		if (!r)
+			dm_ioctl_uevent(md, KOBJ_CHANGE, param->event_nr,
+					&param->flags);
 	}
 
 	if (old_map)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d21e128..7b364d7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -109,6 +109,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 #define DMF_DELETING 4
 #define DMF_NOFLUSH_SUSPENDING 5
 #define DMF_QUEUE_IO_TO_THREAD 6
+#define DMF_INITIALISED 7
 
 /*
  * Work processed by per-device workqueue.
@@ -1932,6 +1933,7 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->queue = md->queue;
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
+	md->disk->flags |= GENHD_FL_SUPPRESS_ADD_EVENT;
 	add_disk(md->disk);
 	format_dev_t(md->name, MKDEV(_major, minor));
 
@@ -2619,19 +2621,30 @@ out:
  * Event notification.
  *---------------------------------------------------------------*/
 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
-		       unsigned cookie)
+		      unsigned cookie)
 {
 	char udev_cookie[DM_COOKIE_LENGTH];
 	char *envp[] = { udev_cookie, NULL };
+	int r = 0;
+
+	if (action == KOBJ_ADD) {
+		set_bit(DMF_INITIALISED, &md->flags);
+		dev_set_uevent_suppress(disk_to_dev(md->disk), 0);
+	}
 
 	if (!cookie)
-		return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
+		r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
 	else {
 		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
 			 DM_COOKIE_ENV_VAR_NAME, cookie);
-		return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
+		r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
 					  action, envp);
 	}
+
+	if (!r && dev_get_uevent_suppress(disk_to_dev(md->disk)))
+		r = -EINVAL;
+
+	return r;
 }
 
 uint32_t dm_next_uevent_seq(struct mapped_device *md)
@@ -2698,6 +2711,11 @@ int dm_suspended_md(struct mapped_device *md)
 	return test_bit(DMF_SUSPENDED, &md->flags);
 }
 
+int dm_initialised_md(struct mapped_device *md)
+{
+	return test_bit(DMF_INITIALISED, &md->flags);
+}
+
 int dm_suspended(struct dm_target *ti)
 {
 	return dm_suspended_md(dm_table_get_md(ti->table));
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index bad1724..1e9d7fc 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -99,6 +99,11 @@ int dm_deleting_md(struct mapped_device *md);
 int dm_suspended_md(struct mapped_device *md);
 
 /*
+ * Is the device initialised? (Add event was sent.)
+ */
+int dm_initialised_md(struct mapped_device *md);
+
+/*
  * The device-mapper can be driven through one of two interfaces;
  * ioctl or filesystem, depending which patch you have applied.
  */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e8865c1..916173b 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -526,6 +526,10 @@ void register_disk(struct gendisk *disk)
 	blkdev_put(bdev, FMODE_READ);
 
 exit:
+	/* caller will send ADD event later */
+	if (disk->flags & GENHD_FL_SUPPRESS_ADD_EVENT)
+		return;
+
 	/* announce disk after possible partitions are created */
 	dev_set_uevent_suppress(ddev, 0);
 	kobject_uevent(&ddev->kobj, KOBJ_ADD);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 56b5051..ade71ff 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -116,6 +116,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_SUPPRESS_ADD_EVENT		256
 
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
-- 
1.7.0

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/3] Add genhd flag requesting notification of partition changes only.
  2010-03-18 13:58 [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Milan Broz
@ 2010-03-18 13:58 ` Milan Broz
  2010-03-18 13:58 ` [PATCH 3/3] Do not send multiple REMOVE events for kobjects Milan Broz
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 40+ messages in thread
From: Milan Broz @ 2010-03-18 13:58 UTC (permalink / raw)
  To: dm-devel, kay.sievers; +Cc: Milan Broz

This patch provides notification mechanism which allows handle partition
code in userspace.

If the BLKRRPART ioctl arrives and GENHD_FL_PARTITION_CHANGE_NOTIFY
is set, just send uevent and ignore in-kernel partitioning code.

This is useful e.g. for device-mapper devices, which can use kpartx
or similar tool in udev rules.

Signed-off-by: Milan Broz <mbroz@redhat.com>
---
 block/ioctl.c         |    3 ++-
 drivers/md/dm.c       |    1 +
 fs/partitions/check.c |    6 ++++++
 include/linux/genhd.h |    8 ++++++++
 4 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index be48ea5..77faed3 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -101,7 +101,8 @@ static int blkdev_reread_part(struct block_device *bdev)
 	struct gendisk *disk = bdev->bd_disk;
 	int res;
 
-	if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
+	if (!disk_userspace_partitions(disk) &&
+	    (!disk_partitionable(disk) || bdev != bdev->bd_contains))
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7b364d7..57da1f7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1934,6 +1934,7 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
 	md->disk->flags |= GENHD_FL_SUPPRESS_ADD_EVENT;
+	md->disk->flags |= GENHD_FL_PARTITION_CHANGE_NOTIFY;
 	add_disk(md->disk);
 	format_dev_t(md->name, MKDEV(_major, minor));
 
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 916173b..b66253f 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -548,6 +548,12 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	struct parsed_partitions *state;
 	int p, highest, res;
 
+	/* partitions handled in userspace, just send change event */
+	if (disk_userspace_partitions(disk)) {
+		kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+		return 0;
+	}
+
 	if (bdev->bd_part_count)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ade71ff..64b25f0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -118,6 +118,9 @@ struct hd_struct {
 #define GENHD_FL_NATIVE_CAPACITY		128
 #define GENHD_FL_SUPPRESS_ADD_EVENT		256
 
+/* notify udev instead of use in-kernel partitioning */
+#define GENHD_FL_PARTITION_CHANGE_NOTIFY	512
+
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
@@ -182,6 +185,11 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part)
 	return NULL;
 }
 
+static inline bool disk_userspace_partitions(struct gendisk *disk)
+{
+	return (disk->flags & GENHD_FL_PARTITION_CHANGE_NOTIFY) ? 1 : 0;
+}
+
 static inline int disk_max_parts(struct gendisk *disk)
 {
 	if (disk->flags & GENHD_FL_EXT_DEVT)
-- 
1.7.0

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/3] Do not send multiple REMOVE events for kobjects.
  2010-03-18 13:58 [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Milan Broz
  2010-03-18 13:58 ` [PATCH 2/3] Add genhd flag requesting notification of partition changes only Milan Broz
@ 2010-03-18 13:58 ` Milan Broz
  2010-03-18 16:13 ` [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Kay Sievers
  2010-03-19  9:42 ` Hannes Reinecke
  3 siblings, 0 replies; 40+ messages in thread
From: Milan Broz @ 2010-03-18 13:58 UTC (permalink / raw)
  To: dm-devel, kay.sievers; +Cc: Milan Broz

If the KOBJ_REMOVE was sent already there is no need
to send it again.

One example is when device-mapper device send explicit REMOVE
event with additional DM environment values (cookie) when mapping
table is being destroyed (device is unusable after that) and
underlying block device sends another REMOVE event unconditionally
in its destructor.

Signed-off-by: Milan Broz <mbroz@redhat.com>
---
 lib/kobject_uevent.c |    9 +++++++++
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c9d3a3e..c7881bd 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -125,6 +125,15 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 				 kobject_name(kobj), kobj, __func__);
 		return 0;
 	}
+
+	/* skip remove event, if already sent*/
+	if (action == KOBJ_REMOVE && kobj->state_remove_uevent_sent) {
+		pr_debug("kobject: '%s' (%p): %s: ignoring "
+				 "already sent remove event!\n",
+				 kobject_name(kobj), kobj, __func__);
+		return 0;
+	}
+
 	/* skip the event, if the filter returns zero. */
 	if (uevent_ops && uevent_ops->filter)
 		if (!uevent_ops->filter(kset, kobj)) {
-- 
1.7.0

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-18 13:58 [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Milan Broz
  2010-03-18 13:58 ` [PATCH 2/3] Add genhd flag requesting notification of partition changes only Milan Broz
  2010-03-18 13:58 ` [PATCH 3/3] Do not send multiple REMOVE events for kobjects Milan Broz
@ 2010-03-18 16:13 ` Kay Sievers
  2010-03-18 17:24   ` Alasdair G Kergon
  2010-03-18 21:35   ` Milan Broz
  2010-03-19  9:42 ` Hannes Reinecke
  3 siblings, 2 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-18 16:13 UTC (permalink / raw)
  To: Milan Broz; +Cc: dm-devel

On Thu, Mar 18, 2010 at 14:58, Milan Broz <mbroz@redhat.com> wrote:
> Block layer sends ADD uevent when new device is initialised.
>
> But the device-mapper block devices are more complex,
> initialisation consists of allocating underlying device and
> loading mapping table.
>
> Because from the userspace all block devices should behave
> the same, patch defines new flag indicating that ADD event
> should be suppressed in block layer.
>
> If the flag is set, caller then take full responsibility
> for enabling and sending events later when device is ready
> to use.

This will disconnect /sys, /dev and /proc from the flow of events. We
rather like to keep them in sync. Device enumeration will find devices
which have never been announced before. It will also find devices
where "remove" was sent, but the device is still there. I don't think
this disconnect is really acceptable from a driver core perspective
and its consumers. On systems without devtmpfs, there will be no
device node for the dm device while there is already the full sysfs
entry and udev would be idle (settle returns), but the state in /sys
not fully reflected in /dev, which is what we should avoid.

How about doing a two-stage instantiation instead of mangling events
to work around this setup model? Instead of creating the mentioned
event/sys inconsistencies, did you think about not registering the
"dead" blockdev until it is ready to be used as a blockdev? Like you
would allocate the dm instance in the kernel, but only register the
blockdev with the block subsystem when the device is resumed/the table
loaded. That way, only after the device is usable, it would get
registered and appear in /dev, /sys and proc.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-18 16:13 ` [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Kay Sievers
@ 2010-03-18 17:24   ` Alasdair G Kergon
  2010-03-18 21:35   ` Milan Broz
  1 sibling, 0 replies; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-18 17:24 UTC (permalink / raw)
  To: Kay Sievers; +Cc: dm-devel, Milan Broz

On Thu, Mar 18, 2010 at 05:13:22PM +0100, Kay Sievers wrote:
> How about doing a two-stage instantiation instead of mangling events
> to work around this setup model? Instead of creating the mentioned
> event/sys inconsistencies, did you think about not registering the
> "dead" blockdev until it is ready to be used as a blockdev? Like you
> would allocate the dm instance in the kernel, but only register the
> blockdev with the block subsystem when the device is resumed/the table
> loaded. That way, only after the device is usable, it would get
> registered and appear in /dev, /sys and proc.
 
Still going around in circles, I'm afraid.

That looked like even bigger block layer surgery last time we considered it.
By all means, look at it yet again...

We're creating trees of devices that reference each other (need to know
minor numbers) and have constraints about which parts of the code may
allocate new memory.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-18 16:13 ` [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Kay Sievers
  2010-03-18 17:24   ` Alasdair G Kergon
@ 2010-03-18 21:35   ` Milan Broz
  2010-03-19  8:27     ` Kay Sievers
  1 sibling, 1 reply; 40+ messages in thread
From: Milan Broz @ 2010-03-18 21:35 UTC (permalink / raw)
  To: device-mapper development

On 03/18/2010 05:13 PM, Kay Sievers wrote:
> On Thu, Mar 18, 2010 at 14:58, Milan Broz <mbroz@redhat.com> wrote:
>> Block layer sends ADD uevent when new device is initialised.
>>
>> But the device-mapper block devices are more complex,
>> initialisation consists of allocating underlying device and
>> loading mapping table.
>>
>> Because from the userspace all block devices should behave
>> the same, patch defines new flag indicating that ADD event
>> should be suppressed in block layer.
>>
>> If the flag is set, caller then take full responsibility
>> for enabling and sending events later when device is ready
>> to use.

Hi Kay,

First - the patch is intentionally simple, because it is was the goal.

Call it hack if you want - the goal was fix events to fit
the udev rules specification of events, not rewrite block layer
initialisation.
("Why the dm devices need special handling of ADD event" problem.)

Now you are saying, that it is not enough.
Well I am not sure if I understand these objections:

> This will disconnect /sys, /dev and /proc from the flow of events. We
> rather like to keep them in sync. Device enumeration will find devices
> which have never been announced before. It will also find devices
> where "remove" was sent, but the device is still there. I don't think
> this disconnect is really acceptable from a driver core perspective
> and its consumers. On systems without devtmpfs, there will be no
> device node for the dm device while there is already the full sysfs
> entry and udev would be idle (settle returns), but the state in /sys
> not fully reflected in /dev, which is what we should avoid.

If it is problem it is already there for ages.
The device node appears immediately when device is usable.

Device without mapping table is not usable.

The information about device is not yet in /partitions, it is present
in /sysfs, but the device have zero size
(size is defined by table which is not yet loaded)
This is _current_ state and I think it was this way since 2.6.0 kernel.
No change with this patch.

What's the real problem there?

And why device-mapper need separate allocation of major:minor pair
and then load table in two separate steps:

device-mapper devices can be more complex that one simple device,
it constructs devices by referencing other devices in the table.

An example are snapshots - you have the origin device, (several) COW(s)
and these are linked together and must be activated
(read: resumed) as a tree - together.

I understand that you can invent another model which fit better udev
and I am sure that if device-mapper is designed now, udev integration
is one of the prerequisite. But it pre-dates udev.
Of course the final state is that some functions will be integrated into
block layer and the special "device-mapper" add-on disappears together
with this problem... :-)

> How about doing a two-stage instantiation instead of mangling events
> to work around this setup model? Instead of creating the mentioned
> event/sys inconsistencies, did you think about not registering the
> "dead" blockdev until it is ready to be used as a blockdev? Like you
> would allocate the dm instance in the kernel, but only register the
> blockdev with the block subsystem when the device is resumed/the table
> loaded. That way, only after the device is usable, it would get
> registered and appear in /dev, /sys and proc.

See above - if you mean registering device = allocating major:minor pair,
it is not possible to move it to resume.
Not because I am ignorant but because it is basic design principle
of the device-mapper.

Probably we can achieve it by duplicating/rewriting a lot of block layer
code - but I am sure you discussed this with Alasdair several times.

Mangling events... why you think it is mangling events? It just move ADD
event to proper place, when the device becomes ready.
Sysfs entry always appears before it - just the time interval
is slightly different here (but resume is called immediately after create).

I am trying to use udev view of problem - that's why I pushed to
add sysfs entries for dm for example. And it worked.

But if your statement is "it is your broken activation model" as you said
in some discussion, I can do nothing, just disagree - it is different model,
not broken.
And we have to find compromise how to work with it together.

Thanks,
Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-18 21:35   ` Milan Broz
@ 2010-03-19  8:27     ` Kay Sievers
  2010-03-19  9:06       ` Lars Ellenberg
  2010-03-19  9:10       ` Milan Broz
  0 siblings, 2 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19  8:27 UTC (permalink / raw)
  To: Milan Broz; +Cc: device-mapper development

On Thu, Mar 18, 2010 at 22:35, Milan Broz <mbroz@redhat.com> wrote:

> But if your statement is "it is your broken activation model" as you said
> in some discussion, I can do nothing, just disagree - it is different model,
> not broken.

Sure, if you are fine with that model, I'm fine with it too. You are
the one sending patches to mangle basic driver-core definitions to
paper-over some issues dm seem to have with it. I just object to such
core changes, not to the way dm is doing things.

I have no problem with dm creating "dead" devices, it's like this
since a long time, but please don't try to fake things in the driver
core to make it look different from what it is. We don't want /sys and
/dev and events to be out of sync, like non-"add"-ed devices which are
fully created in /sys, or "remove"-d devices which are still fully
populated in /sys.

/sys is the direct export of kernel objects, if you create objects,
they appear, and they get announced. If you don't want them to be
announced at that time, just don't register them at that time.

Don't get me wrong, I'm not asking you to change the current state how
dm is doing things, I just object to the patch which inconsistently
tries to fake events, which do not match the state in /sys.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  8:27     ` Kay Sievers
@ 2010-03-19  9:06       ` Lars Ellenberg
  2010-03-19  9:24         ` Kay Sievers
  2010-03-19  9:10       ` Milan Broz
  1 sibling, 1 reply; 40+ messages in thread
From: Lars Ellenberg @ 2010-03-19  9:06 UTC (permalink / raw)
  To: dm-devel

On Fri, Mar 19, 2010 at 09:27:35AM +0100, Kay Sievers wrote:
> On Thu, Mar 18, 2010 at 22:35, Milan Broz <mbroz@redhat.com> wrote:
> 
> > But if your statement is "it is your broken activation model" as you said
> > in some discussion, I can do nothing, just disagree - it is different model,
> > not broken.
> 
> Sure, if you are fine with that model, I'm fine with it too. You are
> the one sending patches to mangle basic driver-core definitions to
> paper-over some issues dm seem to have with it. I just object to such
> core changes, not to the way dm is doing things.
> 
> I have no problem with dm creating "dead" devices, it's like this
> since a long time, but please don't try to fake things in the driver
> core to make it look different from what it is. We don't want /sys and
> /dev and events to be out of sync, like non-"add"-ed devices which are
> fully created in /sys, or "remove"-d devices which are still fully
> populated in /sys.
> 
> /sys is the direct export of kernel objects, if you create objects,
> they appear, and they get announced. If you don't want them to be
> announced at that time, just don't register them at that time.
> 
> Don't get me wrong, I'm not asking you to change the current state how
> dm is doing things, I just object to the patch which inconsistently
> tries to fake events, which do not match the state in /sys.

Would introducing a KOBJ_READY_TO_BE_CONSUMED_BY_UDEV help?

Or re-using KOBJ_ONLINE for that purpose,
even though it has different meaning elsewhere?

	Lars

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  8:27     ` Kay Sievers
  2010-03-19  9:06       ` Lars Ellenberg
@ 2010-03-19  9:10       ` Milan Broz
  2010-03-19  9:22         ` Kay Sievers
  1 sibling, 1 reply; 40+ messages in thread
From: Milan Broz @ 2010-03-19  9:10 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development

On 03/19/2010 09:27 AM, Kay Sievers wrote:
> On Thu, Mar 18, 2010 at 22:35, Milan Broz <mbroz@redhat.com> wrote:

> /sys is the direct export of kernel objects, if you create objects,
> they appear, and they get announced. If you don't want them to be
> announced at that time, just don't register them at that time.

Well, again, I used something what is already used (not on many places,
but it is there), just search for dev_set_uevent_suppress().

See

/* delay uevents, until we scanned partition table */
dev_set_uevent_suppress(ddev, 1);

... (part table scan etc. it reads disk, so there can be
significant delay if device retries read for example)

+ /* caller will send ADD event later */
+ if (disk->flags & GENHD_FL_SUPPRESS_ADD_EVENT)
+ return;
+
/* announce disk after possible partitions are created */
dev_set_uevent_suppress(ddev, 0);
kobject_uevent(&ddev->kobj, KOBJ_ADD);


So the comment says that base device is announced after all
partitions devices are created.

I thought it is exactly the same model I used - so there is for
some time unannounced fully created "dead" device in sysfs.

Probably I am still missing where the problem is - the ADD
event is sent later, so the problem in time interval?
Or the atomicity of the call?

Well, if it is not acceptable (is this what you want to say?),
what do you suggest?

Not use add_disk()/del_gendisk() in dm core and rewrite it?
This seems like change a lot of code. And after that someone
surely will say "why dm implements this differently".

But I think it always need a change in device core code.

Thanks,
Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:10       ` Milan Broz
@ 2010-03-19  9:22         ` Kay Sievers
  0 siblings, 0 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19  9:22 UTC (permalink / raw)
  To: Milan Broz; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 10:10, Milan Broz <mbroz@redhat.com> wrote:
> On 03/19/2010 09:27 AM, Kay Sievers wrote:
>> On Thu, Mar 18, 2010 at 22:35, Milan Broz <mbroz@redhat.com> wrote:
>
>> /sys is the direct export of kernel objects, if you create objects,
>> they appear, and they get announced. If you don't want them to be
>> announced at that time, just don't register them at that time.
>
> Well, again, I used something what is already used (not on many places,
> but it is there), just search for dev_set_uevent_suppress().
>
> See

Sure, I see. It's code I added myself.

> /* delay uevents, until we scanned partition table */
> dev_set_uevent_suppress(ddev, 1);
>
> ... (part table scan etc. it reads disk, so there can be
> significant delay if device retries read for example)

Come on. It's the same code path, and it's for proper sysfs timing,
not for some it-can-happen-any-time action triggered from userspace.

> So the comment says that base device is announced after all
> partitions devices are created.

Sure, but that's not in any sense what matches your patch, and
"remove" is still fully handled by the core.

> Well, if it is not acceptable (is this what you want to say?),
> what do you suggest?

Leave it as it is, or fix the driver core _interaction_, not the core itself.

> Not use add_disk()/del_gendisk() in dm core and rewrite it?
> This seems like change a lot of code. And after that someone
> surely will say "why dm implements this differently".

Why would anybody say that. You create the _always_visible_ objects
when they are supposed to be visible, that would be all you do. I'm
not saying you need to do that, but you better don't fake around in
driver core events like your patch tries to do.

> But I think it always need a change in device core code.

Sure, we can do all reasonable changes here, inconsistent /sys and
event state we try to avoid for many good reasons.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:06       ` Lars Ellenberg
@ 2010-03-19  9:24         ` Kay Sievers
  2010-03-19  9:49           ` Milan Broz
  2010-03-19 13:24           ` Peter Rajnoha
  0 siblings, 2 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19  9:24 UTC (permalink / raw)
  To: device-mapper development

On Fri, Mar 19, 2010 at 10:06, Lars Ellenberg <lars.ellenberg@linbit.com> wrote:
> Would introducing a KOBJ_READY_TO_BE_CONSUMED_BY_UDEV help?

No, that's what "change" is for, and we already have these "change"
events for dm. Udev does not care if the device is ready or not, it
synchronizes /sys and /dev, and that works just fine with "change"
events.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-18 13:58 [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Milan Broz
                   ` (2 preceding siblings ...)
  2010-03-18 16:13 ` [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Kay Sievers
@ 2010-03-19  9:42 ` Hannes Reinecke
  2010-03-19 12:27   ` Alasdair G Kergon
  3 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2010-03-19  9:42 UTC (permalink / raw)
  To: device-mapper development; +Cc: Milan Broz

Milan Broz wrote:
> Block layer sends ADD uevent when new device is initialised.
> 
> But the device-mapper block devices are more complex,
> initialisation consists of allocating underlying device and
> loading mapping table.
> 
> Because from the userspace all block devices should behave
> the same, patch defines new flag indicating that ADD event
> should be suppressed in block layer.
> 
> If the flag is set, caller then take full responsibility
> for enabling and sending events later when device is ready
> to use.
> 

Please. DONT.

driver core sends out 'ADD' uevents whenever a device
is added (ie made visible) in /sys.
There is _no_ guarantee that a device is usable at this
point, hence there is is 'CHANGE' event which those device
afflicted by this sent out to signal to userspace the
device is now ready for use.

You are not alone here, S/390 DASD and qeth driver
behave the same.

And udev (and multipath :-) are pretty much aware of
the fact. And so should other userspace tools.

No need to fiddle around with events here. It'll
just serve to confuse userspace.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:24         ` Kay Sievers
@ 2010-03-19  9:49           ` Milan Broz
  2010-03-19 10:16             ` Kay Sievers
  2010-03-19 13:24           ` Peter Rajnoha
  1 sibling, 1 reply; 40+ messages in thread
From: Milan Broz @ 2010-03-19  9:49 UTC (permalink / raw)
  To: device-mapper development

On 03/19/2010 10:24 AM, Kay Sievers wrote:
> On Fri, Mar 19, 2010 at 10:06, Lars Ellenberg <lars.ellenberg@linbit.com> wrote:
>> Would introducing a KOBJ_READY_TO_BE_CONSUMED_BY_UDEV help?
> 
> No, that's what "change" is for, and we already have these "change"
> events for dm. Udev does not care if the device is ready or not, it
> synchronizes /sys and /dev, and that works just fine with "change"
> events.

Udev (rules) do not not care if the device is ready?

That's really news for me. So you are basically saying that dm ADD event is ok,
and it is problem of udev rules that they react here on ADD event and run
various scan over not-yet-ready device because it should wait for CHANGE?

ok, then I am wasting time with fixing the dm ADD event. We have this already.

Mea culpa. Sorry for spam then:-)

Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:49           ` Milan Broz
@ 2010-03-19 10:16             ` Kay Sievers
  2010-03-19 11:14               ` Milan Broz
                                 ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19 10:16 UTC (permalink / raw)
  To: Milan Broz; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 10:49, Milan Broz <mbroz@redhat.com> wrote:
> On 03/19/2010 10:24 AM, Kay Sievers wrote:
>> On Fri, Mar 19, 2010 at 10:06, Lars Ellenberg <lars.ellenberg@linbit.com> wrote:
>>> Would introducing a KOBJ_READY_TO_BE_CONSUMED_BY_UDEV help?
>>
>> No, that's what "change" is for, and we already have these "change"
>> events for dm. Udev does not care if the device is ready or not, it
>> synchronizes /sys and /dev, and that works just fine with "change"
>> events.
>
> Udev (rules) do not not care if the device is ready?
>
> That's really news for me. So you are basically saying that dm ADD event is ok,
> and it is problem of udev rules that they react here on ADD event and run
> various scan over not-yet-ready device because it should wait for CHANGE?

It will not wait, the tools will just fail to open the device, and
udev will only create the basic device node, but not possible
metadata-based symlinks or anything like that. That is expected
behavior, udev will handle just fine. The needed information will only
be readable with the next "change" event, and be fully updated with
every "change" event after that.

Usually, it does not make much sense to distinguish between "add" and
"change" in userspace/udev rules. The called udev rules are the same,
and should just check a subsystem-provided property if they should go
ahead or ignore the device.

Udev itself handles "add" and "change" exactly the same way. The only
exception is network device renaming, which only runs on "add".

There are several subsystems that depend on updating everything with
"change" events when device configurations change. There is nothing
inherently wrong with this approach, as long as subsystems send the
proper "change" events and don't try to hide anything they have
registered.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 10:16             ` Kay Sievers
@ 2010-03-19 11:14               ` Milan Broz
  2010-03-19 11:44                 ` Kay Sievers
  2010-03-19 11:50                 ` Hannes Reinecke
  2010-03-19 12:00               ` Alasdair G Kergon
  2010-03-19 12:12               ` Alasdair G Kergon
  2 siblings, 2 replies; 40+ messages in thread
From: Milan Broz @ 2010-03-19 11:14 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development

On 03/19/2010 11:16 AM, Kay Sievers wrote:

> There are several subsystems that depend on updating everything with
> "change" events when device configurations change. There is nothing
> inherently wrong with this approach, as long as subsystems send the
> proper "change" events and don't try to hide anything they have
> registered.

ok, this is perfectly fine with dm devices, CHANGE announces all
changes. Just I am not sure if all consumers of events (and separate
rules authors) know about that, I saw so many problems with
failing something when wrongly reacting to ADD event...

Also it means that after ADD the by-uuid* and similar symlinks
cannot be yet trusted - if the UUID is read from device and device
is not yet ready.

Well. And what should happen if anyone generate
artificial CHANGE event before the real first CHANGE event comes from
subsystem? (yes, I am looking at you, OPTIONS+="watch" thing for example)

According to above, rules must be written such way that every ADD/CHANGE
event must expect that device is not ready, so it can create only
partial info in udev, is it correct?
(this is of course no problem if the rules are the same for both cases)

Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 11:14               ` Milan Broz
@ 2010-03-19 11:44                 ` Kay Sievers
  2010-03-19 12:08                   ` Milan Broz
  2010-03-19 11:50                 ` Hannes Reinecke
  1 sibling, 1 reply; 40+ messages in thread
From: Kay Sievers @ 2010-03-19 11:44 UTC (permalink / raw)
  To: Milan Broz; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 12:14, Milan Broz <mbroz@redhat.com> wrote:
> On 03/19/2010 11:16 AM, Kay Sievers wrote:
>
>> There are several subsystems that depend on updating everything with
>> "change" events when device configurations change. There is nothing
>> inherently wrong with this approach, as long as subsystems send the
>> proper "change" events and don't try to hide anything they have
>> registered.
>
> ok, this is perfectly fine with dm devices, CHANGE announces all
> changes. Just I am not sure if all consumers of events (and separate
> rules authors) know about that, I saw so many problems with
> failing something when wrongly reacting to ADD event...
>
> Also it means that after ADD the by-uuid* and similar symlinks
> cannot be yet trusted - if the UUID is read from device and device
> is not yet ready.

Yes, such links will not be created, or even be removed, if we can
not/or no longer retrieve the needed information from the device.

> Well. And what should happen if anyone generate
> artificial CHANGE event before the real first CHANGE event comes from
> subsystem? (yes, I am looking at you, OPTIONS+="watch" thing for example)

We retrieve the device state and if it's not ready, we just exit. We
used to do that by calling dmsetup and checking the state of the
device. If the device is ready we create all the stuff, if not, we
just skip the rules. A later event from the subsystem might do the
same stuff again, udev preserves everything that does not get updated,
and this is usually cheap enough and such events do not happen at a
high frequency.

In the simplest case, which will not really apply here, rules could
just check if the size is not "0", and only go ahead if it found a
device which has readable stuff on it.

Ideally, we just gather the needed information from the subsystem
always in the same way, and react on the event the same way,
regardless if it is the first, the second, or any later event.

> According to above, rules must be written such way that every ADD/CHANGE
> event must expect that device is not ready, so it can create only
> partial info in udev, is it correct?
> (this is of course no problem if the rules are the same for both cases)

Yeah, usually it does not create any meta-information besides the
primary device node.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 11:14               ` Milan Broz
  2010-03-19 11:44                 ` Kay Sievers
@ 2010-03-19 11:50                 ` Hannes Reinecke
  1 sibling, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2010-03-19 11:50 UTC (permalink / raw)
  To: device-mapper development

Milan Broz wrote:
> On 03/19/2010 11:16 AM, Kay Sievers wrote:
> 
>> There are several subsystems that depend on updating everything with
>> "change" events when device configurations change. There is nothing
>> inherently wrong with this approach, as long as subsystems send the
>> proper "change" events and don't try to hide anything they have
>> registered.
> 
> ok, this is perfectly fine with dm devices, CHANGE announces all
> changes. Just I am not sure if all consumers of events (and separate
> rules authors) know about that, I saw so many problems with
> failing something when wrongly reacting to ADD event...
> 
> Also it means that after ADD the by-uuid* and similar symlinks
> cannot be yet trusted - if the UUID is read from device and device
> is not yet ready.
> 
Correct. We try to take care of that one by
a) not running something like 'vol_id' for 'ADD' events on dm devices
and
b) really making sure the dm device can be read from when receiving
   CHANGE event.

> Well. And what should happen if anyone generate
> artificial CHANGE event before the real first CHANGE event comes from
> subsystem? (yes, I am looking at you, OPTIONS+="watch" thing for example)
> 
You have to check the device anyway. 'CHANGE' literally just means that,
ie something has changed with the device state.
We used to have 'ONLINE' and 'OFFLINE' events for block devices, but
that got modified to the more generic 'CHANGE' event.

There is _no_ indication what has changed with the device state; any
program etc must check for itself if the device is in a usable state.
(Normally there won't even be any additional environment variables with
a 'CHANGE' event, so one has to look in sysfs anyway to find out what
has changed).

> According to above, rules must be written such way that every ADD/CHANGE
> event must expect that device is not ready, so it can create only
> partial info in udev, is it correct?
Correct.

> (this is of course no problem if the rules are the same for both cases)
> 
Well, yes and no.
We (read: SUSE) are trying to make sure to _not_ run any programs likely
to read data from the disk when receiving 'ADD' events from these devices.

You might get lucky here when the 'ADD' event is in fact a fake as it
was triggered externally, but this is not something I'd bank on.
(And if someone is faking 'ADD' events he can as well do it properly
and fake the corresponding 'CHANGE' events, too)

HTH.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 10:16             ` Kay Sievers
  2010-03-19 11:14               ` Milan Broz
@ 2010-03-19 12:00               ` Alasdair G Kergon
  2010-03-19 12:12               ` Alasdair G Kergon
  2 siblings, 0 replies; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 12:00 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development, Milan Broz

We've been over all this ground before, but here it is again FWIW

On Fri, Mar 19, 2010 at 11:16:15AM +0100, Kay Sievers wrote:
> It will not wait, the tools will just fail to open the device, and
> udev will only create the basic device node, but not possible
> metadata-based symlinks or anything like that. That is expected
> behavior, udev will handle just fine. The needed information will only
> be readable with the next "change" event, and be fully updated with
> every "change" event after that.
 
But while userspace code responding to the uevents is fiddling with the
new node deciding what to do, the userspace code (e.g. lvm2) that is
creating the device tree is continuing to manipulate it as these
are multi-step processes.  If code triggered by the uevents runs
alongside the lvm2 code, stuff fails.  (E.g. lvm2 wants exclusive access
which it can't get because something reacting to the uevent opened the
device temporarily - even though we could have told it not to bother as
it would find nothing of interest on it yet - a concept of 'ignored'
or 'private' devices).  So we require a synchronisation mechanism so
that the lvm2 code can wait for *everything* triggered by the uevents to
finish before proceeding.  (Or alternatively a mechanism to flag that
other code reacting to uevents should ignore these devices at this
stage.)  We attempted to add the synchronisation ourselves
(95-dm-notify.rules) but I think you pointed out there are still things
outside our control that can be triggered without a mechanism to wait
for them to finish.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 11:44                 ` Kay Sievers
@ 2010-03-19 12:08                   ` Milan Broz
  2010-03-19 12:14                     ` Kay Sievers
  0 siblings, 1 reply; 40+ messages in thread
From: Milan Broz @ 2010-03-19 12:08 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development

On 03/19/2010 12:44 PM, Kay Sievers wrote:
> On Fri, Mar 19, 2010 at 12:14, Milan Broz <mbroz@redhat.com> wrote:
>> On 03/19/2010 11:16 AM, Kay Sievers wrote:
>> Well. And what should happen if anyone generate
>> artificial CHANGE event before the real first CHANGE event comes from
>> subsystem? (yes, I am looking at you, OPTIONS+="watch" thing for example)
> 
> We retrieve the device state and if it's not ready, we just exit.

How? By scanning/opening it?

Scan on uninitialised device can lock the device and break initialisation,
"-EBUSY" - isn't it race?

So all applications, which run some kind of configuration of device
should expect that device can be randomly locked by udev rules...

[Yes, it happens. I solved several problems in cryptsetup where various
udev scans open and locks keyslot device.
(Ignoring the fact that it contains key material, in this case it was
not security problem - the material is still obfuscated.)
These are now masked in udev rules, but I do not like this approach much.]

> Yeah, usually it does not create any meta-information besides the
> primary device node.

Unfortunately it is not the DM/LVM etc case - we had always symlinks
and long device names etc.

Anyway, this name/uuid information is in sysfs now.
But not the VG/LV symlinks info which is pure userspace abstraction...

Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 10:16             ` Kay Sievers
  2010-03-19 11:14               ` Milan Broz
  2010-03-19 12:00               ` Alasdair G Kergon
@ 2010-03-19 12:12               ` Alasdair G Kergon
  2010-03-19 12:16                 ` Kay Sievers
  2 siblings, 1 reply; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 12:12 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development, Milan Broz

On Fri, Mar 19, 2010 at 11:16:15AM +0100, Kay Sievers wrote:
> It will not wait, the tools will just fail to open the device, and

In some cases the tools are wasting their time even trying to open
the device - we know in advance that it is pointless and should have
a mechanism in place so they can be told not to bother.

E.g. Flagging a device as 'private' as discussed in an earlier mail.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 12:08                   ` Milan Broz
@ 2010-03-19 12:14                     ` Kay Sievers
  0 siblings, 0 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19 12:14 UTC (permalink / raw)
  To: Milan Broz; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 13:08, Milan Broz <mbroz@redhat.com> wrote:
> On 03/19/2010 12:44 PM, Kay Sievers wrote:
>> On Fri, Mar 19, 2010 at 12:14, Milan Broz <mbroz@redhat.com> wrote:
>>> On 03/19/2010 11:16 AM, Kay Sievers wrote:
>>> Well. And what should happen if anyone generate
>>> artificial CHANGE event before the real first CHANGE event comes from
>>> subsystem? (yes, I am looking at you, OPTIONS+="watch" thing for example)
>>
>> We retrieve the device state and if it's not ready, we just exit.
>
> How? By scanning/opening it?
>
> Scan on uninitialised device can lock the device and break initialisation,
> "-EBUSY" - isn't it race?

Whatever is appropriate, be it a control device node, sysfs, or the
device itself. The simplest case would be the "size" check, MD checks
a sysfs attribute, others just try to open. For dm we used to call
dmsetup and rely on its returned information.

> So all applications, which run some kind of configuration of device
> should expect that device can be randomly locked by udev rules...
>
> [Yes, it happens. I solved several problems in cryptsetup where various
> udev scans open and locks keyslot device.
> (Ignoring the fact that it contains key material, in this case it was
> not security problem - the material is still obfuscated.)
> These are now masked in udev rules, but I do not like this approach much.]

Yes, some tools may need to synchronize with the eventually running
events. Plain udev does ignore all dm devices regarding the tools
which open the device. It's all in the special subsystem rules, which
might need to take care of synchronization.

>> Yeah, usually it does not create any meta-information besides the
>> primary device node.
>
> Unfortunately it is not the DM/LVM etc case - we had always symlinks
> and long device names etc.

But these are passed from an earlier event run. Usually they are just
retrieved the same way again.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 12:12               ` Alasdair G Kergon
@ 2010-03-19 12:16                 ` Kay Sievers
  2010-03-19 13:17                   ` Peter Rajnoha
  0 siblings, 1 reply; 40+ messages in thread
From: Kay Sievers @ 2010-03-19 12:16 UTC (permalink / raw)
  To: Kay Sievers, Milan Broz, device-mapper development

On Fri, Mar 19, 2010 at 13:12, Alasdair G Kergon <agk@redhat.com> wrote:
> On Fri, Mar 19, 2010 at 11:16:15AM +0100, Kay Sievers wrote:
>> It will not wait, the tools will just fail to open the device, and
>
> In some cases the tools are wasting their time even trying to open
> the device - we know in advance that it is pointless and should have
> a mechanism in place so they can be told not to bother.
>
> E.g. Flagging a device as 'private' as discussed in an earlier mail.

Sure, that can all be done. Plain udev leaves dm and md devices
completely to the subsystem own rules. It might be other tools which
interfere here, and that might need to be synchronized to match on
some values in the udev properties or sysfs.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:42 ` Hannes Reinecke
@ 2010-03-19 12:27   ` Alasdair G Kergon
  0 siblings, 0 replies; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 12:27 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: device-mapper development, Milan Broz

On Fri, Mar 19, 2010 at 10:42:58AM +0100, Hannes Reinecke wrote:
> No need to fiddle around with events here. It'll
> just serve to confuse userspace.
 
Userspace is already well-confused and doing things that look silly from
a high-level viewpoint and none of the potential solutions we've
proposed to the various problems has been acceptable yet.

So we widened the scope of our thinking, and suggested if we just break
this kobject/uevent connection in the kernel, it might give us easy
solutions to several of the seemingly-intractable
userspace-processing-of-uevent problems at once.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 12:16                 ` Kay Sievers
@ 2010-03-19 13:17                   ` Peter Rajnoha
  0 siblings, 0 replies; 40+ messages in thread
From: Peter Rajnoha @ 2010-03-19 13:17 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development, Milan Broz

On 03/19/2010 01:16 PM, Kay Sievers wrote:
> On Fri, Mar 19, 2010 at 13:12, Alasdair G Kergon <agk@redhat.com> wrote:
>> E.g. Flagging a device as 'private' as discussed in an earlier mail.
> 
> Sure, that can all be done. Plain udev leaves dm and md devices
> completely to the subsystem own rules. It might be other tools which
> interfere here, and that might need to be synchronized to match on
> some values in the udev properties or sysfs.
> 
> Kay

Great. We have the flags in uevents, we have udev properties. We can't
put them directly in sysfs because of the reasons we already discussed
(it's also extended information related to exact device-mapper subsystem,
not easily accessible in the middle of processing from outer space).

Do we have these flags/properties available when processing the event
originated in the "watch rule" or "udevadm trigger". No. The information
is cleared from udev db and inaccessible.

That's what we need and what we tried to ask for (besides trying to solve
that problem with the ADD event).

Peter

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19  9:24         ` Kay Sievers
  2010-03-19  9:49           ` Milan Broz
@ 2010-03-19 13:24           ` Peter Rajnoha
  2010-03-19 13:43             ` Kay Sievers
  2010-03-19 14:08             ` Hannes Reinecke
  1 sibling, 2 replies; 40+ messages in thread
From: Peter Rajnoha @ 2010-03-19 13:24 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development

On 03/19/2010 10:24 AM, Kay Sievers wrote:
> No, that's what "change" is for, and we already have these "change"
> events for dm. Udev does not care if the device is ready or not, it
> synchronizes /sys and /dev, and that works just fine with "change"
> events.

CHANGE events, not quite... We can't even rely on these.

Just to mention, there's also a CHANGE event generated when
read-only flag is set for a device (this is not managed by
device-mapper of course). This one is generated even before
the actual CHANGE event that is generated when DM device is
ready to be used.

Peter

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:24           ` Peter Rajnoha
@ 2010-03-19 13:43             ` Kay Sievers
  2010-03-19 13:47               ` Alasdair G Kergon
  2010-03-19 13:58               ` David Zeuthen
  2010-03-19 14:08             ` Hannes Reinecke
  1 sibling, 2 replies; 40+ messages in thread
From: Kay Sievers @ 2010-03-19 13:43 UTC (permalink / raw)
  To: Peter Rajnoha; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 14:24, Peter Rajnoha <prajnoha@redhat.com> wrote:
> On 03/19/2010 10:24 AM, Kay Sievers wrote:
>> No, that's what "change" is for, and we already have these "change"
>> events for dm. Udev does not care if the device is ready or not, it
>> synchronizes /sys and /dev, and that works just fine with "change"
>> events.
>
> CHANGE events, not quite... We can't even rely on these.
>
> Just to mention, there's also a CHANGE event generated when
> read-only flag is set for a device (this is not managed by
> device-mapper of course). This one is generated even before
> the actual CHANGE event that is generated when DM device is
> ready to be used.

Sure, but as mentioned earlier, these events are just expected to
fail, and update the current udev state, if they can't retrieve the
needed information or find out that the device in not usable.

Kay

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:43             ` Kay Sievers
@ 2010-03-19 13:47               ` Alasdair G Kergon
  2010-03-19 13:58               ` David Zeuthen
  1 sibling, 0 replies; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 13:47 UTC (permalink / raw)
  To: Kay Sievers; +Cc: device-mapper development, Peter Rajnoha

On Fri, Mar 19, 2010 at 02:43:13PM +0100, Kay Sievers wrote:
> Sure, but as mentioned earlier, these events are just expected to
> fail, and update the current udev state, if they can't retrieve the
> needed information or find out that the device in not usable.
 
They have to 'fail' in a controlled way that does not interfere
with any other processing that may already be happening.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:43             ` Kay Sievers
  2010-03-19 13:47               ` Alasdair G Kergon
@ 2010-03-19 13:58               ` David Zeuthen
  2010-03-19 14:34                 ` Alasdair G Kergon
  2010-03-19 15:55                 ` Mike Snitzer
  1 sibling, 2 replies; 40+ messages in thread
From: David Zeuthen @ 2010-03-19 13:58 UTC (permalink / raw)
  To: device-mapper development


[-- Attachment #1.1: Type: text/plain, Size: 1689 bytes --]

Hey Kay,

On Fri, Mar 19, 2010 at 9:43 AM, Kay Sievers <kay.sievers@vrfy.org> wrote:

> On Fri, Mar 19, 2010 at 14:24, Peter Rajnoha <prajnoha@redhat.com> wrote:
> > On 03/19/2010 10:24 AM, Kay Sievers wrote:
> >> No, that's what "change" is for, and we already have these "change"
> >> events for dm. Udev does not care if the device is ready or not, it
> >> synchronizes /sys and /dev, and that works just fine with "change"
> >> events.
> >
> > CHANGE events, not quite... We can't even rely on these.
> >
> > Just to mention, there's also a CHANGE event generated when
> > read-only flag is set for a device (this is not managed by
> > device-mapper of course). This one is generated even before
> > the actual CHANGE event that is generated when DM device is
> > ready to be used.
>
> Sure, but as mentioned earlier, these events are just expected to
> fail, and update the current udev state, if they can't retrieve the
> needed information or find out that the device in not usable.
>

I think the problem is the that fact that 3rd party user space
opens the device before it is ready (e.g. just after ADD but before
the first CHANGE) makes things fall over.

This short-coming is what needs to get fixed, I think - it's very
fragile this way and since any random user / package can add
rules to open the device on add events, said user / package can
make device-mapper fail. Which doesn't exactly strike me
as robust behavior.

While Alasdair may believe this is "silly" or because someone
is "confused", this can and will happen because our current
user-base love to fiddle around with udev rules to the extent
that various 3rd party packages will ship a lot of them.

     David

[-- Attachment #1.2: Type: text/html, Size: 2391 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:24           ` Peter Rajnoha
  2010-03-19 13:43             ` Kay Sievers
@ 2010-03-19 14:08             ` Hannes Reinecke
  2010-03-19 15:05               ` Peter Rajnoha
  1 sibling, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2010-03-19 14:08 UTC (permalink / raw)
  To: device-mapper development

Peter Rajnoha wrote:
> On 03/19/2010 10:24 AM, Kay Sievers wrote:
>> No, that's what "change" is for, and we already have these "change"
>> events for dm. Udev does not care if the device is ready or not, it
>> synchronizes /sys and /dev, and that works just fine with "change"
>> events.
> 
> CHANGE events, not quite... We can't even rely on these.
> 
> Just to mention, there's also a CHANGE event generated when
> read-only flag is set for a device (this is not managed by
> device-mapper of course). This one is generated even before
> the actual CHANGE event that is generated when DM device is
> ready to be used.
> 
As mentioned earlier, CHANGE events do not carry any
information about _what_ has changed.
The udev rules / programs are expected to check for
this themselves. So from that point of view yes, you
cannot simply wait for 'the' CHANGE event as you
might get more than one.

I guess most of this discussion could be solved if
the CHANGE events would be modify to attach some
enviroment variables indicating the nature of the change.

EG adding 'DM_STATE=LIVE' or whatever is appropriate
here.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		      zSeries & Storage
hare@suse.de			      +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Markus Rex, HRB 16746 (AG Nürnberg)

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:58               ` David Zeuthen
@ 2010-03-19 14:34                 ` Alasdair G Kergon
  2010-03-19 14:59                   ` David Zeuthen
  2010-03-19 15:55                 ` Mike Snitzer
  1 sibling, 1 reply; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 14:34 UTC (permalink / raw)
  To: David Zeuthen; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 09:58:00AM -0400, David Zeuthen wrote:
> I think the problem is the that fact that 3rd party user space
> opens the device before it is ready (e.g. just after ADD but before
> the first CHANGE) makes things fall over.
> This short-coming is what needs to get fixed, I think - it's very
> fragile this way and since any random user / package can add
> rules to open the device on add events, said user / package can
> make device-mapper fail. Which doesn't exactly strike me
> as robust behavior.

And we suggested two potential solutions:

  1 - change the kernel so the ADD event doesn't arrive until the device is
ready for use.     [plus equivalent change for REMOVE]

      Advantage: the dm device handling looks more like a real disk so we have
less 'special case' code.  /dev then only indexes "dm devices ready to be used"
rather than "dm devices registered in the kernel"

      Disadvantage: breaks the currently-simple kobject/sysfs/dev linkage (as
per Kay's earlier mail)
 

  2 - several changes to the way udev rules are handled so we can choose to
ignore events and make no changes to /dev, so we can override rules other
packages insert without requiring dm-specific checks adding to them all, and
probably some of the other things we've discussed on these various threads.

 
Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 14:34                 ` Alasdair G Kergon
@ 2010-03-19 14:59                   ` David Zeuthen
  2010-03-19 15:24                     ` Alasdair G Kergon
  0 siblings, 1 reply; 40+ messages in thread
From: David Zeuthen @ 2010-03-19 14:59 UTC (permalink / raw)
  To: device-mapper development, Kay Sievers


[-- Attachment #1.1: Type: text/plain, Size: 3757 bytes --]

Hey,

On Fri, Mar 19, 2010 at 10:34 AM, Alasdair G Kergon <agk@redhat.com> wrote:

> On Fri, Mar 19, 2010 at 09:58:00AM -0400, David Zeuthen wrote:
> > I think the problem is the that fact that 3rd party user space
> > opens the device before it is ready (e.g. just after ADD but before
> > the first CHANGE) makes things fall over.
> > This short-coming is what needs to get fixed, I think - it's very
> > fragile this way and since any random user / package can add
> > rules to open the device on add events, said user / package can
> > make device-mapper fail. Which doesn't exactly strike me
> > as robust behavior.
>
> And we suggested two potential solutions:
>
>  1 - change the kernel so the ADD event doesn't arrive until the device is
> ready for use.     [plus equivalent change for REMOVE]
>
>      Advantage: the dm device handling looks more like a real disk so we
> have
> less 'special case' code.  /dev then only indexes "dm devices ready to be
> used"
> rather than "dm devices registered in the kernel"
>
>      Disadvantage: breaks the currently-simple kobject/sysfs/dev linkage
> (as
> per Kay's earlier mail)


>
>  2 - several changes to the way udev rules are handled so we can choose to
> ignore events and make no changes to /dev, so we can override rules other
> packages insert without requiring dm-specific checks adding to them all,
> and
> probably some of the other things we've discussed on these various threads.
>

I don't think it's realistic to assume that user space will read and honor
something like a DM_UDEV_DISABLE_DM_RULES_FLAG property - while
we can make the udev package and other "core" OS packages do this, rules
from users, sites and random 3rd party packages will open() the device
on "add" (and, if properly written, also on "change").

It's much better to make that operation somehow gracefully fail in the
window where the device isn't configured. My understanding is that
we are not doing this today - for example, suppose I have this udev
rule

  SUBSYSTEM=="block", ACTION=="add|change", \
  IMPORT{program}="foo-check-for-some-signature $tempnode"

where foo-check-for-some-signature is some program to check for
a fs signature (say, for a properietary fs on a portable music
player device) and, if, so, set the FOO_* properties with specifics
about the device (these will be used in the UI to control/manage
the portable music player).

Things like this should work for any block device, no matter what
state it's in. Sure, for device-mapper block devices open() or
read() on the device may fail in the window between "add" and
"change" but that's fine.

What can never happen though, is that this configuration of the
device-mapper device somehow randomly fails because the
program foo-check-for-some-signature tries to open every block
device.

(Sure, user space can be _clever_ and save the fork+exec+open
by checking DM_UDEV_DISABLE_DM_RULES_FLAG from an
udev rule - but that's optimization, not something required for
correctness.)

I haven't checked if this problem still exists with device-mapper
but I know, in the past, that it has - and IIRC it was the reason
that you introduced DM_UDEV_DISABLE_DM_RULES_FLAG
after the udev ignore_device directive was removed.

I'd like to reiterate that it's actually not a problem that the sequence is

 - "add" uevent
 - device is not usable, access to the device fails gracefully
 - "change" uevent
 - device usable, blkid on the device etc. works

the point really is that you have to accept that there will exist
user space programs that does things on the device between
"add" and the initial "change" uevent.

The other problem, the assumption that "change" uevents only
originates from libdevmapper, is separate from this problem.

     David

[-- Attachment #1.2: Type: text/html, Size: 5117 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 14:08             ` Hannes Reinecke
@ 2010-03-19 15:05               ` Peter Rajnoha
  2010-03-19 15:14                 ` David Zeuthen
  0 siblings, 1 reply; 40+ messages in thread
From: Peter Rajnoha @ 2010-03-19 15:05 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: device-mapper development

On 03/19/2010 03:08 PM, Hannes Reinecke wrote:
> As mentioned earlier, CHANGE events do not carry any
> information about _what_ has changed.
> The udev rules / programs are expected to check for
> this themselves. So from that point of view yes, you
> cannot simply wait for 'the' CHANGE event as you
> might get more than one.
> 
> I guess most of this discussion could be solved if
> the CHANGE events would be modify to attach some
> enviroment variables indicating the nature of the change.
> 
> EG adding 'DM_STATE=LIVE' or whatever is appropriate
> here.

As for the variables, we already carry some important
information within uevents.

Generally, it carries hints for udev rules to instruct them
how they should be applied correctly, which parts should be
run based on the type of the device, it's real meaning with all
relations to other devices taken into account within that
DM subsystem used (e.g. LVM2's snapshots, mirrors...).

Most of this information is really not suitable to be stored
as a sysfs attribute since it deals with userspace notions,
an abstraction layer above device-mapper...

So the CHANGE event should carry all this information to
properly do all necessary actions and decisions while processing
the udev rules. This information is not easy to acquire by just
calling some program in userspace (from udev rules as most of
the others do). The device can be in the middle of processing
and that event could be just a part of the whole sequence
(also, it's not only that one device, we deal with all sorts
of relations among devices).

Now, the problem we're hitting are those artificial events
generated as a result of the "watch" rule and "udevadm trigger"
that could be triggered anytime. These are just plain events
that we can't even ignore. We have to apply all the rules for
them, but since we don't have the hints/flags, they are applied
incorrectly (creating nodes/symlinks where it is not appropriate,
calling programs from udev rules that are not appropriate etc.).

The proposal here was to keep such information in udev db
so it's still accessible while processing any of the "artificial"
events.

We can and already do provide some information in uevent's
environment, but the idea is broken by those spurious events.
As mentioned already, we made a proposal to make these bits of
information accessible even for the other events...

And the final solution needs cooperation from both sides.

Peter

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 15:05               ` Peter Rajnoha
@ 2010-03-19 15:14                 ` David Zeuthen
  2010-03-19 15:51                   ` Peter Rajnoha
  0 siblings, 1 reply; 40+ messages in thread
From: David Zeuthen @ 2010-03-19 15:14 UTC (permalink / raw)
  To: device-mapper development


[-- Attachment #1.1: Type: text/plain, Size: 946 bytes --]

On Fri, Mar 19, 2010 at 11:05 AM, Peter Rajnoha <prajnoha@redhat.com> wrote:

> As for the variables, we already carry some important
> information within uevents.
>
> Generally, it carries hints for udev rules to instruct them
> how they should be applied correctly, which parts should be
> run based on the type of the device, it's real meaning with all
> relations to other devices taken into account within that
> DM subsystem used (e.g. LVM2's snapshots, mirrors...).
>
> Most of this information is really not suitable to be stored
> as a sysfs attribute since it deals with userspace notions,
> an abstraction layer above device-mapper...
>

Presumably this information originates from user space when
setting up the device-mapper device, right? Why can't you
simply store it in, say, /var/run/device-mapper?

(Or, better, store it in /dev/.device-mapper/ to avoid hitting
the real disk - /dev is guaranteed to be on tmpfs)

Thanks,
David

[-- Attachment #1.2: Type: text/html, Size: 1364 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 14:59                   ` David Zeuthen
@ 2010-03-19 15:24                     ` Alasdair G Kergon
  2010-03-19 16:01                       ` David Zeuthen
  0 siblings, 1 reply; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 15:24 UTC (permalink / raw)
  To: David Zeuthen; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 10:59:12AM -0400, David Zeuthen wrote:
> I'd like to reiterate that it's actually not a problem that the sequence is
>  - "add" uevent
>  - device is not usable, access to the device fails gracefully
>  - "change" uevent
>  - device usable, blkid on the device etc. works
> the point really is that you have to accept that there will exist
> user space programs that does things on the device between
> "add" and the initial "change" uevent.
 
It is a problem if both:
    1) something attempts to open the devices in that window
AND 2) we have no mechanism to wait for it to finish

Also note that if it is permitted to run 'udevadm trigger' at
any time that also causes a synchonisation problem here: our
application code doesn't even know there is something conflicting
that it needs to wait for.  [Even though we want to ignore ADD,
we may still need to synchronise against it.]

So again, the 'don't issue ADD until device is usable' offers a simple
way of avoiding this class of problems.  The 'private device
attribute' we suggested offers another - all would be expected
to respect a 'private' sysfs attribute.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 15:14                 ` David Zeuthen
@ 2010-03-19 15:51                   ` Peter Rajnoha
  0 siblings, 0 replies; 40+ messages in thread
From: Peter Rajnoha @ 2010-03-19 15:51 UTC (permalink / raw)
  To: David Zeuthen; +Cc: device-mapper development

On 03/19/2010 04:14 PM, David Zeuthen wrote:
>> Generally, it carries hints for udev rules to instruct them
>> how they should be applied correctly, which parts should be
>> run based on the type of the device, it's real meaning with all
>> relations to other devices taken into account within that
>> DM subsystem used (e.g. LVM2's snapshots, mirrors...).
>>
>> Most of this information is really not suitable to be stored
>> as a sysfs attribute since it deals with userspace notions,
>> an abstraction layer above device-mapper...
>>
> 
> Presumably this information originates from user space when
> setting up the device-mapper device, right? Why can't you
> simply store it in, say, /var/run/device-mapper?
> 
> (Or, better, store it in /dev/.device-mapper/ to avoid hitting
> the real disk - /dev is guaranteed to be on tmpfs)

Sure, this is a possibility...

But that would mean *duplicating* the (part) of udev db functionality
(so it could be considered as another workaround?) I just asked
myself if there would be more people who could benefit from such
feature in udev db directly. A more proper and official solution
others can use if needed as well.

(Maybe we can even write our own udev daemon, called dm-udevd :))


Peter

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 13:58               ` David Zeuthen
  2010-03-19 14:34                 ` Alasdair G Kergon
@ 2010-03-19 15:55                 ` Mike Snitzer
  2010-03-19 16:08                   ` David Zeuthen
  1 sibling, 1 reply; 40+ messages in thread
From: Mike Snitzer @ 2010-03-19 15:55 UTC (permalink / raw)
  To: device-mapper development

Hi David,

On Fri, Mar 19 2010 at  9:58am -0400,
David Zeuthen <zeuthen@gmail.com> wrote:

> Hey Kay,
> 
> On Fri, Mar 19, 2010 at 9:43 AM, Kay Sievers <kay.sievers@vrfy.org> wrote:
> 
> > On Fri, Mar 19, 2010 at 14:24, Peter Rajnoha <prajnoha@redhat.com> wrote:
> > > On 03/19/2010 10:24 AM, Kay Sievers wrote:
> > >> No, that's what "change" is for, and we already have these "change"
> > >> events for dm. Udev does not care if the device is ready or not, it
> > >> synchronizes /sys and /dev, and that works just fine with "change"
> > >> events.
> > >
> > > CHANGE events, not quite... We can't even rely on these.
> > >
> > > Just to mention, there's also a CHANGE event generated when
> > > read-only flag is set for a device (this is not managed by
> > > device-mapper of course). This one is generated even before
> > > the actual CHANGE event that is generated when DM device is
> > > ready to be used.
> >
> > Sure, but as mentioned earlier, these events are just expected to
> > fail, and update the current udev state, if they can't retrieve the
> > needed information or find out that the device in not usable.
> >
> 
> I think the problem is the that fact that 3rd party user space
> opens the device before it is ready (e.g. just after ADD but before
> the first CHANGE) makes things fall over.
> 
> This short-coming is what needs to get fixed, I think - it's very
> fragile this way and since any random user / package can add
> rules to open the device on add events, said user / package can
> make device-mapper fail. Which doesn't exactly strike me
> as robust behavior.

When I first read this response I thought we had a major break-through,
namely: udev allowing udev rules to race with the tool that is making
the device usable was not "robust behavior".

But your 2nd mail in this thread established that I had wishful thinking
on that so-called break-through.

At least we agree that these uevents are causing DM to race against
arbitrary udev rules; which leads to sporadic failures.

I think I understand udev's utopian intent to have all udev rules be
able to do as they wish with any device: said access should "fail
gracefully" on devices that aren't ready.

Thing is, this isn't scalable at all.  Having all these arbitrary rules
issuing IOs to devices that aren't usable is a complete waste of time.
On enterprise systems that have 100s (*shudder* 1000s) of LUNs, this
udev rules' freedom to access such unusable devices is really working
against us (if the goal is to activate devices as quickly and reliably
as possible).

We at least need a way to _reliably_ allow DM to do its work of managing
its devices.  What if udev were to offer a per device "udev rules lock"
(exposed via sysfs?) that allows subsystems (e.g. DM) to know they can't
yet proceed with exclussively accessing the device they are tasked with
managing?

This per device "udev rules lock" would at least allow DM to cope with
the racey nature of udev rules.  Not ideal as it still allows
inefficient (and unecessary) access to devices that shouldn't be touched
but it would at least be a means to an end (or so I'd think).

Mike

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 15:24                     ` Alasdair G Kergon
@ 2010-03-19 16:01                       ` David Zeuthen
  2010-03-19 16:36                         ` Alasdair G Kergon
  2010-03-22 10:11                         ` Milan Broz
  0 siblings, 2 replies; 40+ messages in thread
From: David Zeuthen @ 2010-03-19 16:01 UTC (permalink / raw)
  To: David Zeuthen, device-mapper development, Kay Sievers


[-- Attachment #1.1: Type: text/plain, Size: 2540 bytes --]

On Fri, Mar 19, 2010 at 11:24 AM, Alasdair G Kergon <agk@redhat.com> wrote:

> On Fri, Mar 19, 2010 at 10:59:12AM -0400, David Zeuthen wrote:
> > I'd like to reiterate that it's actually not a problem that the sequence
> is
> >  - "add" uevent
> >  - device is not usable, access to the device fails gracefully
> >  - "change" uevent
> >  - device usable, blkid on the device etc. works
> > the point really is that you have to accept that there will exist
> > user space programs that does things on the device between
> > "add" and the initial "change" uevent.
>
> It is a problem if both:
>    1) something attempts to open the devices in that window
> AND 2) we have no mechanism to wait for it to finish
>
> Also note that if it is permitted to run 'udevadm trigger' at
> any time that also causes a synchonisation problem here: our
> application code doesn't even know there is something conflicting
> that it needs to wait for.  [Even though we want to ignore ADD,
> we may still need to synchronise against it.]
>
> So again, the 'don't issue ADD until device is usable' offers a simple
> way of avoiding this class of problems.


But as Kay said it's a horrible horrible hack.

If you wanted to solve the problem kernel-side I'd expect you to
create a new subsytem for device-mapper (let's call it 'kdm') that
libdevmapper would talk to. Then you'd only present block device
objects exactly when the device-mapper side has been configured.

Then the flow of events could be something like this

 - add device kdm-0  (subsys 'kdm')
 - libdevmapper configures kdm-0
 - add device dm-0 (subsys 'block')

but I don't know if this is better.


> The 'private device
> attribute' we suggested offers another - all would be expected
> to respect a 'private' sysfs attribute.
>

Sorry, but this is an even worse hack. Mandating that all of user
space (that is: past, present and future) needs to read some
random 'private' attribute in sysfs because of weird life-cycle
issues in the device-mapper implementation... that's not really
workable.

Anyway, I really don't think you can expect user space to behave
sanely so it's not really worth trying.

(I don't think you ever could expect user space to behave sanely,
but I'll note that it's an even bigger problem now that we have
powerful frameworks (such as udev) allowing people to run code
at device discovery time.... I mean, device-mapper have probably
been suffering from these issues from day 1 - it just wasn't visible
earlier on because we didn't have uevents...)

Thanks,
David

[-- Attachment #1.2: Type: text/html, Size: 3597 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 15:55                 ` Mike Snitzer
@ 2010-03-19 16:08                   ` David Zeuthen
  0 siblings, 0 replies; 40+ messages in thread
From: David Zeuthen @ 2010-03-19 16:08 UTC (permalink / raw)
  To: device-mapper development


[-- Attachment #1.1: Type: text/plain, Size: 1041 bytes --]

On Fri, Mar 19, 2010 at 11:55 AM, Mike Snitzer <snitzer@redhat.com> wrote:

> Thing is, this isn't scalable at all.  Having all these arbitrary rules
> issuing IOs to devices that aren't usable is a complete waste of time.
> On enterprise systems that have 100s (*shudder* 1000s) of LUNs, this
> udev rules' freedom to access such unusable devices is really working
> against us (if the goal is to activate devices as quickly and reliably
> as possible).
>

Please, Mike, whether to probe devices or not is a separate discussion
and it's not really helpful to reiterate that discussion.

(Please do see various mailing lists archives for that particular
discussion;
FWIW, you will notice that what udev is doing is actually desirable. If you
examine the kernel sources, you will find that the kernel itself submits
IO to the block devices - partition table probing for starters. If you
search
harder you will find *raw numbers* (instead of speculation like "100s
(*shudder* 1000s)" showing what udev is doing is not a problem...)

    David

[-- Attachment #1.2: Type: text/html, Size: 1454 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 16:01                       ` David Zeuthen
@ 2010-03-19 16:36                         ` Alasdair G Kergon
  2010-03-22 10:11                         ` Milan Broz
  1 sibling, 0 replies; 40+ messages in thread
From: Alasdair G Kergon @ 2010-03-19 16:36 UTC (permalink / raw)
  To: David Zeuthen; +Cc: device-mapper development

On Fri, Mar 19, 2010 at 12:01:23PM -0400, David Zeuthen wrote:
> > So again, the 'don't issue ADD until device is usable' offers a simple
> > way of avoiding this class of problems.
> But as Kay said it's a horrible horrible hack.
 
I quite agree.  But every solution is a horrible hack in some way.

If we *really* want to solve this, we have to put all options on the table
and go back to the fundamentals, and aim to get agreement on definitions
of things like "block device visible to userspace" which we all believe
can be made to work.

> Sorry, but this is an even worse hack. Mandating that all of user
> space (that is: past, present and future) needs to read some
> random 'private' attribute in sysfs because of weird life-cycle
> issues in the device-mapper implementation... that's not really
> workable.
 
I think it's a simple and general concept.  Similar to an inactive multipath
path that must not have any I/O sent to it unless the first path fails.
Similar to a device in a shared storage cluster that has an exclusive
reservation on it from a different node.

Basically it's a marker to say: 
  Hands off! There's nothing here that would interest you.

Alasdair

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl.
  2010-03-19 16:01                       ` David Zeuthen
  2010-03-19 16:36                         ` Alasdair G Kergon
@ 2010-03-22 10:11                         ` Milan Broz
  1 sibling, 0 replies; 40+ messages in thread
From: Milan Broz @ 2010-03-22 10:11 UTC (permalink / raw)
  To: device-mapper development; +Cc: David Zeuthen

On 03/19/2010 05:01 PM, David Zeuthen wrote:

Hi,

> If you wanted to solve the problem kernel-side I'd expect you to
> create a new subsytem for device-mapper (let's call it 'kdm') that
> libdevmapper would talk to. Then you'd only present block device
> objects exactly when the device-mapper side has been configured.
> 
> Then the flow of events could be something like this
> 
>  - add device kdm-0  (subsys 'kdm')
>  - libdevmapper configures kdm-0
>  - add device dm-0 (subsys 'block')
> 
> but I don't know if this is better.

Nope. device-mapper constructs block devices into other block devices,
it just use some more complex trees (in comparison with MD) and
so you just see these device trees from userspace.

Visibility of all these devices is surely useful when you want to construct
something special without high-level abstraction (like lvm2)
(e.g. liveCD uses snapshots this way).

I want to say, with all the rules Kay just written in some previous mail,
these newly allocated block devices should be present
in sysfs and /dev in kobject creation time.

And btw there is no problem on kernel side using this definition -
ADD/CHANGE event is generated properly, all scans on "unconfigured"
(= no table) device fails.

The problem is in userspace synchronisation.

And device-mapper is nothing special here - what happens if someone
trigger change event before mkfs open the plain disk?
mkfs can fail because device can be locked by asynchronous scan.
(and udev settle is not always option)
  
 
>     The 'private device
>     attribute' we suggested offers another - all would be expected
>     to respect a 'private' sysfs attribute.
> 
> 
> Sorry, but this is an even worse hack. Mandating that all of user
> space (that is: past, present and future) needs to read some
> random 'private' attribute in sysfs because of weird life-cycle
> issues in the device-mapper implementation... that's not really
> workable.

If all you have is a hammer, everything looks like a nail.

Sure, we can scan everything what is in system and expect that
it responds "Error 418 I'm a teapot!" and we can fail gracefully :-)

You simply have to distinguish some differences in device types,
and you are already doing that. (See above why special subsystem
is not good idea IMHO.)

What is interesting, that the private attribute comes from udev
world (when discussing cryptsetup problem. NO other application must
read keyslot device, because it contains key related material).
(It is done this way, because it can this way use kernel crypto
for key obfuscation - and it was 3rd party design.)

See https://bugzilla.redhat.com/show_bug.cgi?id=528909#c8
(yes, it is your comment)

It is generic concept. Now it is wrong. Why?

> Anyway, I really don't think you can expect user space to behave
> sanely so it's not really worth trying.
> 
> (I don't think you ever could expect user space to behave sanely,
> but I'll note that it's an even bigger problem now that we have
> powerful frameworks (such as udev) allowing people to run code
> at device discovery time.... I mean, device-mapper have probably
> been suffering from these issues from day 1 - it just wasn't visible
> earlier on because we didn't have uevents...)

Sounds more like anarchy:-) Device-mapper is also very powerful
system (btw lvm2 uses quite small part of its abilities).
So we have several "powerful" systems and we want to work them together
(and keep the plane flying and not crash).
That means that during the flight some rules must apply. That's all.

Milan
--
mbroz@redhat.com

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2010-03-22 10:11 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-18 13:58 [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Milan Broz
2010-03-18 13:58 ` [PATCH 2/3] Add genhd flag requesting notification of partition changes only Milan Broz
2010-03-18 13:58 ` [PATCH 3/3] Do not send multiple REMOVE events for kobjects Milan Broz
2010-03-18 16:13 ` [PATCH 1/3] Send KOBJ_ADD event after dm resume ioctl Kay Sievers
2010-03-18 17:24   ` Alasdair G Kergon
2010-03-18 21:35   ` Milan Broz
2010-03-19  8:27     ` Kay Sievers
2010-03-19  9:06       ` Lars Ellenberg
2010-03-19  9:24         ` Kay Sievers
2010-03-19  9:49           ` Milan Broz
2010-03-19 10:16             ` Kay Sievers
2010-03-19 11:14               ` Milan Broz
2010-03-19 11:44                 ` Kay Sievers
2010-03-19 12:08                   ` Milan Broz
2010-03-19 12:14                     ` Kay Sievers
2010-03-19 11:50                 ` Hannes Reinecke
2010-03-19 12:00               ` Alasdair G Kergon
2010-03-19 12:12               ` Alasdair G Kergon
2010-03-19 12:16                 ` Kay Sievers
2010-03-19 13:17                   ` Peter Rajnoha
2010-03-19 13:24           ` Peter Rajnoha
2010-03-19 13:43             ` Kay Sievers
2010-03-19 13:47               ` Alasdair G Kergon
2010-03-19 13:58               ` David Zeuthen
2010-03-19 14:34                 ` Alasdair G Kergon
2010-03-19 14:59                   ` David Zeuthen
2010-03-19 15:24                     ` Alasdair G Kergon
2010-03-19 16:01                       ` David Zeuthen
2010-03-19 16:36                         ` Alasdair G Kergon
2010-03-22 10:11                         ` Milan Broz
2010-03-19 15:55                 ` Mike Snitzer
2010-03-19 16:08                   ` David Zeuthen
2010-03-19 14:08             ` Hannes Reinecke
2010-03-19 15:05               ` Peter Rajnoha
2010-03-19 15:14                 ` David Zeuthen
2010-03-19 15:51                   ` Peter Rajnoha
2010-03-19  9:10       ` Milan Broz
2010-03-19  9:22         ` Kay Sievers
2010-03-19  9:42 ` Hannes Reinecke
2010-03-19 12:27   ` Alasdair G Kergon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.