Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH -next 0/5] block: add a sequence number to disks
@ 2021-03-15 20:02 Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

With this series a monotonically increasing number is added to disks,
precisely in the genhd struct, and it's exported in sysfs and uevent.

This helps the userspace correlate events for devices that reuse the
same device, like loop.

The first patch is the core one, the 2..4 expose the information in
different ways, while the last one increase the sequence number for
loop devices at every attach.

    # udevadm monitor -kp |grep -e ^DEVNAME -e ^DISKSEQ &
    [1] 523
    # losetup -fP 3part
    [ 3698.615848] loop0: detected capacity change from 16384 to 0
    DEVNAME=/dev/loop0
    DISKSEQ=13
    [ 3698.647189]  loop0: p1 p2 p3
    DEVNAME=/dev/loop0
    DISKSEQ=13
    DEVNAME=/dev/loop0p1
    DISKSEQ=13
    DEVNAME=/dev/loop0p2
    DISKSEQ=13
    DEVNAME=/dev/loop0p3
    DISKSEQ=13
    # losetup -fP 2part
    [ 3705.170766] loop1: detected capacity change from 40960 to 0
    DEVNAME=/dev/loop1
    DISKSEQ=14
    [ 3705.247280]  loop1: p1 p2
    DEVNAME=/dev/loop1
    DISKSEQ=14
    DEVNAME=/dev/loop1p1
    DISKSEQ=14
    DEVNAME=/dev/loop1p2
    DISKSEQ=14
    # ./getdiskseq /dev/loop*
    /dev/loop0:     13
    /dev/loop0p1:   13
    /dev/loop0p2:   13
    /dev/loop0p3:   13
    /dev/loop1:     14
    /dev/loop1p1:   14
    /dev/loop1p2:   14
    /dev/loop2:     5
    /dev/loop3:     6
    /dev/loop-control: Function not implemented
    # grep . /sys/class/block/*/diskseq
    /sys/class/block/loop0/diskseq:13
    /sys/class/block/loop1/diskseq:14
    /sys/class/block/loop2/diskseq:5
    /sys/class/block/loop3/diskseq:6
    /sys/class/block/ram0/diskseq:1
    /sys/class/block/ram1/diskseq:2
    /sys/class/block/vda/diskseq:7

If merged, this feature will immediately used by the userspace:
https://github.com/systemd/systemd/issues/17469#issuecomment-762919781

This is just a resend after the merge window and rebased on linux-block.
Also, added more people in CC from the get_maintainer.pl output
as well as -next in the subject.

Matteo Croce (5):
  block: add disk sequence number
  block: add ioctl to read the disk sequence number
  block: refactor sysfs code
  block: export diskseq in sysfs
  loop: increment sequence number

 Documentation/ABI/testing/sysfs-block | 12 ++++++++
 block/genhd.c                         | 43 ++++++++++++++++++++++++---
 block/ioctl.c                         |  2 ++
 drivers/block/loop.c                  |  3 ++
 include/linux/genhd.h                 |  2 ++
 include/uapi/linux/fs.h               |  1 +
 6 files changed, 59 insertions(+), 4 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
@ 2021-03-15 20:02 ` Matteo Croce
  2021-03-15 20:18   ` Matthew Wilcox
  2021-03-16  1:44   ` JeffleXu
  2021-03-15 20:02 ` [PATCH -next 2/5] block: add ioctl to read the " Matteo Croce
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

Add a sequence number to the disk devices. This number is put in the
uevent so userspace can correlate events when a driver reuses a device,
like the loop one.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 block/genhd.c         | 19 +++++++++++++++++++
 include/linux/genhd.h |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 8c8f543572e6..92debcb9e061 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1215,8 +1215,17 @@ static void disk_release(struct device *dev)
 		blk_put_queue(disk->queue);
 	kfree(disk);
 }
+
+static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
+}
+
 struct class block_class = {
 	.name		= "block",
+	.dev_uevent	= block_uevent,
 };
 
 static char *block_devnode(struct device *dev, umode_t *mode,
@@ -1388,6 +1397,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 	disk_to_dev(disk)->class = &block_class;
 	disk_to_dev(disk)->type = &disk_type;
 	device_initialize(disk_to_dev(disk));
+	inc_diskseq(disk);
+
 	return disk;
 
 out_destroy_part_tbl:
@@ -1938,3 +1949,11 @@ static void disk_release_events(struct gendisk *disk)
 	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
 	kfree(disk->ev);
 }
+
+void inc_diskseq(struct gendisk *disk)
+{
+	static atomic64_t diskseq;
+
+	disk->diskseq = atomic64_inc_return(&diskseq);
+}
+EXPORT_SYMBOL_GPL(inc_diskseq);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index f364619092cc..632141b360d2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,6 +167,7 @@ struct gendisk {
 	int node_id;
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
+	u64 diskseq;
 };
 
 /*
@@ -326,6 +327,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 #endif /* CONFIG_SYSFS */
 
 extern struct rw_semaphore bdev_lookup_sem;
+extern void inc_diskseq(struct gendisk *disk);
 
 dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH -next 2/5] block: add ioctl to read the disk sequence number
  2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
@ 2021-03-15 20:02 ` Matteo Croce
  2021-03-15 20:13   ` Matthew Wilcox
  2021-03-15 20:02 ` [PATCH -next 3/5] block: refactor sysfs code Matteo Croce
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence number
from the genhd structure.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 block/ioctl.c           | 2 ++
 include/uapi/linux/fs.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/block/ioctl.c b/block/ioctl.c
index ff241e663c01..266315d00942 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -467,6 +467,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
 				BLKDEV_DISCARD_SECURE);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
+	case BLKGETDISKSEQ:
+		return put_u64(argp, bdev->bd_disk->diskseq);
 	case BLKREPORTZONE:
 		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
 	case BLKRESETZONE:
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f44eb0a04afd..5dc72bbdd9b7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -184,6 +184,7 @@ struct fsxattr {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
 /*
  * A jump here: 130-131 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH -next 3/5] block: refactor sysfs code
  2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 2/5] block: add ioctl to read the " Matteo Croce
@ 2021-03-15 20:02 ` Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 4/5] block: export diskseq in sysfs Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 5/5] loop: increment sequence number Matteo Croce
  4 siblings, 0 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

Move the sysfs register code from a function named disk_add_events() to
a new function named disk_add_sysfs(). Also, rename the attribute list
with a more generic name than disk_events_attrs.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 block/genhd.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 92debcb9e061..57d92ea7ae05 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -41,6 +41,7 @@ static void disk_alloc_events(struct gendisk *disk);
 static void disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
+static void disk_add_sysfs(struct gendisk *disk);
 
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
@@ -628,6 +629,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	 */
 	WARN_ON_ONCE(!blk_get_queue(disk->queue));
 
+	disk_add_sysfs(disk);
 	disk_add_events(disk);
 	blk_integrity_add(disk);
 }
@@ -1838,7 +1840,7 @@ static const DEVICE_ATTR(events_poll_msecs, 0644,
 			 disk_events_poll_msecs_show,
 			 disk_events_poll_msecs_store);
 
-static const struct attribute *disk_events_attrs[] = {
+static const struct attribute *disk_sysfs_attrs[] = {
 	&dev_attr_events.attr,
 	&dev_attr_events_async.attr,
 	&dev_attr_events_poll_msecs.attr,
@@ -1909,13 +1911,16 @@ static void disk_alloc_events(struct gendisk *disk)
 	disk->ev = ev;
 }
 
-static void disk_add_events(struct gendisk *disk)
+static void disk_add_sysfs(struct gendisk *disk)
 {
 	/* FIXME: error handling */
-	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_events_attrs) < 0)
+	if (sysfs_create_files(&disk_to_dev(disk)->kobj, disk_sysfs_attrs) < 0)
 		pr_warn("%s: failed to create sysfs files for events\n",
 			disk->disk_name);
+}
 
+static void disk_add_events(struct gendisk *disk)
+{
 	if (!disk->ev)
 		return;
 
@@ -1940,7 +1945,7 @@ static void disk_del_events(struct gendisk *disk)
 		mutex_unlock(&disk_events_mutex);
 	}
 
-	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
+	sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_sysfs_attrs);
 }
 
 static void disk_release_events(struct gendisk *disk)
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH -next 4/5] block: export diskseq in sysfs
  2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
                   ` (2 preceding siblings ...)
  2021-03-15 20:02 ` [PATCH -next 3/5] block: refactor sysfs code Matteo Croce
@ 2021-03-15 20:02 ` Matteo Croce
  2021-03-15 20:02 ` [PATCH -next 5/5] loop: increment sequence number Matteo Croce
  4 siblings, 0 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

Add a new sysfs handle to export the new diskseq value.
Place it in <sysfs>/block/<disk>/diskseq and document it.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 Documentation/ABI/testing/sysfs-block | 12 ++++++++++++
 block/genhd.c                         | 11 +++++++++++
 2 files changed, 23 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index e34cdeeeb9d4..a0ed87386639 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -28,6 +28,18 @@ Description:
 		For more details refer Documentation/admin-guide/iostats.rst
 
 
+What:		/sys/block/<disk>/diskseq
+Date:		February 2021
+Contact:	Matteo Croce <mcroce@microsoft.com>
+Description:
+		The /sys/block/<disk>/diskseq files reports the disk
+		sequence number, which is a monotonically increasing
+		number assigned to every drive.
+		Some devices, like the loop device, refresh such number
+		every time the backing file is changed.
+		The value type is 64 bit unsigned.
+
+
 What:		/sys/block/<disk>/<part>/stat
 Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
diff --git a/block/genhd.c b/block/genhd.c
index 57d92ea7ae05..6a7ed426def0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1764,6 +1764,7 @@ static void disk_check_events(struct disk_events *ev,
  * events_async		: list of events which can be detected w/o polling
  *			  (always empty, only for backwards compatibility)
  * events_poll_msecs	: polling interval, 0: disable, -1: system default
+ * diskseq		: disk sequence number, since boot
  */
 static ssize_t __disk_events_show(unsigned int events, char *buf)
 {
@@ -1834,16 +1835,26 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
 	return count;
 }
 
+static ssize_t diskseq_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%llu\n", disk->diskseq);
+}
+
 static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
 static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
 static const DEVICE_ATTR(events_poll_msecs, 0644,
 			 disk_events_poll_msecs_show,
 			 disk_events_poll_msecs_store);
+static const DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
 
 static const struct attribute *disk_sysfs_attrs[] = {
 	&dev_attr_events.attr,
 	&dev_attr_events_async.attr,
 	&dev_attr_events_poll_msecs.attr,
+	&dev_attr_diskseq.attr,
 	NULL,
 };
 
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH -next 5/5] loop: increment sequence number
  2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
                   ` (3 preceding siblings ...)
  2021-03-15 20:02 ` [PATCH -next 4/5] block: export diskseq in sysfs Matteo Croce
@ 2021-03-15 20:02 ` Matteo Croce
  4 siblings, 0 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:02 UTC (permalink / raw)
  To: linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke

From: Matteo Croce <mcroce@microsoft.com>

On a very loaded system, if there are many events queued up from multiple
attach/detach cycles, it's impossible to match them up with the
LOOP_CONFIGURE or LOOP_SET_FD call, since we don't know where the position
of our own association in the queue is[1].
Not even an empty uevent queue is a reliable indication that we already
received the uevent we were waiting for, since with multi-partition block
devices each partition's event is queued asynchronously and might be
delivered later.

Increment the disk sequence number when setting or changing the backing
file, so the userspace knows which backing file generated the event.

[1] https://github.com/systemd/systemd/issues/17469#issuecomment-762919781

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 drivers/block/loop.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a370cde3ddd4..1541ccff81f9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -734,6 +734,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 		goto out_err;
 
 	/* and ... switch */
+	inc_diskseq(lo->lo_disk);
 	blk_mq_freeze_queue(lo->lo_queue);
 	mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 	lo->lo_backing_file = file;
@@ -1122,6 +1123,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	if (error)
 		goto out_unlock;
 
+	inc_diskseq(lo->lo_disk);
+
 	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 2/5] block: add ioctl to read the disk sequence number
  2021-03-15 20:02 ` [PATCH -next 2/5] block: add ioctl to read the " Matteo Croce
@ 2021-03-15 20:13   ` Matthew Wilcox
  2021-03-15 20:17     ` Damien Le Moal
  2021-03-15 20:34     ` Matteo Croce
  0 siblings, 2 replies; 20+ messages in thread
From: Matthew Wilcox @ 2021-03-15 20:13 UTC (permalink / raw)
  To: Matteo Croce
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke, Ajay Joshi,
	Christoph Hellwig, Matias Bjorling, Hans Holmberg,
	Dmitry Fomichev, Keith Busch, Dmitry V. Levin

On Mon, Mar 15, 2021 at 09:02:39PM +0100, Matteo Croce wrote:
> +++ b/include/uapi/linux/fs.h
> @@ -184,6 +184,7 @@ struct fsxattr {
>  #define BLKSECDISCARD _IO(0x12,125)
>  #define BLKROTATIONAL _IO(0x12,126)
>  #define BLKZEROOUT _IO(0x12,127)
> +#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
>  /*
>   * A jump here: 130-131 are reserved for zoned block devices
>   * (see uapi/linux/blkzoned.h)

Not your bug, but this is now 130-136.

+cc all the people who signed off on the commits that added those ioctl
numbers without updating this comment.  Perhaps one of them will figure
out how to stop this happening in future.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 2/5] block: add ioctl to read the disk sequence number
  2021-03-15 20:13   ` Matthew Wilcox
@ 2021-03-15 20:17     ` Damien Le Moal
  2021-03-15 20:34     ` Matteo Croce
  1 sibling, 0 replies; 20+ messages in thread
From: Damien Le Moal @ 2021-03-15 20:17 UTC (permalink / raw)
  To: Matthew Wilcox, Matteo Croce
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Tejun Heo,
	Javier González, Niklas Cassel, Johannes Thumshirn,
	Hannes Reinecke, Ajay Joshi, Christoph Hellwig, Matias Bjorling,
	Hans Holmberg, Dmitry Fomichev, Keith Busch, Dmitry V. Levin

On 2021/03/16 5:14, Matthew Wilcox wrote:
> On Mon, Mar 15, 2021 at 09:02:39PM +0100, Matteo Croce wrote:
>> +++ b/include/uapi/linux/fs.h
>> @@ -184,6 +184,7 @@ struct fsxattr {
>>  #define BLKSECDISCARD _IO(0x12,125)
>>  #define BLKROTATIONAL _IO(0x12,126)
>>  #define BLKZEROOUT _IO(0x12,127)
>> +#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
>>  /*
>>   * A jump here: 130-131 are reserved for zoned block devices
>>   * (see uapi/linux/blkzoned.h)
> 
> Not your bug, but this is now 130-136.
> 
> +cc all the people who signed off on the commits that added those ioctl
> numbers without updating this comment.  Perhaps one of them will figure
> out how to stop this happening in future.
> 

Indeed. Will be more careful :)
And send a patch to fix this.

Thanks !

-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
@ 2021-03-15 20:18   ` Matthew Wilcox
  2021-03-15 21:04     ` Matthew Wilcox
                       ` (2 more replies)
  2021-03-16  1:44   ` JeffleXu
  1 sibling, 3 replies; 20+ messages in thread
From: Matthew Wilcox @ 2021-03-15 20:18 UTC (permalink / raw)
  To: Matteo Croce
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> From: Matteo Croce <mcroce@microsoft.com>
> 
> Add a sequence number to the disk devices. This number is put in the
> uevent so userspace can correlate events when a driver reuses a device,
> like the loop one.

Should this be documented as monotonically increasing?  I think this
is actually a media identifier.  Consider (if you will) a floppy disc.
Back when such things were common, it was possible with personal computers
of the era to have multiple floppy discs "in play" and be prompted to
insert them as needed.  So shouldn't it be possible to support something
similar here -- you're really removing the media from the loop device.
With a monotonically increasing number, you're always destroying the
media when you remove it, but in principle, it should be possible to
reinsert the same media and have the same media identifier number.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 2/5] block: add ioctl to read the disk sequence number
  2021-03-15 20:13   ` Matthew Wilcox
  2021-03-15 20:17     ` Damien Le Moal
@ 2021-03-15 20:34     ` Matteo Croce
  1 sibling, 0 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-15 20:34 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke, Ajay Joshi,
	Christoph Hellwig, Matias Bjorling, Hans Holmberg,
	Dmitry Fomichev, Keith Busch, Dmitry V. Levin

On Mon, Mar 15, 2021 at 9:13 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, Mar 15, 2021 at 09:02:39PM +0100, Matteo Croce wrote:
> > +++ b/include/uapi/linux/fs.h
> > @@ -184,6 +184,7 @@ struct fsxattr {
> >  #define BLKSECDISCARD _IO(0x12,125)
> >  #define BLKROTATIONAL _IO(0x12,126)
> >  #define BLKZEROOUT _IO(0x12,127)
> > +#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
> >  /*
> >   * A jump here: 130-131 are reserved for zoned block devices
> >   * (see uapi/linux/blkzoned.h)
>
> Not your bug, but this is now 130-136.
>
> +cc all the people who signed off on the commits that added those ioctl
> numbers without updating this comment.  Perhaps one of them will figure
> out how to stop this happening in future.

Note taken, thanks!

-- 
per aspera ad upstream

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:18   ` Matthew Wilcox
@ 2021-03-15 21:04     ` Matthew Wilcox
  2021-03-15 21:32       ` Lennart Poettering
                         ` (2 more replies)
  2021-03-16 14:13     ` Christoph Hellwig
  2021-03-25 20:52     ` Lennart Poettering
  2 siblings, 3 replies; 20+ messages in thread
From: Matthew Wilcox @ 2021-03-15 21:04 UTC (permalink / raw)
  To: Matteo Croce
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > From: Matteo Croce <mcroce@microsoft.com>
> > 
> > Add a sequence number to the disk devices. This number is put in the
> > uevent so userspace can correlate events when a driver reuses a device,
> > like the loop one.
> 
> Should this be documented as monotonically increasing?  I think this
> is actually a media identifier.  Consider (if you will) a floppy disc.
> Back when such things were common, it was possible with personal computers
> of the era to have multiple floppy discs "in play" and be prompted to
> insert them as needed.  So shouldn't it be possible to support something
> similar here -- you're really removing the media from the loop device.
> With a monotonically increasing number, you're always destroying the
> media when you remove it, but in principle, it should be possible to
> reinsert the same media and have the same media identifier number.

So ... a lot of devices have UUIDs or similar.  eg:

$ cat /sys/block/nvme0n1/uuid 
e8238fa6-bf53-0001-001b-448b49cec94f

https://linux.die.net/man/8/scsi_id (for scsi)

how about making this way more generic; create an xattr on a file to
store the uuid (if one doesn't already exist) whenever it's used as the
base for a loop device.  then sysfs (or whatever) can report the contents
of that xattr as the unique id.

That can be mostly in userspace -- losetup can create it, and read it.
It can be passed in as the first two current-reserved __u64 entries in
loop_config.  The only kernel change should be creating the sysfs
entry /sys/block/loopN/uuid from those two array entries.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 21:04     ` Matthew Wilcox
@ 2021-03-15 21:32       ` Lennart Poettering
  2021-03-25 17:29       ` Matteo Croce
  2021-03-25 20:58       ` Lennart Poettering
  2 siblings, 0 replies; 20+ messages in thread
From: Lennart Poettering @ 2021-03-15 21:32 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matteo Croce, linux-block, linux-fsdevel, linux-kernel,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mo, 15.03.21 21:04, Matthew Wilcox (willy@infradead.org) wrote:

> On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> > On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > > From: Matteo Croce <mcroce@microsoft.com>
> > >
> > > Add a sequence number to the disk devices. This number is put in the
> > > uevent so userspace can correlate events when a driver reuses a device,
> > > like the loop one.
> >
> > Should this be documented as monotonically increasing?  I think this
> > is actually a media identifier.  Consider (if you will) a floppy disc.
> > Back when such things were common, it was possible with personal computers
> > of the era to have multiple floppy discs "in play" and be prompted to
> > insert them as needed.  So shouldn't it be possible to support something
> > similar here -- you're really removing the media from the loop device.
> > With a monotonically increasing number, you're always destroying the
> > media when you remove it, but in principle, it should be possible to
> > reinsert the same media and have the same media identifier number.
>
> So ... a lot of devices have UUIDs or similar.  eg:
>
> $ cat /sys/block/nvme0n1/uuid
> e8238fa6-bf53-0001-001b-448b49cec94f
>
> https://linux.die.net/man/8/scsi_id (for scsi)
>
> how about making this way more generic; create an xattr on a file to
> store the uuid (if one doesn't already exist) whenever it's used as the
> base for a loop device.  then sysfs (or whatever) can report the contents
> of that xattr as the unique id.
>
> That can be mostly in userspace -- losetup can create it, and read it.
> It can be passed in as the first two current-reserved __u64 entries in
> loop_config.  The only kernel change should be creating the sysfs
> entry /sys/block/loopN/uuid from those two array entries.

As a (part-time) maintainer of udev: as one major likely consumer of
this I'd *really* prefer some concept here that works without
`losetup` needing to be patched. i.e. we have plenty userspace that
calls LOOP_CONFIGURE or LOOP_SET_FD, not just losetup, and we'd have
to patch them all. In particular in a world of containers it's even
worse: people probably will continue to use old userspaces (mixed with
newer ones) for a very long time (decades!), and those old userpace
won't fill in the fields for the ioctl hence.

Hence, for me it would be essential to have an identifier that is
assigned by the kernel, instead of requiring userspace to assign it,
because userspace won't for a long long time.

I'd be OK with a hybrid approach where userspace *can* fill
something in, but doesn't have to in which case the kernel would fill
it in.

That all said, I very much prefer if we'd use a kernel-enforced
"sequence number" or "generation counter" or so for this instead of a
uuid or random cookie or so. Why? because it allows userspace that
monitors things to derive ordering from these ids: when you watch
these events and see a uevent for a device seqno=4711 then you know
that it is from an earlier use than one you see for seqno=8878. UUIDs
can't give you that. That's in particular a nice property since
uevents/netlink are not a reliable transport: messages can get lost
when the socket buffers overrun, or when udev as the uevent broker
gets overloaded. Hence, for a userspace program it's kinda nice to
know whether it' worth waiting for a specific loop device use or if
it's clear that ship has sailed already: i.e. if my own use of a
specific loop device gets seqno 777 then I know it still makes sense
to wait for appropriate uevents as long as I see seqno <= 776. But if
we I see seqneo >= 778 then I know it's not worth waiting anymore and
one component in the uevent message chain has dropped my messages.

But of course, beggars can't be choosers. If a seqno/generation
counter concept is not in the cards, I'd be OK with a uuid/random
cooie approach too. And if an approach where the kernel assigns these
seqnos strictly monotonically is not in the cards, then I'd be OK with
an approach where userspace can pick the ids, too. I'll take what I
can get. My primary concern is that we get something to match up
uevents, partition devices and the main block device with, and all of
the suggested approaches could deliver that.

Lennart

--
Lennart Poettering, Berlin

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
  2021-03-15 20:18   ` Matthew Wilcox
@ 2021-03-16  1:44   ` JeffleXu
  2021-03-23 17:43     ` Matteo Croce
  1 sibling, 1 reply; 20+ messages in thread
From: JeffleXu @ 2021-03-16  1:44 UTC (permalink / raw)
  To: Matteo Croce, linux-block, linux-fsdevel
  Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Jens Axboe,
	Alexander Viro, Damien Le Moal, Tejun Heo, Javier González,
	Niklas Cassel, Johannes Thumshirn, Hannes Reinecke



On 3/16/21 4:02 AM, Matteo Croce wrote:
> From: Matteo Croce <mcroce@microsoft.com>
> 
> Add a sequence number to the disk devices. This number is put in the
> uevent so userspace can correlate events when a driver reuses a device,
> like the loop one.
> 
> Signed-off-by: Matteo Croce <mcroce@microsoft.com>
> ---
>  block/genhd.c         | 19 +++++++++++++++++++
>  include/linux/genhd.h |  2 ++
>  2 files changed, 21 insertions(+)
> 
> diff --git a/block/genhd.c b/block/genhd.c
> index 8c8f543572e6..92debcb9e061 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1215,8 +1215,17 @@ static void disk_release(struct device *dev)
>  		blk_put_queue(disk->queue);
>  	kfree(disk);
>  }
> +
> +static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +
> +	return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
> +}
> +
>  struct class block_class = {
>  	.name		= "block",
> +	.dev_uevent	= block_uevent,
>  };
>  
>  static char *block_devnode(struct device *dev, umode_t *mode,
> @@ -1388,6 +1397,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	disk_to_dev(disk)->class = &block_class;
>  	disk_to_dev(disk)->type = &disk_type;
>  	device_initialize(disk_to_dev(disk));
> +	inc_diskseq(disk);
> +
>  	return disk;
>  
>  out_destroy_part_tbl:
> @@ -1938,3 +1949,11 @@ static void disk_release_events(struct gendisk *disk)
>  	WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
>  	kfree(disk->ev);
>  }
> +
> +void inc_diskseq(struct gendisk *disk)
> +{
> +	static atomic64_t diskseq;
> +
> +	disk->diskseq = atomic64_inc_return(&diskseq);
> +}
> +EXPORT_SYMBOL_GPL(inc_diskseq);

Hi, I'm quite interested in this 'seqnum'. Actually I'm also planing to
add support for some sort of 'seqnum' when supporting IO polling for dm
devices, so that every time dm device changes its dm table, the seqnum
will be increased.

As for your patch, @diskseq is declared as one static variable in
inc_diskseq(). Then I doubt if all callers of inc_diskseq() will share
*one* counting when inc_diskseq() is compiled as the separate call entry
rather than inlined.



> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index f364619092cc..632141b360d2 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -167,6 +167,7 @@ struct gendisk {
>  	int node_id;
>  	struct badblocks *bb;
>  	struct lockdep_map lockdep_map;
> +	u64 diskseq;
>  };
>  
>  /*
> @@ -326,6 +327,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
>  #endif /* CONFIG_SYSFS */
>  
>  extern struct rw_semaphore bdev_lookup_sem;
> +extern void inc_diskseq(struct gendisk *disk);
>  
>  dev_t blk_lookup_devt(const char *name, int partno);
>  void blk_request_module(dev_t devt);
> 

-- 
Thanks,
Jeffle

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:18   ` Matthew Wilcox
  2021-03-15 21:04     ` Matthew Wilcox
@ 2021-03-16 14:13     ` Christoph Hellwig
  2021-04-20 20:12       ` Lennart Poettering
  2021-03-25 20:52     ` Lennart Poettering
  2 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2021-03-16 14:13 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matteo Croce, linux-block, linux-fsdevel, linux-kernel,
	Lennart Poettering, Luca Boccassi, Jens Axboe, Alexander Viro,
	Damien Le Moal, Tejun Heo, Javier Gonz?lez, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > From: Matteo Croce <mcroce@microsoft.com>
> > 
> > Add a sequence number to the disk devices. This number is put in the
> > uevent so userspace can correlate events when a driver reuses a device,
> > like the loop one.
> 
> Should this be documented as monotonically increasing?  I think this
> is actually a media identifier.  Consider (if you will) a floppy disc.
> Back when such things were common, it was possible with personal computers
> of the era to have multiple floppy discs "in play" and be prompted to
> insert them as needed.  So shouldn't it be possible to support something
> similar here -- you're really removing the media from the loop device.
> With a monotonically increasing number, you're always destroying the
> media when you remove it, but in principle, it should be possible to
> reinsert the same media and have the same media identifier number.

And we have some decent infrastructure related to media changes,
grep for disk_events.  I think this needs to plug into that
infrastructure instead of duplicating it.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-16  1:44   ` JeffleXu
@ 2021-03-23 17:43     ` Matteo Croce
  0 siblings, 0 replies; 20+ messages in thread
From: Matteo Croce @ 2021-03-23 17:43 UTC (permalink / raw)
  To: JeffleXu
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Tue, Mar 16, 2021 at 2:44 AM JeffleXu <jefflexu@linux.alibaba.com> wrote:
>
> On 3/16/21 4:02 AM, Matteo Croce wrote:
> > From: Matteo Croce <mcroce@microsoft.com>
> >
> > Add a sequence number to the disk devices. This number is put in the
> > uevent so userspace can correlate events when a driver reuses a device,
> > like the loop one.
>
> Hi, I'm quite interested in this 'seqnum'. Actually I'm also planing to
> add support for some sort of 'seqnum' when supporting IO polling for dm
> devices, so that every time dm device changes its dm table, the seqnum
> will be increased.
>

Interesting, thanks!

> As for your patch, @diskseq is declared as one static variable in
> inc_diskseq(). Then I doubt if all callers of inc_diskseq() will share
> *one* counting when inc_diskseq() is compiled as the separate call entry
> rather than inlined.
>

That would be true if the static declaration was in the .h, but being
in genhd.c it goes in vmlinux once.
Maybe you get confused by the inc_diskseq prototype in the header
file, but the function body is in the .c

Regards,



--
per aspera ad upstream

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 21:04     ` Matthew Wilcox
  2021-03-15 21:32       ` Lennart Poettering
@ 2021-03-25 17:29       ` Matteo Croce
  2021-03-26  8:00         ` Hannes Reinecke
  2021-03-25 20:58       ` Lennart Poettering
  2 siblings, 1 reply; 20+ messages in thread
From: Matteo Croce @ 2021-03-25 17:29 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mon, Mar 15, 2021 at 10:05 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> > On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > > From: Matteo Croce <mcroce@microsoft.com>
> > >
> > > Add a sequence number to the disk devices. This number is put in the
> > > uevent so userspace can correlate events when a driver reuses a device,
> > > like the loop one.
> >
> > Should this be documented as monotonically increasing?  I think this
> > is actually a media identifier.  Consider (if you will) a floppy disc.
> > Back when such things were common, it was possible with personal computers
> > of the era to have multiple floppy discs "in play" and be prompted to
> > insert them as needed.  So shouldn't it be possible to support something
> > similar here -- you're really removing the media from the loop device.
> > With a monotonically increasing number, you're always destroying the
> > media when you remove it, but in principle, it should be possible to
> > reinsert the same media and have the same media identifier number.
>
> So ... a lot of devices have UUIDs or similar.  eg:
>
> $ cat /sys/block/nvme0n1/uuid
> e8238fa6-bf53-0001-001b-448b49cec94f
>
> https://linux.die.net/man/8/scsi_id (for scsi)
>

Hi,

I don't have uuid anywhere:

matteo@saturno:~$ ll /dev/sd?
brw-rw---- 1 root disk 8,  0 feb 16 13:24 /dev/sda
brw-rw---- 1 root disk 8, 16 feb 16 13:24 /dev/sdb
brw-rw---- 1 root disk 8, 32 feb 16 13:24 /dev/sdc
brw-rw---- 1 root disk 8, 48 feb 16 13:24 /dev/sdd
brw-rw---- 1 root disk 8, 64 mar  4 06:26 /dev/sde
brw-rw---- 1 root disk 8, 80 feb 16 13:24 /dev/sdf
matteo@saturno:~$ ll /sys/block/*/uuid
ls: cannot access '/sys/block/*/uuid': No such file or directory

mcroce@t490s:~$ ll /dev/nvme0n1
brw-rw----. 1 root disk 259, 0 25 mar 14.22 /dev/nvme0n1
mcroce@t490s:~$ ll /sys/block/*/uuid
ls: cannot access '/sys/block/*/uuid': No such file or directory

I find it only on a mdraid array:

$ cat /sys/devices/virtual/block/md127/md/uuid
26117338-4f54-f14e-b5d4-93feb7fe825d

I'm using a vanilla 5.11 kernel.

Regards,
-- 
per aspera ad upstream

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 20:18   ` Matthew Wilcox
  2021-03-15 21:04     ` Matthew Wilcox
  2021-03-16 14:13     ` Christoph Hellwig
@ 2021-03-25 20:52     ` Lennart Poettering
  2 siblings, 0 replies; 20+ messages in thread
From: Lennart Poettering @ 2021-03-25 20:52 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matteo Croce, linux-block, linux-fsdevel, linux-kernel,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mo, 15.03.21 20:18, Matthew Wilcox (willy@infradead.org) wrote:
65;6203;1c
> On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > From: Matteo Croce <mcroce@microsoft.com>
> >
> > Add a sequence number to the disk devices. This number is put in the
> > uevent so userspace can correlate events when a driver reuses a device,
> > like the loop one.
>
> Should this be documented as monotonically increasing?

I think this would be great. My usecase for this would be to match up
uevents with loopback block device attachments, because that's
basically impossible right now: you attach a loopback device to a
file, and then wait for the relevant uevents to happen, for all
partitions but you cannot do this safely right now, since loopback
block devices are heavily reused in many scenarios so you never know
if a uevent is from the attachment you created yourself or from a
previous one — or even already for the next.

If this would be documented as being monotonic this would be excellent
for this usecase: if you know that your own use of a specific loopback
device got seqno x then you know that if you see uevents for seqno < x
it makes sense to wait longer, but when you see seqno > x then you
know it's too late, somehow you lost uevents and hsould abort.

Hence: for my usecase having this strictly monotonic, and thus being
able to *order* attachments across all areas where the seqno appears
would be absolutely excellent and make this as robust as it possibly
could be.

> I think this is actually a media identifier.  Consider (if you will)
> a floppy disc.  Back when such things were common, it was possible
> with personal computers of the era to have multiple floppy discs "in
> play" and be prompted to insert them as needed.  So shouldn't it be
> possible to support something similar here -- you're really removing
> the media from the loop device.  With a monotonically increasing
> number, you're always destroying the media when you remove it, but
> in principle, it should be possible to reinsert the same media and
> have the same media identifier number.

This would be useless for my usecase, we don't really care for the
precise file being attached (which is queriable via sysfs anyway), but
we want to match up our use of the device with the uevents it
generates on itself and decendend partition block devices.

Hence: for my usecase I want something that recognizes *attachments*
and not media. If i attach the same media 3 times i want to be able to
discern the three times. And more importantly: if I attach it once and
someone else also once, then I don't want to get confused by that and
be able ti distinguish both attachments.

Morevoer, I am not even sure what media identifier would mean: if you
have one image and then copy it, is that still the same image? in your
model, should that have distinct ids? or the same, because it is from
the same common original version? and if i then modify one, what
happens then?

Finally, media usually comes with ids anyway. i.e. file systems have
uuids, GPT partition tables have meda uuids. The infrastructure for
that already exists. What we need really is something that allows us
to track attachments, not media.

(That said, I think it would make sense to bump the IDs not only on
explicit user-induced reattachments, but also when media is replaced,
i.e. bump it more often than not)

Lennart

--
Lennart Poettering, Berlin

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-15 21:04     ` Matthew Wilcox
  2021-03-15 21:32       ` Lennart Poettering
  2021-03-25 17:29       ` Matteo Croce
@ 2021-03-25 20:58       ` Lennart Poettering
  2 siblings, 0 replies; 20+ messages in thread
From: Lennart Poettering @ 2021-03-25 20:58 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Matteo Croce, linux-block, linux-fsdevel, linux-kernel,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Mo, 15.03.21 21:04, Matthew Wilcox (willy@infradead.org) wrote:

> On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> > On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > > From: Matteo Croce <mcroce@microsoft.com>
> > >
> > > Add a sequence number to the disk devices. This number is put in the
> > > uevent so userspace can correlate events when a driver reuses a device,
> > > like the loop one.
> >
> > Should this be documented as monotonically increasing?  I think this
> > is actually a media identifier.  Consider (if you will) a floppy disc.
> > Back when such things were common, it was possible with personal computers
> > of the era to have multiple floppy discs "in play" and be prompted to
> > insert them as needed.  So shouldn't it be possible to support something
> > similar here -- you're really removing the media from the loop device.
> > With a monotonically increasing number, you're always destroying the
> > media when you remove it, but in principle, it should be possible to
> > reinsert the same media and have the same media identifier number.
>
> So ... a lot of devices have UUIDs or similar.  eg:
>
> $ cat /sys/block/nvme0n1/uuid
> e8238fa6-bf53-0001-001b-448b49cec94f
>
> https://linux.die.net/man/8/scsi_id (for scsi)
>
> how about making this way more generic; create an xattr on a file to
> store the uuid (if one doesn't already exist) whenever it's used as the
> base for a loop device.  then sysfs (or whatever) can report the contents
> of that xattr as the unique id.
>
> That can be mostly in userspace -- losetup can create it, and read it.
> It can be passed in as the first two current-reserved __u64 entries in
> loop_config.  The only kernel change should be creating the sysfs
> entry /sys/block/loopN/uuid from those two array entries.

I prefer seqnos over uuids because we can order them when we see a
bunch of uevents for the same loopback device with their seqnos, as
mentioned in that other mail.

But beggars can't be choosers. If we could propagate some uuid from
the loopback setup ioctl into the device so that that appears via
sysfs that would work too for me, but not as robustly, since we lack
the ordering to detect whether it's worth waiting for more uevents or
if already somebody else took possesion of the device.

TLDR: seqnos FTW! but uuids assigned at attachment time is better than
nothing.

Lennart

--
Lennart Poettering, Berlin

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-25 17:29       ` Matteo Croce
@ 2021-03-26  8:00         ` Hannes Reinecke
  0 siblings, 0 replies; 20+ messages in thread
From: Hannes Reinecke @ 2021-03-26  8:00 UTC (permalink / raw)
  To: Matteo Croce, Matthew Wilcox
  Cc: linux-block, linux-fsdevel, linux-kernel, Lennart Poettering,
	Luca Boccassi, Jens Axboe, Alexander Viro, Damien Le Moal,
	Tejun Heo, Javier González, Niklas Cassel,
	Johannes Thumshirn

On 3/25/21 6:29 PM, Matteo Croce wrote:
> On Mon, Mar 15, 2021 at 10:05 PM Matthew Wilcox <willy@infradead.org> wrote:
>>
>> On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
>>> On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
>>>> From: Matteo Croce <mcroce@microsoft.com>
>>>>
>>>> Add a sequence number to the disk devices. This number is put in the
>>>> uevent so userspace can correlate events when a driver reuses a device,
>>>> like the loop one.
>>>
>>> Should this be documented as monotonically increasing?  I think this
>>> is actually a media identifier.  Consider (if you will) a floppy disc.
>>> Back when such things were common, it was possible with personal computers
>>> of the era to have multiple floppy discs "in play" and be prompted to
>>> insert them as needed.  So shouldn't it be possible to support something
>>> similar here -- you're really removing the media from the loop device.
>>> With a monotonically increasing number, you're always destroying the
>>> media when you remove it, but in principle, it should be possible to
>>> reinsert the same media and have the same media identifier number.
>>
>> So ... a lot of devices have UUIDs or similar.  eg:
>>
>> $ cat /sys/block/nvme0n1/uuid
>> e8238fa6-bf53-0001-001b-448b49cec94f
>>
>> https://linux.die.net/man/8/scsi_id (for scsi)
>>
> 
> Hi,
> 
> I don't have uuid anywhere:
> 
> matteo@saturno:~$ ll /dev/sd?
> brw-rw---- 1 root disk 8,  0 feb 16 13:24 /dev/sda
> brw-rw---- 1 root disk 8, 16 feb 16 13:24 /dev/sdb
> brw-rw---- 1 root disk 8, 32 feb 16 13:24 /dev/sdc
> brw-rw---- 1 root disk 8, 48 feb 16 13:24 /dev/sdd
> brw-rw---- 1 root disk 8, 64 mar  4 06:26 /dev/sde
> brw-rw---- 1 root disk 8, 80 feb 16 13:24 /dev/sdf
> matteo@saturno:~$ ll /sys/block/*/uuid
> ls: cannot access '/sys/block/*/uuid': No such file or directory
> 
> mcroce@t490s:~$ ll /dev/nvme0n1
> brw-rw----. 1 root disk 259, 0 25 mar 14.22 /dev/nvme0n1
> mcroce@t490s:~$ ll /sys/block/*/uuid
> ls: cannot access '/sys/block/*/uuid': No such file or directory
> 
> I find it only on a mdraid array:
> 
> $ cat /sys/devices/virtual/block/md127/md/uuid
> 26117338-4f54-f14e-b5d4-93feb7fe825d
> 
> I'm using a vanilla 5.11 kernel.
> 
The 'uuid' is optional for NVMe devices, and indeed not even present for 
other device types.
Use the 'wwid' attribute, which contains a unique identifier for all 
nvme devices:

# cat /sys/block/nvme*/wwid
nvme.8086-4356504436343735303034323430304e474e-564f303430304b45464a42-00000001
nvme.8086-4356504436343735303034363430304e474e-564f303430304b45464a42-00000001
uuid.3c6500ee-a775-4c89-b223-e9551f5a9f7a

and for SCSI the wwid is part of the SCSI device:
# cat /sys/block/sd*/device/wwid
naa.600508b1001ce2e648a35b6ec14a3996

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH -next 1/5] block: add disk sequence number
  2021-03-16 14:13     ` Christoph Hellwig
@ 2021-04-20 20:12       ` Lennart Poettering
  0 siblings, 0 replies; 20+ messages in thread
From: Lennart Poettering @ 2021-04-20 20:12 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Matthew Wilcox, Matteo Croce, linux-block, linux-fsdevel,
	linux-kernel, Luca Boccassi, Jens Axboe, Alexander Viro,
	Damien Le Moal, Tejun Heo, Javier Gonz?lez, Niklas Cassel,
	Johannes Thumshirn, Hannes Reinecke

On Di, 16.03.21 14:13, Christoph Hellwig (hch@infradead.org) wrote:

> On Mon, Mar 15, 2021 at 08:18:24PM +0000, Matthew Wilcox wrote:
> > On Mon, Mar 15, 2021 at 09:02:38PM +0100, Matteo Croce wrote:
> > > From: Matteo Croce <mcroce@microsoft.com>
> > >
> > > Add a sequence number to the disk devices. This number is put in the
> > > uevent so userspace can correlate events when a driver reuses a device,
> > > like the loop one.
> >
> > Should this be documented as monotonically increasing?  I think this
> > is actually a media identifier.  Consider (if you will) a floppy disc.
> > Back when such things were common, it was possible with personal computers
> > of the era to have multiple floppy discs "in play" and be prompted to
> > insert them as needed.  So shouldn't it be possible to support something
> > similar here -- you're really removing the media from the loop device.
> > With a monotonically increasing number, you're always destroying the
> > media when you remove it, but in principle, it should be possible to
> > reinsert the same media and have the same media identifier number.
>
> And we have some decent infrastructure related to media changes,
> grep for disk_events.  I think this needs to plug into that
> infrastructure instead of duplicating it.

I'd argue this makes sense in one way only, i.e. that whenever the
media_change event is seen the seqnum is implicitly bumped.

I am pretty sure though that loopback devices shouldn't synthesize
media_change events themselves though. There's quite a difference I
would argue between a real media change event caused by external
effect (i.e. humans/hw buttons/sensors) to loop device reuse, which is
exclusively triggered by internal events (i.e. local code). Moreover I
think the loopback subsystem should manage the seqnum on its own,
since it ideally would return the assigned seqnum immediately from the
attachment ioctl, i.e. it shouldn't just be a side-effect of
attachment, but a part of it, if you follow what I mean.

Does that make sense?

Matteo, would it make sense to extend your patch set to bump the
seqnum implicitly on media_change for devices that implement that?

Lennart

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, back to index

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-15 20:02 [PATCH -next 0/5] block: add a sequence number to disks Matteo Croce
2021-03-15 20:02 ` [PATCH -next 1/5] block: add disk sequence number Matteo Croce
2021-03-15 20:18   ` Matthew Wilcox
2021-03-15 21:04     ` Matthew Wilcox
2021-03-15 21:32       ` Lennart Poettering
2021-03-25 17:29       ` Matteo Croce
2021-03-26  8:00         ` Hannes Reinecke
2021-03-25 20:58       ` Lennart Poettering
2021-03-16 14:13     ` Christoph Hellwig
2021-04-20 20:12       ` Lennart Poettering
2021-03-25 20:52     ` Lennart Poettering
2021-03-16  1:44   ` JeffleXu
2021-03-23 17:43     ` Matteo Croce
2021-03-15 20:02 ` [PATCH -next 2/5] block: add ioctl to read the " Matteo Croce
2021-03-15 20:13   ` Matthew Wilcox
2021-03-15 20:17     ` Damien Le Moal
2021-03-15 20:34     ` Matteo Croce
2021-03-15 20:02 ` [PATCH -next 3/5] block: refactor sysfs code Matteo Croce
2021-03-15 20:02 ` [PATCH -next 4/5] block: export diskseq in sysfs Matteo Croce
2021-03-15 20:02 ` [PATCH -next 5/5] loop: increment sequence number Matteo Croce

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git