* [PATCH v4 1/5] block: add disk sequence number
2021-07-11 17:54 [PATCH v4 0/5] block: add a sequence number to disks Matteo Croce
@ 2021-07-11 17:54 ` Matteo Croce
2021-07-12 6:25 ` Christoph Hellwig
2021-07-11 17:54 ` [PATCH v4 2/5] block: export the diskseq in uevents Matteo Croce
` (3 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Matteo Croce @ 2021-07-11 17:54 UTC (permalink / raw)
To: linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
From: Matteo Croce <mcroce@microsoft.com>
Associating uevents with block devices in userspace is difficult and racy:
the uevent netlink socket is lossy, and on slow and overloaded systems
has a very high latency.
Block devices do not have exclusive owners in userspace, any process can
set one up (e.g. loop devices). Moreover, device names can be reused
(e.g. loop0 can be reused again and again). A userspace process setting
up a block device and watching for its events cannot thus reliably tell
whether an event relates to the device it just set up or another earlier
instance with the same name.
Being able to set a UUID on a loop device would solve the race conditions.
But it does not allow to derive orderings from uevents: if you see a
uevent with a UUID that does not match the device you are waiting for,
you cannot tell whether it's because the right uevent has not arrived yet,
or it was already sent and you missed it. So you cannot tell whether you
should wait for it or not.
Associating a unique, monotonically increasing sequential number to the
lifetime of each block device, which can be retrieved with an ioctl
immediately upon setting it up, allows to solve the race conditions with
uevents, and also allows userspace processes to know whether they should
wait for the uevent they need or if it was dropped and thus they should
move on.
Additionally, increment the disk sequence number when the media change,
i.e. on DISK_EVENT_MEDIA_CHANGE event.
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
block/disk-events.c | 3 +++
block/genhd.c | 25 +++++++++++++++++++++++++
include/linux/genhd.h | 2 ++
3 files changed, 30 insertions(+)
diff --git a/block/disk-events.c b/block/disk-events.c
index a75931ff5da4..04c52f3992ed 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -190,6 +190,9 @@ static void disk_check_events(struct disk_events *ev,
spin_unlock_irq(&ev->lock);
+ if (events & DISK_EVENT_MEDIA_CHANGE)
+ inc_diskseq(disk);
+
/*
* Tell userland about new events. Only the events listed in
* @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
diff --git a/block/genhd.c b/block/genhd.c
index af4d2ab4a633..b7fca2102aa3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,6 +29,23 @@
static struct kobject *block_depr;
+/*
+ * Unique, monotonically increasing sequential number associated with block
+ * devices instances (i.e. incremented each time a device is attached).
+ * Associating uevents with block devices in userspace is difficult and racy:
+ * the uevent netlink socket is lossy, and on slow and overloaded systems has
+ * a very high latency.
+ * Block devices do not have exclusive owners in userspace, any process can set
+ * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
+ * can be reused again and again).
+ * A userspace process setting up a block device and watching for its events
+ * cannot thus reliably tell whether an event relates to the device it just set
+ * up or another earlier instance with the same name.
+ * This sequential number allows userspace processes to solve this problem, and
+ * uniquely associate an uevent to the lifetime to a device.
+ */
+static atomic64_t diskseq;
+
/* for extended dynamic devt allocation, currently only one major is used */
#define NR_EXT_DEVT (1 << MINORBITS)
static DEFINE_IDA(ext_devt_ida);
@@ -1263,6 +1280,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk));
+ inc_diskseq(disk);
+
return disk;
out_destroy_part_tbl:
@@ -1363,3 +1382,9 @@ int bdev_read_only(struct block_device *bdev)
return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
}
EXPORT_SYMBOL(bdev_read_only);
+
+void inc_diskseq(struct gendisk *disk)
+{
+ disk->diskseq = atomic64_inc_return(&diskseq);
+}
+EXPORT_SYMBOL_GPL(inc_diskseq);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 13b34177cc85..140c028845af 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -172,6 +172,7 @@ struct gendisk {
int node_id;
struct badblocks *bb;
struct lockdep_map lockdep_map;
+ u64 diskseq;
};
/*
@@ -332,6 +333,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
#endif /* CONFIG_SYSFS */
dev_t part_devt(struct gendisk *disk, u8 partno);
+void inc_diskseq(struct gendisk *disk);
dev_t blk_lookup_devt(const char *name, int partno);
void blk_request_module(dev_t devt);
#ifdef CONFIG_BLOCK
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4 1/5] block: add disk sequence number
2021-07-11 17:54 ` [PATCH v4 1/5] block: add disk sequence number Matteo Croce
@ 2021-07-12 6:25 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-12 6:25 UTC (permalink / raw)
To: Matteo Croce
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz??lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
On Sun, Jul 11, 2021 at 07:54:11PM +0200, Matteo Croce wrote:
> +void inc_diskseq(struct gendisk *disk)
> +{
> + disk->diskseq = atomic64_inc_return(&diskseq);
> +}
> +EXPORT_SYMBOL_GPL(inc_diskseq);
No need to export inc_diskseq in the new world order.
Otherwise looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 2/5] block: export the diskseq in uevents
2021-07-11 17:54 [PATCH v4 0/5] block: add a sequence number to disks Matteo Croce
2021-07-11 17:54 ` [PATCH v4 1/5] block: add disk sequence number Matteo Croce
@ 2021-07-11 17:54 ` Matteo Croce
2021-07-12 6:26 ` Christoph Hellwig
2021-07-11 17:54 ` [PATCH v4 3/5] block: add ioctl to read the disk sequence number Matteo Croce
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Matteo Croce @ 2021-07-11 17:54 UTC (permalink / raw)
To: linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
From: Matteo Croce <mcroce@microsoft.com>
Export the newly introduced diskseq in uevents:
$ udevadm info /sys/class/block/* |grep -e DEVNAME -e DISKSEQ
E: DEVNAME=/dev/loop0
E: DISKSEQ=1
E: DEVNAME=/dev/loop1
E: DISKSEQ=2
E: DEVNAME=/dev/loop2
E: DISKSEQ=3
E: DEVNAME=/dev/loop3
E: DISKSEQ=4
E: DEVNAME=/dev/loop4
E: DISKSEQ=5
E: DEVNAME=/dev/loop5
E: DISKSEQ=6
E: DEVNAME=/dev/loop6
E: DISKSEQ=7
E: DEVNAME=/dev/loop7
E: DISKSEQ=8
E: DEVNAME=/dev/nvme0n1
E: DISKSEQ=9
E: DEVNAME=/dev/nvme0n1p1
E: DISKSEQ=9
E: DEVNAME=/dev/nvme0n1p2
E: DISKSEQ=9
E: DEVNAME=/dev/nvme0n1p3
E: DISKSEQ=9
E: DEVNAME=/dev/nvme0n1p4
E: DISKSEQ=9
E: DEVNAME=/dev/nvme0n1p5
E: DISKSEQ=9
E: DEVNAME=/dev/sda
E: DISKSEQ=10
E: DEVNAME=/dev/sda1
E: DISKSEQ=10
E: DEVNAME=/dev/sda2
E: DISKSEQ=10
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
block/genhd.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/block/genhd.c b/block/genhd.c
index b7fca2102aa3..3d9c9d189ff7 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1101,8 +1101,17 @@ static void disk_release(struct device *dev)
blk_put_queue(disk->queue);
kfree(disk);
}
+
+static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
+}
+
struct class block_class = {
.name = "block",
+ .dev_uevent = block_uevent,
};
static char *block_devnode(struct device *dev, umode_t *mode,
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4 2/5] block: export the diskseq in uevents
2021-07-11 17:54 ` [PATCH v4 2/5] block: export the diskseq in uevents Matteo Croce
@ 2021-07-12 6:26 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-12 6:26 UTC (permalink / raw)
To: Matteo Croce
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz??lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 3/5] block: add ioctl to read the disk sequence number
2021-07-11 17:54 [PATCH v4 0/5] block: add a sequence number to disks Matteo Croce
2021-07-11 17:54 ` [PATCH v4 1/5] block: add disk sequence number Matteo Croce
2021-07-11 17:54 ` [PATCH v4 2/5] block: export the diskseq in uevents Matteo Croce
@ 2021-07-11 17:54 ` Matteo Croce
2021-07-12 6:28 ` Christoph Hellwig
2021-07-12 19:22 ` Elliott, Robert (Servers)
2021-07-11 17:54 ` [PATCH v4 4/5] block: export diskseq in sysfs Matteo Croce
2021-07-11 17:54 ` [PATCH v4 5/5] loop: raise media_change event Matteo Croce
4 siblings, 2 replies; 14+ messages in thread
From: Matteo Croce @ 2021-07-11 17:54 UTC (permalink / raw)
To: linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
From: Matteo Croce <mcroce@microsoft.com>
Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence number
from the genhd structure.
# ./getdiskseq /dev/loop*
/dev/loop0: 13
/dev/loop0p1: 13
/dev/loop0p2: 13
/dev/loop0p3: 13
/dev/loop1: 14
/dev/loop1p1: 14
/dev/loop1p2: 14
/dev/loop2: 5
/dev/loop3: 6
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
block/ioctl.c | 2 ++
include/uapi/linux/fs.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/block/ioctl.c b/block/ioctl.c
index 24beec9ca9c9..0c3a4a53fa11 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -469,6 +469,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
BLKDEV_DISCARD_SECURE);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
+ case BLKGETDISKSEQ:
+ return put_u64(argp, bdev->bd_disk->diskseq);
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 4c32e97dcdf0..bdf7b404b3e7 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -184,6 +184,7 @@ struct fsxattr {
#define BLKSECDISCARD _IO(0x12,125)
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
/*
* A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4 3/5] block: add ioctl to read the disk sequence number
2021-07-11 17:54 ` [PATCH v4 3/5] block: add ioctl to read the disk sequence number Matteo Croce
@ 2021-07-12 6:28 ` Christoph Hellwig
2021-07-12 19:22 ` Elliott, Robert (Servers)
1 sibling, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-12 6:28 UTC (permalink / raw)
To: Matteo Croce
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz??lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH v4 3/5] block: add ioctl to read the disk sequence number
2021-07-11 17:54 ` [PATCH v4 3/5] block: add ioctl to read the disk sequence number Matteo Croce
2021-07-12 6:28 ` Christoph Hellwig
@ 2021-07-12 19:22 ` Elliott, Robert (Servers)
2021-07-12 23:25 ` Matteo Croce
2021-07-13 5:59 ` Christoph Hellwig
1 sibling, 2 replies; 14+ messages in thread
From: Elliott, Robert (Servers) @ 2021-07-12 19:22 UTC (permalink / raw)
To: 'Matteo Croce', linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
> -----Original Message-----
> From: Matteo Croce <mcroce@linux.microsoft.com>
> Sent: Sunday, July 11, 2021 12:54 PM
...
> Subject: [PATCH v4 3/5] block: add ioctl to read the disk sequence number
>
> From: Matteo Croce <mcroce@microsoft.com>
>
> Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence number
> from the genhd structure.
...
Given:
static int put_u64(u64 __user *argp, u64 val)
{
return put_user(val, argp);
}
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 24beec9ca9c9..0c3a4a53fa11 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -469,6 +469,8 @@ static int blkdev_common_ioctl(struct block_device
> *bdev, fmode_t mode,
> BLKDEV_DISCARD_SECURE);
...
> + case BLKGETDISKSEQ:
> + return put_u64(argp, bdev->bd_disk->diskseq);
How does that work on a system in which int is 32 bits?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 3/5] block: add ioctl to read the disk sequence number
2021-07-12 19:22 ` Elliott, Robert (Servers)
@ 2021-07-12 23:25 ` Matteo Croce
2021-07-13 5:59 ` Christoph Hellwig
1 sibling, 0 replies; 14+ messages in thread
From: Matteo Croce @ 2021-07-12 23:25 UTC (permalink / raw)
To: Elliott, Robert (Servers)
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
On Mon, 12 Jul 2021 19:22:43 +0000
"Elliott, Robert (Servers)" <elliott@hpe.com> wrote:
>
>
> > -----Original Message-----
> > From: Matteo Croce <mcroce@linux.microsoft.com>
> > Sent: Sunday, July 11, 2021 12:54 PM
> ...
> > Subject: [PATCH v4 3/5] block: add ioctl to read the disk sequence
> > number
> >
> > From: Matteo Croce <mcroce@microsoft.com>
> >
> > Add a new BLKGETDISKSEQ ioctl which retrieves the disk sequence
> > number from the genhd structure.
> ...
>
>
> Given:
> static int put_u64(u64 __user *argp, u64 val)
> {
> return put_user(val, argp);
> }
>
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index 24beec9ca9c9..0c3a4a53fa11 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -469,6 +469,8 @@ static int blkdev_common_ioctl(struct
> > block_device *bdev, fmode_t mode,
> > BLKDEV_DISCARD_SECURE);
> ...
>
> > + case BLKGETDISKSEQ:
> > + return put_u64(argp, bdev->bd_disk->diskseq);
>
> How does that work on a system in which int is 32 bits?
>
>
Hi,
what's the difference with this?
case BLKGETSIZE64:
return put_u64(argp, i_size_read(bdev->bd_inode));
The returned int is an error code, not the diskseq value itself, e.g.
case BLKFRAGET:
if (!argp)
return -EINVAL;
Regards,
--
per aspera ad upstream
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v4 3/5] block: add ioctl to read the disk sequence number
2021-07-12 19:22 ` Elliott, Robert (Servers)
2021-07-12 23:25 ` Matteo Croce
@ 2021-07-13 5:59 ` Christoph Hellwig
1 sibling, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-13 5:59 UTC (permalink / raw)
To: Elliott, Robert (Servers)
Cc: 'Matteo Croce',
linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz?lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
On Mon, Jul 12, 2021 at 07:22:43PM +0000, Elliott, Robert (Servers) wrote:
> static int put_u64(u64 __user *argp, u64 val)
> {
> return put_user(val, argp);
> }
>
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index 24beec9ca9c9..0c3a4a53fa11 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -469,6 +469,8 @@ static int blkdev_common_ioctl(struct block_device
> > *bdev, fmode_t mode,
> > BLKDEV_DISCARD_SECURE);
> ...
>
> > + case BLKGETDISKSEQ:
> > + return put_u64(argp, bdev->bd_disk->diskseq);
>
> How does that work on a system in which int is 32 bits?
Why would it not work? put_user is a magic macro that works on all
scalar value.
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 4/5] block: export diskseq in sysfs
2021-07-11 17:54 [PATCH v4 0/5] block: add a sequence number to disks Matteo Croce
` (2 preceding siblings ...)
2021-07-11 17:54 ` [PATCH v4 3/5] block: add ioctl to read the disk sequence number Matteo Croce
@ 2021-07-11 17:54 ` Matteo Croce
2021-07-12 6:29 ` Christoph Hellwig
2021-07-11 17:54 ` [PATCH v4 5/5] loop: raise media_change event Matteo Croce
4 siblings, 1 reply; 14+ messages in thread
From: Matteo Croce @ 2021-07-11 17:54 UTC (permalink / raw)
To: linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
From: Matteo Croce <mcroce@microsoft.com>
Add a new sysfs handle to export the new diskseq value.
Place it in <sysfs>/block/<disk>/diskseq and document it.
$ grep . /sys/class/block/*/diskseq
/sys/class/block/loop0/diskseq:13
/sys/class/block/loop1/diskseq:14
/sys/class/block/loop2/diskseq:5
/sys/class/block/loop3/diskseq:6
/sys/class/block/ram0/diskseq:1
/sys/class/block/ram1/diskseq:2
/sys/class/block/vda/diskseq:7
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
Documentation/ABI/testing/sysfs-block | 12 ++++++++++++
block/genhd.c | 10 ++++++++++
2 files changed, 22 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index e34cdeeeb9d4..a0ed87386639 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -28,6 +28,18 @@ Description:
For more details refer Documentation/admin-guide/iostats.rst
+What: /sys/block/<disk>/diskseq
+Date: February 2021
+Contact: Matteo Croce <mcroce@microsoft.com>
+Description:
+ The /sys/block/<disk>/diskseq files reports the disk
+ sequence number, which is a monotonically increasing
+ number assigned to every drive.
+ Some devices, like the loop device, refresh such number
+ every time the backing file is changed.
+ The value type is 64 bit unsigned.
+
+
What: /sys/block/<disk>/<part>/stat
Date: February 2008
Contact: Jerome Marchand <jmarchan@redhat.com>
diff --git a/block/genhd.c b/block/genhd.c
index 3d9c9d189ff7..422c0dbc6ce2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -985,6 +985,14 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
+static ssize_t diskseq_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%llu\n", disk->diskseq);
+}
+
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
@@ -997,6 +1005,7 @@ static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
+static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
@@ -1042,6 +1051,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_events.attr,
&dev_attr_events_async.attr,
&dev_attr_events_poll_msecs.attr,
+ &dev_attr_diskseq.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4 4/5] block: export diskseq in sysfs
2021-07-11 17:54 ` [PATCH v4 4/5] block: export diskseq in sysfs Matteo Croce
@ 2021-07-12 6:29 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-12 6:29 UTC (permalink / raw)
To: Matteo Croce
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz??lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v4 5/5] loop: raise media_change event
2021-07-11 17:54 [PATCH v4 0/5] block: add a sequence number to disks Matteo Croce
` (3 preceding siblings ...)
2021-07-11 17:54 ` [PATCH v4 4/5] block: export diskseq in sysfs Matteo Croce
@ 2021-07-11 17:54 ` Matteo Croce
2021-07-12 6:54 ` Christoph Hellwig
4 siblings, 1 reply; 14+ messages in thread
From: Matteo Croce @ 2021-07-11 17:54 UTC (permalink / raw)
To: linux-block, linux-fsdevel, Jens Axboe
Cc: linux-kernel, Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier González, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
From: Matteo Croce <mcroce@microsoft.com>
Make the loop device raise a DISK_MEDIA_CHANGE event on attach or detach.
# udevadm monitor -up |grep -e DISK_MEDIA_CHANGE -e DEVNAME &
# losetup -f zero
[ 7.454235] loop0: detected capacity change from 0 to 16384
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop0
DEVNAME=/dev/loop0
DEVNAME=/dev/loop0
# losetup -f zero
[ 10.205245] loop1: detected capacity change from 0 to 16384
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop1
DEVNAME=/dev/loop1
DEVNAME=/dev/loop1
# losetup -f zero2
[ 13.532368] loop2: detected capacity change from 0 to 40960
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop2
DEVNAME=/dev/loop2
# losetup -D
DEVNAME=/dev/loop1
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop1
DEVNAME=/dev/loop2
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop2
DEVNAME=/dev/loop0
DISK_MEDIA_CHANGE=1
DEVNAME=/dev/loop0
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
drivers/block/loop.c | 20 ++++++++++++++++++++
drivers/block/loop.h | 1 +
2 files changed, 21 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f37b9e3d833c..c632f9bd33ba 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -731,6 +731,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_err;
/* and ... switch */
+ lo->changed = true;
+ bdev_check_media_change(bdev);
blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
@@ -1205,6 +1207,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
goto out_unlock;
}
+ lo->changed = true;
+ bdev_check_media_change(bdev);
+
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
@@ -1349,6 +1354,8 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev;
lo_number = lo->lo_number;
+ lo->changed = true;
+ bdev_check_media_change(bdev);
out_unlock:
mutex_unlock(&lo->lo_mutex);
if (partscan) {
@@ -2016,11 +2023,22 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
mutex_unlock(&lo->lo_mutex);
}
+static unsigned int lo_check_events(struct gendisk *disk, unsigned int clearing)
+{
+ struct loop_device *lo = disk->private_data;
+ bool changed = lo->changed;
+
+ lo->changed = false;
+
+ return changed ? DISK_EVENT_MEDIA_CHANGE : 0;
+}
+
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
.open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
+ .check_events = lo_check_events,
#ifdef CONFIG_COMPAT
.compat_ioctl = lo_compat_ioctl,
#endif
@@ -2325,6 +2343,8 @@ static int loop_add(int i)
disk->fops = &lo_fops;
disk->private_data = lo;
disk->queue = lo->lo_queue;
+ disk->events = DISK_EVENT_MEDIA_CHANGE;
+ disk->event_flags = DISK_EVENT_FLAG_UEVENT;
sprintf(disk->disk_name, "loop%d", i);
add_disk(disk);
mutex_unlock(&loop_ctl_mutex);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 1988899db63a..a2fdfd27e6a7 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -63,6 +63,7 @@ struct loop_device {
struct timer_list timer;
bool use_dio;
bool sysfs_inited;
+ bool changed;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
--
2.31.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v4 5/5] loop: raise media_change event
2021-07-11 17:54 ` [PATCH v4 5/5] loop: raise media_change event Matteo Croce
@ 2021-07-12 6:54 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2021-07-12 6:54 UTC (permalink / raw)
To: Matteo Croce
Cc: linux-block, linux-fsdevel, Jens Axboe, linux-kernel,
Lennart Poettering, Luca Boccassi, Alexander Viro,
Damien Le Moal, Tejun Heo, Javier Gonz??lez, Niklas Cassel,
Johannes Thumshirn, Hannes Reinecke, Matthew Wilcox,
Christoph Hellwig, JeffleXu
So when I said plug into the disk events I did not mean that you need
to use all the polling stuff - we can just do the parts required
which are the uevents and invalidation with a new little helper
like this (needs the actual prototype and kerneldoc of course):
diff --git a/block/disk-events.c b/block/disk-events.c
index a75931ff5da4..0023ab1559b6 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -163,15 +163,31 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_unlock_irq(&ev->lock);
}
+/*
+ * Tell userland about new events. Only the events listed in @disk->events are
+ * reported, and only if DISK_EVENT_FLAG_UEVENT is set. Otherwise, events are
+ * processed internally but never get reported to userland.
+ */
+static void disk_event_uevent(struct gendisk *disk, unsigned int events)
+{
+ char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
+ int nr_events = 0, i;
+
+ for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
+ if (events & disk->events & (1 << i))
+ envp[nr_events++] = disk_uevents[i];
+
+ if (nr_events)
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+}
+
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr)
{
struct gendisk *disk = ev->disk;
- char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
unsigned int clearing = *clearing_ptr;
unsigned int events;
unsigned long intv;
- int nr_events = 0, i;
/* check events */
events = disk->fops->check_events(disk, clearing);
@@ -190,19 +206,8 @@ static void disk_check_events(struct disk_events *ev,
spin_unlock_irq(&ev->lock);
- /*
- * Tell userland about new events. Only the events listed in
- * @disk->events are reported, and only if DISK_EVENT_FLAG_UEVENT
- * is set. Otherwise, events are processed internally but never
- * get reported to userland.
- */
- for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
- if ((events & disk->events & (1 << i)) &&
- (disk->event_flags & DISK_EVENT_FLAG_UEVENT))
- envp[nr_events++] = disk_uevents[i];
-
- if (nr_events)
- kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+ if (disk->event_flags & DISK_EVENT_FLAG_UEVENT)
+ disk_event_uevent(disk, events);
}
/**
@@ -281,6 +286,21 @@ bool bdev_check_media_change(struct block_device *bdev)
}
EXPORT_SYMBOL(bdev_check_media_change);
+bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+{
+ disk_event_uevent(disk, events);
+
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
+ return false;
+
+ if (__invalidate_device(disk->part0, true))
+ pr_warn("VFS: busy inodes on changed media %s\n",
+ disk->disk_name);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+ return true;
+}
+EXPORT_SYMBOL_GPL(disk_force_media_change);
+
/*
* Separate this part out so that a different pointer for clearing_ptr can be
* passed in for disk_clear_events.
^ permalink raw reply related [flat|nested] 14+ messages in thread