All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] [RFC] Add support for uevents on block device idle changes
@ 2009-11-17 14:37 ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-17 14:37 UTC (permalink / raw)
  To: linux-kernel; +Cc: axboe, linux-hotplug, Matthew Garrett

Userspace may wish to know whether a given disk is active or idle, for
example to modify power management policy based on access patterns. This
patch adds a deferrable timer to the block layer which will fire if the
disk is idle for a user-definable period of time, generating a uevent. A
uevent will also be generated if an access is received while the disk is
classified as idle.

This patch seems to work as designed, but introduces a noticable amount of
userspace overhead in udevd. I'm guessing that this is because change events
on block devices are normally associated with disk removal/insertion, so
a large quantity of complex rules end up getting run in order to deal with
RAID setup or whatever. Is there a better way to deliver these events?
---
 Documentation/ABI/testing/sysfs-block |    9 +++++
 block/blk-core.c                      |    9 +++++
 block/genhd.c                         |   55 +++++++++++++++++++++++++++++++++
 fs/partitions/check.c                 |    3 ++
 include/linux/genhd.h                 |    6 +++
 5 files changed, 82 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..5519720 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,12 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		Contains the number of milliseconds to wait after an access
+		before declaring that a disk is idle. Any accesses during
+		this time will reset the timer. "0" (the default) indicates
+		that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
 		if (should_fail_request(bio))
 			goto end_io;
 
+		if (bio->bi_bdev->bd_disk->hysteresis_time &&
+		    bio_has_data(bio) &&
+		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+			       jiffies+msecs_to_jiffies
+			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
+			bio->bi_bdev->bd_disk->idle = 0;
+			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+		}
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..f59fbe0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,26 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = 1;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+	char event[] = "IDLE=0";
+	char *envp[] = { event, NULL };
+
+	if (gd->idle)
+		event[5] = '1';
+
+	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +563,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +885,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +920,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +942,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1236,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..d55dd29 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -652,6 +652,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	cancel_work_sync(&disk->idle_notify);
+	del_timer_sync(&disk->hysteresis_timer);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	bool idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
-- 
1.6.5.2


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH] [RFC] Add support for uevents on block device idle changes
@ 2009-11-17 14:37 ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-17 14:37 UTC (permalink / raw)
  To: linux-kernel; +Cc: axboe, linux-hotplug, Matthew Garrett

Userspace may wish to know whether a given disk is active or idle, for
example to modify power management policy based on access patterns. This
patch adds a deferrable timer to the block layer which will fire if the
disk is idle for a user-definable period of time, generating a uevent. A
uevent will also be generated if an access is received while the disk is
classified as idle.

This patch seems to work as designed, but introduces a noticable amount of
userspace overhead in udevd. I'm guessing that this is because change events
on block devices are normally associated with disk removal/insertion, so
a large quantity of complex rules end up getting run in order to deal with
RAID setup or whatever. Is there a better way to deliver these events?
---
 Documentation/ABI/testing/sysfs-block |    9 +++++
 block/blk-core.c                      |    9 +++++
 block/genhd.c                         |   55 +++++++++++++++++++++++++++++++++
 fs/partitions/check.c                 |    3 ++
 include/linux/genhd.h                 |    6 +++
 5 files changed, 82 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..5519720 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,12 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		Contains the number of milliseconds to wait after an access
+		before declaring that a disk is idle. Any accesses during
+		this time will reset the timer. "0" (the default) indicates
+		that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
 		if (should_fail_request(bio))
 			goto end_io;
 
+		if (bio->bi_bdev->bd_disk->hysteresis_time &&
+		    bio_has_data(bio) &&
+		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+			       jiffies+msecs_to_jiffies
+			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
+			bio->bi_bdev->bd_disk->idle = 0;
+			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+		}
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..f59fbe0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,26 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = 1;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+	char event[] = "IDLE=0";
+	char *envp[] = { event, NULL };
+
+	if (gd->idle)
+		event[5] = '1';
+
+	kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +563,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +885,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +920,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail  	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +942,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1236,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..d55dd29 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -652,6 +652,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	cancel_work_sync(&disk->idle_notify);
+	del_timer_sync(&disk->hysteresis_timer);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	bool idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
-- 
1.6.5.2


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-17 14:37 ` Matthew Garrett
@ 2009-11-17 15:55   ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-17 15:55 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: linux-kernel, axboe, linux-hotplug

On Tue, Nov 17, 2009 at 15:37, Matthew Garrett <mjg@redhat.com> wrote:
> Userspace may wish to know whether a given disk is active or idle, for
> example to modify power management policy based on access patterns. This
> patch adds a deferrable timer to the block layer which will fire if the
> disk is idle for a user-definable period of time, generating a uevent. A
> uevent will also be generated if an access is received while the disk is
> classified as idle.
>
> This patch seems to work as designed, but introduces a noticable amount of
> userspace overhead in udevd. I'm guessing that this is because change events
> on block devices are normally associated with disk removal/insertion, so
> a large quantity of complex rules end up getting run in order to deal with
> RAID setup or whatever. Is there a better way to deliver these events?

I guess, at the moment the disk tells it's idle, udev will open() the
disk and look for changed signatures, and end its idle state. :)

Uevents might not be the right interface, they are usually used if the
device needs to be (re-)examined, which will the idle thing into a
loop with the current setups, I guess.

Maybe we can use a sysfs file which can be open()'d and something can
watch with poll(), and gets woken up by the kernel, after the drive
changes its state? MD raid, as an example, has files like this in
sysfs to allow monitoring. That way, there is also no overhead, if the
requesting process goes away, which is usually the nicer interface,
than a global switch, which does not care about if the requesting
process still exists.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-17 15:55   ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-17 15:55 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: linux-kernel, axboe, linux-hotplug

On Tue, Nov 17, 2009 at 15:37, Matthew Garrett <mjg@redhat.com> wrote:
> Userspace may wish to know whether a given disk is active or idle, for
> example to modify power management policy based on access patterns. This
> patch adds a deferrable timer to the block layer which will fire if the
> disk is idle for a user-definable period of time, generating a uevent. A
> uevent will also be generated if an access is received while the disk is
> classified as idle.
>
> This patch seems to work as designed, but introduces a noticable amount of
> userspace overhead in udevd. I'm guessing that this is because change events
> on block devices are normally associated with disk removal/insertion, so
> a large quantity of complex rules end up getting run in order to deal with
> RAID setup or whatever. Is there a better way to deliver these events?

I guess, at the moment the disk tells it's idle, udev will open() the
disk and look for changed signatures, and end its idle state. :)

Uevents might not be the right interface, they are usually used if the
device needs to be (re-)examined, which will the idle thing into a
loop with the current setups, I guess.

Maybe we can use a sysfs file which can be open()'d and something can
watch with poll(), and gets woken up by the kernel, after the drive
changes its state? MD raid, as an example, has files like this in
sysfs to allow monitoring. That way, there is also no overhead, if the
requesting process goes away, which is usually the nicer interface,
than a global switch, which does not care about if the requesting
process still exists.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-17 15:55   ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-17 16:09     ` David Zeuthen
  -1 siblings, 0 replies; 73+ messages in thread
From: David Zeuthen @ 2009-11-17 16:09 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Matthew Garrett, linux-kernel, axboe, linux-hotplug

On Tue, 2009-11-17 at 16:55 +0100, Kay Sievers wrote:
> I guess, at the moment the disk tells it's idle, udev will open() the
> disk and look for changed signatures, and end its idle state. :)

Huh, this will probably cause interesting loops then ;-)

> Uevents might not be the right interface, they are usually used if the
> device needs to be (re-)examined, which will the idle thing into a
> loop with the current setups, I guess.

Yeah.

> Maybe we can use a sysfs file which can be open()'d and something can
> watch with poll(), and gets woken up by the kernel, after the drive
> changes its state? MD raid, as an example, has files like this in
> sysfs to allow monitoring. That way, there is also no overhead, if the
> requesting process goes away, which is usually the nicer interface,
> than a global switch, which does not care about if the requesting
> process still exists.

Can't we just use /sys/block/sdX/stat? I believe this file already has
the property that the contents of the file will stay constant exactly
when the device is idle. Being able to use poll() for change
notifications seems like a good interface.

Thanks,
David



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-17 16:09     ` David Zeuthen
  0 siblings, 0 replies; 73+ messages in thread
From: David Zeuthen @ 2009-11-17 16:09 UTC (permalink / raw)
  To: Kay Sievers; +Cc: Matthew Garrett, linux-kernel, axboe, linux-hotplug

On Tue, 2009-11-17 at 16:55 +0100, Kay Sievers wrote:
> I guess, at the moment the disk tells it's idle, udev will open() the
> disk and look for changed signatures, and end its idle state. :)

Huh, this will probably cause interesting loops then ;-)

> Uevents might not be the right interface, they are usually used if the
> device needs to be (re-)examined, which will the idle thing into a
> loop with the current setups, I guess.

Yeah.

> Maybe we can use a sysfs file which can be open()'d and something can
> watch with poll(), and gets woken up by the kernel, after the drive
> changes its state? MD raid, as an example, has files like this in
> sysfs to allow monitoring. That way, there is also no overhead, if the
> requesting process goes away, which is usually the nicer interface,
> than a global switch, which does not care about if the requesting
> process still exists.

Can't we just use /sys/block/sdX/stat? I believe this file already has
the property that the contents of the file will stay constant exactly
when the device is idle. Being able to use poll() for change
notifications seems like a good interface.

Thanks,
David



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-17 16:09     ` [PATCH] [RFC] Add support for uevents on block device idle David Zeuthen
@ 2009-11-17 18:57       ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-17 18:57 UTC (permalink / raw)
  To: David Zeuthen; +Cc: Kay Sievers, linux-kernel, axboe, linux-hotplug

Ok. How about something like this? It adds an extra field to the stat 
file and introduces David's suggestion of making it pollable.

commit ba6d4c7ab7940ae8dc11a884281d0a36b20455b9
Author: Matthew Garrett <mjg@redhat.com>
Date:   Mon Nov 16 17:44:03 2009 -0500

    [RFC] Add support for uevents on block device idle changes
    
    Userspace may wish to know whether a given disk is active or idle, for
    example to modify power management policy based on access patterns. This
    patch adds a deferrable timer to the block layer which will fire if the
    disk is idle for a user-definable period of time, generating a uevent. A
    uevent will also be generated if an access is received while the disk is
    classified as idle.

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..8747f42 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -3,7 +3,7 @@ Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
-		statistics of disk <disk>. They contain 11 fields:
+		statistics of disk <disk>. They contain 12 fields:
 		 1 - reads completed succesfully
 		 2 - reads merged
 		 3 - sectors read
@@ -15,6 +15,7 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
+		12 - 1 if the disk is idle (determined by idle_hysteresis)
 		For more details refer Documentation/iostats.txt
 
 
@@ -128,3 +129,12 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		Contains the number of milliseconds to wait after an access
+		before declaring that a disk is idle. Any accesses during
+		this time will reset the timer. "0" (the default) indicates
+		that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
 		if (should_fail_request(bio))
 			goto end_io;
 
+		if (bio->bi_bdev->bd_disk->hysteresis_time &&
+		    bio_has_data(bio) &&
+		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+			       jiffies+msecs_to_jiffies
+			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
+			bio->bi_bdev->bd_disk->idle = 0;
+			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+		}
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..ea37e48 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,21 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = 1;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+
+	sysfs_notify(&disk_to_dev(gd)->kobj, NULL, "stat");
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +558,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +880,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +915,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +937,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1231,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..cccfb7d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -230,6 +230,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	struct gendisk *gd = dev_to_disk(dev);
 	int cpu;
 
 	cpu = part_stat_lock();
@@ -238,7 +239,7 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u %1u"
 		"\n",
 		part_stat_read(p, ios[READ]),
 		part_stat_read(p, merges[READ]),
@@ -250,7 +251,8 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		gd->idle);
 }
 
 ssize_t part_inflight_show(struct device *dev,
@@ -652,6 +654,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	del_timer_sync(&disk->hysteresis_timer);
+	cancel_work_sync(&disk->idle_notify);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	bool idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-17 18:57       ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-17 18:57 UTC (permalink / raw)
  To: David Zeuthen; +Cc: Kay Sievers, linux-kernel, axboe, linux-hotplug

Ok. How about something like this? It adds an extra field to the stat 
file and introduces David's suggestion of making it pollable.

commit ba6d4c7ab7940ae8dc11a884281d0a36b20455b9
Author: Matthew Garrett <mjg@redhat.com>
Date:   Mon Nov 16 17:44:03 2009 -0500

    [RFC] Add support for uevents on block device idle changes
    
    Userspace may wish to know whether a given disk is active or idle, for
    example to modify power management policy based on access patterns. This
    patch adds a deferrable timer to the block layer which will fire if the
    disk is idle for a user-definable period of time, generating a uevent. A
    uevent will also be generated if an access is received while the disk is
    classified as idle.

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..8747f42 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -3,7 +3,7 @@ Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
-		statistics of disk <disk>. They contain 11 fields:
+		statistics of disk <disk>. They contain 12 fields:
 		 1 - reads completed succesfully
 		 2 - reads merged
 		 3 - sectors read
@@ -15,6 +15,7 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
+		12 - 1 if the disk is idle (determined by idle_hysteresis)
 		For more details refer Documentation/iostats.txt
 
 
@@ -128,3 +129,12 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		Contains the number of milliseconds to wait after an access
+		before declaring that a disk is idle. Any accesses during
+		this time will reset the timer. "0" (the default) indicates
+		that no events will be generated.
\ No newline at end of file
diff --git a/block/blk-core.c b/block/blk-core.c
index 71da511..f278817 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
 		if (should_fail_request(bio))
 			goto end_io;
 
+		if (bio->bi_bdev->bd_disk->hysteresis_time &&
+		    bio_has_data(bio) &&
+		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
+			       jiffies+msecs_to_jiffies
+			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
+			bio->bi_bdev->bd_disk->idle = 0;
+			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
+		}
+
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..ea37e48 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,21 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = 1;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+
+	sysfs_notify(&disk_to_dev(gd)->kobj, NULL, "stat");
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +558,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +880,32 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +915,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail  	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +937,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1231,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..cccfb7d 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -230,6 +230,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	struct gendisk *gd = dev_to_disk(dev);
 	int cpu;
 
 	cpu = part_stat_lock();
@@ -238,7 +239,7 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u %1u"
 		"\n",
 		part_stat_read(p, ios[READ]),
 		part_stat_read(p, merges[READ]),
@@ -250,7 +251,8 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		gd->idle);
 }
 
 ssize_t part_inflight_show(struct device *dev,
@@ -652,6 +654,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	del_timer_sync(&disk->hysteresis_timer);
+	cancel_work_sync(&disk->idle_notify);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7e969a5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	bool idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-17 18:57       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 19:30         ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 19:30 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Tue, Nov 17, 2009 at 19:57, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> Ok. How about something like this? It adds an extra field to the stat
> file and introduces David's suggestion of making it pollable.

Hmm, while thinking more about it, I'm still not sure if we should add
an interface which can work for a single user only, and add a timer to
the kernel which is only used by a userspace policy thing.

Wouldn't it be good enough, if we add a file "idle_since" which
contains the time of the actual disk idle time, and userspace can
schedule a re-examination of that value at the actual end of the idle
time it is looking for?

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 19:30         ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 19:30 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Tue, Nov 17, 2009 at 19:57, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> Ok. How about something like this? It adds an extra field to the stat
> file and introduces David's suggestion of making it pollable.

Hmm, while thinking more about it, I'm still not sure if we should add
an interface which can work for a single user only, and add a timer to
the kernel which is only used by a userspace policy thing.

Wouldn't it be good enough, if we add a file "idle_since" which
contains the time of the actual disk idle time, and userspace can
schedule a re-examination of that value at the actual end of the idle
time it is looking for?

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 19:30         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 19:40           ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 19:40 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:

> Wouldn't it be good enough, if we add a file "idle_since" which
> contains the time of the actual disk idle time, and userspace can
> schedule a re-examination of that value at the actual end of the idle
> time it is looking for?

That would require either polling or waking up a userspace application 
on every disk access. Doing it in-kernel involves only a single timer 
wakeup for every active/idle transition.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 19:40           ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 19:40 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:

> Wouldn't it be good enough, if we add a file "idle_since" which
> contains the time of the actual disk idle time, and userspace can
> schedule a re-examination of that value at the actual end of the idle
> time it is looking for?

That would require either polling or waking up a userspace application 
on every disk access. Doing it in-kernel involves only a single timer 
wakeup for every active/idle transition.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 19:40           ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 19:47             ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 19:47 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
> On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
>
>> Wouldn't it be good enough, if we add a file "idle_since" which
>> contains the time of the actual disk idle time, and userspace can
>> schedule a re-examination of that value at the actual end of the idle
>> time it is looking for?
>
> That would require either polling or waking up a userspace application
> on every disk access. Doing it in-kernel involves only a single timer
> wakeup for every active/idle transition.

How would it? If you look for, like a 60 seconds timeout, and the file
contains 20, you schedule a wakeup in 40 seconds. If the file after
the 40 seconds contains 60, you reached your idle timeout exactly at
that moment, if it's less, then you re-calculate and start from the
beginning.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 19:47             ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 19:47 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
> On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
>
>> Wouldn't it be good enough, if we add a file "idle_since" which
>> contains the time of the actual disk idle time, and userspace can
>> schedule a re-examination of that value at the actual end of the idle
>> time it is looking for?
>
> That would require either polling or waking up a userspace application
> on every disk access. Doing it in-kernel involves only a single timer
> wakeup for every active/idle transition.

How would it? If you look for, like a 60 seconds timeout, and the file
contains 20, you schedule a wakeup in 40 seconds. If the file after
the 40 seconds contains 60, you reached your idle timeout exactly at
that moment, if it's less, then you re-calculate and start from the
beginning.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 19:47             ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 19:53               ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 19:53 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 08:47:37PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
> > On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
> >
> >> Wouldn't it be good enough, if we add a file "idle_since" which
> >> contains the time of the actual disk idle time, and userspace can
> >> schedule a re-examination of that value at the actual end of the idle
> >> time it is looking for?
> >
> > That would require either polling or waking up a userspace application
> > on every disk access. Doing it in-kernel involves only a single timer
> > wakeup for every active/idle transition.
> 
> How would it? If you look for, like a 60 seconds timeout, and the file
> contains 20, you schedule a wakeup in 40 seconds. If the file after
> the 40 seconds contains 60, you reached your idle timeout exactly at
> that moment, if it's less, then you re-calculate and start from the
> beginning.

How is that not polling? You'll repeatedly read a file looking for a 
value that may never appear - imagine the case where you're waiting for 
60 seconds of idleness, but the disk always becomes active again after 
50.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 19:53               ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 19:53 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 08:47:37PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
> > On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
> >
> >> Wouldn't it be good enough, if we add a file "idle_since" which
> >> contains the time of the actual disk idle time, and userspace can
> >> schedule a re-examination of that value at the actual end of the idle
> >> time it is looking for?
> >
> > That would require either polling or waking up a userspace application
> > on every disk access. Doing it in-kernel involves only a single timer
> > wakeup for every active/idle transition.
> 
> How would it? If you look for, like a 60 seconds timeout, and the file
> contains 20, you schedule a wakeup in 40 seconds. If the file after
> the 40 seconds contains 60, you reached your idle timeout exactly at
> that moment, if it's less, then you re-calculate and start from the
> beginning.

How is that not polling? You'll repeatedly read a file looking for a 
value that may never appear - imagine the case where you're waiting for 
60 seconds of idleness, but the disk always becomes active again after 
50.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 19:53               ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 20:03                 ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 20:03 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 20:53, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 08:47:37PM +0100, Kay Sievers wrote:
>> On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
>> > On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
>> >
>> >> Wouldn't it be good enough, if we add a file "idle_since" which
>> >> contains the time of the actual disk idle time, and userspace can
>> >> schedule a re-examination of that value at the actual end of the idle
>> >> time it is looking for?
>> >
>> > That would require either polling or waking up a userspace application
>> > on every disk access. Doing it in-kernel involves only a single timer
>> > wakeup for every active/idle transition.
>>
>> How would it? If you look for, like a 60 seconds timeout, and the file
>> contains 20, you schedule a wakeup in 40 seconds. If the file after
>> the 40 seconds contains 60, you reached your idle timeout exactly at
>> that moment, if it's less, then you re-calculate and start from the
>> beginning.
>
> How is that not polling? You'll repeatedly read a file looking for a
> value that may never appear - imagine the case where you're waiting for
> 60 seconds of idleness, but the disk always becomes active again after
> 50.

Sure, but what's wrong with reading that file every 50 seconds? Almost
all boxes poll for media changes of optical drives and usb card
readers anyway, so it's not that we are not doing stuff like this
already.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 20:03                 ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 20:03 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 20:53, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 08:47:37PM +0100, Kay Sievers wrote:
>> On Wed, Nov 18, 2009 at 20:40, Matthew Garrett <mjg@redhat.com> wrote:
>> > On Wed, Nov 18, 2009 at 08:30:07PM +0100, Kay Sievers wrote:
>> >
>> >> Wouldn't it be good enough, if we add a file "idle_since" which
>> >> contains the time of the actual disk idle time, and userspace can
>> >> schedule a re-examination of that value at the actual end of the idle
>> >> time it is looking for?
>> >
>> > That would require either polling or waking up a userspace application
>> > on every disk access. Doing it in-kernel involves only a single timer
>> > wakeup for every active/idle transition.
>>
>> How would it? If you look for, like a 60 seconds timeout, and the file
>> contains 20, you schedule a wakeup in 40 seconds. If the file after
>> the 40 seconds contains 60, you reached your idle timeout exactly at
>> that moment, if it's less, then you re-calculate and start from the
>> beginning.
>
> How is that not polling? You'll repeatedly read a file looking for a
> value that may never appear - imagine the case where you're waiting for
> 60 seconds of idleness, but the disk always becomes active again after
> 50.

Sure, but what's wrong with reading that file every 50 seconds? Almost
all boxes poll for media changes of optical drives and usb card
readers anyway, so it's not that we are not doing stuff like this
already.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 20:03                 ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 20:07                   ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 20:07 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:

> Sure, but what's wrong with reading that file every 50 seconds? Almost
> all boxes poll for media changes of optical drives and usb card
> readers anyway, so it's not that we are not doing stuff like this
> already.

We poll for media because there's no event-based way of avoiding it - in 
this case there is.
-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 20:07                   ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 20:07 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:

> Sure, but what's wrong with reading that file every 50 seconds? Almost
> all boxes poll for media changes of optical drives and usb card
> readers anyway, so it's not that we are not doing stuff like this
> already.

We poll for media because there's no event-based way of avoiding it - in 
this case there is.
-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 20:07                   ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 21:06                     ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:06 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 21:07, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
>
>> Sure, but what's wrong with reading that file every 50 seconds? Almost
>> all boxes poll for media changes of optical drives and usb card
>> readers anyway, so it's not that we are not doing stuff like this
>> already.
>
> We poll for media because there's no event-based way of avoiding it - in
> this case there is.

That's true, but I think there is a significant difference between
polling every one or two seconds for media changes, and usually one or
two minutes for a disk idle. It's not that we poll in a rather hight
frequency, in an arbitrary interval, and check if some condition is
met.

I still don't think that we should add new event interfaces which are
single-subscriber only, and use global values for a specific user.
What if there will be another independent user for this, which might
want a different timeout? They fight over the trigger value to set in
sysfs?

>From my perspective, the once-at-timeout wakeup is more acceptable
than an in-kernel policy setting for a single-subscriber event
interface.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:06                     ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:06 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 21:07, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
>
>> Sure, but what's wrong with reading that file every 50 seconds? Almost
>> all boxes poll for media changes of optical drives and usb card
>> readers anyway, so it's not that we are not doing stuff like this
>> already.
>
> We poll for media because there's no event-based way of avoiding it - in
> this case there is.

That's true, but I think there is a significant difference between
polling every one or two seconds for media changes, and usually one or
two minutes for a disk idle. It's not that we poll in a rather hight
frequency, in an arbitrary interval, and check if some condition is
met.

I still don't think that we should add new event interfaces which are
single-subscriber only, and use global values for a specific user.
What if there will be another independent user for this, which might
want a different timeout? They fight over the trigger value to set in
sysfs?

From my perspective, the once-at-timeout wakeup is more acceptable
than an in-kernel policy setting for a single-subscriber event
interface.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 21:06                     ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 21:29                       ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:29 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:06, Kay Sievers <kay.sievers@vrfy.org> wrote:
> On Wed, Nov 18, 2009 at 21:07, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
>>
>>> Sure, but what's wrong with reading that file every 50 seconds? Almost
>>> all boxes poll for media changes of optical drives and usb card
>>> readers anyway, so it's not that we are not doing stuff like this
>>> already.
>>
>> We poll for media because there's no event-based way of avoiding it - in
>> this case there is.
>
> That's true, but I think there is a significant difference between
> polling every one or two seconds for media changes, and usually one or
> two minutes for a disk idle. It's not that we poll in a rather hight
> frequency, in an arbitrary interval, and check if some condition is
> met.
>
> I still don't think that we should add new event interfaces which are
> single-subscriber only, and use global values for a specific user.
> What if there will be another independent user for this, which might
> want a different timeout? They fight over the trigger value to set in
> sysfs?
>
> From my perspective, the once-at-timeout wakeup is more acceptable
> than an in-kernel policy setting for a single-subscriber event
> interface.

I guess, the "idle_since" file could be made poll()able, and throw an
event when the idle time is re-set to 0, so the value checking needs
only to happen as long we wait for the disk to become idle. As long as
it's busy anyway, the rare wakeups should not matter much. :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:29                       ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:29 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:06, Kay Sievers <kay.sievers@vrfy.org> wrote:
> On Wed, Nov 18, 2009 at 21:07, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
>>
>>> Sure, but what's wrong with reading that file every 50 seconds? Almost
>>> all boxes poll for media changes of optical drives and usb card
>>> readers anyway, so it's not that we are not doing stuff like this
>>> already.
>>
>> We poll for media because there's no event-based way of avoiding it - in
>> this case there is.
>
> That's true, but I think there is a significant difference between
> polling every one or two seconds for media changes, and usually one or
> two minutes for a disk idle. It's not that we poll in a rather hight
> frequency, in an arbitrary interval, and check if some condition is
> met.
>
> I still don't think that we should add new event interfaces which are
> single-subscriber only, and use global values for a specific user.
> What if there will be another independent user for this, which might
> want a different timeout? They fight over the trigger value to set in
> sysfs?
>
> From my perspective, the once-at-timeout wakeup is more acceptable
> than an in-kernel policy setting for a single-subscriber event
> interface.

I guess, the "idle_since" file could be made poll()able, and throw an
event when the idle time is re-set to 0, so the value checking needs
only to happen as long we wait for the disk to become idle. As long as
it's busy anyway, the rare wakeups should not matter much. :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 21:06                     ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 21:33                       ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:33 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:

> That's true, but I think there is a significant difference between
> polling every one or two seconds for media changes, and usually one or
> two minutes for a disk idle. It's not that we poll in a rather hight
> frequency, in an arbitrary interval, and check if some condition is
> met.

My use cases are on the order of a second.

> I still don't think that we should add new event interfaces which are
> single-subscriber only, and use global values for a specific user.
> What if there will be another independent user for this, which might
> want a different timeout? They fight over the trigger value to set in
> sysfs?

You can trivially multiplex without any additional wakeups. Something 
like devkit-disks can simply trigger on the lowest requested time and 
then schedule wakeups for subscribers who want a different timeout.

> From my perspective, the once-at-timeout wakeup is more acceptable
> than an in-kernel policy setting for a single-subscriber event
> interface.

I'd be open to it being something for multiple subscribers, though that 
would add to the complexity in the block code and I'm not sure that's 
needed.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:33                       ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:33 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:

> That's true, but I think there is a significant difference between
> polling every one or two seconds for media changes, and usually one or
> two minutes for a disk idle. It's not that we poll in a rather hight
> frequency, in an arbitrary interval, and check if some condition is
> met.

My use cases are on the order of a second.

> I still don't think that we should add new event interfaces which are
> single-subscriber only, and use global values for a specific user.
> What if there will be another independent user for this, which might
> want a different timeout? They fight over the trigger value to set in
> sysfs?

You can trivially multiplex without any additional wakeups. Something 
like devkit-disks can simply trigger on the lowest requested time and 
then schedule wakeups for subscribers who want a different timeout.

> From my perspective, the once-at-timeout wakeup is more acceptable
> than an in-kernel policy setting for a single-subscriber event
> interface.

I'd be open to it being something for multiple subscribers, though that 
would add to the complexity in the block code and I'm not sure that's 
needed.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 21:29                       ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 21:35                         ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:35 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:

> I guess, the "idle_since" file could be made poll()able, and throw an
> event when the idle time is re-set to 0, so the value checking needs
> only to happen as long we wait for the disk to become idle. As long as
> it's busy anyway, the rare wakeups should not matter much. :)

That'd be a userspace wakeup every time something gets submitted to the 
block device, which sounds far from ideal...

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:35                         ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:35 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:

> I guess, the "idle_since" file could be made poll()able, and throw an
> event when the idle time is re-set to 0, so the value checking needs
> only to happen as long we wait for the disk to become idle. As long as
> it's busy anyway, the rare wakeups should not matter much. :)

That'd be a userspace wakeup every time something gets submitted to the 
block device, which sounds far from ideal...

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 21:35                         ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 21:39                           ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:39 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:35, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:
>
>> I guess, the "idle_since" file could be made poll()able, and throw an
>> event when the idle time is re-set to 0, so the value checking needs
>> only to happen as long we wait for the disk to become idle. As long as
>> it's busy anyway, the rare wakeups should not matter much. :)
>
> That'd be a userspace wakeup every time something gets submitted to the
> block device, which sounds far from ideal...

No, you would only poll() when you reached the timeout and the disk
entered the idle state. This can not happen more frequently than the
timeout itself.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:39                           ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:39 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:35, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:
>
>> I guess, the "idle_since" file could be made poll()able, and throw an
>> event when the idle time is re-set to 0, so the value checking needs
>> only to happen as long we wait for the disk to become idle. As long as
>> it's busy anyway, the rare wakeups should not matter much. :)
>
> That'd be a userspace wakeup every time something gets submitted to the
> block device, which sounds far from ideal...

No, you would only poll() when you reached the timeout and the disk
entered the idle state. This can not happen more frequently than the
timeout itself.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 21:33                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-18 21:40                         ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:40 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:
>
>> That's true, but I think there is a significant difference between
>> polling every one or two seconds for media changes, and usually one or
>> two minutes for a disk idle. It's not that we poll in a rather hight
>> frequency, in an arbitrary interval, and check if some condition is
>> met.
>
> My use cases are on the order of a second.
>
>> I still don't think that we should add new event interfaces which are
>> single-subscriber only, and use global values for a specific user.
>> What if there will be another independent user for this, which might
>> want a different timeout? They fight over the trigger value to set in
>> sysfs?
>
> You can trivially multiplex without any additional wakeups. Something
> like devkit-disks can simply trigger on the lowest requested time and
> then schedule wakeups for subscribers who want a different timeout.

No, it can't do this race-free. And it's far from trivial. It can not
know when something changes the single global value.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:40                         ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-18 21:40 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:
>
>> That's true, but I think there is a significant difference between
>> polling every one or two seconds for media changes, and usually one or
>> two minutes for a disk idle. It's not that we poll in a rather hight
>> frequency, in an arbitrary interval, and check if some condition is
>> met.
>
> My use cases are on the order of a second.
>
>> I still don't think that we should add new event interfaces which are
>> single-subscriber only, and use global values for a specific user.
>> What if there will be another independent user for this, which might
>> want a different timeout? They fight over the trigger value to set in
>> sysfs?
>
> You can trivially multiplex without any additional wakeups. Something
> like devkit-disks can simply trigger on the lowest requested time and
> then schedule wakeups for subscribers who want a different timeout.

No, it can't do this race-free. And it's far from trivial. It can not
know when something changes the single global value.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 21:39                           ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-18 21:45                             ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:45 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:39:26PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 22:35, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:
> >
> >> I guess, the "idle_since" file could be made poll()able, and throw an
> >> event when the idle time is re-set to 0, so the value checking needs
> >> only to happen as long we wait for the disk to become idle. As long as
> >> it's busy anyway, the rare wakeups should not matter much. :)
> >
> > That'd be a userspace wakeup every time something gets submitted to the
> > block device, which sounds far from ideal...
> 
> No, you would only poll() when you reached the timeout and the disk
> entered the idle state. This can not happen more frequently than the
> timeout itself.

I don't understand. idle_since would be reset on every access to the 
block device. The alternative is to generate an event when the disk goes 
idle, but that goes back to requiring a timer in the kernel...

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-18 21:45                             ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-18 21:45 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 10:39:26PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 22:35, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > On Wed, Nov 18, 2009 at 10:29:23PM +0100, Kay Sievers wrote:
> >
> >> I guess, the "idle_since" file could be made poll()able, and throw an
> >> event when the idle time is re-set to 0, so the value checking needs
> >> only to happen as long we wait for the disk to become idle. As long as
> >> it's busy anyway, the rare wakeups should not matter much. :)
> >
> > That'd be a userspace wakeup every time something gets submitted to the
> > block device, which sounds far from ideal...
> 
> No, you would only poll() when you reached the timeout and the disk
> entered the idle state. This can not happen more frequently than the
> timeout itself.

I don't understand. idle_since would be reset on every access to the 
block device. The alternative is to generate an event when the disk goes 
idle, but that goes back to requiring a timer in the kernel...

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-17 14:37 ` Matthew Garrett
@ 2009-11-18 22:10   ` Bartlomiej Zolnierkiewicz
  -1 siblings, 0 replies; 73+ messages in thread
From: Bartlomiej Zolnierkiewicz @ 2009-11-18 22:10 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: linux-kernel, axboe, linux-hotplug

On Tuesday 17 November 2009 15:37:39 Matthew Garrett wrote:

> @@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
>  		if (should_fail_request(bio))
>  			goto end_io;
>  
> +		if (bio->bi_bdev->bd_disk->hysteresis_time &&
> +		    bio_has_data(bio) &&
> +		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
> +			       jiffies+msecs_to_jiffies
> +			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
> +			bio->bi_bdev->bd_disk->idle = 0;
> +			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
> +		}
> +

Wouldn't it be more reliable to hook into places where block requests are
issued/completed instead of queued?  This way you will not miss special
requests (some of them are quite common in practice and may take a relatively
long time to execute, i.e. FLUSH CACHE).

-- 
Bartlomiej Zolnierkiewicz

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
@ 2009-11-18 22:10   ` Bartlomiej Zolnierkiewicz
  0 siblings, 0 replies; 73+ messages in thread
From: Bartlomiej Zolnierkiewicz @ 2009-11-18 22:10 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: linux-kernel, axboe, linux-hotplug

On Tuesday 17 November 2009 15:37:39 Matthew Garrett wrote:

> @@ -1452,6 +1452,15 @@ static inline void __generic_make_request(struct bio *bio)
>  		if (should_fail_request(bio))
>  			goto end_io;
>  
> +		if (bio->bi_bdev->bd_disk->hysteresis_time &&
> +		    bio_has_data(bio) &&
> +		    !mod_timer(&bio->bi_bdev->bd_disk->hysteresis_timer,
> +			       jiffies+msecs_to_jiffies
> +			       (bio->bi_bdev->bd_disk->hysteresis_time))) {
> +			bio->bi_bdev->bd_disk->idle = 0;
> +			schedule_work(&bio->bi_bdev->bd_disk->idle_notify);
> +		}
> +

Wouldn't it be more reliable to hook into places where block requests are
issued/completed instead of queued?  This way you will not miss special
requests (some of them are quite common in practice and may take a relatively
long time to execute, i.e. FLUSH CACHE).

-- 
Bartlomiej Zolnierkiewicz

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-18 21:33                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-19 11:09                         ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 11:09 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:
>
>> That's true, but I think there is a significant difference between
>> polling every one or two seconds for media changes, and usually one or
>> two minutes for a disk idle. It's not that we poll in a rather hight
>> frequency, in an arbitrary interval, and check if some condition is
>> met.
>
> My use cases are on the order of a second.

Ok, what's the specific use case, which should be triggered after a
second? I thought you were thinking about disk spindown or similar.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 11:09                         ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 11:09 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Wed, Nov 18, 2009 at 10:06:33PM +0100, Kay Sievers wrote:
>
>> That's true, but I think there is a significant difference between
>> polling every one or two seconds for media changes, and usually one or
>> two minutes for a disk idle. It's not that we poll in a rather hight
>> frequency, in an arbitrary interval, and check if some condition is
>> met.
>
> My use cases are on the order of a second.

Ok, what's the specific use case, which should be triggered after a
second? I thought you were thinking about disk spindown or similar.

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-19 11:09                         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-19 13:01                           ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 13:01 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > My use cases are on the order of a second.
> 
> Ok, what's the specific use case, which should be triggered after a
> second? I thought you were thinking about disk spindown or similar.

The first is altering ALPM policy. ALPM will be initiated by the host if 
the number of queued requests hits zero - if there's no hysteresis 
implemented, then that can result in a significant performance hit. We 
don't need /much/ hysteresis, but it's the difference between a 50% 
performance hit and not having that.

The other I'm currently looking at is monitoring disk usage in order to 
be able to apply an adaptive policy to disk spindown. This doesn't need 
the information to be as accurate, but it would still be better at 
closer to the second granularity than the minute level.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 13:01                           ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 13:01 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > My use cases are on the order of a second.
> 
> Ok, what's the specific use case, which should be triggered after a
> second? I thought you were thinking about disk spindown or similar.

The first is altering ALPM policy. ALPM will be initiated by the host if 
the number of queued requests hits zero - if there's no hysteresis 
implemented, then that can result in a significant performance hit. We 
don't need /much/ hysteresis, but it's the difference between a 50% 
performance hit and not having that.

The other I'm currently looking at is monitoring disk usage in order to 
be able to apply an adaptive policy to disk spindown. This doesn't need 
the information to be as accurate, but it would still be better at 
closer to the second granularity than the minute level.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-19 13:01                           ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-19 13:29                             ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 13:29 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
> On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
>> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> > My use cases are on the order of a second.
>>
>> Ok, what's the specific use case, which should be triggered after a
>> second? I thought you were thinking about disk spindown or similar.
>
> The first is altering ALPM policy. ALPM will be initiated by the host if
> the number of queued requests hits zero - if there's no hysteresis
> implemented, then that can result in a significant performance hit. We
> don't need /much/ hysteresis, but it's the difference between a 50%
> performance hit and not having that.

Can't that logic live entirely in the kernel, instead of being a
rather generic userspace event interface (with the current limitation
to a single user)?

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 13:29                             ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 13:29 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
> On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
>> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> > My use cases are on the order of a second.
>>
>> Ok, what's the specific use case, which should be triggered after a
>> second? I thought you were thinking about disk spindown or similar.
>
> The first is altering ALPM policy. ALPM will be initiated by the host if
> the number of queued requests hits zero - if there's no hysteresis
> implemented, then that can result in a significant performance hit. We
> don't need /much/ hysteresis, but it's the difference between a 50%
> performance hit and not having that.

Can't that logic live entirely in the kernel, instead of being a
rather generic userspace event interface (with the current limitation
to a single user)?

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-19 13:29                             ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-19 14:16                               ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:16 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 02:29:29PM +0100, Kay Sievers wrote:
> On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
> > On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
> >> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> >> > My use cases are on the order of a second.
> >>
> >> Ok, what's the specific use case, which should be triggered after a
> >> second? I thought you were thinking about disk spindown or similar.
> >
> > The first is altering ALPM policy. ALPM will be initiated by the host if
> > the number of queued requests hits zero - if there's no hysteresis
> > implemented, then that can result in a significant performance hit. We
> > don't need /much/ hysteresis, but it's the difference between a 50%
> > performance hit and not having that.
> 
> Can't that logic live entirely in the kernel, instead of being a
> rather generic userspace event interface (with the current limitation
> to a single user)?

It could, but it seems a bit of a hack. It'd still also require the 
timer to be in the kernel, so we might as well expose that to userspace.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 14:16                               ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:16 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 02:29:29PM +0100, Kay Sievers wrote:
> On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
> > On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
> >> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> >> > My use cases are on the order of a second.
> >>
> >> Ok, what's the specific use case, which should be triggered after a
> >> second? I thought you were thinking about disk spindown or similar.
> >
> > The first is altering ALPM policy. ALPM will be initiated by the host if
> > the number of queued requests hits zero - if there's no hysteresis
> > implemented, then that can result in a significant performance hit. We
> > don't need /much/ hysteresis, but it's the difference between a 50%
> > performance hit and not having that.
> 
> Can't that logic live entirely in the kernel, instead of being a
> rather generic userspace event interface (with the current limitation
> to a single user)?

It could, but it seems a bit of a hack. It'd still also require the 
timer to be in the kernel, so we might as well expose that to userspace.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-19 14:16                               ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-19 14:25                                 ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 14:25 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 02:29:29PM +0100, Kay Sievers wrote:
>> On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
>> > On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
>> >> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> >> > My use cases are on the order of a second.
>> >>
>> >> Ok, what's the specific use case, which should be triggered after a
>> >> second? I thought you were thinking about disk spindown or similar.
>> >
>> > The first is altering ALPM policy. ALPM will be initiated by the host if
>> > the number of queued requests hits zero - if there's no hysteresis
>> > implemented, then that can result in a significant performance hit. We
>> > don't need /much/ hysteresis, but it's the difference between a 50%
>> > performance hit and not having that.
>>
>> Can't that logic live entirely in the kernel, instead of being a
>> rather generic userspace event interface (with the current limitation
>> to a single user)?
>
> It could, but it seems a bit of a hack. It'd still also require the
> timer to be in the kernel, so we might as well expose that to userspace.

Sure, but a userspace configurable policy for an in-kernel disk-idle
powermanagent sounds fine, compared to a single-subscriber
userspace-only disk-idle event interface. :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 14:25                                 ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 14:25 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 02:29:29PM +0100, Kay Sievers wrote:
>> On Thu, Nov 19, 2009 at 14:01, Matthew Garrett <mjg@redhat.com> wrote:
>> > On Thu, Nov 19, 2009 at 12:09:30PM +0100, Kay Sievers wrote:
>> >> On Wed, Nov 18, 2009 at 22:33, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> >> > My use cases are on the order of a second.
>> >>
>> >> Ok, what's the specific use case, which should be triggered after a
>> >> second? I thought you were thinking about disk spindown or similar.
>> >
>> > The first is altering ALPM policy. ALPM will be initiated by the host if
>> > the number of queued requests hits zero - if there's no hysteresis
>> > implemented, then that can result in a significant performance hit. We
>> > don't need /much/ hysteresis, but it's the difference between a 50%
>> > performance hit and not having that.
>>
>> Can't that logic live entirely in the kernel, instead of being a
>> rather generic userspace event interface (with the current limitation
>> to a single user)?
>
> It could, but it seems a bit of a hack. It'd still also require the
> timer to be in the kernel, so we might as well expose that to userspace.

Sure, but a userspace configurable policy for an in-kernel disk-idle
powermanagent sounds fine, compared to a single-subscriber
userspace-only disk-idle event interface. :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-19 14:25                                 ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-19 14:30                                   ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:30 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 03:25:59PM +0100, Kay Sievers wrote:
> On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > It could, but it seems a bit of a hack. It'd still also require the
> > timer to be in the kernel, so we might as well expose that to userspace.
> 
> Sure, but a userspace configurable policy for an in-kernel disk-idle
> powermanagent sounds fine, compared to a single-subscriber
> userspace-only disk-idle event interface. :)

Well, we still need to expose this for the access pattern modifying. I 
really don't see the issue with the single subscriber being devkit-disks 
- none of the operations involved are atomic, so we're inherently racy 
here.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 14:30                                   ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:30 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 03:25:59PM +0100, Kay Sievers wrote:
> On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> > It could, but it seems a bit of a hack. It'd still also require the
> > timer to be in the kernel, so we might as well expose that to userspace.
> 
> Sure, but a userspace configurable policy for an in-kernel disk-idle
> powermanagent sounds fine, compared to a single-subscriber
> userspace-only disk-idle event interface. :)

Well, we still need to expose this for the access pattern modifying. I 
really don't see the issue with the single subscriber being devkit-disks 
- none of the operations involved are atomic, so we're inherently racy 
here.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-19 14:30                                   ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-19 14:34                                     ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 14:34 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:30, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 03:25:59PM +0100, Kay Sievers wrote:
>> On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> > It could, but it seems a bit of a hack. It'd still also require the
>> > timer to be in the kernel, so we might as well expose that to userspace.
>>
>> Sure, but a userspace configurable policy for an in-kernel disk-idle
>> powermanagent sounds fine, compared to a single-subscriber
>> userspace-only disk-idle event interface. :)
>
> Well, we still need to expose this for the access pattern modifying. I
> really don't see the issue with the single subscriber being devkit-disks
> - none of the operations involved are atomic, so we're inherently racy
> here.

Single-subscriber event interfaces are usually a no-go for generic
infrastructure like this. We still have the unmodified HAL running
until it is dead, and this works only because there are no such
awkward interfaces. In a few years we will probably have diskfoo
replacing dk-disks, and then ... :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 14:34                                     ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 14:34 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:30, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 03:25:59PM +0100, Kay Sievers wrote:
>> On Thu, Nov 19, 2009 at 15:16, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
>> > It could, but it seems a bit of a hack. It'd still also require the
>> > timer to be in the kernel, so we might as well expose that to userspace.
>>
>> Sure, but a userspace configurable policy for an in-kernel disk-idle
>> powermanagent sounds fine, compared to a single-subscriber
>> userspace-only disk-idle event interface. :)
>
> Well, we still need to expose this for the access pattern modifying. I
> really don't see the issue with the single subscriber being devkit-disks
> - none of the operations involved are atomic, so we're inherently racy
> here.

Single-subscriber event interfaces are usually a no-go for generic
infrastructure like this. We still have the unmodified HAL running
until it is dead, and this works only because there are no such
awkward interfaces. In a few years we will probably have diskfoo
replacing dk-disks, and then ... :)

Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-19 14:34                                     ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-19 14:48                                       ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:48 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 03:34:53PM +0100, Kay Sievers wrote:

> Single-subscriber event interfaces are usually a no-go for generic
> infrastructure like this. We still have the unmodified HAL running
> until it is dead, and this works only because there are no such
> awkward interfaces. In a few years we will probably have diskfoo
> replacing dk-disks, and then ... :)

If you've got any ideas for what a multi-subscriber interface would look 
like, I'm happy look at it. I don't think there's an especially 
compelling use-case for one right now so I'm not enthusiastic about the 
additional complexity that'd be required, but as long as there's basic 
agreement that it's not practical to do this in userspace then we're at 
least on the same page.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 14:48                                       ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-19 14:48 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 03:34:53PM +0100, Kay Sievers wrote:

> Single-subscriber event interfaces are usually a no-go for generic
> infrastructure like this. We still have the unmodified HAL running
> until it is dead, and this works only because there are no such
> awkward interfaces. In a few years we will probably have diskfoo
> replacing dk-disks, and then ... :)

If you've got any ideas for what a multi-subscriber interface would look 
like, I'm happy look at it. I don't think there's an especially 
compelling use-case for one right now so I'm not enthusiastic about the 
additional complexity that'd be required, but as long as there's basic 
agreement that it's not practical to do this in userspace then we're at 
least on the same page.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle  changes
  2009-11-19 14:48                                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-19 15:00                                         ` Kay Sievers
  -1 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 15:00 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:48, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 03:34:53PM +0100, Kay Sievers wrote:
>
>> Single-subscriber event interfaces are usually a no-go for generic
>> infrastructure like this. We still have the unmodified HAL running
>> until it is dead, and this works only because there are no such
>> awkward interfaces. In a few years we will probably have diskfoo
>> replacing dk-disks, and then ... :)
>
> If you've got any ideas for what a multi-subscriber interface would look
> like, I'm happy look at it.

Yeah, it would not be as simple as your patch. It probably involves a
way to get a file descriptor per listener, to let the kernel know if
anybody is interested, and to auto-cleanup when the listener dies, and
to have per instance timers.

> I don't think there's an especially
> compelling use-case for one right now so I'm not enthusiastic about the
> additional complexity that'd be required,

Right, but we've been there, and it's a pain, if you can not subscribe
to an interface because something else is already using it/expecting
it is the only user ever. So there needs to be a good reason for
adding something like this as a new interface, which will very likely
hit us back some day.

> but as long as there's basic
> agreement that it's not practical to do this in userspace then we're at
> least on the same page.

I'm all for executing the policy inside the kernel and let userspace
only enable/configure it. It think there is not much to disagr4ee
about such an approach.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-19 15:00                                         ` Kay Sievers
  0 siblings, 0 replies; 73+ messages in thread
From: Kay Sievers @ 2009-11-19 15:00 UTC (permalink / raw)
  To: Matthew Garrett; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 15:48, Matthew Garrett <mjg59@srcf.ucam.org> wrote:
> On Thu, Nov 19, 2009 at 03:34:53PM +0100, Kay Sievers wrote:
>
>> Single-subscriber event interfaces are usually a no-go for generic
>> infrastructure like this. We still have the unmodified HAL running
>> until it is dead, and this works only because there are no such
>> awkward interfaces. In a few years we will probably have diskfoo
>> replacing dk-disks, and then ... :)
>
> If you've got any ideas for what a multi-subscriber interface would look
> like, I'm happy look at it.

Yeah, it would not be as simple as your patch. It probably involves a
way to get a file descriptor per listener, to let the kernel know if
anybody is interested, and to auto-cleanup when the listener dies, and
to have per instance timers.

> I don't think there's an especially
> compelling use-case for one right now so I'm not enthusiastic about the
> additional complexity that'd be required,

Right, but we've been there, and it's a pain, if you can not subscribe
to an interface because something else is already using it/expecting
it is the only user ever. So there needs to be a good reason for
adding something like this as a new interface, which will very likely
hit us back some day.

> but as long as there's basic
> agreement that it's not practical to do this in userspace then we're at
> least on the same page.

I'm all for executing the policy inside the kernel and let userspace
only enable/configure it. It think there is not much to disagr4ee
about such an approach.

Thanks,
Kay

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-19 15:00                                         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
@ 2009-11-20 20:29                                           ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-20 20:29 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 04:00:46PM +0100, Kay Sievers wrote:

> Yeah, it would not be as simple as your patch. It probably involves a
> way to get a file descriptor per listener, to let the kernel know if
> anybody is interested, and to auto-cleanup when the listener dies, and
> to have per instance timers.

This would seem to involve a lot of extra locking in the block 
submission and completion code. I don't think it's ideal. How about 
this:

* idle_hysteresis contains a value. If it's greater than 0, attempting 
to increase it will give -EINVAL. It can be polled.

* On idle state transition, applications listening to the stat sysfs 
node will get woken. The stat output will include the number of msecs 
that the disk has been idle. If this is less than the application 
requested, it can set a timer to wake it up again in the future and 
recheck.

* When an application exits, if (and only if) it wrote a value to 
idle_hysteresis, it should set this back to 0. This will notify any 
other apps, which may then set their own wakeup time.

It's not beautiful but it satisfies the constraints. There's a minimum 
of extra wakeups, it doesn't complicate the block path any further and 
multiple applications can take advantage of it.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-20 20:29                                           ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-20 20:29 UTC (permalink / raw)
  To: Kay Sievers; +Cc: David Zeuthen, linux-kernel, axboe, linux-hotplug

On Thu, Nov 19, 2009 at 04:00:46PM +0100, Kay Sievers wrote:

> Yeah, it would not be as simple as your patch. It probably involves a
> way to get a file descriptor per listener, to let the kernel know if
> anybody is interested, and to auto-cleanup when the listener dies, and
> to have per instance timers.

This would seem to involve a lot of extra locking in the block 
submission and completion code. I don't think it's ideal. How about 
this:

* idle_hysteresis contains a value. If it's greater than 0, attempting 
to increase it will give -EINVAL. It can be polled.

* On idle state transition, applications listening to the stat sysfs 
node will get woken. The stat output will include the number of msecs 
that the disk has been idle. If this is less than the application 
requested, it can set a timer to wake it up again in the future and 
recheck.

* When an application exits, if (and only if) it wrote a value to 
idle_hysteresis, it should set this back to 0. This will notify any 
other apps, which may then set their own wakeup time.

It's not beautiful but it satisfies the constraints. There's a minimum 
of extra wakeups, it doesn't complicate the block path any further and 
multiple applications can take advantage of it.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-18 20:07                   ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-22 23:37                     ` Pavel Machek
  -1 siblings, 0 replies; 73+ messages in thread
From: Pavel Machek @ 2009-11-22 23:37 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Kay Sievers, David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> 
> > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > all boxes poll for media changes of optical drives and usb card
> > readers anyway, so it's not that we are not doing stuff like this
> > already.
> 
> We poll for media because there's no event-based way of avoiding it - in 
> this case there is.

...when you add overhead to every disk operation. I'd say that polling
once in 50 seconds is preferable to that.
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-22 23:37                     ` Pavel Machek
  0 siblings, 0 replies; 73+ messages in thread
From: Pavel Machek @ 2009-11-22 23:37 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Kay Sievers, David Zeuthen, linux-kernel, axboe, linux-hotplug

On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> 
> > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > all boxes poll for media changes of optical drives and usb card
> > readers anyway, so it's not that we are not doing stuff like this
> > already.
> 
> We poll for media because there's no event-based way of avoiding it - in 
> this case there is.

...when you add overhead to every disk operation. I'd say that polling
once in 50 seconds is preferable to that.
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-22 23:37                     ` [PATCH] [RFC] Add support for uevents on block device idle Pavel Machek
@ 2009-11-23 14:12                       ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:12 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Kay Sievers, David Zeuthen, linux-kernel, axboe, linux-hotplug

On Mon, Nov 23, 2009 at 12:37:49AM +0100, Pavel Machek wrote:
> On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> > On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> > 
> > > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > > all boxes poll for media changes of optical drives and usb card
> > > readers anyway, so it's not that we are not doing stuff like this
> > > already.
> > 
> > We poll for media because there's no event-based way of avoiding it - in 
> > this case there is.
> 
> ...when you add overhead to every disk operation. I'd say that polling
> once in 50 seconds is preferable to that.

Yeah, but 50 seconds isn't the timescale we're talking about here.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 14:12                       ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:12 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Kay Sievers, David Zeuthen, linux-kernel, axboe, linux-hotplug

On Mon, Nov 23, 2009 at 12:37:49AM +0100, Pavel Machek wrote:
> On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> > On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> > 
> > > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > > all boxes poll for media changes of optical drives and usb card
> > > readers anyway, so it's not that we are not doing stuff like this
> > > already.
> > 
> > We poll for media because there's no event-based way of avoiding it - in 
> > this case there is.
> 
> ...when you add overhead to every disk operation. I'd say that polling
> once in 50 seconds is preferable to that.

Yeah, but 50 seconds isn't the timescale we're talking about here.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-22 23:37                     ` [PATCH] [RFC] Add support for uevents on block device idle Pavel Machek
@ 2009-11-23 14:17                       ` Jens Axboe
  -1 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 14:17 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Matthew Garrett, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Pavel Machek wrote:
> On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> > On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> > 
> > > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > > all boxes poll for media changes of optical drives and usb card
> > > readers anyway, so it's not that we are not doing stuff like this
> > > already.
> > 
> > We poll for media because there's no event-based way of avoiding it - in 
> > this case there is.
> 
> ...when you add overhead to every disk operation. I'd say that polling
> once in 50 seconds is preferable to that.

I have to agree, doing a mod_timer() on every single IO is going to suck
big time. I went to great lengths to avoid doing that even for timeout
detection. So that's pretty much a non-starter to begin with.

Additionally, as Bart also wrote, you are not doing this in the right
place. You want to do this post-merge, not for each incoming IO. Have
you looked at laptop mode? Looks like you are essentially re-inventing
that, but in a bad way.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 14:17                       ` Jens Axboe
  0 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 14:17 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Matthew Garrett, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Pavel Machek wrote:
> On Wed 2009-11-18 20:07:12, Matthew Garrett wrote:
> > On Wed, Nov 18, 2009 at 09:03:21PM +0100, Kay Sievers wrote:
> > 
> > > Sure, but what's wrong with reading that file every 50 seconds? Almost
> > > all boxes poll for media changes of optical drives and usb card
> > > readers anyway, so it's not that we are not doing stuff like this
> > > already.
> > 
> > We poll for media because there's no event-based way of avoiding it - in 
> > this case there is.
> 
> ...when you add overhead to every disk operation. I'd say that polling
> once in 50 seconds is preferable to that.

I have to agree, doing a mod_timer() on every single IO is going to suck
big time. I went to great lengths to avoid doing that even for timeout
detection. So that's pretty much a non-starter to begin with.

Additionally, as Bart also wrote, you are not doing this in the right
place. You want to do this post-merge, not for each incoming IO. Have
you looked at laptop mode? Looks like you are essentially re-inventing
that, but in a bad way.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-23 14:17                       ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
@ 2009-11-23 14:25                         ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 03:17:54PM +0100, Jens Axboe wrote:

> I have to agree, doing a mod_timer() on every single IO is going to suck
> big time. I went to great lengths to avoid doing that even for timeout
> detection. So that's pretty much a non-starter to begin with.

It's conditional on a (default off) setting, so it's not a hit unless 
the user requests it. But yeah, the performance hit is obviously a 
concern. It may be that polling is the least bad way of doing this.

> Additionally, as Bart also wrote, you are not doing this in the right
> place. You want to do this post-merge, not for each incoming IO. Have
> you looked at laptop mode? Looks like you are essentially re-inventing
> that, but in a bad way.

Right, that's mostly down to my having no familiarity with the block 
layer at all :) I can fix that up easily enough, but if a deferrable 
timer is going to be too expensive then it'll need some rethinking 
anyway.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 14:25                         ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:25 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 03:17:54PM +0100, Jens Axboe wrote:

> I have to agree, doing a mod_timer() on every single IO is going to suck
> big time. I went to great lengths to avoid doing that even for timeout
> detection. So that's pretty much a non-starter to begin with.

It's conditional on a (default off) setting, so it's not a hit unless 
the user requests it. But yeah, the performance hit is obviously a 
concern. It may be that polling is the least bad way of doing this.

> Additionally, as Bart also wrote, you are not doing this in the right
> place. You want to do this post-merge, not for each incoming IO. Have
> you looked at laptop mode? Looks like you are essentially re-inventing
> that, but in a bad way.

Right, that's mostly down to my having no familiarity with the block 
layer at all :) I can fix that up easily enough, but if a deferrable 
timer is going to be too expensive then it'll need some rethinking 
anyway.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-23 14:25                         ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-23 14:31                           ` Jens Axboe
  -1 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 14:31 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Matthew Garrett wrote:
> On Mon, Nov 23, 2009 at 03:17:54PM +0100, Jens Axboe wrote:
> 
> > I have to agree, doing a mod_timer() on every single IO is going to suck
> > big time. I went to great lengths to avoid doing that even for timeout
> > detection. So that's pretty much a non-starter to begin with.
> 
> It's conditional on a (default off) setting, so it's not a hit unless 
> the user requests it. But yeah, the performance hit is obviously a 
> concern. It may be that polling is the least bad way of doing this.

Even if it's off by default, doesn't mean we shouldn't make the
implementation correct or fast :-)

> > Additionally, as Bart also wrote, you are not doing this in the right
> > place. You want to do this post-merge, not for each incoming IO. Have
> > you looked at laptop mode? Looks like you are essentially re-inventing
> > that, but in a bad way.
> 
> Right, that's mostly down to my having no familiarity with the block 
> layer at all :) I can fix that up easily enough, but if a deferrable 
> timer is going to be too expensive then it'll need some rethinking 
> anyway.

Well, take a look at laptop mode. A timer per-io is probably
unavoidable, but doing it at IO completion could mean a big decrease in
timer activity as opposed to doing it for each incoming IO. And since
you are looking at when the disk is idle, it makes a lot more sense to
me to do that when the we complete a request (and have no further
pending IO) rather than on incoming IO.

Your biggest performance issue here is going to be sync IO, since the
disk will go idle very briefly before being kicked into action again.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 14:31                           ` Jens Axboe
  0 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 14:31 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Matthew Garrett wrote:
> On Mon, Nov 23, 2009 at 03:17:54PM +0100, Jens Axboe wrote:
> 
> > I have to agree, doing a mod_timer() on every single IO is going to suck
> > big time. I went to great lengths to avoid doing that even for timeout
> > detection. So that's pretty much a non-starter to begin with.
> 
> It's conditional on a (default off) setting, so it's not a hit unless 
> the user requests it. But yeah, the performance hit is obviously a 
> concern. It may be that polling is the least bad way of doing this.

Even if it's off by default, doesn't mean we shouldn't make the
implementation correct or fast :-)

> > Additionally, as Bart also wrote, you are not doing this in the right
> > place. You want to do this post-merge, not for each incoming IO. Have
> > you looked at laptop mode? Looks like you are essentially re-inventing
> > that, but in a bad way.
> 
> Right, that's mostly down to my having no familiarity with the block 
> layer at all :) I can fix that up easily enough, but if a deferrable 
> timer is going to be too expensive then it'll need some rethinking 
> anyway.

Well, take a look at laptop mode. A timer per-io is probably
unavoidable, but doing it at IO completion could mean a big decrease in
timer activity as opposed to doing it for each incoming IO. And since
you are looking at when the disk is idle, it makes a lot more sense to
me to do that when the we complete a request (and have no further
pending IO) rather than on incoming IO.

Your biggest performance issue here is going to be sync IO, since the
disk will go idle very briefly before being kicked into action again.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-23 14:31                           ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
@ 2009-11-23 14:42                             ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:42 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 03:31:40PM +0100, Jens Axboe wrote:

> Well, take a look at laptop mode. A timer per-io is probably
> unavoidable, but doing it at IO completion could mean a big decrease in
> timer activity as opposed to doing it for each incoming IO. And since
> you are looking at when the disk is idle, it makes a lot more sense to
> me to do that when the we complete a request (and have no further
> pending IO) rather than on incoming IO.

Right. The current implementation I have does a del_timer() at 
submission (which should be moved to post-merge) - that should be cheap 
in the common case of a new command being submitted when there's already 
commands outstanding. There's then a mod_timer() at completion time. 
That's still a certain amount of expense, but it should be less.

> Your biggest performance issue here is going to be sync IO, since the
> disk will go idle very briefly before being kicked into action again.

Ok, I'll try to benchmark that.

The alternative (polling) method would be something much like Kay 
suggested - either add an extra field to stat or an extra sysfs file, 
then invalidate that on submission and set to jiffies on completion. 
It's not ideal from a wakeups perspective, but it's pretty low impact on 
the kernel side.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 14:42                             ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 14:42 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 03:31:40PM +0100, Jens Axboe wrote:

> Well, take a look at laptop mode. A timer per-io is probably
> unavoidable, but doing it at IO completion could mean a big decrease in
> timer activity as opposed to doing it for each incoming IO. And since
> you are looking at when the disk is idle, it makes a lot more sense to
> me to do that when the we complete a request (and have no further
> pending IO) rather than on incoming IO.

Right. The current implementation I have does a del_timer() at 
submission (which should be moved to post-merge) - that should be cheap 
in the common case of a new command being submitted when there's already 
commands outstanding. There's then a mod_timer() at completion time. 
That's still a certain amount of expense, but it should be less.

> Your biggest performance issue here is going to be sync IO, since the
> disk will go idle very briefly before being kicked into action again.

Ok, I'll try to benchmark that.

The alternative (polling) method would be something much like Kay 
suggested - either add an extra field to stat or an extra sysfs file, 
then invalidate that on submission and set to jiffies on completion. 
It's not ideal from a wakeups perspective, but it's pretty low impact on 
the kernel side.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-23 14:42                             ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
@ 2009-11-23 19:50                               ` Jens Axboe
  -1 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 19:50 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Matthew Garrett wrote:
> On Mon, Nov 23, 2009 at 03:31:40PM +0100, Jens Axboe wrote:
> 
> > Well, take a look at laptop mode. A timer per-io is probably
> > unavoidable, but doing it at IO completion could mean a big decrease in
> > timer activity as opposed to doing it for each incoming IO. And since
> > you are looking at when the disk is idle, it makes a lot more sense to
> > me to do that when the we complete a request (and have no further
> > pending IO) rather than on incoming IO.
> 
> Right. The current implementation I have does a del_timer() at 
> submission (which should be moved to post-merge) - that should be cheap 
> in the common case of a new command being submitted when there's already 
> commands outstanding. There's then a mod_timer() at completion time. 
> That's still a certain amount of expense, but it should be less.
> 
> > Your biggest performance issue here is going to be sync IO, since the
> > disk will go idle very briefly before being kicked into action again.
> 
> Ok, I'll try to benchmark that.
> 
> The alternative (polling) method would be something much like Kay 
> suggested - either add an extra field to stat or an extra sysfs file, 
> then invalidate that on submission and set to jiffies on completion. 
> It's not ideal from a wakeups perspective, but it's pretty low impact on 
> the kernel side.

If the polling works out, then yes that approach is certainly a lot
better from a performance impact pov.

What kind of time intervals are you targetting?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 19:50                               ` Jens Axboe
  0 siblings, 0 replies; 73+ messages in thread
From: Jens Axboe @ 2009-11-23 19:50 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23 2009, Matthew Garrett wrote:
> On Mon, Nov 23, 2009 at 03:31:40PM +0100, Jens Axboe wrote:
> 
> > Well, take a look at laptop mode. A timer per-io is probably
> > unavoidable, but doing it at IO completion could mean a big decrease in
> > timer activity as opposed to doing it for each incoming IO. And since
> > you are looking at when the disk is idle, it makes a lot more sense to
> > me to do that when the we complete a request (and have no further
> > pending IO) rather than on incoming IO.
> 
> Right. The current implementation I have does a del_timer() at 
> submission (which should be moved to post-merge) - that should be cheap 
> in the common case of a new command being submitted when there's already 
> commands outstanding. There's then a mod_timer() at completion time. 
> That's still a certain amount of expense, but it should be less.
> 
> > Your biggest performance issue here is going to be sync IO, since the
> > disk will go idle very briefly before being kicked into action again.
> 
> Ok, I'll try to benchmark that.
> 
> The alternative (polling) method would be something much like Kay 
> suggested - either add an extra field to stat or an extra sysfs file, 
> then invalidate that on submission and set to jiffies on completion. 
> It's not ideal from a wakeups perspective, but it's pretty low impact on 
> the kernel side.

If the polling works out, then yes that approach is certainly a lot
better from a performance impact pov.

What kind of time intervals are you targetting?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle changes
  2009-11-23 19:50                               ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
@ 2009-11-23 19:54                                 ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 19:54 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 08:50:00PM +0100, Jens Axboe wrote:

> If the polling works out, then yes that approach is certainly a lot
> better from a performance impact pov.
> 
> What kind of time intervals are you targetting?

On the order of a second. I'm doing benchmarking with my current 
implementation now, I'll let you know how it looks.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH] [RFC] Add support for uevents on block device idle
@ 2009-11-23 19:54                                 ` Matthew Garrett
  0 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-11-23 19:54 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Pavel Machek, Kay Sievers, David Zeuthen, linux-kernel, linux-hotplug

On Mon, Nov 23, 2009 at 08:50:00PM +0100, Jens Axboe wrote:

> If the polling works out, then yes that approach is certainly a lot
> better from a performance impact pov.
> 
> What kind of time intervals are you targetting?

On the order of a second. I'm doing benchmarking with my current 
implementation now, I'll let you know how it looks.

-- 
Matthew Garrett | mjg59@srcf.ucam.org

^ permalink raw reply	[flat|nested] 73+ messages in thread

* [RFC] Add support for events on block device idle changes
  2009-11-23 19:50                               ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
  (?)
  (?)
@ 2009-12-11 21:20                               ` Matthew Garrett
  -1 siblings, 0 replies; 73+ messages in thread
From: Matthew Garrett @ 2009-12-11 21:20 UTC (permalink / raw)
  To: jens.axboe; +Cc: pavel, kay.sievers, david, linux-kernel, Matthew Garrett

I looked into polling and realised that it's not going to work - reducing
the performance hit requires the idle->active transition to be acknowleged
as quickly as possible, so there needs to be an event for that. And for
that to happen, we need to have an idea of whether the disk is idle or not.
That could be done by just performing a comparison of the last request time
against the current time, but it still ends up giving us several of the
same problems.

What I've done in this version is move the timer modification to the
completion, with request submission just performing the timer deletion. This
should make things less expensive than the previous versions of the patch.
I've also added updates to the idle_hysteresis file, which makes it possible
to implement a mechanism for multiple applications with different timeout
requirements. This is somewhat hacky, but avoids putting any more complexity
in fast path code. Thoughts?

--- 

Userspace may wish to know whether a given disk is active or idle, for
example to modify power management policy based on access patterns. This
patch adds a deferrable timer to the block layer which will fire if the
disk is idle for a user-definable period of time, generating a
notification event on the device's stat node. An event will also be
generated if an access is received while the disk is classified as idle.

 Documentation/ABI/testing/sysfs-block |   49 +++++++++++++++++++++++++++++-
 block/blk-core.c                      |    5 +++
 block/elevator.c                      |    7 ++++
 block/genhd.c                         |   54 +++++++++++++++++++++++++++++++++
 fs/partitions/check.c                 |    9 ++++-
 include/linux/genhd.h                 |    6 ++++
 6 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 5f3beda..03b411b 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -3,7 +3,7 @@ Date:		February 2008
 Contact:	Jerome Marchand <jmarchan@redhat.com>
 Description:
 		The /sys/block/<disk>/stat files displays the I/O
-		statistics of disk <disk>. They contain 11 fields:
+		statistics of disk <disk>. They contain 12 fields:
 		 1 - reads completed succesfully
 		 2 - reads merged
 		 3 - sectors read
@@ -15,6 +15,14 @@ Description:
 		 9 - I/Os currently in progress
 		10 - time spent doing I/Os (ms)
 		11 - weighted time spent doing I/Os (ms)
+		12 - -1 if the disk is active, otherwise the length of time in
+		     milliseconds since the disk became idle (determined by
+		     the idle_hysteresis setting)
+
+		Applications that call poll() or select() on this attribute
+		will be woken when the block device undergoes a transition
+		between active and idle.
+
 		For more details refer Documentation/iostats.txt
 
 
@@ -128,3 +136,42 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/idle_hysteresis
+Date:		November 2009
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		Contains the number of milliseconds to wait after an
+		access before declaring that a disk is idle. Any
+		accesses to the block device during this time will
+		reset the timer. "0" (the default) indicates that no
+		events will be generated. If a value has already been
+		written to this file, then the only valid values are
+		either "0" (to disable notification) or a number
+		smaller than the current value. On write, a
+		notification will be sent to any userspace
+		applications poll()ing on this file.
+
+		The intended use of this interface is to allow
+		applications to change power management policy based
+		on disk activity patterns. The first application to
+		use this interface should write its timeout value and
+		continue monitoring this file along with
+		"stat". Notifications will be sent to the "stat" file
+		when the disk switches from active to idle or
+		vice-versa.
+
+		If more than one application uses this interface, the
+		second application should attempt to write its own
+		timeout. If this fails, it should read the current
+		timeout. It will then be woken on active to idle
+		transitions, at which point it should sleep for the
+		time difference between its desired timeout and the
+		programmed timeout. On waking, it should then re-read
+		the "state" file to determine if the disk has been
+		idle for long enough.
+
+		If the write is successful, then the first application
+		to use the interface will be woken instead. It should
+		then modify its wakeup code to match the above
+		description.
diff --git a/block/blk-core.c b/block/blk-core.c
index 10e305f..16b501a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2115,6 +2115,11 @@ static void blk_finish_request(struct request *req, int error)
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion(req);
 
+	if (blk_fs_request(req) && req->rq_disk->hysteresis_time)
+		mod_timer(&req->rq_disk->hysteresis_timer,
+			  jiffies+msecs_to_jiffies
+			  (req->rq_disk->hysteresis_time));
+
 	blk_delete_timer(req);
 
 	blk_account_io_done(req);
diff --git a/block/elevator.c b/block/elevator.c
index a847046..01f0bfb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -683,6 +683,13 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		BUG();
 	}
 
+	if (blk_fs_request(rq) && rq->rq_disk->hysteresis_time &&
+	    rq->rq_disk->idle) {
+		rq->rq_disk->idle = 0;
+		del_timer(&rq->rq_disk->hysteresis_timer);
+		schedule_work(&rq->rq_disk->idle_notify);
+	}
+
 	if (unplug_it && blk_queue_plugged(q)) {
 		int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC]
 				- queue_in_flight(q);
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..802c142 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -504,6 +504,21 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+static void disk_idle(unsigned long data)
+{
+	struct gendisk *gd = (struct gendisk *)data;
+
+	gd->idle = jiffies;
+	schedule_work(&gd->idle_notify);
+}
+
+static void disk_idle_notify_thread(struct work_struct *work)
+{
+	struct gendisk *gd = container_of(work, struct gendisk, idle_notify);
+
+	sysfs_notify(&disk_to_dev(gd)->kobj, NULL, "stat");
+}
+
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -543,6 +558,10 @@ void add_disk(struct gendisk *disk)
 
 	blk_register_region(disk_devt(disk), disk->minors, NULL,
 			    exact_match, exact_lock, disk);
+
+	init_timer(&disk->hysteresis_timer);
+	setup_timer(&disk->hysteresis_timer, disk_idle, (unsigned long)disk);
+
 	register_disk(disk);
 	blk_register_queue(disk);
 
@@ -861,6 +880,36 @@ static ssize_t disk_alignment_offset_show(struct device *dev,
 	return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 }
 
+static ssize_t disk_idle_hysteresis_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%d\n", disk->hysteresis_time);
+}
+
+static ssize_t disk_idle_hysteresis_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	unsigned long timeout;
+	int res;
+
+	res = strict_strtoul(buf, 10, &timeout);
+	if (res)
+		return -EINVAL;
+
+	if (disk->hysteresis_time && timeout > disk->hysteresis_time)
+		return -EINVAL;
+
+	disk->hysteresis_time = timeout;
+	sysfs_notify(&dev->kobj, NULL, "idle_hysteresis");
+
+	return count;
+}
+
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
@@ -870,6 +919,8 @@ static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(idle_hysteresis, 0644, disk_idle_hysteresis_show,
+		   disk_idle_hysteresis_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -890,6 +941,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_idle_hysteresis.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1183,6 +1235,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
 		device_initialize(disk_to_dev(disk));
 		INIT_WORK(&disk->async_notify,
 			media_change_notify_thread);
+		INIT_WORK(&disk->idle_notify,
+			  disk_idle_notify_thread);
 	}
 	return disk;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..954dc32 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -230,6 +230,7 @@ ssize_t part_stat_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	struct gendisk *gd = dev_to_disk(dev);
 	int cpu;
 
 	cpu = part_stat_lock();
@@ -238,7 +239,7 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u %8d"
 		"\n",
 		part_stat_read(p, ios[READ]),
 		part_stat_read(p, merges[READ]),
@@ -250,7 +251,8 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		gd->idle ? jiffies_to_msecs(jiffies - gd->idle) : -1);
 }
 
 ssize_t part_inflight_show(struct device *dev,
@@ -652,6 +654,9 @@ void del_gendisk(struct gendisk *disk)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
+	del_timer_sync(&disk->hysteresis_timer);
+	cancel_work_sync(&disk->idle_notify);
+
 	/* invalidate stuff */
 	disk_part_iter_init(&piter, disk,
 			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..b8e6158 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/kdev_t.h>
 #include <linux/rcupdate.h>
+#include <linux/timer.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -163,10 +164,15 @@ struct gendisk {
 
 	atomic_t sync_io;		/* RAID */
 	struct work_struct async_notify;
+	struct work_struct idle_notify;
 #ifdef  CONFIG_BLK_DEV_INTEGRITY
 	struct blk_integrity *integrity;
 #endif
 	int node_id;
+
+	unsigned long idle;
+	int hysteresis_time;
+	struct timer_list hysteresis_timer;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
-- 
1.6.5.2


^ permalink raw reply related	[flat|nested] 73+ messages in thread

end of thread, other threads:[~2009-12-11 21:21 UTC | newest]

Thread overview: 73+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-11-17 14:37 [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-17 14:37 ` Matthew Garrett
2009-11-17 15:55 ` Kay Sievers
2009-11-17 15:55   ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-17 16:09   ` [PATCH] [RFC] Add support for uevents on block device idle changes David Zeuthen
2009-11-17 16:09     ` [PATCH] [RFC] Add support for uevents on block device idle David Zeuthen
2009-11-17 18:57     ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-17 18:57       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 19:30       ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 19:30         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 19:40         ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 19:40           ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 19:47           ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 19:47             ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 19:53             ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 19:53               ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 20:03               ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 20:03                 ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 20:07                 ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 20:07                   ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 21:06                   ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 21:06                     ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 21:29                     ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 21:29                       ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 21:35                       ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 21:35                         ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 21:39                         ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 21:39                           ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-18 21:45                           ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 21:45                             ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 21:33                     ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-18 21:33                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-18 21:40                       ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-18 21:40                         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-19 11:09                       ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-19 11:09                         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-19 13:01                         ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-19 13:01                           ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-19 13:29                           ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-19 13:29                             ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-19 14:16                             ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-19 14:16                               ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-19 14:25                               ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-19 14:25                                 ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-19 14:30                                 ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-19 14:30                                   ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-19 14:34                                   ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-19 14:34                                     ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-19 14:48                                     ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-19 14:48                                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-19 15:00                                       ` [PATCH] [RFC] Add support for uevents on block device idle changes Kay Sievers
2009-11-19 15:00                                         ` [PATCH] [RFC] Add support for uevents on block device idle Kay Sievers
2009-11-20 20:29                                         ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-20 20:29                                           ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-22 23:37                   ` [PATCH] [RFC] Add support for uevents on block device idle changes Pavel Machek
2009-11-22 23:37                     ` [PATCH] [RFC] Add support for uevents on block device idle Pavel Machek
2009-11-23 14:12                     ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-23 14:12                       ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-23 14:17                     ` [PATCH] [RFC] Add support for uevents on block device idle changes Jens Axboe
2009-11-23 14:17                       ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
2009-11-23 14:25                       ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-23 14:25                         ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-23 14:31                         ` [PATCH] [RFC] Add support for uevents on block device idle changes Jens Axboe
2009-11-23 14:31                           ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
2009-11-23 14:42                           ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-23 14:42                             ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-11-23 19:50                             ` [PATCH] [RFC] Add support for uevents on block device idle changes Jens Axboe
2009-11-23 19:50                               ` [PATCH] [RFC] Add support for uevents on block device idle Jens Axboe
2009-11-23 19:54                               ` [PATCH] [RFC] Add support for uevents on block device idle changes Matthew Garrett
2009-11-23 19:54                                 ` [PATCH] [RFC] Add support for uevents on block device idle Matthew Garrett
2009-12-11 21:20                               ` [RFC] Add support for events on block device idle changes Matthew Garrett
2009-11-18 22:10 ` [PATCH] [RFC] Add support for uevents " Bartlomiej Zolnierkiewicz
2009-11-18 22:10   ` Bartlomiej Zolnierkiewicz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.