linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] block layer runtime pm
@ 2012-05-15  8:48 Lin Ming
  2012-05-15  8:48 ` [RFC PATCH 1/3] block: add a flag to identify PM request Lin Ming
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-15  8:48 UTC (permalink / raw)
  To: Jens Axboe, Alan Stern; +Cc: linux-kernel, linux-pm, linux-scsi

Hi,

In August 2010, Jens and Alan discussed about "Runtime PM and the block
layer". http://marc.info/?t=128259108400001&r=1&w=2

Here are the very draft, compile tested only patches that try to
implement the ideas discussed.

I throw it out early to get feedbacks and see if I'm on the right
direction.

Lin Ming (3):
      block: add a flag to identify PM request
      block: add queue runtime pm callback
      block: add queue idle timer

 block/blk-core.c           |   25 +++++++++++++++++++++++
 block/blk-settings.c       |    6 +++++
 block/elevator.c           |   10 +++++++++
 drivers/scsi/scsi_lib.c    |   25 ++++++++++++++++++++--
 drivers/scsi/sd.c          |   47 +++++++++++++++++++++++++++++++++++++------
 include/linux/blk_types.h  |    2 +
 include/linux/blkdev.h     |    8 +++++++
 include/scsi/scsi_device.h |    4 +++
 8 files changed, 117 insertions(+), 10 deletions(-)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15  8:48 [RFC PATCH 0/3] block layer runtime pm Lin Ming
@ 2012-05-15  8:48 ` Lin Ming
  2012-05-15 14:35   ` Alan Stern
  2012-05-15  8:48 ` [RFC PATCH 2/3] block: add queue runtime pm callback Lin Ming
  2012-05-15  8:48 ` [RFC PATCH 3/3] block: add queue idle timer Lin Ming
  2 siblings, 1 reply; 23+ messages in thread
From: Lin Ming @ 2012-05-15  8:48 UTC (permalink / raw)
  To: Jens Axboe, Alan Stern; +Cc: linux-kernel, linux-pm, linux-scsi

Add a flag REQ_PM to identify the request is PM related.
As an example, modify scsi code to use this flag.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 drivers/scsi/scsi_lib.c    |   25 ++++++++++++++++++++++---
 drivers/scsi/sd.c          |   15 ++++++++-------
 include/linux/blk_types.h  |    2 ++
 include/scsi/scsi_device.h |    4 ++++
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ead6405..5a1eb5a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -256,10 +256,10 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 EXPORT_SYMBOL(scsi_execute);
 
 
-int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
+static int __scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 		     int data_direction, void *buffer, unsigned bufflen,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
-		     int *resid)
+		     int *resid, int flags)
 {
 	char *sense = NULL;
 	int result;
@@ -270,15 +270,34 @@ int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			return DRIVER_ERROR << 24;
 	}
 	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      sense, timeout, retries, 0, resid);
+			      sense, timeout, retries, flags, resid);
 	if (sshdr)
 		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
 
 	kfree(sense);
 	return result;
 }
+
+int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
+		     int data_direction, void *buffer, unsigned bufflen,
+		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
+		     int *resid)
+{
+	return __scsi_execute_req(sdev, cmd, data_direction, buffer, bufflen,
+			      sshdr, timeout, retries, resid, 0);
+}
 EXPORT_SYMBOL(scsi_execute_req);
 
+int scsi_execute_req_flag(struct scsi_device *sdev, const unsigned char *cmd,
+		     int data_direction, void *buffer, unsigned bufflen,
+		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
+		     int *resid, int flags)
+{
+	return __scsi_execute_req(sdev, cmd, data_direction, buffer, bufflen,
+			      sshdr, timeout, retries, resid, flags);
+}
+EXPORT_SYMBOL(scsi_execute_req_flag);
+
 /*
  * Function:    scsi_init_cmd_errh()
  *
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5ba5c2a..e34bc07 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1251,7 +1251,7 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	return retval;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp)
+static int sd_sync_cache(struct scsi_disk *sdkp, int flags)
 {
 	int retries, res;
 	struct scsi_device *sdp = sdkp->device;
@@ -1269,8 +1269,9 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-				       SD_FLUSH_TIMEOUT, SD_MAX_RETRIES, NULL);
+		res = scsi_execute_req_flag(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
+				       SD_FLUSH_TIMEOUT, SD_MAX_RETRIES, NULL,
+				       flags);
 		if (res == 0)
 			break;
 	}
@@ -2812,8 +2813,8 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
-	res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-			       SD_TIMEOUT, SD_MAX_RETRIES, NULL);
+	res = scsi_execute_req_flag(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
+			       SD_TIMEOUT, SD_MAX_RETRIES, NULL, REQ_PM);
 	if (res) {
 		sd_printk(KERN_WARNING, sdkp, "START_STOP FAILED\n");
 		sd_print_result(sdkp, res);
@@ -2841,7 +2842,7 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		sd_sync_cache(sdkp, 0);
 	}
 
 	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
@@ -2863,7 +2864,7 @@ static int sd_suspend(struct device *dev, pm_message_t mesg)
 
 	if (sdkp->WCE) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		ret = sd_sync_cache(sdkp);
+		ret = sd_sync_cache(sdkp, REQ_PM);
 		if (ret)
 			goto done;
 	}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4053cbd..a75f854 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -150,6 +150,7 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
+	__REQ_PM,		/* runtime pm request */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -191,5 +192,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1 << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1 << __REQ_SECURE)
+#define REQ_PM			(1 << __REQ_PM)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6efb2e1..7735b46 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -387,6 +387,10 @@ extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			    int data_direction, void *buffer, unsigned bufflen,
 			    struct scsi_sense_hdr *, int timeout, int retries,
 			    int *resid);
+extern int scsi_execute_req_flag(struct scsi_device *sdev, const unsigned char *cmd,
+			    int data_direction, void *buffer, unsigned bufflen,
+			    struct scsi_sense_hdr *, int timeout, int retries,
+			    int *resid, int flags);
 
 #ifdef CONFIG_PM_RUNTIME
 extern int scsi_autopm_get_device(struct scsi_device *);
-- 
1.7.2.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC PATCH 2/3] block: add queue runtime pm callback
  2012-05-15  8:48 [RFC PATCH 0/3] block layer runtime pm Lin Ming
  2012-05-15  8:48 ` [RFC PATCH 1/3] block: add a flag to identify PM request Lin Ming
@ 2012-05-15  8:48 ` Lin Ming
  2012-05-15 14:37   ` Alan Stern
  2012-05-15  8:48 ` [RFC PATCH 3/3] block: add queue idle timer Lin Ming
  2 siblings, 1 reply; 23+ messages in thread
From: Lin Ming @ 2012-05-15  8:48 UTC (permalink / raw)
  To: Jens Axboe, Alan Stern; +Cc: linux-kernel, linux-pm, linux-scsi

Add ->runtime_pm callback and ->pm_status to request queue.
As an example, implement a simple queue runtime_pm callback
in sd driver.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 block/blk-settings.c   |    6 ++++++
 drivers/scsi/sd.c      |   32 ++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |    5 +++++
 3 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index d3234fc..51be5f9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -99,6 +99,12 @@ void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
 }
 EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
 
+void blk_queue_runtime_pm(struct request_queue *q, runtime_pm_fn *fn)
+{
+	q->runtime_pm = fn;
+}
+EXPORT_SYMBOL_GPL(blk_queue_runtime_pm);
+
 /**
  * blk_set_default_limits - reset limits to default values
  * @lim:  the queue_limits structure to reset
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e34bc07..a4f049a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -938,6 +938,22 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	return scsi_prep_return(q, rq, ret);
 }
 
+static int sd_runtime_pm_fn(struct request_queue *q, pm_message_t mesg)
+{
+	struct scsi_device *sdp = q->queuedata;
+
+	if (mesg.event == PM_EVENT_SUSPEND) {
+		q->pm_status = RPM_SUSPENDING;
+		pm_runtime_put(&sdp->sdev_gendev);
+	}
+	else if (mesg.event == PM_EVENT_RESUME) {
+		q->pm_status = RPM_RESUMING;
+		pm_runtime_get(&sdp->sdev_gendev);
+	}
+
+	return 0;
+}
+
 /**
  *	sd_open - open a scsi disk device
  *	@inode: only i_rdev member may be used
@@ -2616,6 +2632,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
 	blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
+	blk_queue_runtime_pm(sdp->request_queue, sd_runtime_pm_fn);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT;
@@ -2872,6 +2889,13 @@ static int sd_suspend(struct device *dev, pm_message_t mesg)
 	if ((mesg.event & PM_EVENT_SLEEP) && sdkp->device->manage_start_stop) {
 		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
 		ret = sd_start_stop_device(sdkp, 0);
+		if (!ret) {
+			struct request_queue *q = sdkp->disk->queue;
+
+			spin_lock_irq(q->queue_lock);
+			q->pm_status = RPM_SUSPENDED;
+			spin_unlock_irq(q->queue_lock);
+		}
 	}
 
 done:
@@ -2889,6 +2913,14 @@ static int sd_resume(struct device *dev)
 
 	sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
 	ret = sd_start_stop_device(sdkp, 1);
+	if (!ret) {
+		struct request_queue *q = sdkp->disk->queue;
+
+		spin_lock_irq(q->queue_lock);
+		q->pm_status = RPM_ACTIVE;
+		__blk_run_queue(sdkp->disk->queue);
+		spin_unlock_irq(q->queue_lock);
+	}
 
 done:
 	scsi_disk_put(sdkp);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2aa2466..b5b212c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -216,6 +216,7 @@ typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 typedef int (bsg_job_fn) (struct bsg_job *);
+typedef int (runtime_pm_fn) (struct request_queue *q, pm_message_t mesg);
 
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
@@ -289,6 +290,7 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	runtime_pm_fn		*runtime_pm;
 
 	/*
 	 * Dispatch queue sorting
@@ -346,6 +348,8 @@ struct request_queue {
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
 
+	int			pm_status;
+
 	unsigned int		dma_drain_size;
 	void			*dma_drain_buffer;
 	unsigned int		dma_pad_mask;
@@ -855,6 +859,7 @@ extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
+extern void blk_queue_runtime_pm(struct request_queue *q, runtime_pm_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-- 
1.7.2.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [RFC PATCH 3/3] block: add queue idle timer
  2012-05-15  8:48 [RFC PATCH 0/3] block layer runtime pm Lin Ming
  2012-05-15  8:48 ` [RFC PATCH 1/3] block: add a flag to identify PM request Lin Ming
  2012-05-15  8:48 ` [RFC PATCH 2/3] block: add queue runtime pm callback Lin Ming
@ 2012-05-15  8:48 ` Lin Ming
  2012-05-15 14:39   ` Alan Stern
  2012-05-15 19:19   ` Jeff Moyer
  2 siblings, 2 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-15  8:48 UTC (permalink / raw)
  To: Jens Axboe, Alan Stern; +Cc: linux-kernel, linux-pm, linux-scsi

Add an idle timer that is set to some suitable timeout and would be
added when the queue first goes empty. If nothing has happened during
the timeout interval, then the queue is suspended.

Queueing a new request could check the state and resume queue if it is
supended.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
 block/blk-core.c       |   25 +++++++++++++++++++++++++
 block/elevator.c       |   10 ++++++++++
 include/linux/blkdev.h |    3 +++
 3 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1f61b74..01b4e14 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -473,6 +473,17 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
+static void blk_rq_idle_timer(unsigned long data)
+{
+	struct request_queue *q = (struct request_queue *) data;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!q->nr_pending && q->runtime_pm)
+		q->runtime_pm(q, PMSG_SUSPEND);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
@@ -504,6 +515,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+	setup_timer(&q->idle, blk_rq_idle_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
 	INIT_LIST_HEAD(&q->icq_list);
 	INIT_LIST_HEAD(&q->flush_queue[0]);
@@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	/* PM request is not accounted */
+	if (!(req->cmd_flags & REQ_PM)) {
+		if (!(--q->nr_pending))
+			/* Hard code to 20secs, will move to sysfs */
+			mod_timer(&q->idle, jiffies + 20*HZ);
+	}
+
 	elv_completed_request(q, req);
 
 	/* this is a bio leak */
@@ -1917,6 +1936,12 @@ struct request *blk_peek_request(struct request_queue *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
+		/* Only PM request is allowed to go if the queue is suspended */
+		if (q->pm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)) {
+			rq = NULL;
+			break;
+		}
+
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
diff --git a/block/elevator.c b/block/elevator.c
index f016855..7e68994 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -546,6 +546,9 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
 
 	rq->cmd_flags &= ~REQ_STARTED;
 
+	/* __elv_add_request will increment the count */
+	if (!(rq->cmd_flags & REQ_PM))
+		q->nr_pending--;
 	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 
@@ -587,6 +590,13 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 {
 	trace_block_rq_insert(q, rq);
 
+	if (!(rq->cmd_flags & REQ_PM)) {
+		if (!(q->pm_status != RPM_ACTIVE) &&
+				q->nr_pending++ == 0 && q->runtime_pm)
+			/* async resume */
+			q->runtime_pm(q, PMSG_RESUME);
+	}
+
 	rq->q = q;
 
 	if (rq->cmd_flags & REQ_SOFTBARRIER) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5b212c..f6bc96b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -347,6 +347,7 @@ struct request_queue {
 	unsigned int		nr_congestion_on;
 	unsigned int		nr_congestion_off;
 	unsigned int		nr_batching;
+	unsigned int		nr_pending;
 
 	int			pm_status;
 
@@ -365,6 +366,8 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct timer_list	idle;
+
 	struct list_head	icq_list;
 
 	struct queue_limits	limits;
-- 
1.7.2.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15  8:48 ` [RFC PATCH 1/3] block: add a flag to identify PM request Lin Ming
@ 2012-05-15 14:35   ` Alan Stern
  2012-05-15 15:33     ` Lin Ming
  0 siblings, 1 reply; 23+ messages in thread
From: Alan Stern @ 2012-05-15 14:35 UTC (permalink / raw)
  To: Lin Ming
  Cc: Jens Axboe, Kernel development list, Linux-pm mailing list,
	SCSI development list

On Tue, 15 May 2012, Lin Ming wrote:

> Add a flag REQ_PM to identify the request is PM related.
> As an example, modify scsi code to use this flag.

There already is such a flag; you don't need to add one.  In fact, 
there already are _two_ such flags, and it would be best to remove one 
of them.  In include/linux/blkdev.h:

	REQ_TYPE_PM_SUSPEND,		/* suspend request */
	REQ_TYPE_PM_RESUME,		/* resume request */

Apparently they had been used by the old ide driver, but they don't
seem to be used anywhere now.

Aside from that, this patch seems to be going in the right direction.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 2/3] block: add queue runtime pm callback
  2012-05-15  8:48 ` [RFC PATCH 2/3] block: add queue runtime pm callback Lin Ming
@ 2012-05-15 14:37   ` Alan Stern
  2012-05-15 15:36     ` Lin Ming
  0 siblings, 1 reply; 23+ messages in thread
From: Alan Stern @ 2012-05-15 14:37 UTC (permalink / raw)
  To: Lin Ming; +Cc: Jens Axboe, linux-kernel, linux-pm, linux-scsi

On Tue, 15 May 2012, Lin Ming wrote:

> Add ->runtime_pm callback and ->pm_status to request queue.
> As an example, implement a simple queue runtime_pm callback
> in sd driver.

> +static int sd_runtime_pm_fn(struct request_queue *q, pm_message_t mesg)
> +{
> +	struct scsi_device *sdp = q->queuedata;
> +
> +	if (mesg.event == PM_EVENT_SUSPEND) {
> +		q->pm_status = RPM_SUSPENDING;
> +		pm_runtime_put(&sdp->sdev_gendev);
> +	}
> +	else if (mesg.event == PM_EVENT_RESUME) {
> +		q->pm_status = RPM_RESUMING;
> +		pm_runtime_get(&sdp->sdev_gendev);
> +	}

Don't use pm_message_t arguments for runtime PM.  Implement separate 
routines for suspend and resume.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-15  8:48 ` [RFC PATCH 3/3] block: add queue idle timer Lin Ming
@ 2012-05-15 14:39   ` Alan Stern
  2012-05-15 15:49     ` Lin Ming
  2012-05-15 19:19   ` Jeff Moyer
  1 sibling, 1 reply; 23+ messages in thread
From: Alan Stern @ 2012-05-15 14:39 UTC (permalink / raw)
  To: Lin Ming; +Cc: Jens Axboe, linux-kernel, linux-pm, linux-scsi

On Tue, 15 May 2012, Lin Ming wrote:

> Add an idle timer that is set to some suitable timeout and would be
> added when the queue first goes empty. If nothing has happened during
> the timeout interval, then the queue is suspended.

You don't need to create an idle timer for runtime PM support; the 
runtime PM framework already has such a timer.  Read 
Documentation/power/runtime_pm.txt, especially the parts describing 
pm_runtime_autosuspend() and related functions.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15 14:35   ` Alan Stern
@ 2012-05-15 15:33     ` Lin Ming
  2012-05-15 15:37       ` Alan Cox
  2012-05-15 16:03       ` Alan Stern
  0 siblings, 2 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-15 15:33 UTC (permalink / raw)
  To: Alan Stern
  Cc: Jens Axboe, Kernel development list, Linux-pm mailing list,
	SCSI development list

On Tue, 2012-05-15 at 10:35 -0400, Alan Stern wrote:
> On Tue, 15 May 2012, Lin Ming wrote:
> 
> > Add a flag REQ_PM to identify the request is PM related.
> > As an example, modify scsi code to use this flag.
> 
> There already is such a flag; you don't need to add one.  In fact, 
> there already are _two_ such flags, and it would be best to remove one 
> of them.  In include/linux/blkdev.h:
> 
> 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
> 	REQ_TYPE_PM_RESUME,		/* resume request */
> 
> Apparently they had been used by the old ide driver, but they don't
> seem to be used anywhere now.

IDE code still uses both types and they are used for system
suspend/resume. See generic_ide_suspend and generic_ide_resume.

But we need a flag to check whether it is runtime suspend/resume
request.

> 
> Aside from that, this patch seems to be going in the right direction.
> 
> Alan Stern
> 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 2/3] block: add queue runtime pm callback
  2012-05-15 14:37   ` Alan Stern
@ 2012-05-15 15:36     ` Lin Ming
  0 siblings, 0 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-15 15:36 UTC (permalink / raw)
  To: Alan Stern; +Cc: Jens Axboe, linux-kernel, linux-pm, linux-scsi

On Tue, 2012-05-15 at 10:37 -0400, Alan Stern wrote:
> On Tue, 15 May 2012, Lin Ming wrote:
> 
> > Add ->runtime_pm callback and ->pm_status to request queue.
> > As an example, implement a simple queue runtime_pm callback
> > in sd driver.
> 
> > +static int sd_runtime_pm_fn(struct request_queue *q, pm_message_t mesg)
> > +{
> > +	struct scsi_device *sdp = q->queuedata;
> > +
> > +	if (mesg.event == PM_EVENT_SUSPEND) {
> > +		q->pm_status = RPM_SUSPENDING;
> > +		pm_runtime_put(&sdp->sdev_gendev);
> > +	}
> > +	else if (mesg.event == PM_EVENT_RESUME) {
> > +		q->pm_status = RPM_RESUMING;
> > +		pm_runtime_get(&sdp->sdev_gendev);
> > +	}
> 
> Don't use pm_message_t arguments for runtime PM.  Implement separate 
> routines for suspend and resume.

OK, I'll add 2 callbacks to request_queue.

runtime_pm_fn runtime_suspend;
runtime_pm_fn runtime_resume;

> 
> Alan Stern
> 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15 15:33     ` Lin Ming
@ 2012-05-15 15:37       ` Alan Cox
  2012-05-15 15:58         ` Lin Ming
  2012-05-15 16:03       ` Alan Stern
  1 sibling, 1 reply; 23+ messages in thread
From: Alan Cox @ 2012-05-15 15:37 UTC (permalink / raw)
  To: Lin Ming
  Cc: Alan Stern, Jens Axboe, Kernel development list,
	Linux-pm mailing list, SCSI development list

On Tue, 15 May 2012 23:33:05 +0800
Lin Ming <ming.m.lin@intel.com> wrote:

> On Tue, 2012-05-15 at 10:35 -0400, Alan Stern wrote:
> > On Tue, 15 May 2012, Lin Ming wrote:
> > 
> > > Add a flag REQ_PM to identify the request is PM related.
> > > As an example, modify scsi code to use this flag.
> > 
> > There already is such a flag; you don't need to add one.  In fact, 
> > there already are _two_ such flags, and it would be best to remove one 
> > of them.  In include/linux/blkdev.h:
> > 
> > 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
> > 	REQ_TYPE_PM_RESUME,		/* resume request */
> > 
> > Apparently they had been used by the old ide driver, but they don't
> > seem to be used anywhere now.
> 
> IDE code still uses both types and they are used for system
> suspend/resume. See generic_ide_suspend and generic_ide_resume.

drivers/ide is obsolete code though so we shouldn't be forcing its needs
on the future

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-15 14:39   ` Alan Stern
@ 2012-05-15 15:49     ` Lin Ming
  0 siblings, 0 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-15 15:49 UTC (permalink / raw)
  To: Alan Stern; +Cc: Jens Axboe, linux-kernel, linux-pm, linux-scsi

On Tue, 2012-05-15 at 10:39 -0400, Alan Stern wrote:
> On Tue, 15 May 2012, Lin Ming wrote:
> 
> > Add an idle timer that is set to some suitable timeout and would be
> > added when the queue first goes empty. If nothing has happened during
> > the timeout interval, then the queue is suspended.
> 
> You don't need to create an idle timer for runtime PM support; the 
> runtime PM framework already has such a timer.  Read 
> Documentation/power/runtime_pm.txt, especially the parts describing 
> pm_runtime_autosuspend() and related functions.

OK, will use pm_runtime_autosuspend.

> 
> Alan Stern
> 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15 15:37       ` Alan Cox
@ 2012-05-15 15:58         ` Lin Ming
  2012-05-15 18:30           ` Alan Stern
  0 siblings, 1 reply; 23+ messages in thread
From: Lin Ming @ 2012-05-15 15:58 UTC (permalink / raw)
  To: Alan Cox
  Cc: Alan Stern, Jens Axboe, Kernel development list,
	Linux-pm mailing list, SCSI development list

On Tue, 2012-05-15 at 16:37 +0100, Alan Cox wrote:
> On Tue, 15 May 2012 23:33:05 +0800
> Lin Ming <ming.m.lin@intel.com> wrote:
> 
> > On Tue, 2012-05-15 at 10:35 -0400, Alan Stern wrote:
> > > On Tue, 15 May 2012, Lin Ming wrote:
> > > 
> > > > Add a flag REQ_PM to identify the request is PM related.
> > > > As an example, modify scsi code to use this flag.
> > > 
> > > There already is such a flag; you don't need to add one.  In fact, 
> > > there already are _two_ such flags, and it would be best to remove one 
> > > of them.  In include/linux/blkdev.h:
> > > 
> > > 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
> > > 	REQ_TYPE_PM_RESUME,		/* resume request */
> > > 
> > > Apparently they had been used by the old ide driver, but they don't
> > > seem to be used anywhere now.
> > 
> > IDE code still uses both types and they are used for system
> > suspend/resume. See generic_ide_suspend and generic_ide_resume.
> 
> drivers/ide is obsolete code though so we shouldn't be forcing its needs
> on the future

Do you mean we don't care about breaking ide code?

struct request {
        ....
        unsigned int cmd_flags;
        enum rq_cmd_type_bits cmd_type;

Another reason I didn't reuse REQ_TYPE_PM_SUSPEND/REQ_TYPE_PM_RESUME is
because all scsi requests set the cmd_type to REQ_TYPE_BLOCK_PC.

So I think we should add a new cmd_flags(REQ_PM) to tell if it's a
runtime pm request.

Thanks.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15 15:33     ` Lin Ming
  2012-05-15 15:37       ` Alan Cox
@ 2012-05-15 16:03       ` Alan Stern
  1 sibling, 0 replies; 23+ messages in thread
From: Alan Stern @ 2012-05-15 16:03 UTC (permalink / raw)
  To: Lin Ming
  Cc: Jens Axboe, Kernel development list, Linux-pm mailing list,
	SCSI development list

On Tue, 15 May 2012, Lin Ming wrote:

> On Tue, 2012-05-15 at 10:35 -0400, Alan Stern wrote:
> > On Tue, 15 May 2012, Lin Ming wrote:
> > 
> > > Add a flag REQ_PM to identify the request is PM related.
> > > As an example, modify scsi code to use this flag.
> > 
> > There already is such a flag; you don't need to add one.  In fact, 
> > there already are _two_ such flags, and it would be best to remove one 
> > of them.  In include/linux/blkdev.h:
> > 
> > 	REQ_TYPE_PM_SUSPEND,		/* suspend request */
> > 	REQ_TYPE_PM_RESUME,		/* resume request */
> > 
> > Apparently they had been used by the old ide driver, but they don't
> > seem to be used anywhere now.
> 
> IDE code still uses both types and they are used for system
> suspend/resume. See generic_ide_suspend and generic_ide_resume.

You're right, of course.  I searched for "BLK_TYPE_PM_" instead of 
"REQ_TYPE_PM_".  :-(

> But we need a flag to check whether it is runtime suspend/resume
> request.

It should be possible to fix up the IDE code so that it uses the same
flag for both suspend and resume.  After all, there's no real danger of
confusion -- you never get into a situation where you're not sure
whether the current PM action is a suspend or a resume.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 1/3] block: add a flag to identify PM request
  2012-05-15 15:58         ` Lin Ming
@ 2012-05-15 18:30           ` Alan Stern
  0 siblings, 0 replies; 23+ messages in thread
From: Alan Stern @ 2012-05-15 18:30 UTC (permalink / raw)
  To: Lin Ming
  Cc: Alan Cox, Jens Axboe, Kernel development list,
	Linux-pm mailing list, SCSI development list

On Tue, 15 May 2012, Lin Ming wrote:

> > > IDE code still uses both types and they are used for system
> > > suspend/resume. See generic_ide_suspend and generic_ide_resume.
> > 
> > drivers/ide is obsolete code though so we shouldn't be forcing its needs
> > on the future
> 
> Do you mean we don't care about breaking ide code?

As far as I can tell, Alan means that you shouldn't hesitate to change
the IDE code in order to make things easier for more up-to-date
systems.  But don't _break_ the IDE driver!

> struct request {
>         ....
>         unsigned int cmd_flags;
>         enum rq_cmd_type_bits cmd_type;
> 
> Another reason I didn't reuse REQ_TYPE_PM_SUSPEND/REQ_TYPE_PM_RESUME is
> because all scsi requests set the cmd_type to REQ_TYPE_BLOCK_PC.
> 
> So I think we should add a new cmd_flags(REQ_PM) to tell if it's a
> runtime pm request.

Yes, that does make more sense, even though it seems silly to have both
a flag and a type code for the same thing.  Maybe the IDE driver could
be changed to use the new flag.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-15  8:48 ` [RFC PATCH 3/3] block: add queue idle timer Lin Ming
  2012-05-15 14:39   ` Alan Stern
@ 2012-05-15 19:19   ` Jeff Moyer
  2012-05-16  1:27     ` Lin Ming
  1 sibling, 1 reply; 23+ messages in thread
From: Jeff Moyer @ 2012-05-15 19:19 UTC (permalink / raw)
  To: Lin Ming; +Cc: Jens Axboe, Alan Stern, linux-kernel, linux-pm, linux-scsi

Lin Ming <ming.m.lin@intel.com> writes:

> Add an idle timer that is set to some suitable timeout and would be
> added when the queue first goes empty. If nothing has happened during
> the timeout interval, then the queue is suspended.
>
> Queueing a new request could check the state and resume queue if it is
> supended.
>

[snip]

> @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
>  	if (unlikely(--req->ref_count))
>  		return;
>  
> +	/* PM request is not accounted */
> +	if (!(req->cmd_flags & REQ_PM)) {
> +		if (!(--q->nr_pending))
> +			/* Hard code to 20secs, will move to sysfs */
> +			mod_timer(&q->idle, jiffies + 20*HZ);
> +	}
> +

I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
the queue can transition between empty and non-empty fairly rapidly for
dependent I/O.

> @@ -587,6 +590,13 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
>  {
>  	trace_block_rq_insert(q, rq);
>  
> +	if (!(rq->cmd_flags & REQ_PM)) {
> +		if (!(q->pm_status != RPM_ACTIVE) &&

Uhh, re-read that line.  Not not equal?

Cheers,
Jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-15 19:19   ` Jeff Moyer
@ 2012-05-16  1:27     ` Lin Ming
  2012-05-16 13:04       ` Jeff Moyer
  0 siblings, 1 reply; 23+ messages in thread
From: Lin Ming @ 2012-05-16  1:27 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: Jens Axboe, Alan Stern, linux-kernel, linux-pm, linux-scsi

On Tue, 2012-05-15 at 15:19 -0400, Jeff Moyer wrote:
> Lin Ming <ming.m.lin@intel.com> writes:
> 
> > Add an idle timer that is set to some suitable timeout and would be
> > added when the queue first goes empty. If nothing has happened during
> > the timeout interval, then the queue is suspended.
> >
> > Queueing a new request could check the state and resume queue if it is
> > supended.
> >
> 
> [snip]
> 
> > @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
> >  	if (unlikely(--req->ref_count))
> >  		return;
> >  
> > +	/* PM request is not accounted */
> > +	if (!(req->cmd_flags & REQ_PM)) {
> > +		if (!(--q->nr_pending))
> > +			/* Hard code to 20secs, will move to sysfs */
> > +			mod_timer(&q->idle, jiffies + 20*HZ);
> > +	}
> > +
> 
> I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
> the queue can transition between empty and non-empty fairly rapidly for
> dependent I/O.

I'll remove this idle timer and use runtime pm core's timer.

> 
> > @@ -587,6 +590,13 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
> >  {
> >  	trace_block_rq_insert(q, rq);
> >  
> > +	if (!(rq->cmd_flags & REQ_PM)) {
> > +		if (!(q->pm_status != RPM_ACTIVE) &&
> 
> Uhh, re-read that line.  Not not equal?

Ah, it should be:

if (q->pm_status != RPM_ACTIVE &&

My mistake.

> 
> Cheers,
> Jeff



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16  1:27     ` Lin Ming
@ 2012-05-16 13:04       ` Jeff Moyer
  2012-05-16 13:53         ` Jens Axboe
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Moyer @ 2012-05-16 13:04 UTC (permalink / raw)
  To: Lin Ming; +Cc: Jens Axboe, Alan Stern, linux-kernel, linux-pm, linux-scsi

Lin Ming <ming.m.lin@intel.com> writes:

> On Tue, 2012-05-15 at 15:19 -0400, Jeff Moyer wrote:
>> Lin Ming <ming.m.lin@intel.com> writes:
>> 
>> > Add an idle timer that is set to some suitable timeout and would be
>> > added when the queue first goes empty. If nothing has happened during
>> > the timeout interval, then the queue is suspended.
>> >
>> > Queueing a new request could check the state and resume queue if it is
>> > supended.
>> >
>> 
>> [snip]
>> 
>> > @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
>> >  	if (unlikely(--req->ref_count))
>> >  		return;
>> >  
>> > +	/* PM request is not accounted */
>> > +	if (!(req->cmd_flags & REQ_PM)) {
>> > +		if (!(--q->nr_pending))
>> > +			/* Hard code to 20secs, will move to sysfs */
>> > +			mod_timer(&q->idle, jiffies + 20*HZ);
>> > +	}
>> > +
>> 
>> I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
>> the queue can transition between empty and non-empty fairly rapidly for
>> dependent I/O.
>
> I'll remove this idle timer and use runtime pm core's timer.

This issues isn't which timer to use, it's when to modify it.  Since the
queue can cycle between empty and non-empty very quickly, you should try
to avoid calling mod_timer for every non-empty to empty transition.
Jens had described one way to do this in the thread you referenced in
your 0/3 email.

Cheers,
Jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 13:04       ` Jeff Moyer
@ 2012-05-16 13:53         ` Jens Axboe
  2012-05-16 14:28           ` Alan Stern
  2012-05-16 15:40           ` Lin Ming
  0 siblings, 2 replies; 23+ messages in thread
From: Jens Axboe @ 2012-05-16 13:53 UTC (permalink / raw)
  To: Jeff Moyer; +Cc: Lin Ming, Alan Stern, linux-kernel, linux-pm, linux-scsi

On 05/16/2012 03:04 PM, Jeff Moyer wrote:
> Lin Ming <ming.m.lin@intel.com> writes:
> 
>> On Tue, 2012-05-15 at 15:19 -0400, Jeff Moyer wrote:
>>> Lin Ming <ming.m.lin@intel.com> writes:
>>>
>>>> Add an idle timer that is set to some suitable timeout and would be
>>>> added when the queue first goes empty. If nothing has happened during
>>>> the timeout interval, then the queue is suspended.
>>>>
>>>> Queueing a new request could check the state and resume queue if it is
>>>> supended.
>>>>
>>>
>>> [snip]
>>>
>>>> @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
>>>>  	if (unlikely(--req->ref_count))
>>>>  		return;
>>>>  
>>>> +	/* PM request is not accounted */
>>>> +	if (!(req->cmd_flags & REQ_PM)) {
>>>> +		if (!(--q->nr_pending))
>>>> +			/* Hard code to 20secs, will move to sysfs */
>>>> +			mod_timer(&q->idle, jiffies + 20*HZ);
>>>> +	}
>>>> +
>>>
>>> I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
>>> the queue can transition between empty and non-empty fairly rapidly for
>>> dependent I/O.
>>
>> I'll remove this idle timer and use runtime pm core's timer.
> 
> This issues isn't which timer to use, it's when to modify it.  Since the
> queue can cycle between empty and non-empty very quickly, you should try
> to avoid calling mod_timer for every non-empty to empty transition.
> Jens had described one way to do this in the thread you referenced in
> your 0/3 email.

That's exactly right, thanks Jeff.

Lin, you should have more slack timer handling. Look at the blk-timeout
handling of request timeouts for inspiration, and/or the thread that
Jeff also references. Doing a timer add/del for each request put is a no
go.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 13:53         ` Jens Axboe
@ 2012-05-16 14:28           ` Alan Stern
  2012-05-16 15:40           ` Lin Ming
  1 sibling, 0 replies; 23+ messages in thread
From: Alan Stern @ 2012-05-16 14:28 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Moyer, Lin Ming, linux-kernel, linux-pm, linux-scsi

On Wed, 16 May 2012, Jens Axboe wrote:

> On 05/16/2012 03:04 PM, Jeff Moyer wrote:
> > Lin Ming <ming.m.lin@intel.com> writes:
> > 
> >> On Tue, 2012-05-15 at 15:19 -0400, Jeff Moyer wrote:
> >>> Lin Ming <ming.m.lin@intel.com> writes:
> >>>
> >>>> Add an idle timer that is set to some suitable timeout and would be
> >>>> added when the queue first goes empty. If nothing has happened during
> >>>> the timeout interval, then the queue is suspended.
> >>>>
> >>>> Queueing a new request could check the state and resume queue if it is
> >>>> supended.
> >>>>
> >>>
> >>> [snip]
> >>>
> >>>> @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
> >>>>  	if (unlikely(--req->ref_count))
> >>>>  		return;
> >>>>  
> >>>> +	/* PM request is not accounted */
> >>>> +	if (!(req->cmd_flags & REQ_PM)) {
> >>>> +		if (!(--q->nr_pending))
> >>>> +			/* Hard code to 20secs, will move to sysfs */
> >>>> +			mod_timer(&q->idle, jiffies + 20*HZ);
> >>>> +	}
> >>>> +
> >>>
> >>> I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
> >>> the queue can transition between empty and non-empty fairly rapidly for
> >>> dependent I/O.
> >>
> >> I'll remove this idle timer and use runtime pm core's timer.
> > 
> > This issues isn't which timer to use, it's when to modify it.  Since the
> > queue can cycle between empty and non-empty very quickly, you should try
> > to avoid calling mod_timer for every non-empty to empty transition.
> > Jens had described one way to do this in the thread you referenced in
> > your 0/3 email.
> 
> That's exactly right, thanks Jeff.
> 
> Lin, you should have more slack timer handling. Look at the blk-timeout
> handling of request timeouts for inspiration, and/or the thread that
> Jeff also references. Doing a timer add/del for each request put is a no
> go.

Lin, note that if you use the API properly, the runtime PM timer does
not have to get updated every time an I/O event occurs.  Look up 
pm_runtime_mark_last_busy() in the runtime PM documentation.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 13:53         ` Jens Axboe
  2012-05-16 14:28           ` Alan Stern
@ 2012-05-16 15:40           ` Lin Ming
  2012-05-16 15:59             ` Alan Stern
  1 sibling, 1 reply; 23+ messages in thread
From: Lin Ming @ 2012-05-16 15:40 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Moyer, Alan Stern, linux-kernel, linux-pm, linux-scsi

On Wed, 2012-05-16 at 15:53 +0200, Jens Axboe wrote:
> On 05/16/2012 03:04 PM, Jeff Moyer wrote:
> > Lin Ming <ming.m.lin@intel.com> writes:
> > 
> >> On Tue, 2012-05-15 at 15:19 -0400, Jeff Moyer wrote:
> >>> Lin Ming <ming.m.lin@intel.com> writes:
> >>>
> >>>> Add an idle timer that is set to some suitable timeout and would be
> >>>> added when the queue first goes empty. If nothing has happened during
> >>>> the timeout interval, then the queue is suspended.
> >>>>
> >>>> Queueing a new request could check the state and resume queue if it is
> >>>> supended.
> >>>>
> >>>
> >>> [snip]
> >>>
> >>>> @@ -1129,6 +1141,13 @@ void __blk_put_request(struct request_queue *q, struct request *req)
> >>>>  	if (unlikely(--req->ref_count))
> >>>>  		return;
> >>>>  
> >>>> +	/* PM request is not accounted */
> >>>> +	if (!(req->cmd_flags & REQ_PM)) {
> >>>> +		if (!(--q->nr_pending))
> >>>> +			/* Hard code to 20secs, will move to sysfs */
> >>>> +			mod_timer(&q->idle, jiffies + 20*HZ);
> >>>> +	}
> >>>> +
> >>>
> >>> I'm pretty sure Jens wanted to avoid doing a mod_timer, here, given that
> >>> the queue can transition between empty and non-empty fairly rapidly for
> >>> dependent I/O.
> >>
> >> I'll remove this idle timer and use runtime pm core's timer.
> > 
> > This issues isn't which timer to use, it's when to modify it.  Since the
> > queue can cycle between empty and non-empty very quickly, you should try
> > to avoid calling mod_timer for every non-empty to empty transition.
> > Jens had described one way to do this in the thread you referenced in
> > your 0/3 email.
> 
> That's exactly right, thanks Jeff.
> 
> Lin, you should have more slack timer handling. Look at the blk-timeout
> handling of request timeouts for inspiration, and/or the thread that
> Jeff also references. Doing a timer add/del for each request put is a no
> go.

You mentioned how to detect queue idle in the referenced thread:
	
===
So we could probably add an idle timer that is set to some suitable
timeout for this and would be added when the queue first goes empty. If
new requests come in, just let it simmer and defer checking the state to
when it actually fires.
===

What do you mean of "the queue first goes empty"?


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 15:40           ` Lin Ming
@ 2012-05-16 15:59             ` Alan Stern
  2012-05-16 16:12               ` Lin Ming
  2012-05-16 18:29               ` Jens Axboe
  0 siblings, 2 replies; 23+ messages in thread
From: Alan Stern @ 2012-05-16 15:59 UTC (permalink / raw)
  To: Lin Ming; +Cc: Jens Axboe, Jeff Moyer, linux-kernel, linux-pm, linux-scsi

On Wed, 16 May 2012, Lin Ming wrote:

> > Lin, you should have more slack timer handling. Look at the blk-timeout
> > handling of request timeouts for inspiration, and/or the thread that
> > Jeff also references. Doing a timer add/del for each request put is a no
> > go.
> 
> You mentioned how to detect queue idle in the referenced thread:
> 	
> ===
> So we could probably add an idle timer that is set to some suitable
> timeout for this and would be added when the queue first goes empty. If
> new requests come in, just let it simmer and defer checking the state to
> when it actually fires.

That is basically how the runtime PM timer works, if you use it as I 
described earlier.

> ===
> 
> What do you mean of "the queue first goes empty"?

When the queue is first created, the timer is started.

Whenever the timer expires, the code checks to see if any requests have 
been processed since the previous expiration.  If they have, the timer 
is restarted.  If they haven't, you suspend the device.

Alan Stern


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 15:59             ` Alan Stern
@ 2012-05-16 16:12               ` Lin Ming
  2012-05-16 18:29               ` Jens Axboe
  1 sibling, 0 replies; 23+ messages in thread
From: Lin Ming @ 2012-05-16 16:12 UTC (permalink / raw)
  To: Alan Stern; +Cc: Jens Axboe, Jeff Moyer, linux-kernel, linux-pm, linux-scsi

On Wed, 2012-05-16 at 11:59 -0400, Alan Stern wrote:
> On Wed, 16 May 2012, Lin Ming wrote:
> 
> > > Lin, you should have more slack timer handling. Look at the blk-timeout
> > > handling of request timeouts for inspiration, and/or the thread that
> > > Jeff also references. Doing a timer add/del for each request put is a no
> > > go.
> > 
> > You mentioned how to detect queue idle in the referenced thread:
> > 	
> > ===
> > So we could probably add an idle timer that is set to some suitable
> > timeout for this and would be added when the queue first goes empty. If
> > new requests come in, just let it simmer and defer checking the state to
> > when it actually fires.
> 
> That is basically how the runtime PM timer works, if you use it as I 
> described earlier.
> 
> > ===
> > 
> > What do you mean of "the queue first goes empty"?
> 
> When the queue is first created, the timer is started.
> 
> Whenever the timer expires, the code checks to see if any requests have 
> been processed since the previous expiration.  If they have, the timer 
> is restarted.  If they haven't, you suspend the device.

And the timer is restarted when we resume the device, right?

> 
> Alan Stern
> 



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [RFC PATCH 3/3] block: add queue idle timer
  2012-05-16 15:59             ` Alan Stern
  2012-05-16 16:12               ` Lin Ming
@ 2012-05-16 18:29               ` Jens Axboe
  1 sibling, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2012-05-16 18:29 UTC (permalink / raw)
  To: Alan Stern; +Cc: Lin Ming, Jeff Moyer, linux-kernel, linux-pm, linux-scsi

On 2012-05-16 17:59, Alan Stern wrote:
> On Wed, 16 May 2012, Lin Ming wrote:
> 
>>> Lin, you should have more slack timer handling. Look at the blk-timeout
>>> handling of request timeouts for inspiration, and/or the thread that
>>> Jeff also references. Doing a timer add/del for each request put is a no
>>> go.
>>
>> You mentioned how to detect queue idle in the referenced thread:
>> 	
>> ===
>> So we could probably add an idle timer that is set to some suitable
>> timeout for this and would be added when the queue first goes empty. If
>> new requests come in, just let it simmer and defer checking the state to
>> when it actually fires.
> 
> That is basically how the runtime PM timer works, if you use it as I 
> described earlier.
> 
>> ===
>>
>> What do you mean of "the queue first goes empty"?
> 
> When the queue is first created, the timer is started.
> 
> Whenever the timer expires, the code checks to see if any requests have 
> been processed since the previous expiration.  If they have, the timer 
> is restarted.  If they haven't, you suspend the device.

That sounds ideal, since you don't have to manage it in the block layer
then.

Lin, you should just use that. My suggestion was that you add the timer
when the queue first goes idle, IFF it isn't already running. When it
expires, you check last activity and decide whether to suspend it or
reset the timer. This is a more optimal than continually adding and
deleting timers, or even just moving it forwards 20s all the time. But
if pm already has support for this type of functionality (and it does,
Alan says, which makes sense since this mechanism would be useful for
other edvices), then you should indeed just use that.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2012-05-16 18:29 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-15  8:48 [RFC PATCH 0/3] block layer runtime pm Lin Ming
2012-05-15  8:48 ` [RFC PATCH 1/3] block: add a flag to identify PM request Lin Ming
2012-05-15 14:35   ` Alan Stern
2012-05-15 15:33     ` Lin Ming
2012-05-15 15:37       ` Alan Cox
2012-05-15 15:58         ` Lin Ming
2012-05-15 18:30           ` Alan Stern
2012-05-15 16:03       ` Alan Stern
2012-05-15  8:48 ` [RFC PATCH 2/3] block: add queue runtime pm callback Lin Ming
2012-05-15 14:37   ` Alan Stern
2012-05-15 15:36     ` Lin Ming
2012-05-15  8:48 ` [RFC PATCH 3/3] block: add queue idle timer Lin Ming
2012-05-15 14:39   ` Alan Stern
2012-05-15 15:49     ` Lin Ming
2012-05-15 19:19   ` Jeff Moyer
2012-05-16  1:27     ` Lin Ming
2012-05-16 13:04       ` Jeff Moyer
2012-05-16 13:53         ` Jens Axboe
2012-05-16 14:28           ` Alan Stern
2012-05-16 15:40           ` Lin Ming
2012-05-16 15:59             ` Alan Stern
2012-05-16 16:12               ` Lin Ming
2012-05-16 18:29               ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).