All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] 2.5.x use list_head to handle scsi starved request queues
@ 2003-03-20  2:27 Patrick Mansfield
  2003-03-20 20:05 ` Luben Tuikov
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-20  2:27 UTC (permalink / raw)
  To: James Bottomley, linux-scsi

This patch (against 2.5 bk on march 18) fixes a few problems with the
linux scsi "starved" algorithm.

It uses a list_head per scsi_host to store a list of scsi request queues
that were "starved" (they were not able to send IO because of per host
limitations).

diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/hosts.c starve-25/drivers/scsi/hosts.c
--- 25-bk-base/drivers/scsi/hosts.c	Wed Mar 19 11:54:28 2003
+++ starve-25/drivers/scsi/hosts.c	Wed Mar 19 16:36:40 2003
@@ -383,6 +383,7 @@ struct Scsi_Host * scsi_register(Scsi_Ho
 	scsi_assign_lock(shost, &shost->default_lock);
 	INIT_LIST_HEAD(&shost->my_devices);
 	INIT_LIST_HEAD(&shost->eh_cmd_q);
+	INIT_LIST_HEAD(&shost->starved_list);
 
 	init_waitqueue_head(&shost->host_wait);
 	shost->dma_channel = 0xff;
diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/hosts.h starve-25/drivers/scsi/hosts.h
--- 25-bk-base/drivers/scsi/hosts.h	Wed Mar 19 11:54:28 2003
+++ starve-25/drivers/scsi/hosts.h	Wed Mar 19 16:36:40 2003
@@ -380,6 +380,7 @@ struct Scsi_Host
     struct scsi_host_cmd_pool *cmd_pool;
     spinlock_t            free_list_lock;
     struct list_head      free_list;   /* backup store of cmd structs */
+    struct list_head      starved_list;	/* head of queue limited by can_queue */
 
     spinlock_t		  default_lock;
     spinlock_t		  *host_lock;
@@ -471,12 +472,6 @@ struct Scsi_Host
     unsigned reverse_ordering:1;
 
     /*
-     * Indicates that one or more devices on this host were starved, and
-     * when the device becomes less busy that we need to feed them.
-     */
-    unsigned some_device_starved:1;
-   
-    /*
      * Host has rejected a command because it was busy.
      */
     unsigned int host_blocked;
diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi.h starve-25/drivers/scsi/scsi.h
--- 25-bk-base/drivers/scsi/scsi.h	Wed Mar 19 11:54:28 2003
+++ starve-25/drivers/scsi/scsi.h	Wed Mar 19 16:36:40 2003
@@ -555,6 +555,7 @@ struct scsi_device {
 	volatile unsigned short device_busy;	/* commands actually active on low-level */
 	spinlock_t list_lock;
 	struct list_head cmd_list;	/* queue of in use SCSI Command structures */
+	struct list_head starved_entry;	/* next queue limited via can_queue */
         Scsi_Cmnd *current_cmnd;	/* currently active command */
 	unsigned short queue_depth;	/* How deep of a queue we want */
 	unsigned short last_queue_full_depth; /* These two are used by */
@@ -615,8 +616,6 @@ struct scsi_device {
 					 * because we did a bus reset. */
 	unsigned ten:1;		/* support ten byte read / write */
 	unsigned remap:1;	/* support remapping  */
-	unsigned starved:1;	/* unable to process commands because
-				   host busy */
 //	unsigned sync:1;	/* Sync transfer state, managed by host */
 //	unsigned wide:1;	/* WIDE transfer state, managed by host */
 
diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi_lib.c starve-25/drivers/scsi/scsi_lib.c
--- 25-bk-base/drivers/scsi/scsi_lib.c	Wed Mar 19 11:54:28 2003
+++ starve-25/drivers/scsi/scsi_lib.c	Wed Mar 19 16:36:40 2003
@@ -356,7 +356,6 @@ static void scsi_queue_next_request(requ
 	struct scsi_device *sdev, *sdev2;
 	struct Scsi_Host *shost;
 	unsigned long flags;
-	int all_clear;
 
 	ASSERT_LOCK(q->queue_lock, 0);
 
@@ -383,11 +382,6 @@ static void scsi_queue_next_request(requ
 		__elv_add_request(q, cmd->request, 0, 0);
 	}
 
-	/*
-	 * Just hit the requeue function for the queue.
-	 */
-	__blk_run_queue(q);
-
 	sdev = q->queuedata;
 	shost = sdev->host;
 
@@ -412,31 +406,24 @@ static void scsi_queue_next_request(requ
 		}
 	}
 
-	/*
-	 * Now see whether there are other devices on the bus which
-	 * might be starved.  If so, hit the request function.  If we
-	 * don't find any, then it is safe to reset the flag.  If we
-	 * find any device that it is starved, it isn't safe to reset the
-	 * flag as the queue function releases the lock and thus some
-	 * other device might have become starved along the way.
-	 */
-	all_clear = 1;
-	if (shost->some_device_starved) {
-		list_for_each_entry(sdev, &shost->my_devices, siblings) {
-			if (shost->can_queue > 0 &&
-			    shost->host_busy >= shost->can_queue)
-				break;
-			if (shost->host_blocked || shost->host_self_blocked)
-				break;
-			if (sdev->device_blocked || !sdev->starved)
-				continue;
-			__blk_run_queue(sdev->request_queue);
-			all_clear = 0;
-		}
-
-		if (sdev == NULL && all_clear)
-			shost->some_device_starved = 0;
+	while (!list_empty(&shost->starved_list) &&
+	       !shost->host_blocked && !shost->host_self_blocked &&
+		!((shost->can_queue > 0) &&
+		  (shost->host_busy >= shost->can_queue))) {
+		/*
+		 * As long as shost is accepting commands and we have
+		 * starved queues, call __blk_run_queue. scsi_request_fn
+		 * drops the queue_lock and can add us back to the
+		 * starved_list.
+		 */
+		sdev2 = list_entry(shost->starved_list.next,
+					  struct scsi_device, starved_entry);
+		list_del_init(&sdev2->starved_entry);
+		__blk_run_queue(sdev2->request_queue);
 	}
+
+	__blk_run_queue(q);
+
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -1115,23 +1102,16 @@ static void scsi_request_fn(request_queu
 		 */
 		if (sdev->device_blocked)
 			break;
+
+		if (!list_empty(&sdev->starved_entry))
+			break;
+
 		if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
 		    shost->host_blocked || shost->host_self_blocked) {
-			/*
-			 * If we are unable to process any commands at all for
-			 * this device, then we consider it to be starved.
-			 * What this means is that there are no outstanding
-			 * commands for this device and hence we need a
-			 * little help getting it started again
-			 * once the host isn't quite so busy.
-			 */
-			if (sdev->device_busy == 0) {
-				sdev->starved = 1;
-				shost->some_device_starved = 1;
-			}
+			list_add_tail(&sdev->starved_entry,
+				      &shost->starved_list);
 			break;
-		} else
-			sdev->starved = 0;
+		}
 
 		/*
 		 * If we couldn't find a request that could be queued, then we
diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi_scan.c starve-25/drivers/scsi/scsi_scan.c
--- 25-bk-base/drivers/scsi/scsi_scan.c	Wed Mar 19 11:54:28 2003
+++ starve-25/drivers/scsi/scsi_scan.c	Wed Mar 19 16:36:40 2003
@@ -400,6 +400,7 @@ static struct scsi_device *scsi_alloc_sd
 	INIT_LIST_HEAD(&sdev->siblings);
 	INIT_LIST_HEAD(&sdev->same_target_siblings);
 	INIT_LIST_HEAD(&sdev->cmd_list);
+	INIT_LIST_HEAD(&sdev->starved_entry);
 	spin_lock_init(&sdev->list_lock);
 
 	/*

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-20  2:27 [PATCH] 2.5.x use list_head to handle scsi starved request queues Patrick Mansfield
@ 2003-03-20 20:05 ` Luben Tuikov
  2003-03-21  4:39   ` Patrick Mansfield
  0 siblings, 1 reply; 14+ messages in thread
From: Luben Tuikov @ 2003-03-20 20:05 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: James Bottomley, linux-scsi

Patrick Mansfield wrote:
> This patch (against 2.5 bk on march 18) fixes a few problems with the
> linux scsi "starved" algorithm.

Patch is fine by me.  Comments inlined:

> It uses a list_head per scsi_host to store a list of scsi request queues
> that were "starved" (they were not able to send IO because of per host
> limitations).

Then this should probably be your comment for the list variable, inlined:

> 
> diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/hosts.c starve-25/drivers/scsi/hosts.c
> --- 25-bk-base/drivers/scsi/hosts.c	Wed Mar 19 11:54:28 2003
> +++ starve-25/drivers/scsi/hosts.c	Wed Mar 19 16:36:40 2003
> @@ -383,6 +383,7 @@ struct Scsi_Host * scsi_register(Scsi_Ho
>  	scsi_assign_lock(shost, &shost->default_lock);
>  	INIT_LIST_HEAD(&shost->my_devices);
>  	INIT_LIST_HEAD(&shost->eh_cmd_q);
> +	INIT_LIST_HEAD(&shost->starved_list);
>  
>  	init_waitqueue_head(&shost->host_wait);
>  	shost->dma_channel = 0xff;
> diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/hosts.h starve-25/drivers/scsi/hosts.h
> --- 25-bk-base/drivers/scsi/hosts.h	Wed Mar 19 11:54:28 2003
> +++ starve-25/drivers/scsi/hosts.h	Wed Mar 19 16:36:40 2003
> @@ -380,6 +380,7 @@ struct Scsi_Host
>      struct scsi_host_cmd_pool *cmd_pool;
>      spinlock_t            free_list_lock;
>      struct list_head      free_list;   /* backup store of cmd structs */
> +    struct list_head      starved_list;	/* head of queue limited by can_queue */

We know that it's a ``head'' and that it's a ``queue'', i.e. we know
``What it is'' -- this is obvious by the code itself.

What we *don't* know is what it *means*.  So your comment should read:
/* queue of starved device structs via can_queue */ -- short, succinct,
and to the point. (``via'' to be understood as ``by means of'')

Meaningful commenting is important to make the code more manageable
and readable for the future developers, and even for yourself after
X amount of time.

>  
>      spinlock_t		  default_lock;
>      spinlock_t		  *host_lock;
> @@ -471,12 +472,6 @@ struct Scsi_Host
>      unsigned reverse_ordering:1;
>  
>      /*
> -     * Indicates that one or more devices on this host were starved, and
> -     * when the device becomes less busy that we need to feed them.
> -     */
> -    unsigned some_device_starved:1;
> -   
> -    /*
>       * Host has rejected a command because it was busy.
>       */
>      unsigned int host_blocked;
> diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi.h starve-25/drivers/scsi/scsi.h
> --- 25-bk-base/drivers/scsi/scsi.h	Wed Mar 19 11:54:28 2003
> +++ starve-25/drivers/scsi/scsi.h	Wed Mar 19 16:36:40 2003
> @@ -555,6 +555,7 @@ struct scsi_device {
>  	volatile unsigned short device_busy;	/* commands actually active on low-level */
>  	spinlock_t list_lock;
>  	struct list_head cmd_list;	/* queue of in use SCSI Command structures */
> +	struct list_head starved_entry;	/* next queue limited via can_queue */

The comment here should probably be:
/* if starved, part of the starved_list */

Please do not talk about ``queue limited by can_queue'' -- i.e. do
not talk about request queues/block queues, they have no place in the
comment here -- you can mention them in the core part of the implementation
of the starved algo below.

>          Scsi_Cmnd *current_cmnd;	/* currently active command */
>  	unsigned short queue_depth;	/* How deep of a queue we want */
>  	unsigned short last_queue_full_depth; /* These two are used by */
> @@ -615,8 +616,6 @@ struct scsi_device {
>  					 * because we did a bus reset. */
>  	unsigned ten:1;		/* support ten byte read / write */
>  	unsigned remap:1;	/* support remapping  */
> -	unsigned starved:1;	/* unable to process commands because
> -				   host busy */
>  //	unsigned sync:1;	/* Sync transfer state, managed by host */
>  //	unsigned wide:1;	/* WIDE transfer state, managed by host */
>  
> diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi_lib.c starve-25/drivers/scsi/scsi_lib.c
> --- 25-bk-base/drivers/scsi/scsi_lib.c	Wed Mar 19 11:54:28 2003
> +++ starve-25/drivers/scsi/scsi_lib.c	Wed Mar 19 16:36:40 2003
> @@ -356,7 +356,6 @@ static void scsi_queue_next_request(requ
>  	struct scsi_device *sdev, *sdev2;
>  	struct Scsi_Host *shost;
>  	unsigned long flags;
> -	int all_clear;
>  
>  	ASSERT_LOCK(q->queue_lock, 0);
>  
> @@ -383,11 +382,6 @@ static void scsi_queue_next_request(requ
>  		__elv_add_request(q, cmd->request, 0, 0);
>  	}
>  
> -	/*
> -	 * Just hit the requeue function for the queue.
> -	 */
> -	__blk_run_queue(q);
> -
>  	sdev = q->queuedata;
>  	shost = sdev->host;
>  
> @@ -412,31 +406,24 @@ static void scsi_queue_next_request(requ
>  		}
>  	}
>  
> -	/*
> -	 * Now see whether there are other devices on the bus which
> -	 * might be starved.  If so, hit the request function.  If we
> -	 * don't find any, then it is safe to reset the flag.  If we
> -	 * find any device that it is starved, it isn't safe to reset the
> -	 * flag as the queue function releases the lock and thus some
> -	 * other device might have become starved along the way.
> -	 */
> -	all_clear = 1;
> -	if (shost->some_device_starved) {
> -		list_for_each_entry(sdev, &shost->my_devices, siblings) {
> -			if (shost->can_queue > 0 &&
> -			    shost->host_busy >= shost->can_queue)
> -				break;
> -			if (shost->host_blocked || shost->host_self_blocked)
> -				break;
> -			if (sdev->device_blocked || !sdev->starved)
> -				continue;
> -			__blk_run_queue(sdev->request_queue);
> -			all_clear = 0;
> -		}
> -
> -		if (sdev == NULL && all_clear)
> -			shost->some_device_starved = 0;

Now here you can put a 3-4 line comment talking about queues and
what not.  I.e. in place of the older comment.

> +	while (!list_empty(&shost->starved_list) &&
> +	       !shost->host_blocked && !shost->host_self_blocked &&
> +		!((shost->can_queue > 0) &&
> +		  (shost->host_busy >= shost->can_queue))) {
> +		/*
> +		 * As long as shost is accepting commands and we have
> +		 * starved queues, call __blk_run_queue. scsi_request_fn
> +		 * drops the queue_lock and can add us back to the
> +		 * starved_list.
> +		 */
> +		sdev2 = list_entry(shost->starved_list.next,
> +					  struct scsi_device, starved_entry);
> +		list_del_init(&sdev2->starved_entry);
> +		__blk_run_queue(sdev2->request_queue);
>  	}
> +
> +	__blk_run_queue(q);
> +
>  	spin_unlock_irqrestore(q->queue_lock, flags);
>  }

Why don't you *first* hit the ``q'' queue and unlock it and then, i.e. afterwards,
go over starved_list.

Or you can do it the other way around, i.e. assume prioritization, which I strongly
advise *against* -- the _caller_ may have handled prioritization already.

>  
> @@ -1115,23 +1102,16 @@ static void scsi_request_fn(request_queu
>  		 */
>  		if (sdev->device_blocked)
>  			break;
> +
> +		if (!list_empty(&sdev->starved_entry))
> +			break;
> +
>  		if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
>  		    shost->host_blocked || shost->host_self_blocked) {
> -			/*
> -			 * If we are unable to process any commands at all for
> -			 * this device, then we consider it to be starved.
> -			 * What this means is that there are no outstanding
> -			 * commands for this device and hence we need a
> -			 * little help getting it started again
> -			 * once the host isn't quite so busy.
> -			 */
> -			if (sdev->device_busy == 0) {
> -				sdev->starved = 1;
> -				shost->some_device_starved = 1;
> -			}
> +			list_add_tail(&sdev->starved_entry,
> +				      &shost->starved_list);
>  			break;
> -		} else
> -			sdev->starved = 0;
> +		}
>  
>  		/*
>  		 * If we couldn't find a request that could be queued, then we
> diff -purN -X /home/patman/dontdiff 25-bk-base/drivers/scsi/scsi_scan.c starve-25/drivers/scsi/scsi_scan.c
> --- 25-bk-base/drivers/scsi/scsi_scan.c	Wed Mar 19 11:54:28 2003
> +++ starve-25/drivers/scsi/scsi_scan.c	Wed Mar 19 16:36:40 2003
> @@ -400,6 +400,7 @@ static struct scsi_device *scsi_alloc_sd
>  	INIT_LIST_HEAD(&sdev->siblings);
>  	INIT_LIST_HEAD(&sdev->same_target_siblings);
>  	INIT_LIST_HEAD(&sdev->cmd_list);
> +	INIT_LIST_HEAD(&sdev->starved_entry);
>  	spin_lock_init(&sdev->list_lock);
>  
>  	/*
> 
> -- Patrick Mansfield

-- 
Luben






^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-20 20:05 ` Luben Tuikov
@ 2003-03-21  4:39   ` Patrick Mansfield
  2003-03-21 20:48     ` Luben Tuikov
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-21  4:39 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: James Bottomley, linux-scsi

On Thu, Mar 20, 2003 at 03:05:09PM -0500, Luben Tuikov wrote:
> Patrick Mansfield wrote:
> > This patch (against 2.5 bk on march 18) fixes a few problems with the
> > linux scsi "starved" algorithm.
> 
> Patch is fine by me.  Comments inlined:
> 
> > It uses a list_head per scsi_host to store a list of scsi request queues
> > that were "starved" (they were not able to send IO because of per host
> > limitations).
> 
> Then this should probably be your comment for the list variable, inlined:

Okay, the comments are not clear, and don't match the code, I'll remove
them rather than try to get something terse and meaningful.

> > +	__blk_run_queue(q);
> > +
> >  	spin_unlock_irqrestore(q->queue_lock, flags);
> >  }
> 
> Why don't you *first* hit the ``q'' queue and unlock it and then, i.e. afterwards,
> go over starved_list.
> 
> Or you can do it the other way around, i.e. assume prioritization, which I strongly
> advise *against* -- the _caller_ may have handled prioritization already.

I'm trying to give priority to scsi_devices that have not been able to
send IO. If we call __blk_run_queue(q) first, one busy scsi_device could
starve all other scsi_devices on an adapter.

I don't see how the caller can have a priority (across scsi_devices), as
there is no priorizitation across different request queues. The blk layer
should already have prioritized/sorted the request (per request queue).

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-21  4:39   ` Patrick Mansfield
@ 2003-03-21 20:48     ` Luben Tuikov
  2003-03-22  0:50       ` Patrick Mansfield
  0 siblings, 1 reply; 14+ messages in thread
From: Luben Tuikov @ 2003-03-21 20:48 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: James Bottomley, linux-scsi

Patrick Mansfield wrote:
> 
> Okay, the comments are not clear, and don't match the code, I'll remove
> them rather than try to get something terse and meaningful.

No, please -- you don't have to go from one extreme to the other.

Comments are good, please put some, just make sure that they don't tell
things which can already be determined like int a; /* integer counter */,
but something like int a; /* number of customers served */, i.e. portray
the meaning.

>>Why don't you *first* hit the ``q'' queue and unlock it and then, i.e. afterwards,
>>go over starved_list.
>>
>>Or you can do it the other way around, i.e. assume prioritization, which I strongly
>>advise *against* -- the _caller_ may have handled prioritization already.
> 
> 
> I'm trying to give priority to scsi_devices that have not been able to
> send IO. If we call __blk_run_queue(q) first, one busy scsi_device could
> starve all other scsi_devices on an adapter.

Ok, then go over the list of starved scsi devices FIRST, hit their queues,
and THEN after this, get the lock for the queue and hit it.

I.e. keep the queue lock for the shortest amount of time.

> I don't see how the caller can have a priority (across scsi_devices), as
> there is no priorizitation across different request queues. The blk layer
> should already have prioritized/sorted the request (per request queue).

What I meant is: all this is happening inside scsi_queue_next_request().
Now, since this function takes as an argument a request queue, it is possible
though unlikely, that the caller is calling this function for a set of
request queues in order of giver criterium.

In this respect I said that it could have already been prioritized.

-- 
Luben



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-21 20:48     ` Luben Tuikov
@ 2003-03-22  0:50       ` Patrick Mansfield
  2003-03-24 17:12         ` Luben Tuikov
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-22  0:50 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: James Bottomley, linux-scsi

On Fri, Mar 21, 2003 at 03:48:26PM -0500, Luben Tuikov wrote:
> Patrick Mansfield wrote:
> > 
> > Okay, the comments are not clear, and don't match the code, I'll remove
> > them rather than try to get something terse and meaningful.
> 
> No, please -- you don't have to go from one extreme to the other.

> Comments are good, please put some, just make sure that they don't tell
> things which can already be determined like int a; /* integer counter */,
> but something like int a; /* number of customers served */, i.e. portray
> the meaning.

The comments (just the two in the host.h and scsi.h) are close to trivial,
and the code is not tricky. So I'll remove them.

> Ok, then go over the list of starved scsi devices FIRST, hit their queues,
> and THEN after this, get the lock for the queue and hit it.
> 
> I.e. keep the queue lock for the shortest amount of time.

The lock has to be held while checking shost->host_busy and then when
removing the starved entry, we can have multiple cpu's in the function for
the same adapter at the same time. Plus the lock has to be acquired prior
to any __blk_run_queue call.

I am still working patches for the lock split up in this area.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-22  0:50       ` Patrick Mansfield
@ 2003-03-24 17:12         ` Luben Tuikov
  2003-03-24 19:29           ` Patrick Mansfield
  0 siblings, 1 reply; 14+ messages in thread
From: Luben Tuikov @ 2003-03-24 17:12 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: James Bottomley, linux-scsi

Patrick Mansfield wrote:
> 
> The lock has to be held while checking shost->host_busy and then when
> removing the starved entry, we can have multiple cpu's in the function for
> the same adapter at the same time. Plus the lock has to be acquired prior
> to any __blk_run_queue call.

If scsi_queue_next_request(q, cmd) is running on more than one CPU
and q != q1 then you have a problem with the starved devices list.

I.e. the lock for an object should NOT depend on the circumstances --
an object (or set of objects) should _always_ use the same lock, if
any.

For this reason, when you obtain the q->queue_lock, just use it
around your critical section and __blk_run_queue(), in a minimalistic
approach, then release it.

Then have a starved_list_lock, or use the host->lock (extreme and I do NOT
recommend it) to lock your starved_list.

So, obtain the starved_list_lock, go over the devices, get their request
queues's lock and call __blk_run_queue(sdev->request_queue), then release
the sdev->request_queue->queue_lock and loop over again if needed,
when done, release the starved_list_lock.

Remember that, someone else might want to *rearrange* the order of
devices in the starved list for, say, _prioritization_, at some other
time when there is no queue in context! (thus a starved_list_lock would
make most sense)

> I am still working patches for the lock split up in this area.

Good!

-- 
Luben



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 17:12         ` Luben Tuikov
@ 2003-03-24 19:29           ` Patrick Mansfield
  2003-03-24 20:20             ` Luben Tuikov
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-24 19:29 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: James Bottomley, linux-scsi

On Mon, Mar 24, 2003 at 12:12:07PM -0500, Luben Tuikov wrote:
> Patrick Mansfield wrote:
> > 
> > The lock has to be held while checking shost->host_busy and then when
> > removing the starved entry, we can have multiple cpu's in the function for
> > the same adapter at the same time. Plus the lock has to be acquired prior
> > to any __blk_run_queue call.
> 
> If scsi_queue_next_request(q, cmd) is running on more than one CPU
> and q != q1 then you have a problem with the starved devices list.

Not with the current locking: we lock queue_lock (equal to host_lock)
at the start of scsi_queue_next_request, and unlock at the end of
scsi_queue_next_request.

I was not addressing any locking issues with this patch, the queue_lock
(or host_lock) is protecting the starved list.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 19:29           ` Patrick Mansfield
@ 2003-03-24 20:20             ` Luben Tuikov
  2003-03-24 20:25               ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Luben Tuikov @ 2003-03-24 20:20 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: James Bottomley, linux-scsi

Patrick Mansfield wrote:
> On Mon, Mar 24, 2003 at 12:12:07PM -0500, Luben Tuikov wrote:
> 
>>If scsi_queue_next_request(q, cmd) is running on more than one CPU
>>and q != q1 then you have a problem with the starved devices list.
> 
> 
> Not with the current locking: we lock queue_lock (equal to host_lock)
> at the start of scsi_queue_next_request, and unlock at the end of
> scsi_queue_next_request.

Are you sure that all device's request queues will use
the same lock (the host lock) in the future?

My problem is that your patch code for scsi_queue_next_request()
behaves as if q->queue_lock is the same for all devices' request_queues
of the host.  And this is a _special case_, valid only at the present!!!!

I absolutely _detest_ the fact that the host_lock is used to lock LLDD's
entry points, SCSI Core's data, *and* the request queues for
all devices of the host... Talk about serializaion!

But okay, if you want to leave it like it is, holding the q->lock
all over the place, _*PLEASE*_, put a comment saying that
the q->lock is actually the host_lock and locks all the
queues for all devices on the host... similar to my ``detest''
paragraph above.  And if this were to be ever changed then
the individual locks will have to be attained and released...
and lots of code _rewritten_.

This of course forbids you to touch the starved_list in ANY other
context, than that of holding _a_ q->queue_lock, and this is _again_ wrong...
(because if the queues were to get their own locks, then you've
got the starved list mangled badly...) (BTDT)

If at least the infrastructre is wrong in principle, the code doesn't
have to be.  I'll try to mock up a patch for ``show and tell''. :-)

(Oh, well...)
-- 
Luben
P.S. Ideally, I'd rather have individual q->locks, no host lock (except for
LLDD's sake), and SCSI Core completely multithread safe.




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 20:20             ` Luben Tuikov
@ 2003-03-24 20:25               ` Jens Axboe
  2003-03-24 20:38                 ` Patrick Mansfield
  2003-03-24 21:30                 ` Luben Tuikov
  0 siblings, 2 replies; 14+ messages in thread
From: Jens Axboe @ 2003-03-24 20:25 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Patrick Mansfield, James Bottomley, linux-scsi

On Mon, Mar 24 2003, Luben Tuikov wrote:
> Patrick Mansfield wrote:
> >On Mon, Mar 24, 2003 at 12:12:07PM -0500, Luben Tuikov wrote:
> >
> >>If scsi_queue_next_request(q, cmd) is running on more than one CPU
> >>and q != q1 then you have a problem with the starved devices list.
> >
> >
> >Not with the current locking: we lock queue_lock (equal to host_lock)
> >at the start of scsi_queue_next_request, and unlock at the end of
> >scsi_queue_next_request.
> 
> Are you sure that all device's request queues will use
> the same lock (the host lock) in the future?
> 
> My problem is that your patch code for scsi_queue_next_request()
> behaves as if q->queue_lock is the same for all devices' request_queues
> of the host.  And this is a _special case_, valid only at the present!!!!
> 
> I absolutely _detest_ the fact that the host_lock is used to lock LLDD's
> entry points, SCSI Core's data, *and* the request queues for
> all devices of the host... Talk about serializaion!
> 
> But okay, if you want to leave it like it is, holding the q->lock
> all over the place, _*PLEASE*_, put a comment saying that
> the q->lock is actually the host_lock and locks all the
> queues for all devices on the host... similar to my ``detest''
> paragraph above.  And if this were to be ever changed then
> the individual locks will have to be attained and released...
> and lots of code _rewritten_.

Irk no, that's quite a bad idea.

I completely agree with you that making assumptions about q->queue_lock
== host lock is really really bad. Don't do that.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 20:25               ` Jens Axboe
@ 2003-03-24 20:38                 ` Patrick Mansfield
  2003-03-24 21:25                   ` Luben Tuikov
  2003-03-24 21:30                 ` Luben Tuikov
  1 sibling, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-24 20:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Luben Tuikov, James Bottomley, linux-scsi

On Mon, Mar 24, 2003 at 09:25:09PM +0100, Jens Axboe wrote:
> On Mon, Mar 24 2003, Luben Tuikov wrote:
> > Patrick Mansfield wrote:
> > >On Mon, Mar 24, 2003 at 12:12:07PM -0500, Luben Tuikov wrote:
> > >
> > >>If scsi_queue_next_request(q, cmd) is running on more than one CPU
> > >>and q != q1 then you have a problem with the starved devices list.
> > >
> > >
> > >Not with the current locking: we lock queue_lock (equal to host_lock)
> > >at the start of scsi_queue_next_request, and unlock at the end of
> > >scsi_queue_next_request.
> > 
> > Are you sure that all device's request queues will use
> > the same lock (the host lock) in the future?

That is another patch.

> Irk no, that's quite a bad idea.
> 
> I completely agree with you that making assumptions about q->queue_lock
> == host lock is really really bad. Don't do that.
> 
> -- 
> Jens Axboe

Yes, it's bad (with or without the patch in question), I am working on
patches to make queue_lock per scsi_device (queue_lock != host_lock),
where I'm forced to make changes in this same area.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 20:38                 ` Patrick Mansfield
@ 2003-03-24 21:25                   ` Luben Tuikov
  2003-03-24 21:56                     ` Patrick Mansfield
  0 siblings, 1 reply; 14+ messages in thread
From: Luben Tuikov @ 2003-03-24 21:25 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: Jens Axboe, James Bottomley, linux-scsi

Patrick Mansfield wrote:
> 
> That is another patch.

Then you should've sent it before this one.

But nevertheless, the code of the patch you posted,
assumed that q->queue_lock is the same for all
devices' request queues.  This was my beef
since the very beginning of this thread.

> 
>>Irk no, that's quite a bad idea.
>>
>>I completely agree with you that making assumptions about q->queue_lock
>>== host lock is really really bad. Don't do that.
> 
> Yes, it's bad (with or without the patch in question), I am working on

No!  With the patch in question.

The rest of SCSI Core assumes that q->queue_lock has nothing to do
with any other q's lock.

> patches to make queue_lock per scsi_device (queue_lock != host_lock),
> where I'm forced to make changes in this same area.

-- 
Luben




^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 20:25               ` Jens Axboe
  2003-03-24 20:38                 ` Patrick Mansfield
@ 2003-03-24 21:30                 ` Luben Tuikov
  1 sibling, 0 replies; 14+ messages in thread
From: Luben Tuikov @ 2003-03-24 21:30 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Patrick Mansfield, James Bottomley, linux-scsi

Jens Axboe wrote:
> 
> 
> Irk no, that's quite a bad idea.

Of course it's bad idea -- that's what I've been saying in
all my emails in this thread, but no one listens, until now.

> I completely agree with you that making assumptions about q->queue_lock
> == host lock is really really bad. Don't do that.

Well, Jens, after 4 emails saying the same thing in different
words, but seeing no feedback, I _had to_ yield.

But, I'm nevetheless learning how to participate in
mailing lists -- just keep saying the same thing
in different words, until ppl get what you're trying
to say.

-- 
Luben



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 21:25                   ` Luben Tuikov
@ 2003-03-24 21:56                     ` Patrick Mansfield
  2003-03-24 22:15                       ` Luben Tuikov
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Mansfield @ 2003-03-24 21:56 UTC (permalink / raw)
  To: Luben Tuikov; +Cc: Jens Axboe, James Bottomley, linux-scsi

On Mon, Mar 24, 2003 at 04:25:42PM -0500, Luben Tuikov wrote:
> Patrick Mansfield wrote:
> > 
> > That is another patch.
> 
> Then you should've sent it before this one.

I would have if I was modifying the locking code.

> But nevertheless, the code of the patch you posted,
> assumed that q->queue_lock is the same for all
> devices' request queues.  This was my beef
> since the very beginning of this thread.

Yes, and that is no different from the current code.

> >>Irk no, that's quite a bad idea.
> >>
> >>I completely agree with you that making assumptions about q->queue_lock
> >>== host lock is really really bad. Don't do that.
> > 
> > Yes, it's bad (with or without the patch in question), I am working on
> 
> No!  With the patch in question.
> 
> The rest of SCSI Core assumes that q->queue_lock has nothing to do
> with any other q's lock.

The current code in scsi_queue_next_request() assume queue_lock ==
host_lock, as does code in scsi_error.c. We get a queue_lock (or
host_lock), and then iterate over different q's calling
__blk_run_queue(q).

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] 2.5.x use list_head to handle scsi starved request queues
  2003-03-24 21:56                     ` Patrick Mansfield
@ 2003-03-24 22:15                       ` Luben Tuikov
  0 siblings, 0 replies; 14+ messages in thread
From: Luben Tuikov @ 2003-03-24 22:15 UTC (permalink / raw)
  To: Patrick Mansfield; +Cc: Jens Axboe, James Bottomley, linux-scsi

Patrick Mansfield wrote:
> 
> The current code in scsi_queue_next_request() assume queue_lock ==
> host_lock, as does code in scsi_error.c. We get a queue_lock (or
> host_lock), and then iterate over different q's calling
> __blk_run_queue(q).

1. Please don't speak of queue_lock and host_lock as if they are
the same thing.  They are NOT, even though one points to the other.
This is an infrastructure bug and will have to go.

2. Yes I see this in scsi_error.c. How did this ever slip in!
(yes this is NOT a question, but a statement)

-- 
Luben



^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2003-03-24 22:15 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-03-20  2:27 [PATCH] 2.5.x use list_head to handle scsi starved request queues Patrick Mansfield
2003-03-20 20:05 ` Luben Tuikov
2003-03-21  4:39   ` Patrick Mansfield
2003-03-21 20:48     ` Luben Tuikov
2003-03-22  0:50       ` Patrick Mansfield
2003-03-24 17:12         ` Luben Tuikov
2003-03-24 19:29           ` Patrick Mansfield
2003-03-24 20:20             ` Luben Tuikov
2003-03-24 20:25               ` Jens Axboe
2003-03-24 20:38                 ` Patrick Mansfield
2003-03-24 21:25                   ` Luben Tuikov
2003-03-24 21:56                     ` Patrick Mansfield
2003-03-24 22:15                       ` Luben Tuikov
2003-03-24 21:30                 ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.