linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Chris Mason <mason@suse.com>
To: Andrea Arcangeli <andrea@suse.de>
Cc: Jens Axboe <axboe@suse.de>,
	Marcelo Tosatti <marcelo@conectiva.com.br>,
	lkml <linux-kernel@vger.kernel.org>,
	"Stephen C. Tweedie" <sct@redhat.com>,
	Alan Cox <alan@lxorguk.ukuu.org.uk>,
	Jeff Garzik <jgarzik@pobox.com>, Andrew Morton <akpm@digeo.com>,
	Alexander Viro <viro@math.psu.edu>
Subject: Re: RFC on io-stalls patch
Date: 14 Jul 2003 17:24:59 -0400	[thread overview]
Message-ID: <1058217899.13317.338.camel@tiny.suse.com> (raw)
In-Reply-To: <20030714201924.GR16313@dualathlon.random>

On Mon, 2003-07-14 at 16:19, Andrea Arcangeli wrote:

> > Could I talk you into trying a form of this patch that honors
> > blk_oversized_queue for everything except the BH_sync requests?
> 
> not honoring blk_oversized_queue for BH_sync isn't safe either (still it
> would be an order of magnitude safer than not honoring it for all reads
> unconditonally)  there must be a secondary limit, eating all the
> requests with potentially 512k of ram queued into each requests is
> unsafe (one can generate many sync requests by forking some hundred
> thousand tasks, this isn't only x86).

Fair enough.  This patch allows sync requests to steal up to
q->batch_sectors of additional requests.  This is probably too big a
number but it had the benefit of being already calculated for me.

I need to replace that q->rq.count < 4 crud with a constant or just drop
it entirely.  I like that elevator-lowlatency is more concerned with
sectors in flight than free requests, it would probably be best to keep
it that way.

In other words, this patch is for discussion only.  It allows BH_Sync to
be set for write requests as well, since my original idea for it was to
give lower latencies to any request the rest of the box might be waiting
on (like a log commit).

-chris

--- 1.48/drivers/block/ll_rw_blk.c	Fri Jul 11 15:08:30 2003
+++ edited/drivers/block/ll_rw_blk.c	Mon Jul 14 17:15:35 2003
@@ -546,13 +546,20 @@
  * Get a free request. io_request_lock must be held and interrupts
  * disabled on the way in.  Returns NULL if there are no free requests.
  */
-static struct request *get_request(request_queue_t *q, int rw)
+static struct request *get_request(request_queue_t *q, int rw, int sync)
 {
 	struct request *rq = NULL;
-	struct request_list *rl;
+	struct request_list *rl = &q->rq;
 
-	rl = &q->rq;
-	if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
+	if (sync) {
+		if (blk_oversized_queue_sync(q))
+			return NULL;
+	} else {
+		if (blk_oversized_queue(q) || q->rq.count < 4)
+			return NULL;
+	}
+
+	if (!list_empty(&rl->free)) {
 		rq = blkdev_free_rq(&rl->free);
 		list_del(&rq->queue);
 		rl->count--;
@@ -608,7 +615,7 @@
  *   wakeups where there aren't any requests available yet.
  */
 
-static struct request *__get_request_wait(request_queue_t *q, int rw)
+static struct request *__get_request_wait(request_queue_t *q, int rw, int sync)
 {
 	register struct request *rq;
 	DECLARE_WAITQUEUE(wait, current);
@@ -618,13 +625,13 @@
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		spin_lock_irq(&io_request_lock);
-		if (blk_oversized_queue(q) || q->rq.count == 0) {
+		if (blk_oversized_queue(q) || q->rq.count < 4) {
 			__generic_unplug_device(q);
 			spin_unlock_irq(&io_request_lock);
 			schedule();
 			spin_lock_irq(&io_request_lock);
 		}
-		rq = get_request(q, rw);
+		rq = get_request(q, rw, sync);
 		spin_unlock_irq(&io_request_lock);
 	} while (rq == NULL);
 	remove_wait_queue(&q->wait_for_requests, &wait);
@@ -947,7 +954,7 @@
 static int __make_request(request_queue_t * q, int rw,
 				  struct buffer_head * bh)
 {
-	unsigned int sector, count;
+	unsigned int sector, count, sync;
 	int max_segments = MAX_SEGMENTS;
 	struct request * req, *freereq = NULL;
 	int rw_ahead, max_sectors, el_ret;
@@ -958,6 +965,7 @@
 
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
+	sync = test_and_clear_bit(BH_Sync, &bh->b_state);
 
 	rw_ahead = 0;	/* normal case; gets changed below for READA */
 	switch (rw) {
@@ -1084,14 +1092,14 @@
 				spin_unlock_irq(&io_request_lock);
 				goto end_io;
 			}
-			req = get_request(q, rw);
+			req = get_request(q, rw, sync);
 			if (req == NULL)
 				BUG();
 		} else {
-			req = get_request(q, rw);
+			req = get_request(q, rw, sync);
 			if (req == NULL) {
 				spin_unlock_irq(&io_request_lock);
-				freereq = __get_request_wait(q, rw);
+				freereq = __get_request_wait(q, rw, sync);
 				head = &q->queue_head;
 				spin_lock_irq(&io_request_lock);
 				should_wake = 1;
@@ -1122,6 +1130,8 @@
 out:
 	if (freereq)
 		blkdev_release_request(freereq);
+	if (sync)
+		__generic_unplug_device(q);
 	if (should_wake)
 		get_request_wait_wakeup(q, rw);
 	spin_unlock_irq(&io_request_lock);
--- 1.92/fs/buffer.c	Fri Jul 11 16:43:23 2003
+++ edited/fs/buffer.c	Mon Jul 14 16:23:24 2003
@@ -1144,6 +1144,7 @@
 	bh = getblk(dev, block, size);
 	if (buffer_uptodate(bh))
 		return bh;
+	set_bit(BH_Sync, &bh->b_state);
 	ll_rw_block(READ, 1, &bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
--- 1.24/include/linux/blkdev.h	Fri Jul  4 13:18:12 2003
+++ edited/include/linux/blkdev.h	Mon Jul 14 16:46:56 2003
@@ -295,6 +295,13 @@
 	return q->rq.count == 0;
 }
 
+static inline int blk_oversized_queue_sync(request_queue_t * q)
+{
+	if (q->can_throttle)
+		return atomic_read(&q->nr_sectors) > q->max_queue_sectors + q->batch_sectors;
+	return q->rq.count == 0;
+}
+
 static inline int blk_oversized_queue_batch(request_queue_t * q)
 {
 	return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors;
--- 1.84/include/linux/fs.h	Fri Jul 11 15:13:13 2003
+++ edited/include/linux/fs.h	Mon Jul 14 16:23:24 2003
@@ -221,6 +221,7 @@
 	BH_Launder,	/* 1 if we can throttle on this buffer */
 	BH_Attached,	/* 1 if b_inode_buffers is linked into a list */
 	BH_JBD,		/* 1 if it has an attached journal_head */
+	BH_Sync,
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
 			 * for private allocation by other entities




  reply	other threads:[~2003-07-14 21:15 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-07-08 20:06 RFC on io-stalls patch Marcelo Tosatti
2003-07-10 13:57 ` Jens Axboe
2003-07-11 14:13   ` Chris Mason
2003-07-12  0:20     ` Nick Piggin
2003-07-12 18:37       ` Chris Mason
2003-07-12  7:37     ` Jens Axboe
2003-07-12  7:48       ` Jens Axboe
2003-07-12 18:32       ` Chris Mason
2003-07-13  0:33         ` Andrea Arcangeli
2003-07-13  9:01         ` Jens Axboe
2003-07-13 16:20           ` Chris Mason
2003-07-13 16:45             ` Jeff Garzik
2003-07-13 19:33               ` Andrea Arcangeli
2003-07-13 17:47             ` Jens Axboe
2003-07-13 19:35               ` Andrea Arcangeli
2003-07-14  0:36                 ` Chris Mason
2003-07-13 19:19           ` Andrea Arcangeli
2003-07-14  5:49             ` Jens Axboe
2003-07-14 12:23               ` Marcelo Tosatti
2003-07-14 13:12                 ` Jens Axboe
2003-07-14 19:51                   ` Jens Axboe
2003-07-14 20:09                     ` Chris Mason
2003-07-14 20:19                       ` Andrea Arcangeli
2003-07-14 21:24                         ` Chris Mason [this message]
2003-07-15  5:46                       ` Jens Axboe
2003-07-14 20:09                     ` Marcelo Tosatti
2003-07-14 20:24                       ` Andrea Arcangeli
2003-07-14 20:34                         ` Chris Mason
2003-07-15  5:35                           ` Jens Axboe
     [not found]                           ` <20030714224528.GU16313@dualathlon.random>
2003-07-15  5:40                             ` Jens Axboe
     [not found]                             ` <1058229360.13317.364.camel@tiny.suse.com>
2003-07-15  5:43                               ` Jens Axboe
     [not found]                               ` <20030714175238.3eaddd9a.akpm@osdl.org>
     [not found]                                 ` <20030715020706.GC16313@dualathlon.random>
2003-07-15  5:45                                   ` Jens Axboe
2003-07-15  6:01                                     ` Andrea Arcangeli
2003-07-15  6:08                                       ` Jens Axboe
2003-07-15  7:03                                         ` Andrea Arcangeli
2003-07-15  8:28                                           ` Jens Axboe
2003-07-15  9:12                                             ` Chris Mason
2003-07-15  9:17                                               ` Jens Axboe
2003-07-15  9:18                                                 ` Jens Axboe
2003-07-15  9:30                                                   ` Chris Mason
2003-07-15 10:03                                                   ` Andrea Arcangeli
2003-07-15 10:11                                                     ` Jens Axboe
2003-07-15 14:18                                                 ` Chris Mason
2003-07-15 14:29                                                   ` Jens Axboe
2003-07-16 17:06                                                   ` Chris Mason
2003-07-15  9:22                                               ` Chris Mason
2003-07-15  9:59                                               ` Andrea Arcangeli
2003-07-15  9:48                                             ` Andrea Arcangeli
2003-07-14 20:16                     ` Andrea Arcangeli
2003-07-14 20:17                       ` Marcelo Tosatti
2003-07-14 20:27                         ` Andrea Arcangeli
2003-07-15  5:26                       ` Jens Axboe
2003-07-15  5:48                         ` Andrea Arcangeli
2003-07-15  6:01                           ` Jens Axboe
2003-07-15  6:33                             ` Andrea Arcangeli
2003-07-15 11:22                         ` Alan Cox
2003-07-15 11:27                           ` Jens Axboe
2003-07-16 12:43                             ` Andrea Arcangeli
2003-07-16 12:46                               ` Jens Axboe
2003-07-16 12:59                                 ` Andrea Arcangeli
2003-07-16 13:04                                   ` Jens Axboe
2003-07-16 13:11                                     ` Andrea Arcangeli
2003-07-16 13:21                                       ` Jens Axboe
2003-07-16 13:44                                         ` Andrea Arcangeli
2003-07-16 14:00                                           ` Jens Axboe
2003-07-16 14:24                                             ` Andrea Arcangeli
2003-07-16 16:49                                     ` Andrew Morton
2003-07-15 18:47 Shane Shrybman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1058217899.13317.338.camel@tiny.suse.com \
    --to=mason@suse.com \
    --cc=akpm@digeo.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=andrea@suse.de \
    --cc=axboe@suse.de \
    --cc=jgarzik@pobox.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcelo@conectiva.com.br \
    --cc=sct@redhat.com \
    --cc=viro@math.psu.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).