* RFC on io-stalls patch
@ 2003-07-08 20:06 Marcelo Tosatti
2003-07-10 13:57 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Marcelo Tosatti @ 2003-07-08 20:06 UTC (permalink / raw)
To: lkml
Cc: Stephen C. Tweedie, Alan Cox, Jeff Garzik, Andrew Morton,
Andrea Arcangeli, Chris Mason, Alexander Viro
Hello people,
To get better IO interactivity and to fix potential SMP IO hangs (due to
missed wakeups) we, (Chris Mason integrated Andrea's work) added
"io-stalls-10" patch in 2.4.22-pre3.
The "low-latency" patch (which is part of io-stalls-10) seemed to be a
good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
concerned about that, though.
Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
havent been part of the discussions around the IO stalls issue take a look
at the patch, please?
It seems safe and a good approach to me, but might not be. Or have small
"glitches".
Thanks in advance.
Here is the patch.
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1023 -> 1.1024
# drivers/ide/ide-probe.c 1.17 -> 1.18
# include/linux/pagemap.h 1.19 -> 1.20
# kernel/ksyms.c 1.72 -> 1.73
# include/linux/elevator.h 1.5 -> 1.6
# drivers/block/ll_rw_blk.c 1.45 -> 1.46
# include/linux/blkdev.h 1.23 -> 1.24
# fs/reiserfs/inode.c 1.47 -> 1.48
# mm/filemap.c 1.81 -> 1.82
# drivers/scsi/scsi_lib.c 1.16 -> 1.17
# drivers/scsi/scsi.c 1.17 -> 1.18
# fs/buffer.c 1.86 -> 1.87
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/07/05 mason@suse.com 1.1024
# [PATCH] Fix potential IO hangs and increase interactiveness during heavy IO
#
# io-stalls-10:
#
#
# ===== drivers/block/ll_rw_blk.c 1.45 vs edited =====
# --------------------------------------------
#
diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
--- a/drivers/block/ll_rw_blk.c Tue Jul 8 17:03:41 2003
+++ b/drivers/block/ll_rw_blk.c Tue Jul 8 17:03:41 2003
@@ -176,11 +176,12 @@
{
int count = q->nr_requests;
- count -= __blk_cleanup_queue(&q->rq[READ]);
- count -= __blk_cleanup_queue(&q->rq[WRITE]);
+ count -= __blk_cleanup_queue(&q->rq);
if (count)
printk("blk_cleanup_queue: leaked requests (%d)\n", count);
+ if (atomic_read(&q->nr_sectors))
+ printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
memset(q, 0, sizeof(*q));
}
@@ -215,6 +216,24 @@
}
/**
+ * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
+ * @q: The queue which this applies to.
+ * @active: A flag indication if you want sector throttling on
+ *
+ * Description:
+ * The sector throttling code allows us to put a limit on the number of
+ * sectors pending io to the disk at a given time, sending @active nonzero
+ * indicates you will call blk_started_sectors and blk_finished_sectors in
+ * addition to calling blk_started_io and blk_finished_io in order to
+ * keep track of the number of sectors in flight.
+ **/
+
+void blk_queue_throttle_sectors(request_queue_t * q, int active)
+{
+ q->can_throttle = active;
+}
+
+/**
* blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected
* @mfn: the alternate make_request function
@@ -389,7 +408,7 @@
*
* Returns the (new) number of requests which the queue has available.
*/
-int blk_grow_request_list(request_queue_t *q, int nr_requests)
+int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
{
unsigned long flags;
/* Several broken drivers assume that this function doesn't sleep,
@@ -399,21 +418,34 @@
spin_lock_irqsave(&io_request_lock, flags);
while (q->nr_requests < nr_requests) {
struct request *rq;
- int rw;
rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
if (rq == NULL)
break;
memset(rq, 0, sizeof(*rq));
rq->rq_status = RQ_INACTIVE;
- rw = q->nr_requests & 1;
- list_add(&rq->queue, &q->rq[rw].free);
- q->rq[rw].count++;
+ list_add(&rq->queue, &q->rq.free);
+ q->rq.count++;
+
q->nr_requests++;
}
+
+ /*
+ * Wakeup waiters after both one quarter of the
+ * max-in-fligh queue and one quarter of the requests
+ * are available again.
+ */
+
q->batch_requests = q->nr_requests / 4;
if (q->batch_requests > 32)
q->batch_requests = 32;
+ q->batch_sectors = max_queue_sectors / 4;
+
+ q->max_queue_sectors = max_queue_sectors;
+
+ BUG_ON(!q->batch_sectors);
+ atomic_set(&q->nr_sectors, 0);
+
spin_unlock_irqrestore(&io_request_lock, flags);
return q->nr_requests;
}
@@ -422,23 +454,27 @@
{
struct sysinfo si;
int megs; /* Total memory, in megabytes */
- int nr_requests;
-
- INIT_LIST_HEAD(&q->rq[READ].free);
- INIT_LIST_HEAD(&q->rq[WRITE].free);
- q->rq[READ].count = 0;
- q->rq[WRITE].count = 0;
+ int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
+
+ INIT_LIST_HEAD(&q->rq.free);
+ q->rq.count = 0;
q->nr_requests = 0;
si_meminfo(&si);
megs = si.totalram >> (20 - PAGE_SHIFT);
- nr_requests = 128;
- if (megs < 32)
- nr_requests /= 2;
- blk_grow_request_list(q, nr_requests);
+ nr_requests = MAX_NR_REQUESTS;
+ if (megs < 30) {
+ nr_requests /= 2;
+ max_queue_sectors /= 2;
+ }
+ /* notice early if anybody screwed the defaults */
+ BUG_ON(!nr_requests);
+ BUG_ON(!max_queue_sectors);
+
+ blk_grow_request_list(q, nr_requests, max_queue_sectors);
+
+ init_waitqueue_head(&q->wait_for_requests);
- init_waitqueue_head(&q->wait_for_requests[0]);
- init_waitqueue_head(&q->wait_for_requests[1]);
spin_lock_init(&q->queue_lock);
}
@@ -491,6 +527,8 @@
q->plug_tq.routine = &generic_unplug_device;
q->plug_tq.data = q;
q->plugged = 0;
+ q->can_throttle = 0;
+
/*
* These booleans describe the queue properties. We set the
* default (and most common) values here. Other drivers can
@@ -511,9 +549,10 @@
static struct request *get_request(request_queue_t *q, int rw)
{
struct request *rq = NULL;
- struct request_list *rl = q->rq + rw;
+ struct request_list *rl;
- if (!list_empty(&rl->free)) {
+ rl = &q->rq;
+ if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
rq = blkdev_free_rq(&rl->free);
list_del(&rq->queue);
rl->count--;
@@ -522,34 +561,23 @@
rq->special = NULL;
rq->q = q;
}
-
return rq;
}
/*
- * Here's the request allocation design:
+ * Here's the request allocation design, low latency version:
*
* 1: Blocking on request exhaustion is a key part of I/O throttling.
*
* 2: We want to be `fair' to all requesters. We must avoid starvation, and
* attempt to ensure that all requesters sleep for a similar duration. Hence
* no stealing requests when there are other processes waiting.
- *
- * 3: We also wish to support `batching' of requests. So when a process is
- * woken, we want to allow it to allocate a decent number of requests
- * before it blocks again, so they can be nicely merged (this only really
- * matters if the process happens to be adding requests near the head of
- * the queue).
- *
- * 4: We want to avoid scheduling storms. This isn't really important, because
- * the system will be I/O bound anyway. But it's easy.
- *
- * There is tension between requirements 2 and 3. Once a task has woken,
- * we don't want to allow it to sleep as soon as it takes its second request.
- * But we don't want currently-running tasks to steal all the requests
- * from the sleepers. We handle this with wakeup hysteresis around
- * 0 .. batch_requests and with the assumption that request taking is much,
- * much faster than request freeing.
+ *
+ * There used to be more here, attempting to allow a process to send in a
+ * number of requests once it has woken up. But, there's no way to
+ * tell if a process has just been woken up, or if it is a new process
+ * coming in to steal requests from the waiters. So, we give up and force
+ * everyone to wait fairly.
*
* So here's what we do:
*
@@ -561,28 +589,23 @@
*
* When a process wants a new request:
*
- * b) If free_requests == 0, the requester sleeps in FIFO manner.
- *
- * b) If 0 < free_requests < batch_requests and there are waiters,
- * we still take a request non-blockingly. This provides batching.
- *
- * c) If free_requests >= batch_requests, the caller is immediately
- * granted a new request.
+ * b) If free_requests == 0, the requester sleeps in FIFO manner, and
+ * the queue full condition is set. The full condition is not
+ * cleared until there are no longer any waiters. Once the full
+ * condition is set, all new io must wait, hopefully for a very
+ * short period of time.
*
* When a request is released:
*
- * d) If free_requests < batch_requests, do nothing.
- *
- * f) If free_requests >= batch_requests, wake up a single waiter.
+ * c) If free_requests < batch_requests, do nothing.
*
- * The net effect is that when a process is woken at the batch_requests level,
- * it will be able to take approximately (batch_requests) requests before
- * blocking again (at the tail of the queue).
- *
- * This all assumes that the rate of taking requests is much, much higher
- * than the rate of releasing them. Which is very true.
+ * d) If free_requests >= batch_requests, wake up a single waiter.
*
- * -akpm, Feb 2002.
+ * As each waiter gets a request, he wakes another waiter. We do this
+ * to prevent a race where an unplug might get run before a request makes
+ * it's way onto the queue. The result is a cascade of wakeups, so delaying
+ * the initial wakeup until we've got batch_requests available helps avoid
+ * wakeups where there aren't any requests available yet.
*/
static struct request *__get_request_wait(request_queue_t *q, int rw)
@@ -590,21 +613,37 @@
register struct request *rq;
DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(&q->wait_for_requests[rw], &wait);
+ add_wait_queue_exclusive(&q->wait_for_requests, &wait);
+
do {
set_current_state(TASK_UNINTERRUPTIBLE);
- generic_unplug_device(q);
- if (q->rq[rw].count == 0)
- schedule();
spin_lock_irq(&io_request_lock);
+ if (blk_oversized_queue(q)) {
+ __generic_unplug_device(q);
+ spin_unlock_irq(&io_request_lock);
+ schedule();
+ spin_lock_irq(&io_request_lock);
+ }
rq = get_request(q, rw);
spin_unlock_irq(&io_request_lock);
} while (rq == NULL);
- remove_wait_queue(&q->wait_for_requests[rw], &wait);
+ remove_wait_queue(&q->wait_for_requests, &wait);
current->state = TASK_RUNNING;
+
return rq;
}
+static void get_request_wait_wakeup(request_queue_t *q, int rw)
+{
+ /*
+ * avoid losing an unplug if a second __get_request_wait did the
+ * generic_unplug_device while our __get_request_wait was running
+ * w/o the queue_lock held and w/ our request out of the queue.
+ */
+ if (waitqueue_active(&q->wait_for_requests))
+ wake_up(&q->wait_for_requests);
+}
+
/* RO fail safe mechanism */
static long ro_bits[MAX_BLKDEV][8];
@@ -818,7 +857,6 @@
void blkdev_release_request(struct request *req)
{
request_queue_t *q = req->q;
- int rw = req->cmd;
req->rq_status = RQ_INACTIVE;
req->q = NULL;
@@ -828,9 +866,17 @@
* assume it has free buffers and check waiters
*/
if (q) {
- list_add(&req->queue, &q->rq[rw].free);
- if (++q->rq[rw].count >= q->batch_requests)
- wake_up(&q->wait_for_requests[rw]);
+ int oversized_batch = 0;
+
+ if (q->can_throttle)
+ oversized_batch = blk_oversized_queue_batch(q);
+ q->rq.count++;
+ list_add(&req->queue, &q->rq.free);
+ if (q->rq.count >= q->batch_requests && !oversized_batch) {
+ smp_mb();
+ if (waitqueue_active(&q->wait_for_requests))
+ wake_up(&q->wait_for_requests);
+ }
}
}
@@ -908,6 +954,7 @@
struct list_head *head, *insert_here;
int latency;
elevator_t *elevator = &q->elevator;
+ int should_wake = 0;
count = bh->b_size >> 9;
sector = bh->b_rsector;
@@ -948,7 +995,6 @@
*/
max_sectors = get_max_sectors(bh->b_rdev);
-again:
req = NULL;
head = &q->queue_head;
/*
@@ -957,7 +1003,9 @@
*/
spin_lock_irq(&io_request_lock);
+again:
insert_here = head->prev;
+
if (list_empty(head)) {
q->plug_device_fn(q, bh->b_rdev); /* is atomic */
goto get_rq;
@@ -976,6 +1024,7 @@
req->bhtail = bh;
req->nr_sectors = req->hard_nr_sectors += count;
blk_started_io(count);
+ blk_started_sectors(req, count);
drive_stat_acct(req->rq_dev, req->cmd, count, 0);
req_new_io(req, 1, count);
attempt_back_merge(q, req, max_sectors, max_segments);
@@ -998,6 +1047,7 @@
req->sector = req->hard_sector = sector;
req->nr_sectors = req->hard_nr_sectors += count;
blk_started_io(count);
+ blk_started_sectors(req, count);
drive_stat_acct(req->rq_dev, req->cmd, count, 0);
req_new_io(req, 1, count);
attempt_front_merge(q, head, req, max_sectors, max_segments);
@@ -1030,7 +1080,7 @@
* See description above __get_request_wait()
*/
if (rw_ahead) {
- if (q->rq[rw].count < q->batch_requests) {
+ if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
spin_unlock_irq(&io_request_lock);
goto end_io;
}
@@ -1042,6 +1092,9 @@
if (req == NULL) {
spin_unlock_irq(&io_request_lock);
freereq = __get_request_wait(q, rw);
+ head = &q->queue_head;
+ spin_lock_irq(&io_request_lock);
+ should_wake = 1;
goto again;
}
}
@@ -1064,10 +1117,13 @@
req->start_time = jiffies;
req_new_io(req, 0, count);
blk_started_io(count);
+ blk_started_sectors(req, count);
add_request(q, req, insert_here);
out:
if (freereq)
blkdev_release_request(freereq);
+ if (should_wake)
+ get_request_wait_wakeup(q, rw);
spin_unlock_irq(&io_request_lock);
return 0;
end_io:
@@ -1196,8 +1252,15 @@
bh->b_rdev = bh->b_dev;
bh->b_rsector = bh->b_blocknr * count;
+ get_bh(bh);
generic_make_request(rw, bh);
+ /* fix race condition with wait_on_buffer() */
+ smp_mb(); /* spin_unlock may have inclusive semantics */
+ if (waitqueue_active(&bh->b_wait))
+ wake_up(&bh->b_wait);
+
+ put_bh(bh);
switch (rw) {
case WRITE:
kstat.pgpgout += count;
@@ -1350,6 +1413,7 @@
if ((bh = req->bh) != NULL) {
nsect = bh->b_size >> 9;
blk_finished_io(nsect);
+ blk_finished_sectors(req, nsect);
req->bh = bh->b_reqnext;
bh->b_reqnext = NULL;
bh->b_end_io(bh, uptodate);
@@ -1509,6 +1573,7 @@
EXPORT_SYMBOL(blk_get_queue);
EXPORT_SYMBOL(blk_cleanup_queue);
EXPORT_SYMBOL(blk_queue_headactive);
+EXPORT_SYMBOL(blk_queue_throttle_sectors);
EXPORT_SYMBOL(blk_queue_make_request);
EXPORT_SYMBOL(generic_make_request);
EXPORT_SYMBOL(blkdev_release_request);
diff -Nru a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
--- a/drivers/ide/ide-probe.c Tue Jul 8 17:03:41 2003
+++ b/drivers/ide/ide-probe.c Tue Jul 8 17:03:41 2003
@@ -971,6 +971,7 @@
q->queuedata = HWGROUP(drive);
blk_init_queue(q, do_ide_request);
+ blk_queue_throttle_sectors(q, 1);
}
#undef __IRQ_HELL_SPIN
diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
--- a/drivers/scsi/scsi.c Tue Jul 8 17:03:41 2003
+++ b/drivers/scsi/scsi.c Tue Jul 8 17:03:41 2003
@@ -197,6 +197,7 @@
blk_init_queue(q, scsi_request_fn);
blk_queue_headactive(q, 0);
+ blk_queue_throttle_sectors(q, 1);
q->queuedata = (void *) SDpnt;
}
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c Tue Jul 8 17:03:41 2003
+++ b/drivers/scsi/scsi_lib.c Tue Jul 8 17:03:41 2003
@@ -378,6 +378,7 @@
if ((bh = req->bh) != NULL) {
nsect = bh->b_size >> 9;
blk_finished_io(nsect);
+ blk_finished_sectors(req, nsect);
req->bh = bh->b_reqnext;
bh->b_reqnext = NULL;
sectors -= nsect;
diff -Nru a/fs/buffer.c b/fs/buffer.c
--- a/fs/buffer.c Tue Jul 8 17:03:41 2003
+++ b/fs/buffer.c Tue Jul 8 17:03:41 2003
@@ -153,10 +153,23 @@
get_bh(bh);
add_wait_queue(&bh->b_wait, &wait);
do {
- run_task_queue(&tq_disk);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!buffer_locked(bh))
break;
+ /*
+ * We must read tq_disk in TQ_ACTIVE after the
+ * add_wait_queue effect is visible to other cpus.
+ * We could unplug some line above it wouldn't matter
+ * but we can't do that right after add_wait_queue
+ * without an smp_mb() in between because spin_unlock
+ * has inclusive semantics.
+ * Doing it here is the most efficient place so we
+ * don't do a suprious unplug if we get a racy
+ * wakeup that make buffer_locked to return 0, and
+ * doing it here avoids an explicit smp_mb() we
+ * rely on the implicit one in set_task_state.
+ */
+ run_task_queue(&tq_disk);
schedule();
} while (buffer_locked(bh));
tsk->state = TASK_RUNNING;
@@ -1516,6 +1529,9 @@
/* Done - end_buffer_io_async will unlock */
SetPageUptodate(page);
+
+ wakeup_page_waiters(page);
+
return 0;
out:
@@ -1547,6 +1563,7 @@
} while (bh != head);
if (need_unlock)
UnlockPage(page);
+ wakeup_page_waiters(page);
return err;
}
@@ -1774,6 +1791,8 @@
else
submit_bh(READ, bh);
}
+
+ wakeup_page_waiters(page);
return 0;
}
@@ -2400,6 +2419,7 @@
submit_bh(rw, bh);
bh = next;
} while (bh != head);
+ wakeup_page_waiters(page);
return 0;
}
diff -Nru a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
--- a/fs/reiserfs/inode.c Tue Jul 8 17:03:41 2003
+++ b/fs/reiserfs/inode.c Tue Jul 8 17:03:41 2003
@@ -2080,6 +2080,7 @@
*/
if (nr) {
submit_bh_for_writepage(arr, nr) ;
+ wakeup_page_waiters(page);
} else {
UnlockPage(page) ;
}
diff -Nru a/include/linux/blkdev.h b/include/linux/blkdev.h
--- a/include/linux/blkdev.h Tue Jul 8 17:03:41 2003
+++ b/include/linux/blkdev.h Tue Jul 8 17:03:41 2003
@@ -64,12 +64,6 @@
typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
typedef void (unplug_device_fn) (void *q);
-/*
- * Default nr free requests per queue, ll_rw_blk will scale it down
- * according to available RAM at init time
- */
-#define QUEUE_NR_REQUESTS 8192
-
struct request_list {
unsigned int count;
struct list_head free;
@@ -80,7 +74,7 @@
/*
* the queue request freelist, one for reads and one for writes
*/
- struct request_list rq[2];
+ struct request_list rq;
/*
* The total number of requests on each queue
@@ -93,6 +87,21 @@
int batch_requests;
/*
+ * The total number of 512byte blocks on each queue
+ */
+ atomic_t nr_sectors;
+
+ /*
+ * Batching threshold for sleep/wakeup decisions
+ */
+ int batch_sectors;
+
+ /*
+ * The max number of 512byte blocks on each queue
+ */
+ int max_queue_sectors;
+
+ /*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
@@ -118,13 +127,21 @@
/*
* Boolean that indicates whether this queue is plugged or not.
*/
- char plugged;
+ int plugged:1;
/*
* Boolean that indicates whether current_request is active or
* not.
*/
- char head_active;
+ int head_active:1;
+
+ /*
+ * Boolean that indicates you will use blk_started_sectors
+ * and blk_finished_sectors in addition to blk_started_io
+ * and blk_finished_io. It enables the throttling code to
+ * help keep the sectors in flight to a reasonable value
+ */
+ int can_throttle:1;
unsigned long bounce_pfn;
@@ -137,7 +154,7 @@
/*
* Tasks wait here for free read and write requests
*/
- wait_queue_head_t wait_for_requests[2];
+ wait_queue_head_t wait_for_requests;
};
#define blk_queue_plugged(q) (q)->plugged
@@ -221,10 +238,11 @@
/*
* Access functions for manipulating queue properties
*/
-extern int blk_grow_request_list(request_queue_t *q, int nr_requests);
+extern int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors);
extern void blk_init_queue(request_queue_t *, request_fn_proc *);
extern void blk_cleanup_queue(request_queue_t *);
extern void blk_queue_headactive(request_queue_t *, int);
+extern void blk_queue_throttle_sectors(request_queue_t *, int);
extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
extern void generic_unplug_device(void *);
extern inline int blk_seg_merge_ok(struct buffer_head *, struct buffer_head *);
@@ -243,6 +261,8 @@
#define MAX_SEGMENTS 128
#define MAX_SECTORS 255
+#define MAX_QUEUE_SECTORS (4 << (20 - 9)) /* 4 mbytes when full sized */
+#define MAX_NR_REQUESTS 1024 /* 1024k when in 512 units, normally min is 1M in 1k units */
#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
@@ -268,8 +288,50 @@
return retval;
}
+static inline int blk_oversized_queue(request_queue_t * q)
+{
+ if (q->can_throttle)
+ return atomic_read(&q->nr_sectors) > q->max_queue_sectors;
+ return q->rq.count == 0;
+}
+
+static inline int blk_oversized_queue_batch(request_queue_t * q)
+{
+ return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors;
+}
+
#define blk_finished_io(nsects) do { } while (0)
#define blk_started_io(nsects) do { } while (0)
+
+static inline void blk_started_sectors(struct request *rq, int count)
+{
+ request_queue_t *q = rq->q;
+ if (q && q->can_throttle) {
+ atomic_add(count, &q->nr_sectors);
+ if (atomic_read(&q->nr_sectors) < 0) {
+ printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors));
+ BUG();
+ }
+ }
+}
+
+static inline void blk_finished_sectors(struct request *rq, int count)
+{
+ request_queue_t *q = rq->q;
+ if (q && q->can_throttle) {
+ atomic_sub(count, &q->nr_sectors);
+
+ smp_mb();
+ if (q->rq.count >= q->batch_requests && !blk_oversized_queue_batch(q)) {
+ if (waitqueue_active(&q->wait_for_requests))
+ wake_up(&q->wait_for_requests);
+ }
+ if (atomic_read(&q->nr_sectors) < 0) {
+ printk("nr_sectors is %d\n", atomic_read(&q->nr_sectors));
+ BUG();
+ }
+ }
+}
static inline unsigned int blksize_bits(unsigned int size)
{
diff -Nru a/include/linux/elevator.h b/include/linux/elevator.h
--- a/include/linux/elevator.h Tue Jul 8 17:03:41 2003
+++ b/include/linux/elevator.h Tue Jul 8 17:03:41 2003
@@ -80,7 +80,7 @@
return latency;
}
-#define ELV_LINUS_SEEK_COST 16
+#define ELV_LINUS_SEEK_COST 1
#define ELEVATOR_NOOP \
((elevator_t) { \
@@ -93,8 +93,8 @@
#define ELEVATOR_LINUS \
((elevator_t) { \
- 2048, /* read passovers */ \
- 8192, /* write passovers */ \
+ 128, /* read passovers */ \
+ 512, /* write passovers */ \
\
elevator_linus_merge, /* elevator_merge_fn */ \
elevator_linus_merge_req, /* elevator_merge_req_fn */ \
diff -Nru a/include/linux/pagemap.h b/include/linux/pagemap.h
--- a/include/linux/pagemap.h Tue Jul 8 17:03:41 2003
+++ b/include/linux/pagemap.h Tue Jul 8 17:03:41 2003
@@ -97,6 +97,8 @@
___wait_on_page(page);
}
+extern void FASTCALL(wakeup_page_waiters(struct page * page));
+
/*
* Returns locked page at given index in given cache, creating it if needed.
*/
diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c
--- a/kernel/ksyms.c Tue Jul 8 17:03:41 2003
+++ b/kernel/ksyms.c Tue Jul 8 17:03:41 2003
@@ -296,6 +296,7 @@
EXPORT_SYMBOL(filemap_fdatawait);
EXPORT_SYMBOL(lock_page);
EXPORT_SYMBOL(unlock_page);
+EXPORT_SYMBOL(wakeup_page_waiters);
/* device registration */
EXPORT_SYMBOL(register_chrdev);
diff -Nru a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c Tue Jul 8 17:03:41 2003
+++ b/mm/filemap.c Tue Jul 8 17:03:41 2003
@@ -810,6 +810,20 @@
return &wait[hash];
}
+/*
+ * This must be called after every submit_bh with end_io
+ * callbacks that would result into the blkdev layer waking
+ * up the page after a queue unplug.
+ */
+void wakeup_page_waiters(struct page * page)
+{
+ wait_queue_head_t * head;
+
+ head = page_waitqueue(page);
+ if (waitqueue_active(head))
+ wake_up(head);
+}
+
/*
* Wait for a page to get unlocked.
*
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-08 20:06 RFC on io-stalls patch Marcelo Tosatti
@ 2003-07-10 13:57 ` Jens Axboe
2003-07-11 14:13 ` Chris Mason
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-10 13:57 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik, Andrew Morton,
Andrea Arcangeli, Chris Mason, Alexander Viro
On Tue, Jul 08 2003, Marcelo Tosatti wrote:
>
> Hello people,
>
> To get better IO interactivity and to fix potential SMP IO hangs (due to
> missed wakeups) we, (Chris Mason integrated Andrea's work) added
> "io-stalls-10" patch in 2.4.22-pre3.
>
> The "low-latency" patch (which is part of io-stalls-10) seemed to be a
> good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
> concerned about that, though.
>
> Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
> havent been part of the discussions around the IO stalls issue take a look
> at the patch, please?
>
> It seems safe and a good approach to me, but might not be. Or have small
> "glitches".
Well, I have one naive question. What prevents writes from eating the
entire request pool now? In the 2.2 and earlier days, we reserved the
last 3rd of the requests to writes. 2.4.1 and later used a split request
list to make that same guarentee.
I only did a quick read of the patch so maybe I'm missing the new
mechanism for this. Are we simply relying on fair (FIFO) request
allocation and oversized queue to do its job alone?
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-10 13:57 ` Jens Axboe
@ 2003-07-11 14:13 ` Chris Mason
2003-07-12 0:20 ` Nick Piggin
2003-07-12 7:37 ` Jens Axboe
0 siblings, 2 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-11 14:13 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Thu, 2003-07-10 at 09:57, Jens Axboe wrote:
> On Tue, Jul 08 2003, Marcelo Tosatti wrote:
> >
> > Hello people,
> >
> > To get better IO interactivity and to fix potential SMP IO hangs (due to
> > missed wakeups) we, (Chris Mason integrated Andrea's work) added
> > "io-stalls-10" patch in 2.4.22-pre3.
> >
> > The "low-latency" patch (which is part of io-stalls-10) seemed to be a
> > good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
> > concerned about that, though.
> >
> > Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
> > havent been part of the discussions around the IO stalls issue take a look
> > at the patch, please?
> >
> > It seems safe and a good approach to me, but might not be. Or have small
> > "glitches".
>
> Well, I have one naive question. What prevents writes from eating the
> entire request pool now? In the 2.2 and earlier days, we reserved the
> last 3rd of the requests to writes. 2.4.1 and later used a split request
> list to make that same guarentee.
>
> I only did a quick read of the patch so maybe I'm missing the new
> mechanism for this. Are we simply relying on fair (FIFO) request
> allocation and oversized queue to do its job alone?
Seems that way. With the 2.4.21 code, a read might easily get a
request, but then spend forever waiting for a huge queue of merged
writes to get to disk.
I believe the new way provides better overall read performance in the
presence of lots of writes.
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-11 14:13 ` Chris Mason
@ 2003-07-12 0:20 ` Nick Piggin
2003-07-12 18:37 ` Chris Mason
2003-07-12 7:37 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Nick Piggin @ 2003-07-12 0:20 UTC (permalink / raw)
To: Chris Mason
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Andrea Arcangeli, Alexander Viro
Chris Mason wrote:
>On Thu, 2003-07-10 at 09:57, Jens Axboe wrote:
>
>>On Tue, Jul 08 2003, Marcelo Tosatti wrote:
>>
>>>Hello people,
>>>
>>>To get better IO interactivity and to fix potential SMP IO hangs (due to
>>>missed wakeups) we, (Chris Mason integrated Andrea's work) added
>>>"io-stalls-10" patch in 2.4.22-pre3.
>>>
>>>The "low-latency" patch (which is part of io-stalls-10) seemed to be a
>>>good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
>>>concerned about that, though.
>>>
>>>Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
>>>havent been part of the discussions around the IO stalls issue take a look
>>>at the patch, please?
>>>
>>>It seems safe and a good approach to me, but might not be. Or have small
>>>"glitches".
>>>
>>Well, I have one naive question. What prevents writes from eating the
>>entire request pool now? In the 2.2 and earlier days, we reserved the
>>last 3rd of the requests to writes. 2.4.1 and later used a split request
>>list to make that same guarentee.
>>
>>I only did a quick read of the patch so maybe I'm missing the new
>>mechanism for this. Are we simply relying on fair (FIFO) request
>>allocation and oversized queue to do its job alone?
>>
>
>Seems that way. With the 2.4.21 code, a read might easily get a
>request, but then spend forever waiting for a huge queue of merged
>writes to get to disk.
>
But it is the job of the io scheduler to prevent this, isn't it?
>
>I believe the new way provides better overall read performance in the
>presence of lots of writes.
>
>
I don't know how that can be, considering writers will consume
basically limitless requests. What am I missing?
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-12 0:20 ` Nick Piggin
@ 2003-07-12 18:37 ` Chris Mason
0 siblings, 0 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-12 18:37 UTC (permalink / raw)
To: Nick Piggin
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Andrea Arcangeli, Alexander Viro
On Fri, 2003-07-11 at 20:20, Nick Piggin wrote:
> >Seems that way. With the 2.4.21 code, a read might easily get a
> >request, but then spend forever waiting for a huge queue of merged
> >writes to get to disk.
> >
>
> But it is the job of the io scheduler to prevent this, isn't it?
>
Yes, but the 2.4.21 code doesn't do it.
> >
> >I believe the new way provides better overall read performance in the
> >presence of lots of writes.
> >
> >
>
> I don't know how that can be, considering writers will consume
> basically limitless requests. What am I missing?
There is a limit on the total amount of IO in flight (4MB by default,
reads/writes combined). We can make this a harder limit by disallowing
merges on any requests present at the time of an unplug. Perhaps I'm
not reading your question correctly?
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-11 14:13 ` Chris Mason
2003-07-12 0:20 ` Nick Piggin
@ 2003-07-12 7:37 ` Jens Axboe
2003-07-12 7:48 ` Jens Axboe
2003-07-12 18:32 ` Chris Mason
1 sibling, 2 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-12 7:37 UTC (permalink / raw)
To: Chris Mason
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Fri, Jul 11 2003, Chris Mason wrote:
> On Thu, 2003-07-10 at 09:57, Jens Axboe wrote:
> > On Tue, Jul 08 2003, Marcelo Tosatti wrote:
> > >
> > > Hello people,
> > >
> > > To get better IO interactivity and to fix potential SMP IO hangs (due to
> > > missed wakeups) we, (Chris Mason integrated Andrea's work) added
> > > "io-stalls-10" patch in 2.4.22-pre3.
> > >
> > > The "low-latency" patch (which is part of io-stalls-10) seemed to be a
> > > good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
> > > concerned about that, though.
> > >
> > > Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
> > > havent been part of the discussions around the IO stalls issue take a look
> > > at the patch, please?
> > >
> > > It seems safe and a good approach to me, but might not be. Or have small
> > > "glitches".
> >
> > Well, I have one naive question. What prevents writes from eating the
> > entire request pool now? In the 2.2 and earlier days, we reserved the
> > last 3rd of the requests to writes. 2.4.1 and later used a split request
> > list to make that same guarentee.
> >
> > I only did a quick read of the patch so maybe I'm missing the new
> > mechanism for this. Are we simply relying on fair (FIFO) request
> > allocation and oversized queue to do its job alone?
>
> Seems that way. With the 2.4.21 code, a read might easily get a
> request, but then spend forever waiting for a huge queue of merged
> writes to get to disk.
Correct
> I believe the new way provides better overall read performance in the
> presence of lots of writes.
I fail to see the logic in that. Reads are now treated fairly wrt
writes, but it would be really easy to let writes consume the entire
capacity of the queue (be it all the requests, or just going oversized).
I think the oversized logic is flawed right now, and should only apply
to writes. Always let reads get through. And don't let writes consume
the last 1/8th of the requests, or something like that at least. I'll
try and do a patch for pre4.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-12 7:37 ` Jens Axboe
@ 2003-07-12 7:48 ` Jens Axboe
2003-07-12 18:32 ` Chris Mason
1 sibling, 0 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-12 7:48 UTC (permalink / raw)
To: Chris Mason
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Sat, Jul 12 2003, Jens Axboe wrote:
> On Fri, Jul 11 2003, Chris Mason wrote:
> > On Thu, 2003-07-10 at 09:57, Jens Axboe wrote:
> > > On Tue, Jul 08 2003, Marcelo Tosatti wrote:
> > > >
> > > > Hello people,
> > > >
> > > > To get better IO interactivity and to fix potential SMP IO hangs (due to
> > > > missed wakeups) we, (Chris Mason integrated Andrea's work) added
> > > > "io-stalls-10" patch in 2.4.22-pre3.
> > > >
> > > > The "low-latency" patch (which is part of io-stalls-10) seemed to be a
> > > > good approach to increase IO fairness. Some people (Alan, AFAIK) are a bit
> > > > concerned about that, though.
> > > >
> > > > Could you guys, Stephen, Andrew and maybe Viro (if interested :)) which
> > > > havent been part of the discussions around the IO stalls issue take a look
> > > > at the patch, please?
> > > >
> > > > It seems safe and a good approach to me, but might not be. Or have small
> > > > "glitches".
> > >
> > > Well, I have one naive question. What prevents writes from eating the
> > > entire request pool now? In the 2.2 and earlier days, we reserved the
> > > last 3rd of the requests to writes. 2.4.1 and later used a split request
> > > list to make that same guarentee.
> > >
> > > I only did a quick read of the patch so maybe I'm missing the new
> > > mechanism for this. Are we simply relying on fair (FIFO) request
> > > allocation and oversized queue to do its job alone?
> >
> > Seems that way. With the 2.4.21 code, a read might easily get a
> > request, but then spend forever waiting for a huge queue of merged
> > writes to get to disk.
>
> Correct
>
> > I believe the new way provides better overall read performance in the
> > presence of lots of writes.
>
> I fail to see the logic in that. Reads are now treated fairly wrt
> writes, but it would be really easy to let writes consume the entire
> capacity of the queue (be it all the requests, or just going oversized).
>
> I think the oversized logic is flawed right now, and should only apply
> to writes. Always let reads get through. And don't let writes consume
> the last 1/8th of the requests, or something like that at least. I'll
> try and do a patch for pre4.
Something simple like this should really be added, imo. Untested.
===== drivers/block/ll_rw_blk.c 1.47 vs edited =====
--- 1.47/drivers/block/ll_rw_blk.c Fri Jul 11 10:30:54 2003
+++ edited/drivers/block/ll_rw_blk.c Sat Jul 12 09:47:32 2003
@@ -549,10 +549,18 @@
static struct request *get_request(request_queue_t *q, int rw)
{
struct request *rq = NULL;
- struct request_list *rl;
+ struct request_list *rl = &q->rq;
- rl = &q->rq;
- if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
+ /*
+ * only apply the oversized queue logic to writes. and only let
+ * writes consume 7/8ths of the queue, always leave room for some
+ * reads
+ */
+ if ((rw == WRITE) &&
+ blk_oversized_queue(q) || rl->count < q->nr_requests / 8)
+ return NULL;
+
+ if (!list_empty(&rl->free)) {
rq = blkdev_free_rq(&rl->free);
list_del(&rq->queue);
rl->count--;
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-12 7:37 ` Jens Axboe
2003-07-12 7:48 ` Jens Axboe
@ 2003-07-12 18:32 ` Chris Mason
2003-07-13 0:33 ` Andrea Arcangeli
2003-07-13 9:01 ` Jens Axboe
1 sibling, 2 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-12 18:32 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Sat, 2003-07-12 at 03:37, Jens Axboe wrote:
> > I believe the new way provides better overall read performance in the
> > presence of lots of writes.
>
> I fail to see the logic in that. Reads are now treated fairly wrt
> writes, but it would be really easy to let writes consume the entire
> capacity of the queue (be it all the requests, or just going oversized).
>
> I think the oversized logic is flawed right now, and should only apply
> to writes. Always let reads get through. And don't let writes consume
> the last 1/8th of the requests, or something like that at least. I'll
> try and do a patch for pre4.
If we don't apply oversized checks to reads, what keeps a big streaming
reader from starving out all the writes?
The current patch provides a relatively fixed amount of work to get a
request, and I don't think we should allow that to be bypassed. We
might want to add a special case for synchronous reads (like bread), via
a b_state bit that tells the block layer an immediate unplug is coming
soon. That way the block layer can ignore the oversized checks, grant a
request and unplug right away, hopefully lowering the total number of
unplugs the synchronous reader has to wait through.
Anyway, if you've got doubts about the current patch, I'd be happy to
run a specific benchmark you think will show the bad read
characteristics.
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-12 18:32 ` Chris Mason
@ 2003-07-13 0:33 ` Andrea Arcangeli
2003-07-13 9:01 ` Jens Axboe
1 sibling, 0 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-13 0:33 UTC (permalink / raw)
To: Chris Mason
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
Hello,
On Sat, Jul 12, 2003 at 02:32:32PM -0400, Chris Mason wrote:
> Anyway, if you've got doubts about the current patch, I'd be happy to
> run a specific benchmark you think will show the bad read
> characteristics.
the main reason I dropped the two queues in elevator-lowatency is that
the ability of calling schedule just once only for the I/O completion
with reads was a minor thing compared having to wait a potential 32M
queue to be flushed to disk before being able to read a byte from disk.
So I didn't want to complicate the code with minor things, while fixing
what I considered the major issue (i.e. the overkill queue size with
contigous writes, and too small queue size while seeking).
I already attacked that problem years ago with the max_bomb_segments
(the dead ioctl ;). You know, at that time I attacked the problem from
the wrong side: rather than limting the mbytes in the queue like
elevator-lowatency does, I enforced a max limit on the single request
size, because I didn't have an idea of how critical it is to get 512k
requests for each SG DMA operation in any actual storage (the same
mistake that the anticipatory scheduler developers did when they thought
anticipatory scheduler could in any way obviate the need of very
aggressive readahead). elevator-lowlatency is the max_bomb thing in a
way that doesn't hurt contigous I/O throughput at all, with very similar
latency benefits. Furthmore elevator-lowatency allowed me to grow a lot
the number of requests without killing down a box with gigabytes large
I/O queues, so now in presence of heavily-seeking 512bytes per-request
(the opposite of 512k per request with contigous I/O) many more requests
can sit in the elevator in turn allowing a more aggressive reordering of
requests during seeking. That should result in a performance improvement
when seeking (besides the fariness improvement under a flood of
contigous I/O).
Having two queues, could allow a reader to sleep just once, while this
way it also has to wait for batch_sectors before being able to queue. So
I think what it could do is basically only a cpu saving thing (one less
schedule) and I doubt it would be noticeable.
And in general I don't like too much assumptions that considers reads
different than writes, writes can be latency critical too with fsync
(especially with journaling). Infact if it was just the sync-reads that
we cared about I think read-latency2 from Andrew would already work well
and it's much less invasive, but I consider that a dirty hack compared
to the elevator-lowlatency that fixes stuff for sync writes too, as well
as sync reads, without assuming special workloads. read-latency2
basically makes a very hardcoded assumption that writes aren't latency
critical and it tries to hide the effect of the overkill size of the
queue, by simply putting all reads near the head of the queue, no matter
if the queue size is 1m or 10m or 100m. The whole point of
elevator-lowlatency is not to add read-hacks that assumes only reads are
latency critical. and of course an overkill size of the queue isn't good
for the VM system too, since that memory is unfreeable and it could
render much harder for the VM to be nice with apps allocating ram during
write throttling etc..
Overall I'm not against resurrecting the specialized read queue, after
all writes also gets a special queue (so one could claim that's an
optimization for sync-writes not sync-reads, it's very symmetric ;), but
conceptually I don't find it very worthwhile. So I would prefer to have
a benchmark as you suggested, before complicating things in mainline
(and as you can see now it's a bit more tricky to retain the two
queues ;).
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-12 18:32 ` Chris Mason
2003-07-13 0:33 ` Andrea Arcangeli
@ 2003-07-13 9:01 ` Jens Axboe
2003-07-13 16:20 ` Chris Mason
2003-07-13 19:19 ` Andrea Arcangeli
1 sibling, 2 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-13 9:01 UTC (permalink / raw)
To: Chris Mason
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Sat, Jul 12 2003, Chris Mason wrote:
> On Sat, 2003-07-12 at 03:37, Jens Axboe wrote:
>
> > > I believe the new way provides better overall read performance in the
> > > presence of lots of writes.
> >
> > I fail to see the logic in that. Reads are now treated fairly wrt
> > writes, but it would be really easy to let writes consume the entire
> > capacity of the queue (be it all the requests, or just going oversized).
> >
> > I think the oversized logic is flawed right now, and should only apply
> > to writes. Always let reads get through. And don't let writes consume
> > the last 1/8th of the requests, or something like that at least. I'll
> > try and do a patch for pre4.
>
> If we don't apply oversized checks to reads, what keeps a big streaming
> reader from starving out all the writes?
It's just so much easier to full the queue with writes than with reads.
> The current patch provides a relatively fixed amount of work to get a
> request, and I don't think we should allow that to be bypassed. We
> might want to add a special case for synchronous reads (like bread), via
> a b_state bit that tells the block layer an immediate unplug is coming
> soon. That way the block layer can ignore the oversized checks, grant a
> request and unplug right away, hopefully lowering the total number of
> unplugs the synchronous reader has to wait through.
>
> Anyway, if you've got doubts about the current patch, I'd be happy to
> run a specific benchmark you think will show the bad read
> characteristics.
No I don't have anything specific, it just seems like a bad heuristic to
get rid of. I can try and do some testing tomorrow. I do feel strongly
that we should at least make sure to reserve a few requests for reads
exclusively, even if you don't agree with the oversized check. Anything
else really contradicts all the io testing we have done the past years
that shows how important it is to get a read in ASAP. And doing that in
the middle of 2.4.22-pre is a mistake imo, if you don't have numbers to
show that it doesn't matter for the quick service of reads.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 9:01 ` Jens Axboe
@ 2003-07-13 16:20 ` Chris Mason
2003-07-13 16:45 ` Jeff Garzik
2003-07-13 17:47 ` Jens Axboe
2003-07-13 19:19 ` Andrea Arcangeli
1 sibling, 2 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-13 16:20 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Sun, 2003-07-13 at 05:01, Jens Axboe wrote:
> On Sat, Jul 12 2003, Chris Mason wrote:
> > On Sat, 2003-07-12 at 03:37, Jens Axboe wrote:
> >
> > > > I believe the new way provides better overall read performance in the
> > > > presence of lots of writes.
> > >
> > > I fail to see the logic in that. Reads are now treated fairly wrt
> > > writes, but it would be really easy to let writes consume the entire
> > > capacity of the queue (be it all the requests, or just going oversized).
> > >
> > > I think the oversized logic is flawed right now, and should only apply
> > > to writes. Always let reads get through. And don't let writes consume
> > > the last 1/8th of the requests, or something like that at least. I'll
> > > try and do a patch for pre4.
> >
> > If we don't apply oversized checks to reads, what keeps a big streaming
> > reader from starving out all the writes?
>
> It's just so much easier to full the queue with writes than with reads.
>
Well, I'd say it's a more common problem to have lots of writes, but it
is pretty easy to fill the queue with reads.
> > The current patch provides a relatively fixed amount of work to get a
> > request, and I don't think we should allow that to be bypassed. We
> > might want to add a special case for synchronous reads (like bread), via
> > a b_state bit that tells the block layer an immediate unplug is coming
> > soon. That way the block layer can ignore the oversized checks, grant a
> > request and unplug right away, hopefully lowering the total number of
> > unplugs the synchronous reader has to wait through.
> >
> > Anyway, if you've got doubts about the current patch, I'd be happy to
> > run a specific benchmark you think will show the bad read
> > characteristics.
>
> No I don't have anything specific, it just seems like a bad heuristic to
> get rid of. I can try and do some testing tomorrow. I do feel strongly
> that we should at least make sure to reserve a few requests for reads
> exclusively, even if you don't agree with the oversized check. Anything
> else really contradicts all the io testing we have done the past years
> that shows how important it is to get a read in ASAP. And doing that in
> the middle of 2.4.22-pre is a mistake imo, if you don't have numbers to
> show that it doesn't matter for the quick service of reads.
I believe elevator-lowlatency tries to solve the 'get a read in ASAP'
from a different direction, by trying to limit both the time required to
get a request and the time for required for the unplug to run. Most of
my numbers so far have been timing reads in the face of lots of writes,
where elevator-lowlatency is a big win.
It should make sense to have a reserve of requests for reads, but I
think this should be limited to the synchronous reads. Still, I haven't
been playing with all of this for very long, so your ideas are much
appreciated.
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 16:20 ` Chris Mason
@ 2003-07-13 16:45 ` Jeff Garzik
2003-07-13 19:33 ` Andrea Arcangeli
2003-07-13 17:47 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Jeff Garzik @ 2003-07-13 16:45 UTC (permalink / raw)
To: Chris Mason
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Andrew Morton, Andrea Arcangeli, Alexander Viro
Chris Mason wrote:
> Well, I'd say it's a more common problem to have lots of writes, but it
> is pretty easy to fill the queue with reads.
Well....
* you will usually have more reads than writes
* reads are more dependent than writes, on the whole
* writes are larger due to delays and merging
All this is obviously workload dependent, but it seems like the 60%
common case, at least...
Basically when I hear people talking about lots of writes, that seems to
be downplaying the fact that seeing 20 writes and 2 reads on a queue
does not take into account the userspace applications currently
blocking, waiting to do another read.
Jeff
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 16:45 ` Jeff Garzik
@ 2003-07-13 19:33 ` Andrea Arcangeli
0 siblings, 0 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-13 19:33 UTC (permalink / raw)
To: Jeff Garzik
Cc: Chris Mason, Jens Axboe, Marcelo Tosatti, lkml,
Stephen C. Tweedie, Alan Cox, Andrew Morton, Alexander Viro
On Sun, Jul 13, 2003 at 12:45:58PM -0400, Jeff Garzik wrote:
> Chris Mason wrote:
> >Well, I'd say it's a more common problem to have lots of writes, but it
> >is pretty easy to fill the queue with reads.
>
> Well....
>
> * you will usually have more reads than writes
> * reads are more dependent than writes, on the whole
> * writes are larger due to delays and merging
>
> All this is obviously workload dependent, but it seems like the 60%
> common case, at least...
sync-reads may be as much as 99%. I'm not overlooking sync-reads, I'm
saying sync-writes are important too, and this design is meant to fix
sync-reads as well as sync-writes.
> Basically when I hear people talking about lots of writes, that seems to
> be downplaying the fact that seeing 20 writes and 2 reads on a queue
> does not take into account the userspace applications currently
> blocking, waiting to do another read.
this is not overlooked. The point is that those 2 reads tends to wait
for at least batch_sectors anyways, so it doesn't matter if they wait
for those batch_sectors inside the I/O queue, or in wait_for_request. I
mean, I don't see why being able to wait only in the I/O queue could
make a relevant latency improvement (of course it will save a schedule
but who cares about a schedule on such a sync-workload?)
What you've to care about is that this "read-sync" is the very next one
to get the I/O in wait_for_request, not the write. and this is fixed
now.
in the past (even in 2.4.21) the wait_for_request was broken, so hitting
that path would risk to hang reads forever (there was no ->full control
etc..). Now it's not the case anymore. if the read is still stalled
forever it means wait_for_request needs more fixes than what's already
in mainline in 22pre5.
In the past with broken wait_for_request things were completely
different so using old benchmarks as an argument doesn't sound
completely correct to me. More interesting would be the theories about
why those past benchmarks shown the relevant improvements (latency?
throughput?), so we can see if those theories still applies to the new
lowlatency-elevator fixed code in mainline.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 16:20 ` Chris Mason
2003-07-13 16:45 ` Jeff Garzik
@ 2003-07-13 17:47 ` Jens Axboe
2003-07-13 19:35 ` Andrea Arcangeli
1 sibling, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-13 17:47 UTC (permalink / raw)
To: Chris Mason
Cc: Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox, Jeff Garzik,
Andrew Morton, Andrea Arcangeli, Alexander Viro
On Sun, Jul 13 2003, Chris Mason wrote:
> On Sun, 2003-07-13 at 05:01, Jens Axboe wrote:
> > On Sat, Jul 12 2003, Chris Mason wrote:
> > > On Sat, 2003-07-12 at 03:37, Jens Axboe wrote:
> > >
> > > > > I believe the new way provides better overall read performance in the
> > > > > presence of lots of writes.
> > > >
> > > > I fail to see the logic in that. Reads are now treated fairly wrt
> > > > writes, but it would be really easy to let writes consume the entire
> > > > capacity of the queue (be it all the requests, or just going oversized).
> > > >
> > > > I think the oversized logic is flawed right now, and should only apply
> > > > to writes. Always let reads get through. And don't let writes consume
> > > > the last 1/8th of the requests, or something like that at least. I'll
> > > > try and do a patch for pre4.
> > >
> > > If we don't apply oversized checks to reads, what keeps a big streaming
> > > reader from starving out all the writes?
> >
> > It's just so much easier to full the queue with writes than with reads.
> >
>
> Well, I'd say it's a more common problem to have lots of writes, but it
> is pretty easy to fill the queue with reads.
s/easy/occurs often. Bad wording, yes of course it's trivial to make it
happen. But on a desktop machine, it rarely does. Can't say the same
thing about writes.
> > > The current patch provides a relatively fixed amount of work to get a
> > > request, and I don't think we should allow that to be bypassed. We
> > > might want to add a special case for synchronous reads (like bread), via
> > > a b_state bit that tells the block layer an immediate unplug is coming
> > > soon. That way the block layer can ignore the oversized checks, grant a
> > > request and unplug right away, hopefully lowering the total number of
> > > unplugs the synchronous reader has to wait through.
> > >
> > > Anyway, if you've got doubts about the current patch, I'd be happy to
> > > run a specific benchmark you think will show the bad read
> > > characteristics.
> >
> > No I don't have anything specific, it just seems like a bad heuristic to
> > get rid of. I can try and do some testing tomorrow. I do feel strongly
> > that we should at least make sure to reserve a few requests for reads
> > exclusively, even if you don't agree with the oversized check. Anything
> > else really contradicts all the io testing we have done the past years
> > that shows how important it is to get a read in ASAP. And doing that in
> > the middle of 2.4.22-pre is a mistake imo, if you don't have numbers to
> > show that it doesn't matter for the quick service of reads.
>
> I believe elevator-lowlatency tries to solve the 'get a read in ASAP'
> from a different direction, by trying to limit both the time required to
> get a request and the time for required for the unplug to run. Most of
> my numbers so far have been timing reads in the face of lots of writes,
> where elevator-lowlatency is a big win.
>
> It should make sense to have a reserve of requests for reads, but I
> think this should be limited to the synchronous reads. Still, I haven't
> been playing with all of this for very long, so your ideas are much
> appreciated.
Just catering to the sync reads is probably good enough, and I think
that passing in that extra bit of information is done the best through
just a b_state bit.
I'll try and bench a bit tomorrow with that patched in.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 17:47 ` Jens Axboe
@ 2003-07-13 19:35 ` Andrea Arcangeli
2003-07-14 0:36 ` Chris Mason
0 siblings, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-13 19:35 UTC (permalink / raw)
To: Jens Axboe
Cc: Chris Mason, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Sun, Jul 13, 2003 at 07:47:09PM +0200, Jens Axboe wrote:
> Just catering to the sync reads is probably good enough, and I think
> that passing in that extra bit of information is done the best through
> just a b_state bit.
not sure if we should pass this bit of info, I guess if we add it we can
just threats all reads equally and give them the spare reserved
requests unconditionally, so the highlevel isn't complicated (of course
it would be optional, so nothing would break but it would be annoying
for benchmarking new fs having to patch stuff first to see the effect of
the spare reserved requests).
> I'll try and bench a bit tomorrow with that patched in.
Cool thanks.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 19:35 ` Andrea Arcangeli
@ 2003-07-14 0:36 ` Chris Mason
0 siblings, 0 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-14 0:36 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Sun, 2003-07-13 at 15:35, Andrea Arcangeli wrote:
> On Sun, Jul 13, 2003 at 07:47:09PM +0200, Jens Axboe wrote:
> > Just catering to the sync reads is probably good enough, and I think
> > that passing in that extra bit of information is done the best through
> > just a b_state bit.
>
> not sure if we should pass this bit of info, I guess if we add it we can
> just threats all reads equally and give them the spare reserved
> requests unconditionally, so the highlevel isn't complicated (of course
> it would be optional, so nothing would break but it would be annoying
> for benchmarking new fs having to patch stuff first to see the effect of
> the spare reserved requests).
>
By goal with the b_state bit was to change this:
bread
submit_bh
__get_request_wait (unplugs just to get a free rq)
wait_on_buffer (run tq_disk)
Into this:
bread
set BH_Sync
submit_bh
__make_request (get a reserved sync request, unplugs to start the read)
buffer up to date
It's wishful thinking, since the unplug doesn't mean we end up with an
unlocked buffer. But at the very least, taking a reserved sync request
and unplugging right away shouldn't be worse than the current method of
unplugging just to get a request.
And on boxes with lots of busy drives, avoiding tq_disk is a good thing,
even if we only manage to avoid it for a small percentage of the reads.
> > I'll try and bench a bit tomorrow with that patched in.
>
> Cool thanks.
Thanks Jens
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 9:01 ` Jens Axboe
2003-07-13 16:20 ` Chris Mason
@ 2003-07-13 19:19 ` Andrea Arcangeli
2003-07-14 5:49 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-13 19:19 UTC (permalink / raw)
To: Jens Axboe
Cc: Chris Mason, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Sun, Jul 13, 2003 at 11:01:16AM +0200, Jens Axboe wrote:
> No I don't have anything specific, it just seems like a bad heuristic to
> get rid of. I can try and do some testing tomorrow. I do feel strongly
well, it's not an heuristic, it's a simplification and it will certainly
won't provide any benefit (besides saving some hundred kbytes of ram per
harddisk that is a minor benefit).
> that we should at least make sure to reserve a few requests for reads
> exclusively, even if you don't agree with the oversized check. Anything
> else really contradicts all the io testing we have done the past years
> that shows how important it is to get a read in ASAP. And doing that in
Important for latency or throughput? Do you know which is the benchmarks
that returned better results with the two queues, what's the theory
behind this?
> the middle of 2.4.22-pre is a mistake imo, if you don't have numbers to
> show that it doesn't matter for the quick service of reads.
Is it latency or throughput that you're mainly worried about? Latency
certainly isn't worse with this lowlatency improvement (no matter one or
two queues, that's a very minor issue w.r.t. to latency).
I could imagine readahead throughput could be less likely to be hurted
by writes with the two queue. But I doubt it's very noticeable.
However I'm not against re-adding it, clearly there's a slight benefit
for reads in never blocking in wait_for_request, I just didn't consider
it very worthwhile since they've to block anyways later normally for
batch_sectors anyways, just like every write (in a congested I/O
subsystem - when the I/O isn't congested nothing blocks in the first
place).
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-13 19:19 ` Andrea Arcangeli
@ 2003-07-14 5:49 ` Jens Axboe
2003-07-14 12:23 ` Marcelo Tosatti
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-14 5:49 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Chris Mason, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Sun, Jul 13 2003, Andrea Arcangeli wrote:
> On Sun, Jul 13, 2003 at 11:01:16AM +0200, Jens Axboe wrote:
> > No I don't have anything specific, it just seems like a bad heuristic to
> > get rid of. I can try and do some testing tomorrow. I do feel strongly
>
> well, it's not an heuristic, it's a simplification and it will certainly
> won't provide any benefit (besides saving some hundred kbytes of ram per
> harddisk that is a minor benefit).
You are missing my point - I don't care about loosing the extra request
list, I never said anything about that in this thread. I care about
loosing the reserved requests for reads. And we can do that just fine
with just holding back a handful of requests.
> > that we should at least make sure to reserve a few requests for reads
> > exclusively, even if you don't agree with the oversized check. Anything
> > else really contradicts all the io testing we have done the past years
> > that shows how important it is to get a read in ASAP. And doing that in
>
> Important for latency or throughput? Do you know which is the benchmarks
> that returned better results with the two queues, what's the theory
> behind this?
Forget the two queues, noone has said anything about that. The reserved
reads are important for latency reasons, not throughput.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 5:49 ` Jens Axboe
@ 2003-07-14 12:23 ` Marcelo Tosatti
2003-07-14 13:12 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Marcelo Tosatti @ 2003-07-14 12:23 UTC (permalink / raw)
To: Jens Axboe
Cc: Andrea Arcangeli, Chris Mason, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 14 Jul 2003, Jens Axboe wrote:
> On Sun, Jul 13 2003, Andrea Arcangeli wrote:
> > On Sun, Jul 13, 2003 at 11:01:16AM +0200, Jens Axboe wrote:
> > > No I don't have anything specific, it just seems like a bad heuristic to
> > > get rid of. I can try and do some testing tomorrow. I do feel strongly
> >
> > well, it's not an heuristic, it's a simplification and it will certainly
> > won't provide any benefit (besides saving some hundred kbytes of ram per
> > harddisk that is a minor benefit).
>
> You are missing my point - I don't care about loosing the extra request
> list, I never said anything about that in this thread. I care about
> loosing the reserved requests for reads. And we can do that just fine
> with just holding back a handful of requests.
>
> > > that we should at least make sure to reserve a few requests for reads
> > > exclusively, even if you don't agree with the oversized check. Anything
> > > else really contradicts all the io testing we have done the past years
> > > that shows how important it is to get a read in ASAP. And doing that in
> >
> > Important for latency or throughput? Do you know which is the benchmarks
> > that returned better results with the two queues, what's the theory
> > behind this?
>
> Forget the two queues, noone has said anything about that. The reserved
> reads are important for latency reasons, not throughput.
So Jens,
Please bench (as you said you would), and send us the results.
Its very important.
Thanks
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 12:23 ` Marcelo Tosatti
@ 2003-07-14 13:12 ` Jens Axboe
2003-07-14 19:51 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-14 13:12 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: Andrea Arcangeli, Chris Mason, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Marcelo Tosatti wrote:
>
>
> On Mon, 14 Jul 2003, Jens Axboe wrote:
>
> > On Sun, Jul 13 2003, Andrea Arcangeli wrote:
> > > On Sun, Jul 13, 2003 at 11:01:16AM +0200, Jens Axboe wrote:
> > > > No I don't have anything specific, it just seems like a bad heuristic to
> > > > get rid of. I can try and do some testing tomorrow. I do feel strongly
> > >
> > > well, it's not an heuristic, it's a simplification and it will certainly
> > > won't provide any benefit (besides saving some hundred kbytes of ram per
> > > harddisk that is a minor benefit).
> >
> > You are missing my point - I don't care about loosing the extra request
> > list, I never said anything about that in this thread. I care about
> > loosing the reserved requests for reads. And we can do that just fine
> > with just holding back a handful of requests.
> >
> > > > that we should at least make sure to reserve a few requests for reads
> > > > exclusively, even if you don't agree with the oversized check. Anything
> > > > else really contradicts all the io testing we have done the past years
> > > > that shows how important it is to get a read in ASAP. And doing that in
> > >
> > > Important for latency or throughput? Do you know which is the benchmarks
> > > that returned better results with the two queues, what's the theory
> > > behind this?
> >
> > Forget the two queues, noone has said anything about that. The reserved
> > reads are important for latency reasons, not throughput.
>
> So Jens,
>
> Please bench (as you said you would), and send us the results.
>
> Its very important.
Yes of course I'll send the results, at the current rate (they are
running on the box) it probably wont be before tomorrow though.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 13:12 ` Jens Axboe
@ 2003-07-14 19:51 ` Jens Axboe
2003-07-14 20:09 ` Chris Mason
` (2 more replies)
0 siblings, 3 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-14 19:51 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: Andrea Arcangeli, Chris Mason, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Jens Axboe wrote:
> On Mon, Jul 14 2003, Marcelo Tosatti wrote:
> >
> >
> > On Mon, 14 Jul 2003, Jens Axboe wrote:
> >
> > > On Sun, Jul 13 2003, Andrea Arcangeli wrote:
> > > > On Sun, Jul 13, 2003 at 11:01:16AM +0200, Jens Axboe wrote:
> > > > > No I don't have anything specific, it just seems like a bad heuristic to
> > > > > get rid of. I can try and do some testing tomorrow. I do feel strongly
> > > >
> > > > well, it's not an heuristic, it's a simplification and it will certainly
> > > > won't provide any benefit (besides saving some hundred kbytes of ram per
> > > > harddisk that is a minor benefit).
> > >
> > > You are missing my point - I don't care about loosing the extra request
> > > list, I never said anything about that in this thread. I care about
> > > loosing the reserved requests for reads. And we can do that just fine
> > > with just holding back a handful of requests.
> > >
> > > > > that we should at least make sure to reserve a few requests for reads
> > > > > exclusively, even if you don't agree with the oversized check. Anything
> > > > > else really contradicts all the io testing we have done the past years
> > > > > that shows how important it is to get a read in ASAP. And doing that in
> > > >
> > > > Important for latency or throughput? Do you know which is the benchmarks
> > > > that returned better results with the two queues, what's the theory
> > > > behind this?
> > >
> > > Forget the two queues, noone has said anything about that. The reserved
> > > reads are important for latency reasons, not throughput.
> >
> > So Jens,
> >
> > Please bench (as you said you would), and send us the results.
> >
> > Its very important.
>
> Yes of course I'll send the results, at the current rate (they are
> running on the box) it probably wont be before tomorrow though.
Some initial results with the attached patch, I'll try and do some more
fine grained tomorrow. Base kernel was 2.4.22-pre5 (obviously), drive
tested is a SCSI drive (on aic7xxx, tcq fixed at 4), fs is ext3. I would
have done ide testing actually, but the drive in that machine appears to
have gone dead. I'll pop in a new one tomorrow and test on that too.
no_load:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 2 134 196.3 0.0 0.0 1.00
2.4.22-pre5-axboe 3 133 196.2 0.0 0.0 1.00
ctar_load:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 3 235 114.0 25.0 22.1 1.75
2.4.22-pre5-axboe 3 194 138.1 19.7 20.6 1.46
xtar_load:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 3 309 86.4 15.0 14.9 2.31
2.4.22-pre5-axboe 3 249 107.2 11.3 14.1 1.87
io_load:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 3 637 42.5 120.2 18.5 4.75
2.4.22-pre5-axboe 3 540 50.0 103.0 18.1 4.06
io_other:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 3 576 47.2 107.7 19.8 4.30
2.4.22-pre5-axboe 3 452 59.7 85.3 19.5 3.40
read_load:
Kernel [runs] Time CPU% Loads LCPU% Ratio
2.4.22-pre5 3 150 181.3 8.1 9.3 1.12
2.4.22-pre5-axboe 3 152 178.9 8.2 9.9 1.14
===== drivers/block/ll_rw_blk.c 1.47 vs edited =====
--- 1.47/drivers/block/ll_rw_blk.c Fri Jul 11 10:30:54 2003
+++ edited/drivers/block/ll_rw_blk.c Mon Jul 14 20:42:36 2003
@@ -549,10 +549,12 @@
static struct request *get_request(request_queue_t *q, int rw)
{
struct request *rq = NULL;
- struct request_list *rl;
+ struct request_list *rl = &q->rq;
- rl = &q->rq;
- if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
+ if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
+ return NULL;
+
+ if (!list_empty(&rl->free)) {
rq = blkdev_free_rq(&rl->free);
list_del(&rq->queue);
rl->count--;
@@ -947,7 +949,7 @@
static int __make_request(request_queue_t * q, int rw,
struct buffer_head * bh)
{
- unsigned int sector, count;
+ unsigned int sector, count, sync;
int max_segments = MAX_SEGMENTS;
struct request * req, *freereq = NULL;
int rw_ahead, max_sectors, el_ret;
@@ -958,6 +960,7 @@
count = bh->b_size >> 9;
sector = bh->b_rsector;
+ sync = test_and_clear_bit(BH_Sync, &bh->b_state);
rw_ahead = 0; /* normal case; gets changed below for READA */
switch (rw) {
@@ -1124,6 +1127,8 @@
blkdev_release_request(freereq);
if (should_wake)
get_request_wait_wakeup(q, rw);
+ if (sync)
+ __generic_unplug_device(q);
spin_unlock_irq(&io_request_lock);
return 0;
end_io:
===== fs/buffer.c 1.89 vs edited =====
--- 1.89/fs/buffer.c Tue Jun 24 23:11:29 2003
+++ edited/fs/buffer.c Mon Jul 14 16:59:59 2003
@@ -1142,6 +1142,7 @@
bh = getblk(dev, block, size);
if (buffer_uptodate(bh))
return bh;
+ set_bit(BH_Sync, &bh->b_state);
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
===== include/linux/fs.h 1.82 vs edited =====
--- 1.82/include/linux/fs.h Wed Jul 9 20:42:38 2003
+++ edited/include/linux/fs.h Mon Jul 14 16:59:47 2003
@@ -221,6 +221,7 @@
BH_Launder, /* 1 if we can throttle on this buffer */
BH_Attached, /* 1 if b_inode_buffers is linked into a list */
BH_JBD, /* 1 if it has an attached journal_head */
+ BH_Sync,
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 19:51 ` Jens Axboe
@ 2003-07-14 20:09 ` Chris Mason
2003-07-14 20:19 ` Andrea Arcangeli
2003-07-15 5:46 ` Jens Axboe
2003-07-14 20:09 ` Marcelo Tosatti
2003-07-14 20:16 ` Andrea Arcangeli
2 siblings, 2 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-14 20:09 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, Andrea Arcangeli, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 2003-07-14 at 15:51, Jens Axboe wrote:
> Some initial results with the attached patch, I'll try and do some more
> fine grained tomorrow. Base kernel was 2.4.22-pre5 (obviously), drive
> tested is a SCSI drive (on aic7xxx, tcq fixed at 4), fs is ext3. I would
> have done ide testing actually, but the drive in that machine appears to
> have gone dead. I'll pop in a new one tomorrow and test on that too.
>
Thanks Jens, the results so far are very interesting (although I'm
curious to hear how 2.4.21 did).
> --- 1.47/drivers/block/ll_rw_blk.c Fri Jul 11 10:30:54 2003
> +++ edited/drivers/block/ll_rw_blk.c Mon Jul 14 20:42:36 2003
> @@ -549,10 +549,12 @@
> static struct request *get_request(request_queue_t *q, int rw)
> {
> struct request *rq = NULL;
> - struct request_list *rl;
> + struct request_list *rl = &q->rq;
>
> - rl = &q->rq;
> - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> + return NULL;
> +
> + if (!list_empty(&rl->free)) {
> rq = blkdev_free_rq(&rl->free);
> list_del(&rq->queue);
> rl->count--;
> @@ -947,7 +949,7 @@
Could I talk you into trying a form of this patch that honors
blk_oversized_queue for everything except the BH_sync requests?
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:09 ` Chris Mason
@ 2003-07-14 20:19 ` Andrea Arcangeli
2003-07-14 21:24 ` Chris Mason
2003-07-15 5:46 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-14 20:19 UTC (permalink / raw)
To: Chris Mason
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14, 2003 at 04:09:08PM -0400, Chris Mason wrote:
> On Mon, 2003-07-14 at 15:51, Jens Axboe wrote:
> > --- 1.47/drivers/block/ll_rw_blk.c Fri Jul 11 10:30:54 2003
> > +++ edited/drivers/block/ll_rw_blk.c Mon Jul 14 20:42:36 2003
> > @@ -549,10 +549,12 @@
> > static struct request *get_request(request_queue_t *q, int rw)
> > {
> > struct request *rq = NULL;
> > - struct request_list *rl;
> > + struct request_list *rl = &q->rq;
> >
> > - rl = &q->rq;
> > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> > + return NULL;
> > +
> > + if (!list_empty(&rl->free)) {
> > rq = blkdev_free_rq(&rl->free);
> > list_del(&rq->queue);
> > rl->count--;
> > @@ -947,7 +949,7 @@
>
> Could I talk you into trying a form of this patch that honors
> blk_oversized_queue for everything except the BH_sync requests?
not honoring blk_oversized_queue for BH_sync isn't safe either (still it
would be an order of magnitude safer than not honoring it for all reads
unconditonally) there must be a secondary limit, eating all the
requests with potentially 512k of ram queued into each requests is
unsafe (one can generate many sync requests by forking some hundred
thousand tasks, this isn't only x86).
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:19 ` Andrea Arcangeli
@ 2003-07-14 21:24 ` Chris Mason
0 siblings, 0 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-14 21:24 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Jens Axboe, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 2003-07-14 at 16:19, Andrea Arcangeli wrote:
> > Could I talk you into trying a form of this patch that honors
> > blk_oversized_queue for everything except the BH_sync requests?
>
> not honoring blk_oversized_queue for BH_sync isn't safe either (still it
> would be an order of magnitude safer than not honoring it for all reads
> unconditonally) there must be a secondary limit, eating all the
> requests with potentially 512k of ram queued into each requests is
> unsafe (one can generate many sync requests by forking some hundred
> thousand tasks, this isn't only x86).
Fair enough. This patch allows sync requests to steal up to
q->batch_sectors of additional requests. This is probably too big a
number but it had the benefit of being already calculated for me.
I need to replace that q->rq.count < 4 crud with a constant or just drop
it entirely. I like that elevator-lowlatency is more concerned with
sectors in flight than free requests, it would probably be best to keep
it that way.
In other words, this patch is for discussion only. It allows BH_Sync to
be set for write requests as well, since my original idea for it was to
give lower latencies to any request the rest of the box might be waiting
on (like a log commit).
-chris
--- 1.48/drivers/block/ll_rw_blk.c Fri Jul 11 15:08:30 2003
+++ edited/drivers/block/ll_rw_blk.c Mon Jul 14 17:15:35 2003
@@ -546,13 +546,20 @@
* Get a free request. io_request_lock must be held and interrupts
* disabled on the way in. Returns NULL if there are no free requests.
*/
-static struct request *get_request(request_queue_t *q, int rw)
+static struct request *get_request(request_queue_t *q, int rw, int sync)
{
struct request *rq = NULL;
- struct request_list *rl;
+ struct request_list *rl = &q->rq;
- rl = &q->rq;
- if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
+ if (sync) {
+ if (blk_oversized_queue_sync(q))
+ return NULL;
+ } else {
+ if (blk_oversized_queue(q) || q->rq.count < 4)
+ return NULL;
+ }
+
+ if (!list_empty(&rl->free)) {
rq = blkdev_free_rq(&rl->free);
list_del(&rq->queue);
rl->count--;
@@ -608,7 +615,7 @@
* wakeups where there aren't any requests available yet.
*/
-static struct request *__get_request_wait(request_queue_t *q, int rw)
+static struct request *__get_request_wait(request_queue_t *q, int rw, int sync)
{
register struct request *rq;
DECLARE_WAITQUEUE(wait, current);
@@ -618,13 +625,13 @@
do {
set_current_state(TASK_UNINTERRUPTIBLE);
spin_lock_irq(&io_request_lock);
- if (blk_oversized_queue(q) || q->rq.count == 0) {
+ if (blk_oversized_queue(q) || q->rq.count < 4) {
__generic_unplug_device(q);
spin_unlock_irq(&io_request_lock);
schedule();
spin_lock_irq(&io_request_lock);
}
- rq = get_request(q, rw);
+ rq = get_request(q, rw, sync);
spin_unlock_irq(&io_request_lock);
} while (rq == NULL);
remove_wait_queue(&q->wait_for_requests, &wait);
@@ -947,7 +954,7 @@
static int __make_request(request_queue_t * q, int rw,
struct buffer_head * bh)
{
- unsigned int sector, count;
+ unsigned int sector, count, sync;
int max_segments = MAX_SEGMENTS;
struct request * req, *freereq = NULL;
int rw_ahead, max_sectors, el_ret;
@@ -958,6 +965,7 @@
count = bh->b_size >> 9;
sector = bh->b_rsector;
+ sync = test_and_clear_bit(BH_Sync, &bh->b_state);
rw_ahead = 0; /* normal case; gets changed below for READA */
switch (rw) {
@@ -1084,14 +1092,14 @@
spin_unlock_irq(&io_request_lock);
goto end_io;
}
- req = get_request(q, rw);
+ req = get_request(q, rw, sync);
if (req == NULL)
BUG();
} else {
- req = get_request(q, rw);
+ req = get_request(q, rw, sync);
if (req == NULL) {
spin_unlock_irq(&io_request_lock);
- freereq = __get_request_wait(q, rw);
+ freereq = __get_request_wait(q, rw, sync);
head = &q->queue_head;
spin_lock_irq(&io_request_lock);
should_wake = 1;
@@ -1122,6 +1130,8 @@
out:
if (freereq)
blkdev_release_request(freereq);
+ if (sync)
+ __generic_unplug_device(q);
if (should_wake)
get_request_wait_wakeup(q, rw);
spin_unlock_irq(&io_request_lock);
--- 1.92/fs/buffer.c Fri Jul 11 16:43:23 2003
+++ edited/fs/buffer.c Mon Jul 14 16:23:24 2003
@@ -1144,6 +1144,7 @@
bh = getblk(dev, block, size);
if (buffer_uptodate(bh))
return bh;
+ set_bit(BH_Sync, &bh->b_state);
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
--- 1.24/include/linux/blkdev.h Fri Jul 4 13:18:12 2003
+++ edited/include/linux/blkdev.h Mon Jul 14 16:46:56 2003
@@ -295,6 +295,13 @@
return q->rq.count == 0;
}
+static inline int blk_oversized_queue_sync(request_queue_t * q)
+{
+ if (q->can_throttle)
+ return atomic_read(&q->nr_sectors) > q->max_queue_sectors + q->batch_sectors;
+ return q->rq.count == 0;
+}
+
static inline int blk_oversized_queue_batch(request_queue_t * q)
{
return atomic_read(&q->nr_sectors) > q->max_queue_sectors - q->batch_sectors;
--- 1.84/include/linux/fs.h Fri Jul 11 15:13:13 2003
+++ edited/include/linux/fs.h Mon Jul 14 16:23:24 2003
@@ -221,6 +221,7 @@
BH_Launder, /* 1 if we can throttle on this buffer */
BH_Attached, /* 1 if b_inode_buffers is linked into a list */
BH_JBD, /* 1 if it has an attached journal_head */
+ BH_Sync,
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:09 ` Chris Mason
2003-07-14 20:19 ` Andrea Arcangeli
@ 2003-07-15 5:46 ` Jens Axboe
1 sibling, 0 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 5:46 UTC (permalink / raw)
To: Chris Mason
Cc: Marcelo Tosatti, Andrea Arcangeli, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Chris Mason wrote:
> On Mon, 2003-07-14 at 15:51, Jens Axboe wrote:
>
> > Some initial results with the attached patch, I'll try and do some more
> > fine grained tomorrow. Base kernel was 2.4.22-pre5 (obviously), drive
> > tested is a SCSI drive (on aic7xxx, tcq fixed at 4), fs is ext3. I would
> > have done ide testing actually, but the drive in that machine appears to
> > have gone dead. I'll pop in a new one tomorrow and test on that too.
> >
>
> Thanks Jens, the results so far are very interesting (although I'm
> curious to hear how 2.4.21 did).
Hmm good point, I'll make a run with that as well.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 19:51 ` Jens Axboe
2003-07-14 20:09 ` Chris Mason
@ 2003-07-14 20:09 ` Marcelo Tosatti
2003-07-14 20:24 ` Andrea Arcangeli
2003-07-14 20:16 ` Andrea Arcangeli
2 siblings, 1 reply; 68+ messages in thread
From: Marcelo Tosatti @ 2003-07-14 20:09 UTC (permalink / raw)
To: Jens Axboe
Cc: Andrea Arcangeli, Chris Mason, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 14 Jul 2003, Jens Axboe wrote:
> Some initial results with the attached patch, I'll try and do some more
> fine grained tomorrow. Base kernel was 2.4.22-pre5 (obviously), drive
> tested is a SCSI drive (on aic7xxx, tcq fixed at 4), fs is ext3. I would
> have done ide testing actually, but the drive in that machine appears to
> have gone dead. I'll pop in a new one tomorrow and test on that too.
>
> no_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 2 134 196.3 0.0 0.0 1.00
> 2.4.22-pre5-axboe 3 133 196.2 0.0 0.0 1.00
> ctar_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 235 114.0 25.0 22.1 1.75
> 2.4.22-pre5-axboe 3 194 138.1 19.7 20.6 1.46
> xtar_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 309 86.4 15.0 14.9 2.31
> 2.4.22-pre5-axboe 3 249 107.2 11.3 14.1 1.87
> io_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 637 42.5 120.2 18.5 4.75
> 2.4.22-pre5-axboe 3 540 50.0 103.0 18.1 4.06
> io_other:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 576 47.2 107.7 19.8 4.30
> 2.4.22-pre5-axboe 3 452 59.7 85.3 19.5 3.40
> read_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 150 181.3 8.1 9.3 1.12
> 2.4.22-pre5-axboe 3 152 178.9 8.2 9.9 1.14
Awesome. I'll add it in -pre6.
Thanks a lot Jens.
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:09 ` Marcelo Tosatti
@ 2003-07-14 20:24 ` Andrea Arcangeli
2003-07-14 20:34 ` Chris Mason
0 siblings, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-14 20:24 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: Jens Axboe, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14, 2003 at 05:09:40PM -0300, Marcelo Tosatti wrote:
>
>
> On Mon, 14 Jul 2003, Jens Axboe wrote:
>
> > Some initial results with the attached patch, I'll try and do some more
> > fine grained tomorrow. Base kernel was 2.4.22-pre5 (obviously), drive
> > tested is a SCSI drive (on aic7xxx, tcq fixed at 4), fs is ext3. I would
> > have done ide testing actually, but the drive in that machine appears to
> > have gone dead. I'll pop in a new one tomorrow and test on that too.
> >
> > no_load:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 2 134 196.3 0.0 0.0 1.00
> > 2.4.22-pre5-axboe 3 133 196.2 0.0 0.0 1.00
> > ctar_load:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 3 235 114.0 25.0 22.1 1.75
> > 2.4.22-pre5-axboe 3 194 138.1 19.7 20.6 1.46
> > xtar_load:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 3 309 86.4 15.0 14.9 2.31
> > 2.4.22-pre5-axboe 3 249 107.2 11.3 14.1 1.87
> > io_load:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 3 637 42.5 120.2 18.5 4.75
> > 2.4.22-pre5-axboe 3 540 50.0 103.0 18.1 4.06
> > io_other:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 3 576 47.2 107.7 19.8 4.30
> > 2.4.22-pre5-axboe 3 452 59.7 85.3 19.5 3.40
> > read_load:
> > Kernel [runs] Time CPU% Loads LCPU% Ratio
> > 2.4.22-pre5 3 150 181.3 8.1 9.3 1.12
> > 2.4.22-pre5-axboe 3 152 178.9 8.2 9.9 1.14
>
> Awesome. I'll add it in -pre6.
this isn't what we had in pre4, this is more equivalent to an hack in
pre4 where the requests aren't distributed anymore 50/50 but 10/90. Of
course an I/O queue filled mostly by parallel sync reads, will make the
writer go slower since it will very rarely find the queue not oversized
in presence of a flood of readers. So the writer won't be able to make
progress.
This is a "stop the writers and give unlimited requests to the reader"
hack, not a "reserve some request for the reader", of course then the
reader is faster. of course then, contest shows huge improvements for
the read loads.
But contest only benchmarks the reader, it has no clue how fast the
writer is AFIK. I would love to hear the bandwidth the writer is writing
into the xtar_load. Maybe it's shown in some of the Loads/LCPU or
something, but I don't know the semantics of those fields and I only
look at time and ratio which I understand. so if any contest expert can
enaborate if the xtar_load bandwidth is taken into consideration
somewhere I would love to hear.
thanks,
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:24 ` Andrea Arcangeli
@ 2003-07-14 20:34 ` Chris Mason
2003-07-15 5:35 ` Jens Axboe
[not found] ` <20030714224528.GU16313@dualathlon.random>
0 siblings, 2 replies; 68+ messages in thread
From: Chris Mason @ 2003-07-14 20:34 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Marcelo Tosatti, Jens Axboe, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 2003-07-14 at 16:24, Andrea Arcangeli wrote:
>
> this isn't what we had in pre4, this is more equivalent to an hack in
> pre4 where the requests aren't distributed anymore 50/50 but 10/90. Of
> course an I/O queue filled mostly by parallel sync reads, will make the
> writer go slower since it will very rarely find the queue not oversized
> in presence of a flood of readers. So the writer won't be able to make
> progress.
>
> This is a "stop the writers and give unlimited requests to the reader"
> hack, not a "reserve some request for the reader", of course then the
> reader is faster. of course then, contest shows huge improvements for
> the read loads.
Which is why it's a good place to start. If we didn't see huge
improvements here, there's no use in experimenting with this tactic
further.
>
> But contest only benchmarks the reader, it has no clue how fast the
> writer is AFIK. I would love to hear the bandwidth the writer is writing
> into the xtar_load. Maybe it's shown in some of the Loads/LCPU or
> something, but I don't know the semantics of those fields and I only
> look at time and ratio which I understand. so if any contest expert can
> enaborate if the xtar_load bandwidth is taken into consideration
> somewhere I would love to hear.
I had a much longer reply at first as well, but this is only day 1 of
Jens' benchmark, and he had plans to test other workloads. I'd like to
give him the chance to do that work before we think about merging the
patch. It's a good starting point for the question "can we do better
for reads?" (clearly the answer is yes).
-chris
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:34 ` Chris Mason
@ 2003-07-15 5:35 ` Jens Axboe
[not found] ` <20030714224528.GU16313@dualathlon.random>
1 sibling, 0 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 5:35 UTC (permalink / raw)
To: Chris Mason
Cc: Andrea Arcangeli, Marcelo Tosatti, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Chris Mason wrote:
> On Mon, 2003-07-14 at 16:24, Andrea Arcangeli wrote:
>
> >
> > this isn't what we had in pre4, this is more equivalent to an hack in
> > pre4 where the requests aren't distributed anymore 50/50 but 10/90. Of
> > course an I/O queue filled mostly by parallel sync reads, will make the
> > writer go slower since it will very rarely find the queue not oversized
> > in presence of a flood of readers. So the writer won't be able to make
> > progress.
> >
> > This is a "stop the writers and give unlimited requests to the reader"
> > hack, not a "reserve some request for the reader", of course then the
> > reader is faster. of course then, contest shows huge improvements for
> > the read loads.
>
> Which is why it's a good place to start. If we didn't see huge
> improvements here, there's no use in experimenting with this tactic
> further.
Exactly. The path I'm looking for is something ala 'at least allow one
read in, even if writes have oversized the queue'.
> > But contest only benchmarks the reader, it has no clue how fast the
> > writer is AFIK. I would love to hear the bandwidth the writer is writing
> > into the xtar_load. Maybe it's shown in some of the Loads/LCPU or
> > something, but I don't know the semantics of those fields and I only
> > look at time and ratio which I understand. so if any contest expert can
> > enaborate if the xtar_load bandwidth is taken into consideration
> > somewhere I would love to hear.
>
> I had a much longer reply at first as well, but this is only day 1 of
> Jens' benchmark, and he had plans to test other workloads. I'd like to
> give him the chance to do that work before we think about merging the
> patch. It's a good starting point for the question "can we do better
> for reads?" (clearly the answer is yes).
Thanks Chris, this is exactly the point. BTW, I'd be glad to take hints
on other interesting work loads.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
[parent not found: <20030714224528.GU16313@dualathlon.random>]
* Re: RFC on io-stalls patch
[not found] ` <20030714224528.GU16313@dualathlon.random>
@ 2003-07-15 5:40 ` Jens Axboe
[not found] ` <1058229360.13317.364.camel@tiny.suse.com>
1 sibling, 0 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 5:40 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Chris Mason, Marcelo Tosatti, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15 2003, Andrea Arcangeli wrote:
> On Mon, Jul 14, 2003 at 04:34:41PM -0400, Chris Mason wrote:
> > patch. It's a good starting point for the question "can we do better
> > for reads?" (clearly the answer is yes).
>
> Jens's patch will block every writers until the parallel sync readers go
> away. if we add a 5 seconds delay for every write, sure readers will
> run faster too in contest, and in turn it's not obvious to me it's
Andrea, these comments are getting pretty tiresome and are not very
constructive. If you want to go off on a tirade, be my guest, but I'm
not listening then.
> necessairly a good starting point. I mean, it'd need to be tunable at
> least because otherwise readers can starve writers for a long time in a
> server workload (and it would overschedule big too since there's no
> separate waitqueue like in pre4, probably it doesn't deadlock only
> because of the redoundant weakup in get_request_wait_wakeup. I'd compare
> it to sending sigstop to the `tar x`, during the kernel compile to get a
> 1.0 ratio and peak contest performance.
Again, way overboard silly. You could see reads fill the entire queue
know (since it's sized ridicolously low, something like 35-40ms streamed
io on a modern _ATA_ disk). It's just not very likely, and I'd be very
surprised if it is happening in the contest run. You can see the
workloads making fine progress and iterations, while the score is better
too.
I agree that we don't _want_ to make this be a possibility in the 2.4.22
kernel, but (again) this is a _first version_ designed to show how far
we can take it.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
[parent not found: <1058229360.13317.364.camel@tiny.suse.com>]
* Re: RFC on io-stalls patch
[not found] ` <1058229360.13317.364.camel@tiny.suse.com>
@ 2003-07-15 5:43 ` Jens Axboe
[not found] ` <20030714175238.3eaddd9a.akpm@osdl.org>
1 sibling, 0 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 5:43 UTC (permalink / raw)
To: Chris Mason
Cc: Andrea Arcangeli, Marcelo Tosatti, lkml, Stephen C. Tweedie,
Alan Cox, Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Chris Mason wrote:
> On Mon, 2003-07-14 at 18:45, Andrea Arcangeli wrote:
> > On Mon, Jul 14, 2003 at 04:34:41PM -0400, Chris Mason wrote:
> > > patch. It's a good starting point for the question "can we do better
> > > for reads?" (clearly the answer is yes).
> >
> > Jens's patch will block every writers until the parallel sync readers go
> > away. if we add a 5 seconds delay for every write, sure readers will
> > run faster too in contest, and in turn it's not obvious to me it's
> > necessairly a good starting point.
>
> For real server workloads I agree the patch isn't a good idea. But
> Jens' workload had a small number of kernel compilers (was it one proc
> or make -j4 or so?), so clearly the writers could still make progress.
> This doesn't mean I want the patch but it does mean the numbers are
> worth thinking about ;-)
>
> If we go back to Jens' numbers:
>
> ctar_load:
> Kernel [runs] Time CPU% Loads LCPU% Ratio
> 2.4.22-pre5 3 235 114.0 25.0 22.1 1.75
> 2.4.22-pre5-axboe 3 194 138.1 19.7 20.6 1.46
> ^^^^^^
> The loads column is the number of times ctar_load managed to run during
> the kernel compile, and the patched kernel loses each time. This must
> partially be caused by the lower run time overall, but it is still
> important data. It would be better if contest gave us some kind of
> throughput numbers (avg load time or something).
Well, look at the ratio given the run times listed. You'll see that they
are extremely close (0.1064 for 2.4.22-pre5 vs 0.1015 for
2.4.22-pre5-axboe).
It just shows that we are not hitting the possible bad problems in these
work loads.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
[parent not found: <20030714175238.3eaddd9a.akpm@osdl.org>]
* Re: RFC on io-stalls patch
2003-07-14 19:51 ` Jens Axboe
2003-07-14 20:09 ` Chris Mason
2003-07-14 20:09 ` Marcelo Tosatti
@ 2003-07-14 20:16 ` Andrea Arcangeli
2003-07-14 20:17 ` Marcelo Tosatti
2003-07-15 5:26 ` Jens Axboe
2 siblings, 2 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-14 20:16 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> - rl = &q->rq;
> - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
did you disable the oversized queue check completely for reads? This
looks unsafe, you can end with loads of ram locked up this way, the
request queue cannot be limited in requests anymore. this isn't the
"request reservation", this a "nearly unlimited amount of ram locked in
for reads".
Of course, the more reads can be in the queue, the less the background
write loads will hurt parallel apps like a kernel compile as shown in
xtar_load.
This is very different from the schedule advantage provided by the old
queue model. If you allow an unlimited I/O queue for reads, that means
the I/O queues will be filled by an huge amount of reads and a few
writes (no matter how fast the xtar_load is writing to disk).
In the past (2.4.22pre4) the I/O queue would been at most 50/50, with
your patch it can be 90/10, hence it can generate an huge performance
difference, that can penealize tremendously the writers in server loads
using fsync plus it can hurt the VM badly if all ram is locked up by
parallel reads. Of course contest mostly cares about reads, not writes.
Overall I think your patch is unsafe and shouldn't be applied.
Still if you want to allow 50/50, go ahead, that logic in pre4 was an
order of magnitude more fair and generic than this patch.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:16 ` Andrea Arcangeli
@ 2003-07-14 20:17 ` Marcelo Tosatti
2003-07-14 20:27 ` Andrea Arcangeli
2003-07-15 5:26 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Marcelo Tosatti @ 2003-07-14 20:17 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Jens Axboe, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, 14 Jul 2003, Andrea Arcangeli wrote:
> On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > - rl = &q->rq;
> > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
>
> did you disable the oversized queue check completely for reads? This
> looks unsafe, you can end with loads of ram locked up this way, the
> request queue cannot be limited in requests anymore. this isn't the
> "request reservation", this a "nearly unlimited amount of ram locked in
> for reads".
>
> Of course, the more reads can be in the queue, the less the background
> write loads will hurt parallel apps like a kernel compile as shown in
> xtar_load.
>
> This is very different from the schedule advantage provided by the old
> queue model. If you allow an unlimited I/O queue for reads, that means
> the I/O queues will be filled by an huge amount of reads and a few
> writes (no matter how fast the xtar_load is writing to disk).
>
> In the past (2.4.22pre4) the I/O queue would been at most 50/50, with
> your patch it can be 90/10, hence it can generate an huge performance
> difference, that can penealize tremendously the writers in server loads
> using fsync plus it can hurt the VM badly if all ram is locked up by
> parallel reads. Of course contest mostly cares about reads, not writes.
>
> Overall I think your patch is unsafe and shouldn't be applied.
>
> Still if you want to allow 50/50, go ahead, that logic in pre4 was an
> order of magnitude more fair and generic than this patch.
Well, I change my mind and wont apply it as-is in -pre6.
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:17 ` Marcelo Tosatti
@ 2003-07-14 20:27 ` Andrea Arcangeli
0 siblings, 0 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-14 20:27 UTC (permalink / raw)
To: Marcelo Tosatti
Cc: Jens Axboe, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14, 2003 at 05:17:16PM -0300, Marcelo Tosatti wrote:
>
>
> On Mon, 14 Jul 2003, Andrea Arcangeli wrote:
>
> > On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > > - rl = &q->rq;
> > > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> >
> > did you disable the oversized queue check completely for reads? This
> > looks unsafe, you can end with loads of ram locked up this way, the
> > request queue cannot be limited in requests anymore. this isn't the
> > "request reservation", this a "nearly unlimited amount of ram locked in
> > for reads".
> >
> > Of course, the more reads can be in the queue, the less the background
> > write loads will hurt parallel apps like a kernel compile as shown in
> > xtar_load.
> >
> > This is very different from the schedule advantage provided by the old
> > queue model. If you allow an unlimited I/O queue for reads, that means
> > the I/O queues will be filled by an huge amount of reads and a few
> > writes (no matter how fast the xtar_load is writing to disk).
> >
> > In the past (2.4.22pre4) the I/O queue would been at most 50/50, with
> > your patch it can be 90/10, hence it can generate an huge performance
> > difference, that can penealize tremendously the writers in server loads
> > using fsync plus it can hurt the VM badly if all ram is locked up by
> > parallel reads. Of course contest mostly cares about reads, not writes.
> >
> > Overall I think your patch is unsafe and shouldn't be applied.
> >
> > Still if you want to allow 50/50, go ahead, that logic in pre4 was an
> > order of magnitude more fair and generic than this patch.
>
> Well, I change my mind and wont apply it as-is in -pre6.
I would at least wait a clarification from Jens first. If I missed
something fundamental in my analysis of his patch, of course we could
apply it then, but at the moment it looks not safe and a bit cheating ;)
thanks,
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-14 20:16 ` Andrea Arcangeli
2003-07-14 20:17 ` Marcelo Tosatti
@ 2003-07-15 5:26 ` Jens Axboe
2003-07-15 5:48 ` Andrea Arcangeli
2003-07-15 11:22 ` Alan Cox
1 sibling, 2 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 5:26 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Mon, Jul 14 2003, Andrea Arcangeli wrote:
> On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > - rl = &q->rq;
> > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
>
> did you disable the oversized queue check completely for reads? This
Yes
> looks unsafe, you can end with loads of ram locked up this way, the
> request queue cannot be limited in requests anymore. this isn't the
> "request reservation", this a "nearly unlimited amount of ram locked in
> for reads".
Sorry, but I think that is nonsense. This is the way we have always
worked. You just have to maintain a decent queue length still (like we
always have in 2.4) and there are no problems.
> Of course, the more reads can be in the queue, the less the background
> write loads will hurt parallel apps like a kernel compile as shown in
> xtar_load.
>
> This is very different from the schedule advantage provided by the old
> queue model. If you allow an unlimited I/O queue for reads, that means
> the I/O queues will be filled by an huge amount of reads and a few
> writes (no matter how fast the xtar_load is writing to disk).
>
> In the past (2.4.22pre4) the I/O queue would been at most 50/50, with
> your patch it can be 90/10, hence it can generate an huge performance
> difference, that can penealize tremendously the writers in server loads
> using fsync plus it can hurt the VM badly if all ram is locked up by
> parallel reads. Of course contest mostly cares about reads, not writes.
>
> Overall I think your patch is unsafe and shouldn't be applied.
It is _not_ unsafe, stop spewing nonsense like that. The patch should
not be applied, it's just the first few things I did to see if it would
make a difference like I described. And it had a big effect, so I posted
results and went to bed. Know we have a grounds for further discussion,
and I'll bench the changes seperately too as well. It's about getting
data points you can use, you have to try extremese as well.
> Still if you want to allow 50/50, go ahead, that logic in pre4 was an
> order of magnitude more fair and generic than this patch.
Sigh... No I don't want 90/10 distribution of course, that would be
silly.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 5:26 ` Jens Axboe
@ 2003-07-15 5:48 ` Andrea Arcangeli
2003-07-15 6:01 ` Jens Axboe
2003-07-15 11:22 ` Alan Cox
1 sibling, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-15 5:48 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15, 2003 at 07:26:40AM +0200, Jens Axboe wrote:
> On Mon, Jul 14 2003, Andrea Arcangeli wrote:
> > On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > > - rl = &q->rq;
> > > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> >
> > did you disable the oversized queue check completely for reads? This
>
> Yes
>
> > looks unsafe, you can end with loads of ram locked up this way, the
> > request queue cannot be limited in requests anymore. this isn't the
> > "request reservation", this a "nearly unlimited amount of ram locked in
> > for reads".
>
> Sorry, but I think that is nonsense. This is the way we have always
> worked. You just have to maintain a decent queue length still (like we
But things don't work that way anymore. dropping the check now will lead
to an overkill amount of ram to be locked in.
I enlarged the queue further, since I could, there's no point in having
a few kbytes of queue during seeks, when the big queue helps most. Now
you can have mbytes (not kbytes) of queue during seeks. But you can't
keep it unlimited anymore or you'll generate troubles to the VM and
it'll generate a 90/10 distribution as well, if you start filling it
with many readers.
the reasons things changed is that the "decent queue length" wasn't
decent nor for contigous I/O (it was way too permissive for contigous
I/O) nor for seeking I/O (it was way too restrictive for seeking I/O).
> It is _not_ unsafe, stop spewing nonsense like that. The patch should
it isn't only unsafe for the potentially full ram of the box going
locked (on lowmem boxes of course) but also because you can easily
starve writers completely, especially on my tree you will need only a
few readers to hang completely every write in a certain spindle. and the
more readers the more likely the writer will stall.
> make a difference like I described. And it had a big effect, so I posted
> results and went to bed. Know we have a grounds for further discussion,
> and I'll bench the changes seperately too as well. It's about getting
> data points you can use, you have to try extremese as well.
If you benchmarked with a 2-way or even better on an UP box, then likely
we can get still a relevant speedup even with the starvation fixed and w/o the
90/10 distribution (i.e. too many reads in the queue).
I thought contest was using a quite big -j, but it's ""only"" -j8 for a
2-way (HT cpus have to be included). So your results may still
apply, despite the patch wasn't safe and it could penalize writes to the
point of not allowing them to execute anymore for indefinite time (I
mean: a fixed patch that doesn't have those bugs could generate
similar good results [for reads] out of contest).
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 5:48 ` Andrea Arcangeli
@ 2003-07-15 6:01 ` Jens Axboe
2003-07-15 6:33 ` Andrea Arcangeli
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 6:01 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15 2003, Andrea Arcangeli wrote:
> On Tue, Jul 15, 2003 at 07:26:40AM +0200, Jens Axboe wrote:
> > On Mon, Jul 14 2003, Andrea Arcangeli wrote:
> > > On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > > > - rl = &q->rq;
> > > > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > > > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> > >
> > > did you disable the oversized queue check completely for reads? This
> >
> > Yes
> >
> > > looks unsafe, you can end with loads of ram locked up this way, the
> > > request queue cannot be limited in requests anymore. this isn't the
> > > "request reservation", this a "nearly unlimited amount of ram locked in
> > > for reads".
> >
> > Sorry, but I think that is nonsense. This is the way we have always
> > worked. You just have to maintain a decent queue length still (like we
>
> But things don't work that way anymore. dropping the check now will lead
> to an overkill amount of ram to be locked in.
>
> I enlarged the queue further, since I could, there's no point in having
> a few kbytes of queue during seeks, when the big queue helps most. Now
> you can have mbytes (not kbytes) of queue during seeks. But you can't
> keep it unlimited anymore or you'll generate troubles to the VM and
> it'll generate a 90/10 distribution as well, if you start filling it
> with many readers.
That you pushed MAX_NR_REQUESTS is a really bad idea in my oppinion.
What is the point of having 4MB worth of seeks in the queue? Know you
can fit exactly 1024 seeks in there, with a average seek time of 10 ms
that's over 10 seconds of waiting. That logic is at least as warped as
having 128 64KiB streamed writes in the queue (the problem we had
before), if that is a streamed write it will only take a fraction of the
time the seeky work load will. Even with one seek in between each write,
it's still better...
> the reasons things changed is that the "decent queue length" wasn't
> decent nor for contigous I/O (it was way too permissive for contigous
> I/O) nor for seeking I/O (it was way too restrictive for seeking I/O).
On that we agree. I just think you took it to the other extreme know,
we'll see seek storms penalizing work loads now instead of write bombs.
So yes it did solve the write bomb, but introduced another problem.
Write bomb problem is easier hit of course, but hey you cannot leave the
other one open can you? That'd be like allowing reads to pin down all
memory :)
> > It is _not_ unsafe, stop spewing nonsense like that. The patch should
>
> it isn't only unsafe for the potentially full ram of the box going
> locked (on lowmem boxes of course) but also because you can easily
Correct. Speaking of low mem, using 4 times as many requests on a queue
isn't exactly peaches for low mem consumption on big disk machines
either.
> > make a difference like I described. And it had a big effect, so I posted
> > results and went to bed. Know we have a grounds for further discussion,
> > and I'll bench the changes seperately too as well. It's about getting
> > data points you can use, you have to try extremese as well.
>
> If you benchmarked with a 2-way or even better on an UP box, then likely
> we can get still a relevant speedup even with the starvation fixed and w/o the
> 90/10 distribution (i.e. too many reads in the queue).
I bench on a 2-way.
> I thought contest was using a quite big -j, but it's ""only"" -j8 for a
> 2-way (HT cpus have to be included). So your results may still
> apply, despite the patch wasn't safe and it could penalize writes to the
You only had to look a the numbers posted to see that that was the case.
> point of not allowing them to execute anymore for indefinite time (I
> mean: a fixed patch that doesn't have those bugs could generate
> similar good results [for reads] out of contest).
Completely agree, and I'll proceed to make such a patch today. I hope we
can agree not to waste more time discussion the merrit (that is clear,
it's a win) and applicability of the patch I posted. I showed numbers,
and that was it.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 6:01 ` Jens Axboe
@ 2003-07-15 6:33 ` Andrea Arcangeli
0 siblings, 0 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-15 6:33 UTC (permalink / raw)
To: Jens Axboe
Cc: Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie, Alan Cox,
Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15, 2003 at 08:01:36AM +0200, Jens Axboe wrote:
> On Tue, Jul 15 2003, Andrea Arcangeli wrote:
> > On Tue, Jul 15, 2003 at 07:26:40AM +0200, Jens Axboe wrote:
> > > On Mon, Jul 14 2003, Andrea Arcangeli wrote:
> > > > On Mon, Jul 14, 2003 at 09:51:39PM +0200, Jens Axboe wrote:
> > > > > - rl = &q->rq;
> > > > > - if (!list_empty(&rl->free) && !blk_oversized_queue(q)) {
> > > > > + if ((rw == WRITE) && (blk_oversized_queue(q) || (rl->count < 4)))
> > > >
> > > > did you disable the oversized queue check completely for reads? This
> > >
> > > Yes
> > >
> > > > looks unsafe, you can end with loads of ram locked up this way, the
> > > > request queue cannot be limited in requests anymore. this isn't the
> > > > "request reservation", this a "nearly unlimited amount of ram locked in
> > > > for reads".
> > >
> > > Sorry, but I think that is nonsense. This is the way we have always
> > > worked. You just have to maintain a decent queue length still (like we
> >
> > But things don't work that way anymore. dropping the check now will lead
> > to an overkill amount of ram to be locked in.
> >
> > I enlarged the queue further, since I could, there's no point in having
> > a few kbytes of queue during seeks, when the big queue helps most. Now
> > you can have mbytes (not kbytes) of queue during seeks. But you can't
> > keep it unlimited anymore or you'll generate troubles to the VM and
> > it'll generate a 90/10 distribution as well, if you start filling it
> > with many readers.
>
> That you pushed MAX_NR_REQUESTS is a really bad idea in my oppinion.
> What is the point of having 4MB worth of seeks in the queue? Know you
> can fit exactly 1024 seeks in there, with a average seek time of 10 ms
> that's over 10 seconds of waiting. That logic is at least as warped as
> having 128 64KiB streamed writes in the queue (the problem we had
> before), if that is a streamed write it will only take a fraction of the
> time the seeky work load will. Even with one seek in between each write,
> it's still better...
they may be too many, feel free to shrink it, but I liked the idea of
having more than 128 requests to apply the elevator to an extensive
amount of requests. You know your 10msec mean seek time may go down if
we can pass all 1024 in order. Ordering in blocks of 128 is much worse
than in blocks of 1024.
> > the reasons things changed is that the "decent queue length" wasn't
> > decent nor for contigous I/O (it was way too permissive for contigous
> > I/O) nor for seeking I/O (it was way too restrictive for seeking I/O).
>
> On that we agree. I just think you took it to the other extreme know,
> we'll see seek storms penalizing work loads now instead of write bombs.
possible.
> So yes it did solve the write bomb, but introduced another problem.
> Write bomb problem is easier hit of course, but hey you cannot leave the
> other one open can you? That'd be like allowing reads to pin down all
> memory :)
that's different. This is only a fariness issue across different tasks.
It won't affect VM reliability in freeing ram, or anything like that,
just the user wait time for its I/O.
> > > It is _not_ unsafe, stop spewing nonsense like that. The patch should
> >
> > it isn't only unsafe for the potentially full ram of the box going
> > locked (on lowmem boxes of course) but also because you can easily
>
> Correct. Speaking of low mem, using 4 times as many requests on a queue
> isn't exactly peaches for low mem consumption on big disk machines
> either.
I did the math, IIRC it was some hundred k per spindle, it didn't look
too bad. If you shrink it for other reasons this will go down too.
But regardless I wouldn't rely on the number of requests anymore as a
limiting factor.
Of course the 4M thing is terribly ugly too as a fixed magic number, but
it's still an order of magnitude better than the fixed magic number of
requests of pre2.
> > If you benchmarked with a 2-way or even better on an UP box, then likely
> > we can get still a relevant speedup even with the starvation fixed and w/o the
> > 90/10 distribution (i.e. too many reads in the queue).
>
> I bench on a 2-way.
ok. If it was a UP I would been very impressed but even on a 2-way is
very significant (I mean, not too likely to have the starvation and
90/10 distribution effect with only 8 tasks on few dozen kbytes small .c
files where readahead can't pump too many async reads into the queue).
> > I thought contest was using a quite big -j, but it's ""only"" -j8 for a
> > 2-way (HT cpus have to be included). So your results may still
> > apply, despite the patch wasn't safe and it could penalize writes to the
>
> You only had to look a the numbers posted to see that that was the case.
I wasn't sure how to measure the slowdown of the writer. Now I know it's
a 31% slowdown of the writer compared to a 2x% improvement for the
kernel compile. It's still not completely obvious that some "sigstop
write" effect is going on.
> > point of not allowing them to execute anymore for indefinite time (I
> > mean: a fixed patch that doesn't have those bugs could generate
> > similar good results [for reads] out of contest).
>
> Completely agree, and I'll proceed to make such a patch today. I hope we
> can agree not to waste more time discussion the merrit (that is clear,
> it's a win) and applicability of the patch I posted. I showed numbers,
> and that was it.
If you can reproduce them today on a patch that can't block the writer
(with indefinite starvation or 90/10 distribution) I will sure agree
it's an obvious goodness that would bring a further responsiveness
improvement for reads on top of the elevator-lowlatency.
thanks,
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 5:26 ` Jens Axboe
2003-07-15 5:48 ` Andrea Arcangeli
@ 2003-07-15 11:22 ` Alan Cox
2003-07-15 11:27 ` Jens Axboe
1 sibling, 1 reply; 68+ messages in thread
From: Alan Cox @ 2003-07-15 11:22 UTC (permalink / raw)
To: Jens Axboe
Cc: Andrea Arcangeli, Marcelo Tosatti, Chris Mason, lkml,
Stephen C. Tweedie, Jeff Garzik, Andrew Morton, Alexander Viro
On Maw, 2003-07-15 at 06:26, Jens Axboe wrote:
> Sorry, but I think that is nonsense. This is the way we have always
> worked. You just have to maintain a decent queue length still (like we
> always have in 2.4) and there are no problems.
The memory pinning problem is still real - and always has been. It shows up
best not on IDE disks but large slow media like magneto opticals where you
can queue lots of I/O but you get 500K/second
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 11:22 ` Alan Cox
@ 2003-07-15 11:27 ` Jens Axboe
2003-07-16 12:43 ` Andrea Arcangeli
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-15 11:27 UTC (permalink / raw)
To: Alan Cox
Cc: Andrea Arcangeli, Marcelo Tosatti, Chris Mason, lkml,
Stephen C. Tweedie, Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15 2003, Alan Cox wrote:
> On Maw, 2003-07-15 at 06:26, Jens Axboe wrote:
> > Sorry, but I think that is nonsense. This is the way we have always
> > worked. You just have to maintain a decent queue length still (like we
> > always have in 2.4) and there are no problems.
>
> The memory pinning problem is still real - and always has been. It shows up
> best not on IDE disks but large slow media like magneto opticals where you
> can queue lots of I/O but you get 500K/second
Not the same thing. On slow media, like dvd-ram, what causes the problem
is that you can dirty basically all of the RAM in the system. That has
nothing to do with memory pinned in the request queue.
And that is still writes, not reads. Reads are pinned on the queue, so
very different case.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-15 11:27 ` Jens Axboe
@ 2003-07-16 12:43 ` Andrea Arcangeli
2003-07-16 12:46 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-16 12:43 UTC (permalink / raw)
To: Jens Axboe
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Tue, Jul 15, 2003 at 01:27:37PM +0200, Jens Axboe wrote:
> On Tue, Jul 15 2003, Alan Cox wrote:
> > On Maw, 2003-07-15 at 06:26, Jens Axboe wrote:
> > > Sorry, but I think that is nonsense. This is the way we have always
> > > worked. You just have to maintain a decent queue length still (like we
> > > always have in 2.4) and there are no problems.
> >
> > The memory pinning problem is still real - and always has been. It shows up
> > best not on IDE disks but large slow media like magneto opticals where you
> > can queue lots of I/O but you get 500K/second
>
> Not the same thing. On slow media, like dvd-ram, what causes the problem
> is that you can dirty basically all of the RAM in the system. That has
> nothing to do with memory pinned in the request queue.
you can trivially bound the amount of dirty memory to nearly 0% with the
bdflush sysctl. And the overkill size of the queue until pre3 could be
an huge VM overhead compared to the dirty memory on lowmem boxes,
example a 32/64M machine. So I disagree it's only a mistake of write
throttling that gives problems on slow media.
Infact I tend to think the biggest problem for slow media in 2.4 is the
lack of per spindle pdflush.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 12:43 ` Andrea Arcangeli
@ 2003-07-16 12:46 ` Jens Axboe
2003-07-16 12:59 ` Andrea Arcangeli
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-16 12:46 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> On Tue, Jul 15, 2003 at 01:27:37PM +0200, Jens Axboe wrote:
> > On Tue, Jul 15 2003, Alan Cox wrote:
> > > On Maw, 2003-07-15 at 06:26, Jens Axboe wrote:
> > > > Sorry, but I think that is nonsense. This is the way we have always
> > > > worked. You just have to maintain a decent queue length still (like we
> > > > always have in 2.4) and there are no problems.
> > >
> > > The memory pinning problem is still real - and always has been. It shows up
> > > best not on IDE disks but large slow media like magneto opticals where you
> > > can queue lots of I/O but you get 500K/second
> >
> > Not the same thing. On slow media, like dvd-ram, what causes the problem
> > is that you can dirty basically all of the RAM in the system. That has
> > nothing to do with memory pinned in the request queue.
>
> you can trivially bound the amount of dirty memory to nearly 0% with the
> bdflush sysctl. And the overkill size of the queue until pre3 could be
> an huge VM overhead compared to the dirty memory on lowmem boxes,
> example a 32/64M machine. So I disagree it's only a mistake of write
> throttling that gives problems on slow media.
>
> Infact I tend to think the biggest problem for slow media in 2.4 is the
> lack of per spindle pdflush.
Well it's a combined problem. Threshold too high on dirty memory,
someone doing a read well get stuck flushing out as well.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 12:46 ` Jens Axboe
@ 2003-07-16 12:59 ` Andrea Arcangeli
2003-07-16 13:04 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-16 12:59 UTC (permalink / raw)
To: Jens Axboe
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> Well it's a combined problem. Threshold too high on dirty memory,
> someone doing a read well get stuck flushing out as well.
a pure read not. the write throttling should be per-process, then there
will be little risk.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 12:59 ` Andrea Arcangeli
@ 2003-07-16 13:04 ` Jens Axboe
2003-07-16 13:11 ` Andrea Arcangeli
2003-07-16 16:49 ` Andrew Morton
0 siblings, 2 replies; 68+ messages in thread
From: Jens Axboe @ 2003-07-16 13:04 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > Well it's a combined problem. Threshold too high on dirty memory,
> > someone doing a read well get stuck flushing out as well.
>
> a pure read not. the write throttling should be per-process, then there
> will be little risk.
A read from user space, dirtying data along the way.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 13:04 ` Jens Axboe
@ 2003-07-16 13:11 ` Andrea Arcangeli
2003-07-16 13:21 ` Jens Axboe
2003-07-16 16:49 ` Andrew Morton
1 sibling, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-16 13:11 UTC (permalink / raw)
To: Jens Axboe
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16, 2003 at 03:04:42PM +0200, Jens Axboe wrote:
> On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > Well it's a combined problem. Threshold too high on dirty memory,
> > > someone doing a read well get stuck flushing out as well.
> >
> > a pure read not. the write throttling should be per-process, then there
> > will be little risk.
>
> A read from user space, dirtying data along the way.
it doesn't necessairly block on dirty memory. We even _free_ ram clean
if needed, exactly because of that. You can raise the amount of _free_
ram up to 99% of the whole ram in your box to be almost guaranteed to
never wait on dirty memory freeing. Of course the default tries to
optimize for writeback cache and there's a reasonable margin to avoid
writing dirty stuff. the sysctl is there for special usages where you
want to never block in a read from userspace regardless whatever the
state of the system.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 13:11 ` Andrea Arcangeli
@ 2003-07-16 13:21 ` Jens Axboe
2003-07-16 13:44 ` Andrea Arcangeli
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-16 13:21 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> On Wed, Jul 16, 2003 at 03:04:42PM +0200, Jens Axboe wrote:
> > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > > Well it's a combined problem. Threshold too high on dirty memory,
> > > > someone doing a read well get stuck flushing out as well.
> > >
> > > a pure read not. the write throttling should be per-process, then there
> > > will be little risk.
> >
> > A read from user space, dirtying data along the way.
>
> it doesn't necessairly block on dirty memory. We even _free_ ram clean
> if needed, exactly because of that. You can raise the amount of _free_
> ram up to 99% of the whole ram in your box to be almost guaranteed to
> never wait on dirty memory freeing. Of course the default tries to
> optimize for writeback cache and there's a reasonable margin to avoid
> writing dirty stuff. the sysctl is there for special usages where you
> want to never block in a read from userspace regardless whatever the
> state of the system.
That may be so, but no user will ever touch that sysctl. He just
experiences what Alan outlined, system grinds to a complete halt. Only
much later does it get going again.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 13:21 ` Jens Axboe
@ 2003-07-16 13:44 ` Andrea Arcangeli
2003-07-16 14:00 ` Jens Axboe
0 siblings, 1 reply; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-16 13:44 UTC (permalink / raw)
To: Jens Axboe
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16, 2003 at 03:21:39PM +0200, Jens Axboe wrote:
> On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > On Wed, Jul 16, 2003 at 03:04:42PM +0200, Jens Axboe wrote:
> > > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > > > Well it's a combined problem. Threshold too high on dirty memory,
> > > > > someone doing a read well get stuck flushing out as well.
> > > >
> > > > a pure read not. the write throttling should be per-process, then there
> > > > will be little risk.
> > >
> > > A read from user space, dirtying data along the way.
> >
> > it doesn't necessairly block on dirty memory. We even _free_ ram clean
> > if needed, exactly because of that. You can raise the amount of _free_
> > ram up to 99% of the whole ram in your box to be almost guaranteed to
> > never wait on dirty memory freeing. Of course the default tries to
> > optimize for writeback cache and there's a reasonable margin to avoid
> > writing dirty stuff. the sysctl is there for special usages where you
> > want to never block in a read from userspace regardless whatever the
> > state of the system.
>
> That may be so, but no user will ever touch that sysctl. He just
> experiences what Alan outlined, system grinds to a complete halt. Only
> much later does it get going again.
and on the small boxes that will happen much less now since on the small
boxes the biggest vm overhead could been generated by the uncontrolled
size of the I/O queue that previously could grow as big as 32M.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 13:44 ` Andrea Arcangeli
@ 2003-07-16 14:00 ` Jens Axboe
2003-07-16 14:24 ` Andrea Arcangeli
0 siblings, 1 reply; 68+ messages in thread
From: Jens Axboe @ 2003-07-16 14:00 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> On Wed, Jul 16, 2003 at 03:21:39PM +0200, Jens Axboe wrote:
> > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > On Wed, Jul 16, 2003 at 03:04:42PM +0200, Jens Axboe wrote:
> > > > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > > > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > > > > Well it's a combined problem. Threshold too high on dirty memory,
> > > > > > someone doing a read well get stuck flushing out as well.
> > > > >
> > > > > a pure read not. the write throttling should be per-process, then there
> > > > > will be little risk.
> > > >
> > > > A read from user space, dirtying data along the way.
> > >
> > > it doesn't necessairly block on dirty memory. We even _free_ ram clean
> > > if needed, exactly because of that. You can raise the amount of _free_
> > > ram up to 99% of the whole ram in your box to be almost guaranteed to
> > > never wait on dirty memory freeing. Of course the default tries to
> > > optimize for writeback cache and there's a reasonable margin to avoid
> > > writing dirty stuff. the sysctl is there for special usages where you
> > > want to never block in a read from userspace regardless whatever the
> > > state of the system.
> >
> > That may be so, but no user will ever touch that sysctl. He just
> > experiences what Alan outlined, system grinds to a complete halt. Only
> > much later does it get going again.
>
> and on the small boxes that will happen much less now since on the small
> boxes the biggest vm overhead could been generated by the uncontrolled
> size of the I/O queue that previously could grow as big as 32M.
That is true, however noone runs 32MB boxes anymore :). So I doubt that
would be the case.
--
Jens Axboe
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 14:00 ` Jens Axboe
@ 2003-07-16 14:24 ` Andrea Arcangeli
0 siblings, 0 replies; 68+ messages in thread
From: Andrea Arcangeli @ 2003-07-16 14:24 UTC (permalink / raw)
To: Jens Axboe
Cc: Alan Cox, Marcelo Tosatti, Chris Mason, lkml, Stephen C. Tweedie,
Jeff Garzik, Andrew Morton, Alexander Viro
On Wed, Jul 16, 2003 at 04:00:02PM +0200, Jens Axboe wrote:
> On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > On Wed, Jul 16, 2003 at 03:21:39PM +0200, Jens Axboe wrote:
> > > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > > On Wed, Jul 16, 2003 at 03:04:42PM +0200, Jens Axboe wrote:
> > > > > On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > > > > > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > > > > > Well it's a combined problem. Threshold too high on dirty memory,
> > > > > > > someone doing a read well get stuck flushing out as well.
> > > > > >
> > > > > > a pure read not. the write throttling should be per-process, then there
> > > > > > will be little risk.
> > > > >
> > > > > A read from user space, dirtying data along the way.
> > > >
> > > > it doesn't necessairly block on dirty memory. We even _free_ ram clean
> > > > if needed, exactly because of that. You can raise the amount of _free_
> > > > ram up to 99% of the whole ram in your box to be almost guaranteed to
> > > > never wait on dirty memory freeing. Of course the default tries to
> > > > optimize for writeback cache and there's a reasonable margin to avoid
> > > > writing dirty stuff. the sysctl is there for special usages where you
> > > > want to never block in a read from userspace regardless whatever the
> > > > state of the system.
> > >
> > > That may be so, but no user will ever touch that sysctl. He just
> > > experiences what Alan outlined, system grinds to a complete halt. Only
> > > much later does it get going again.
> >
> > and on the small boxes that will happen much less now since on the small
> > boxes the biggest vm overhead could been generated by the uncontrolled
> > size of the I/O queue that previously could grow as big as 32M.
>
> That is true, however noone runs 32MB boxes anymore :). So I doubt that
> would be the case.
I don't think it's an issue on 32M only, my point was that it's still a
relevant amount of ram on 64M and 128M boxes too and it may be more than
what the VM allows to be dirty in those ram setups (even with the
default sysctl), especially during vm congestion that is when you most
need to dominate the amount of that locked ram.
Andrea
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
2003-07-16 13:04 ` Jens Axboe
2003-07-16 13:11 ` Andrea Arcangeli
@ 2003-07-16 16:49 ` Andrew Morton
1 sibling, 0 replies; 68+ messages in thread
From: Andrew Morton @ 2003-07-16 16:49 UTC (permalink / raw)
To: Jens Axboe; +Cc: andrea, alan, marcelo, mason, linux-kernel, sct, jgarzik, viro
Jens Axboe <axboe@suse.de> wrote:
>
> On Wed, Jul 16 2003, Andrea Arcangeli wrote:
> > On Wed, Jul 16, 2003 at 02:46:56PM +0200, Jens Axboe wrote:
> > > Well it's a combined problem. Threshold too high on dirty memory,
> > > someone doing a read well get stuck flushing out as well.
> >
> > a pure read not. the write throttling should be per-process, then there
> > will be little risk.
>
> A read from user space, dirtying data along the way.
Actually it's a read from userspace which allocates a page which goes into
direct reclaim which discovers a locked buffer on the tail of the LRU and
then waits on it.
And if he's especially unlucky: while he waits, some other process
continues to pound more writes into the queue which get merged ahead of the
one he's waiting on, up to a point.
(I don't know if 2.6 does much better in this regard. It is supposed to.
Has anyone tested for it?)
^ permalink raw reply [flat|nested] 68+ messages in thread
* Re: RFC on io-stalls patch
@ 2003-07-15 18:47 Shane Shrybman
0 siblings, 0 replies; 68+ messages in thread
From: Shane Shrybman @ 2003-07-15 18:47 UTC (permalink / raw)
To: linux-kernel; +Cc: mason, Andrea Arcangeli, axboe
Hi,
(cc me please)
I am currently resurrecting my old K6-2, SCSI test box and I would be
happy to run some benchmarks on it to provide another data point for
your experimentations. If this would be of any help please let me know
what would be the most informative benchmarks to run, (which patches,
benchmark settings etc.)
I am currently compiling 2.4.21, 2.4.22-pre5 and I have just installed
contest 0.61.
The hardware:
scsi0 : Adaptec AIC7XXX EISA/VLB/PCI SCSI HBA DRIVER, Rev 6.2.8
aic7880: Ultra Wide Channel A, SCSI Id=7, 16/253 SCBs
Attached devices:
Host: scsi0 Channel: 00 Id: 02 Lun: 00
Vendor: SEAGATE Model: ST15230W SUN4.2G Rev: 0738
Type: Direct-Access ANSI SCSI revision: 02
Host: scsi0 Channel: 00 Id: 04 Lun: 00
Vendor: SEAGATE Model: ST32171W Rev: 0484
Type: Direct-Access ANSI SCSI revision: 02
processor : 0
vendor_id : AuthenticAMD
cpu family : 5
model : 6
model name : AMD-K6tm w/ multimedia extensions
stepping : 2
cpu MHz : 200.458
cache size : 64 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 1
wp : yes
flags : fpu vme de pse tsc msr mce cx8 mmx
bogomips : 399.76
total used free shared buffers cached
Mem: 46460 44236 2224 0 1292 17516
-/+ buffers/cache: 25428 21032
Swap: 128480 188 128292
Regards,
shane
^ permalink raw reply [flat|nested] 68+ messages in thread
end of thread, other threads:[~2003-07-16 16:54 UTC | newest]
Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-08 20:06 RFC on io-stalls patch Marcelo Tosatti
2003-07-10 13:57 ` Jens Axboe
2003-07-11 14:13 ` Chris Mason
2003-07-12 0:20 ` Nick Piggin
2003-07-12 18:37 ` Chris Mason
2003-07-12 7:37 ` Jens Axboe
2003-07-12 7:48 ` Jens Axboe
2003-07-12 18:32 ` Chris Mason
2003-07-13 0:33 ` Andrea Arcangeli
2003-07-13 9:01 ` Jens Axboe
2003-07-13 16:20 ` Chris Mason
2003-07-13 16:45 ` Jeff Garzik
2003-07-13 19:33 ` Andrea Arcangeli
2003-07-13 17:47 ` Jens Axboe
2003-07-13 19:35 ` Andrea Arcangeli
2003-07-14 0:36 ` Chris Mason
2003-07-13 19:19 ` Andrea Arcangeli
2003-07-14 5:49 ` Jens Axboe
2003-07-14 12:23 ` Marcelo Tosatti
2003-07-14 13:12 ` Jens Axboe
2003-07-14 19:51 ` Jens Axboe
2003-07-14 20:09 ` Chris Mason
2003-07-14 20:19 ` Andrea Arcangeli
2003-07-14 21:24 ` Chris Mason
2003-07-15 5:46 ` Jens Axboe
2003-07-14 20:09 ` Marcelo Tosatti
2003-07-14 20:24 ` Andrea Arcangeli
2003-07-14 20:34 ` Chris Mason
2003-07-15 5:35 ` Jens Axboe
[not found] ` <20030714224528.GU16313@dualathlon.random>
2003-07-15 5:40 ` Jens Axboe
[not found] ` <1058229360.13317.364.camel@tiny.suse.com>
2003-07-15 5:43 ` Jens Axboe
[not found] ` <20030714175238.3eaddd9a.akpm@osdl.org>
[not found] ` <20030715020706.GC16313@dualathlon.random>
2003-07-15 5:45 ` Jens Axboe
2003-07-15 6:01 ` Andrea Arcangeli
2003-07-15 6:08 ` Jens Axboe
2003-07-15 7:03 ` Andrea Arcangeli
2003-07-15 8:28 ` Jens Axboe
2003-07-15 9:12 ` Chris Mason
2003-07-15 9:17 ` Jens Axboe
2003-07-15 9:18 ` Jens Axboe
2003-07-15 9:30 ` Chris Mason
2003-07-15 10:03 ` Andrea Arcangeli
2003-07-15 10:11 ` Jens Axboe
2003-07-15 14:18 ` Chris Mason
2003-07-15 14:29 ` Jens Axboe
2003-07-16 17:06 ` Chris Mason
2003-07-15 9:22 ` Chris Mason
2003-07-15 9:59 ` Andrea Arcangeli
2003-07-15 9:48 ` Andrea Arcangeli
2003-07-14 20:16 ` Andrea Arcangeli
2003-07-14 20:17 ` Marcelo Tosatti
2003-07-14 20:27 ` Andrea Arcangeli
2003-07-15 5:26 ` Jens Axboe
2003-07-15 5:48 ` Andrea Arcangeli
2003-07-15 6:01 ` Jens Axboe
2003-07-15 6:33 ` Andrea Arcangeli
2003-07-15 11:22 ` Alan Cox
2003-07-15 11:27 ` Jens Axboe
2003-07-16 12:43 ` Andrea Arcangeli
2003-07-16 12:46 ` Jens Axboe
2003-07-16 12:59 ` Andrea Arcangeli
2003-07-16 13:04 ` Jens Axboe
2003-07-16 13:11 ` Andrea Arcangeli
2003-07-16 13:21 ` Jens Axboe
2003-07-16 13:44 ` Andrea Arcangeli
2003-07-16 14:00 ` Jens Axboe
2003-07-16 14:24 ` Andrea Arcangeli
2003-07-16 16:49 ` Andrew Morton
2003-07-15 18:47 Shane Shrybman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).