All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shailabh Nagar <nagar@watson.ibm.com>
To: Shailabh Nagar <nagar@watson.ibm.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	elsa-devel <elsa-devel@lists.sourceforge.net>,
	lse-tech@lists.sourceforge.net,
	ckrm-tech <ckrm-tech@lists.sourceforge.net>,
	Guillaume Thouvenin <guillaume.thouvenin@bull.net>,
	Jay Lan <jlan@sgi.com>, Jens Axboe <axboe@suse.de>,
	Suparna Bhattacharya <bsuparna@in.ibm.com>
Subject: [RFC][Patch 3/5] Per-task delay accounting: Sync block I/O delays
Date: Wed, 07 Dec 2005 22:23:10 +0000	[thread overview]
Message-ID: <439760CE.7050401@watson.ibm.com> (raw)
In-Reply-To: <43975D45.3080801@watson.ibm.com>

This patch attempts to record all the time spent by a task
waiting for completion of (user-initiated) block I/O. Ideally, it
would have been nice to be able to record the time spent by a task
waiting for I/O events that are related to async block I/O. While
that can be done now (by measuring time spent in wait_for_async_kiocb)
once (if ?) network aio is implemented, AFAIK, it won't be possible
to distinguish async block and network aio events (and I suspect async
I/O to pipes too...) so async block I/O gets ignored for now.

Suggestions on how async block I/O wait can be accounted accurately would
be welcome.




Changes since 11/14/05

- use nanosecond resolution, adjusted wall clock time for timestamps
  instead of sched_clock (akpm, andi, marcelo)
- collect stats only if delay accounting enabled (parag)
- stats collected for delays in all userspace-initiated block I/O
including fsync/fdatasync but not counting waits for async block io events.

11/14/05: First post


delayacct-blkio.patch

Record time spent by a task waiting for completion of
userspace initiated synchronous block I/O. This can help
determine the right I/O priority for the task.

Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>

 fs/buffer.c               |    6 ++++++
 fs/read_write.c           |   10 +++++++++-
 include/linux/delayacct.h |    4 ++++
 include/linux/sched.h     |    2 ++
 kernel/delayacct.c        |   31 +++++++++++++++++++++++++++++++
 mm/filemap.c              |   10 +++++++++-
 mm/memory.c               |   17 +++++++++++++++--
 7 files changed, 76 insertions(+), 4 deletions(-)

Index: linux-2.6.15-rc5/include/linux/sched.h
===================================================================
--- linux-2.6.15-rc5.orig/include/linux/sched.h
+++ linux-2.6.15-rc5/include/linux/sched.h
@@ -546,6 +546,8 @@ struct task_delay_info {
 	spinlock_t	lock;

 	/* Add stats in pairs: uint64_t delay, uint32_t count */
+	uint64_t blkio_delay;	/* wait for sync block io completion */
+	uint32_t blkio_count;
 };
 #endif

Index: linux-2.6.15-rc5/fs/read_write.c
===================================================================
--- linux-2.6.15-rc5.orig/fs/read_write.c
+++ linux-2.6.15-rc5/fs/read_write.c
@@ -14,6 +14,8 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/delayacct.h>

 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -224,8 +226,14 @@ ssize_t do_sync_read(struct file *filp,
 		(ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
 		wait_on_retry_sync_kiocb(&kiocb);

-	if (-EIOCBQUEUED == ret)
+	if (-EIOCBQUEUED == ret) {
+		__attribute__((unused)) struct timespec start, end;
+
+		getnstimestamp(&start);
 		ret = wait_on_sync_kiocb(&kiocb);
+		getnstimestamp(&end);
+		delayacct_blkio(&start, &end);
+	}
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
Index: linux-2.6.15-rc5/mm/filemap.c
===================================================================
--- linux-2.6.15-rc5.orig/mm/filemap.c
+++ linux-2.6.15-rc5/mm/filemap.c
@@ -28,6 +28,8 @@
 #include <linux/blkdev.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/time.h>
+#include <linux/delayacct.h>
 #include "filemap.h"
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
@@ -1062,8 +1064,14 @@ generic_file_read(struct file *filp, cha

 	init_sync_kiocb(&kiocb, filp);
 	ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
-	if (-EIOCBQUEUED == ret)
+	if (-EIOCBQUEUED == ret) {
+		__attribute__((unused)) struct timespec start, end;
+
+		getnstimestamp(&start);
 		ret = wait_on_sync_kiocb(&kiocb);
+		getnstimestamp(&end);
+		delayacct_blkio(&start, &end);
+	}
 	return ret;
 }

Index: linux-2.6.15-rc5/mm/memory.c
===================================================================
--- linux-2.6.15-rc5.orig/mm/memory.c
+++ linux-2.6.15-rc5/mm/memory.c
@@ -48,6 +48,8 @@
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/time.h>
+#include <linux/delayacct.h>

 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -2200,11 +2202,22 @@ static inline int handle_pte_fault(struc
 	old_entry = entry = *pte;
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
+			int ret;
+			__attribute__((unused)) struct timespec start, end;
+
 			if (!vma->vm_ops || !vma->vm_ops->nopage)
 				return do_anonymous_page(mm, vma, address,
 					pte, pmd, write_access);
-			return do_no_page(mm, vma, address,
-					pte, pmd, write_access);
+
+			if (vma->vm_file)
+				getnstimestamp(&start);
+			ret = do_no_page(mm, vma, address,
+					 pte, pmd, write_access);
+			if (vma->vm_file) {
+				getnstimestamp(&end);
+				delayacct_blkio(&start, &end);
+			}
+			return ret;
 		}
 		if (pte_file(entry))
 			return do_file_page(mm, vma, address,
Index: linux-2.6.15-rc5/fs/buffer.c
===================================================================
--- linux-2.6.15-rc5.orig/fs/buffer.c
+++ linux-2.6.15-rc5/fs/buffer.c
@@ -41,6 +41,8 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <linux/time.h>
+#include <linux/delayacct.h>

 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static void invalidate_bh_lrus(void);
@@ -337,6 +339,7 @@ static long do_fsync(unsigned int fd, in
 	struct file * file;
 	struct address_space *mapping;
 	int ret, err;
+	__attribute__((unused)) struct timespec start, end;

 	ret = -EBADF;
 	file = fget(fd);
@@ -349,6 +352,7 @@ static long do_fsync(unsigned int fd, in
 		goto out_putf;
 	}

+	getnstimestamp(&start);
 	mapping = file->f_mapping;

 	current->flags |= PF_SYNCWRITE;
@@ -371,6 +375,8 @@ static long do_fsync(unsigned int fd, in
 out_putf:
 	fput(file);
 out:
+	getnstimestamp(&end);
+	delayacct_blkio(&start, &end);
 	return ret;
 }

Index: linux-2.6.15-rc5/include/linux/delayacct.h
===================================================================
--- linux-2.6.15-rc5.orig/include/linux/delayacct.h
+++ linux-2.6.15-rc5/include/linux/delayacct.h
@@ -19,8 +19,12 @@
 #ifdef CONFIG_TASK_DELAY_ACCT
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern void delayacct_tsk_init(struct task_struct *tsk);
+extern void delayacct_blkio(struct timespec *start, struct timespec *end);
 #else
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {}
+static inline void delayacct_blkio(struct timespec *start, struct timespec *end)
+{}
+
 #endif /* CONFIG_TASK_DELAY_ACCT */
 #endif /* _LINUX_TASKDELAYS_H */
Index: linux-2.6.15-rc5/kernel/delayacct.c
===================================================================
--- linux-2.6.15-rc5.orig/kernel/delayacct.c
+++ linux-2.6.15-rc5/kernel/delayacct.c
@@ -12,6 +12,7 @@
  */

 #include <linux/sched.h>
+#include <linux/time.h>

 int delayacct_on;	/* Delay accounting turned on/off */

@@ -34,3 +35,33 @@ static int __init delayacct_init(void)
 	return 0;
 }
 core_initcall(delayacct_init);
+
+inline void delayacct_blkio(struct timespec *start, struct timespec *end)
+{
+	unsigned long long delay;
+
+	if (!delayacct_on)
+		return;
+
+	delay = timespec_nsdiff(start, end);
+
+	spin_lock(&current->delays.lock);
+	current->delays.blkio_delay += delay;
+	current->delays.blkio_count++;
+	spin_unlock(&current->delays.lock);
+}
+
+inline void delayacct_swapin(struct timespec *start, struct timespec *end)
+{
+	unsigned long long delay;
+
+	if (!delayacct_on)
+		return;
+
+	delay = timespec_nsdiff(start, end);
+
+	spin_lock(&current->delays.lock);
+	current->delays.swapin_delay += delay;
+	current->delays.swapin_count++;
+	spin_unlock(&current->delays.lock);
+}

  parent reply	other threads:[~2005-12-07 22:23 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-07 22:08 [RFC][Patch 0/5] Per-task delay accounting Shailabh Nagar
2005-12-07 22:13 ` [RFC][Patch 1/5] nanosecond timestamps and diffs Shailabh Nagar
2005-12-12 18:50   ` [Lse-tech] " Christoph Lameter
2005-12-12 19:31     ` Shailabh Nagar
2005-12-12 19:49       ` john stultz
2005-12-12 20:00         ` Shailabh Nagar
2005-12-12 20:07           ` john stultz
2005-12-13  0:54             ` George Anzinger
2005-12-13  3:48               ` Nish Aravamudan
2005-12-13 18:35         ` Jay Lan
2005-12-13 21:16           ` john stultz
2005-12-13 21:44           ` Shailabh Nagar
2005-12-13 22:13             ` George Anzinger
2005-12-13 23:05           ` [ckrm-tech] " Matt Helsley
2005-12-07 22:15 ` [RFC][Patch 2/5] Per-task delay accounting: Initialization, dynamic turn on/off Shailabh Nagar
2005-12-07 22:23 ` Shailabh Nagar [this message]
2005-12-07 22:33   ` [ckrm-tech] [RFC][Patch 3/5] Per-task delay accounting: Sync block I/O delays Dave Hansen
2005-12-07 23:06     ` Shailabh Nagar
2005-12-07 22:28 ` [RFC][Patch 4/5] Per-task delay accounting: Swap in delays Shailabh Nagar
2005-12-07 22:29 ` [RFC][Patch 5/5] Per-task delay accounting: procfs interface Shailabh Nagar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=439760CE.7050401@watson.ibm.com \
    --to=nagar@watson.ibm.com \
    --cc=axboe@suse.de \
    --cc=bsuparna@in.ibm.com \
    --cc=ckrm-tech@lists.sourceforge.net \
    --cc=elsa-devel@lists.sourceforge.net \
    --cc=guillaume.thouvenin@bull.net \
    --cc=jlan@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lse-tech@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.