linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Microstate accounting:  account for some more things
@ 2003-07-08  3:14 Peter Chubb
  0 siblings, 0 replies; only message in thread
From: Peter Chubb @ 2003-07-08  3:14 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: message body text --]
[-- Type: text/plain, Size: 476 bytes --]


Hi,
	The appended patch goes on top of the patch at 
http://marc.theaimsgroup.com/?l=linux-kernel&m=105662514529349&w=2 
to add accounting for time spent in poll/select, sleeping handling
major pagefaults, and time spent asleep on futexes.

Running this patch, I found a problem with major fault accounting in
the kernel: there are combinations of circumstances that can lead to a
major fault being reported when no I/O is done (for example if the
page has been prefaulted)


[-- Attachment #2: p1 --]
[-- Type: text/plain, Size: 10614 bytes --]

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
#	           ChangeSet	1.1402  -> 1.1403 
#	         fs/select.c	1.20    -> 1.21   
#	arch/i386/kernel/irq.c	1.39    -> 1.40   
#	      kernel/futex.c	1.28    -> 1.29   
#	        kernel/msa.c	1.1     -> 1.2    
#	arch/ia64/kernel/irq_ia64.c	1.14    -> 1.15   
#	         mm/memory.c	1.125   -> 1.126  
#	      fs/proc/base.c	1.52    -> 1.53   
#	 include/linux/msa.h	1.1     -> 1.2    
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/07/08	peterc@gelato.unsw.edu.au	1.1403
# Add new states: time sleeping for page I/O, in poll/select/epoll and in futex.
# --------------------------------------------
#
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c	Tue Jul  8 13:13:30 2003
+++ b/arch/i386/kernel/irq.c	Tue Jul  8 13:13:30 2003
@@ -429,7 +429,7 @@
 	unsigned int status;
 
 	irq_enter();
-	msa_start_irq(cpu, irq);
+	msa_start_irq(irq);
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	/* Debugging check for stack overflow: is there less than 1KB free? */
@@ -509,7 +509,7 @@
 	spin_unlock(&desc->lock);
 
 	irq_exit();
-	msa_finish_irq(cpu, irq);
+	msa_finish_irq(irq);
 
 	return 1;
 }
diff -Nru a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
--- a/arch/ia64/kernel/irq_ia64.c	Tue Jul  8 13:13:30 2003
+++ b/arch/ia64/kernel/irq_ia64.c	Tue Jul  8 13:13:30 2003
@@ -122,7 +122,7 @@
 	saved_tpr = ia64_get_tpr();
 	ia64_srlz_d();
 	oldvector = vector;
-	msa_start_irq(cpu, local_vector_to_irq(vector));
+	msa_start_irq(local_vector_to_irq(vector));
 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
 		if (!IS_RESCHEDULE(vector)) {
 			ia64_set_tpr(vector);
@@ -139,7 +139,7 @@
 		ia64_eoi();
 		oldvector = vector;
 		vector = ia64_get_ivr();		
-		msa_continue_irq(cpu, local_vector_to_irq(oldvector),
+		msa_continue_irq(local_vector_to_irq(oldvector),
 				 local_vector_to_irq(vector));
 	}
 	/*
@@ -150,7 +150,7 @@
 	if (local_softirq_pending())
 		do_softirq();
 
-	msa_finish_irq(cpu, local_vector_to_irq(vector));
+	msa_finish_irq(local_vector_to_irq(vector));
 }
 
 #ifdef CONFIG_SMP
diff -Nru a/fs/proc/base.c b/fs/proc/base.c
--- a/fs/proc/base.c	Tue Jul  8 13:13:30 2003
+++ b/fs/proc/base.c	Tue Jul  8 13:13:30 2003
@@ -302,9 +302,7 @@
 static int proc_pid_msa(struct task_struct *task, char *buffer) 
 {
 	struct microstates *msp = &task->microstates;
-  
 	static char *statenames[] = {
-		"Unknown",
 		"User",
 		"System",
 		"Interruptible",
@@ -313,32 +311,40 @@
 		"OnExpiredQueue",
 		"Zombie",
 		"Stopped",
+		"Paging",
+		"Futex",
+		"Poll",
 		"Interrupted",
 	};
+  
 	return sprintf(buffer, 
-		 "State:         %s\n"		\
-		 "ONCPU_USER     %15llu\n"	\
-		 "ONCPU_SYS      %15llu\n"	\
-		 "INTERRUPTIBLE  %15llu\n"	\
-		 "UNINTERRUPTIBLE%15llu\n"	\
-		 "INTERRUPTED    %15llu\n"	\
-		 "ACTIVEQUEUE    %15llu\n"	\
-		 "EXPIREDQUEUE   %15llu\n"	\
-		 "STOPPED        %15llu\n"	\
-		 "ZOMBIE         %15llu\n"	\
-		 "UNKNOWN        %15llu\n",
-		 msp->cur_state <= INTERRUPTED ?
+		       "State:         %s\n"		\
+		       "ONCPU_USER     %15llu\n"	\
+		       "ONCPU_SYS      %15llu\n"	\
+		       "INTERRUPTIBLE  %15llu\n"	\
+		       "UNINTERRUPTIBLE%15llu\n"	\
+		       "INTERRUPTED    %15llu\n"	\
+		       "ACTIVEQUEUE    %15llu\n"	\
+		       "EXPIREDQUEUE   %15llu\n"	\
+		       "STOPPED        %15llu\n"	\
+		       "ZOMBIE         %15llu\n"	\
+		       "SLP_POLL       %15llu\n"	\
+		       "SLP_PAGING     %15llu\n"	\
+		       "SLP_FUTEX      %15llu\n",	\
+		       msp->cur_state >= 0 && msp->cur_state < NR_MICRO_STATES ?
 		       statenames[msp->cur_state] : "Impossible",
-		 msp->timers[ONCPU_USER],
-		 msp->timers[ONCPU_SYS],
-		 msp->timers[INTERRUPTIBLE_SLEEP],
-		 msp->timers[UNINTERRUPTIBLE_SLEEP],
-		 msp->timers[INTERRUPTED],
-		 msp->timers[ONACTIVEQUEUE],
-		 msp->timers[ONEXPIREDQUEUE],
-		 msp->timers[STOPPED],
-		 msp->timers[ZOMBIE],
-		 msp->timers[UNKNOWN]);
+		       msp->timers[ONCPU_USER],
+		       msp->timers[ONCPU_SYS],
+		       msp->timers[INTERRUPTIBLE_SLEEP],
+		       msp->timers[UNINTERRUPTIBLE_SLEEP],
+		       msp->timers[INTERRUPTED],
+		       msp->timers[ONACTIVEQUEUE],
+		       msp->timers[ONEXPIREDQUEUE],
+		       msp->timers[STOPPED],
+		       msp->timers[ZOMBIE],
+		       msp->timers[POLL_SLEEP],
+		       msp->timers[PAGING_SLEEP],
+		       msp->timers[FUTEX_SLEEP]);
 }
 #endif /* CONFIG_MICROSTATE */
 
diff -Nru a/fs/select.c b/fs/select.c
--- a/fs/select.c	Tue Jul  8 13:13:30 2003
+++ b/fs/select.c	Tue Jul  8 13:13:30 2003
@@ -253,6 +253,7 @@
 			retval = table.error;
 			break;
 		}
+		msa_next_state(current, POLL_SLEEP);
 		__timeout = schedule_timeout(__timeout);
 	}
 	__set_current_state(TASK_RUNNING);
@@ -443,6 +444,7 @@
 		count = wait->error;
 		if (count)
 			break;
+		msa_next_state(current, POLL_SLEEP);
 		timeout = schedule_timeout(timeout);
 	}
 	__set_current_state(TASK_RUNNING);
diff -Nru a/include/linux/msa.h b/include/linux/msa.h
--- a/include/linux/msa.h	Tue Jul  8 13:13:30 2003
+++ b/include/linux/msa.h	Tue Jul  8 13:13:30 2003
@@ -18,7 +18,7 @@
  */
 
 enum thread_state {
-	UNKNOWN,
+	UNKNOWN = -1,
 	ONCPU_USER,
 	ONCPU_SYS,
 	INTERRUPTIBLE_SLEEP,
@@ -28,6 +28,10 @@
 	ZOMBIE,
 	STOPPED,
 	INTERRUPTED,
+	PAGING_SLEEP,
+	FUTEX_SLEEP,
+	POLL_SLEEP,
+	
 	NR_MICRO_STATES /* Must be last */
 };
 
@@ -76,9 +80,9 @@
 void msa_update_parent(struct task_struct *parent, struct task_struct *this);
 void msa_init(struct task_struct *p);
 void msa_set_timer(struct task_struct *p, int state);
-void msa_start_irq(int cpu, int irq);
-void msa_continue_irq(int cpu, int oldirq, int newirq);
-void msa_finish_irq(int cpu, int irq);
+void msa_start_irq(int irq);
+void msa_continue_irq(int oldirq, int newirq);
+void msa_finish_irq(int irq);
 clk_t msa_irq_time(int cpu, int irq);
 
 #ifdef TASK_STRUCT_DEFINED
diff -Nru a/kernel/futex.c b/kernel/futex.c
--- a/kernel/futex.c	Tue Jul  8 13:13:30 2003
+++ b/kernel/futex.c	Tue Jul  8 13:13:30 2003
@@ -34,6 +34,7 @@
 #include <linux/futex.h>
 #include <linux/vcache.h>
 #include <linux/mount.h>
+#include <linux/msa.h>
 
 #define FUTEX_HASHBITS 8
 
@@ -349,6 +350,7 @@
 	 * the waiter from the list.
 	 */
 	add_wait_queue(&q.waiters, &wait);
+	msa_next_state(current, FUTEX_SLEEP);
 	set_current_state(TASK_INTERRUPTIBLE);
 	if (!list_empty(&q.list)) {
 		unlock_futex_mm();
diff -Nru a/kernel/msa.c b/kernel/msa.c
--- a/kernel/msa.c	Tue Jul  8 13:13:30 2003
+++ b/kernel/msa.c	Tue Jul  8 13:13:30 2003
@@ -62,36 +62,78 @@
 	msnext->timers[msnext->cur_state] += now - msnext->last_change;
 	
 	/* Update states */
-	if (msprev->next_state != UNKNOWN) {
-		next_state = msprev->next_state;
-		msprev->next_state = UNKNOWN;
-	} else switch(prev->state) {
-	case TASK_INTERRUPTIBLE:
-		next_state = INTERRUPTIBLE_SLEEP;
-		break;
+	switch (msprev->next_state) {
+	case UNKNOWN: /*
+		       * Infer from actual state
+		       */
+		switch (prev->state) {
+		case TASK_INTERRUPTIBLE:
+			next_state = INTERRUPTIBLE_SLEEP;
+			break;
 		
-	case TASK_UNINTERRUPTIBLE:
-		next_state = UNINTERRUPTIBLE_SLEEP;
-		break;
-
-	case TASK_STOPPED:
-		next_state = STOPPED;
-		break;
-
-	case TASK_ZOMBIE:
-		next_state = ZOMBIE;
-		break;
-
-	case TASK_DEAD:
-		next_state = ZOMBIE;
+		case TASK_UNINTERRUPTIBLE:
+			next_state = UNINTERRUPTIBLE_SLEEP;
+			break;
+
+		case TASK_STOPPED:
+			next_state = STOPPED;
+			break;
+
+		case TASK_ZOMBIE:
+			next_state = ZOMBIE;
+			break;
+
+		case TASK_DEAD:
+			next_state = ZOMBIE;
+			break;
+
+		case TASK_RUNNING:		
+			next_state = ONACTIVEQUEUE;
+			break;
+
+		default:
+			next_state = UNKNOWN;
+			break;
+
+		} 
+		break;
+
+	case PAGING_SLEEP: /*
+			    * Sleep states are PAGING_SLEEP;
+			    * others inferred from task state
+			    */
+		switch(prev->state) {
+		case TASK_INTERRUPTIBLE: /* FALLTHROUGH */
+		case TASK_UNINTERRUPTIBLE:
+			next_state = PAGING_SLEEP;
+			break;
+
+		case TASK_STOPPED:
+			next_state = STOPPED;
+			break;
+
+		case TASK_ZOMBIE:
+			next_state = ZOMBIE;
+			break;
+
+		case TASK_DEAD:
+			next_state = ZOMBIE;
+			break;
+
+		case TASK_RUNNING:		
+			next_state = ONACTIVEQUEUE;
+			break;
+
+		default:
+			next_state = UNKNOWN;
+			break;
+		}
 		break;
 
-	case TASK_RUNNING:		
-		next_state = ONACTIVEQUEUE;
+	default: /* Explicitly set next state */
+		next_state = msprev->next_state;
+		msprev->next_state = UNKNOWN;
 		break;
-
-	default:
-		next_state = UNKNOWN;
 	}
 
 	msprev->cur_state = next_state;
@@ -159,11 +201,12 @@
 	}
 }
 
-void msa_start_irq(int cpu, int irq) 
+void msa_start_irq(int irq) 
 {
 	struct task_struct *p = current;
 	struct microstates *mp = &p->microstates;
 	clk_t now;
+	int cpu = smp_processor_id();
 
 	MSA_NOW(now);
 	mp->timers[mp->cur_state] += now - mp->last_change;
@@ -175,9 +218,10 @@
 	msa_irq_pids[cpu][irq] = current->pid;
 }
 
-void msa_continue_irq(int cpu, int oldirq, int newirq) 
+void msa_continue_irq(int oldirq, int newirq) 
 {
 	clk_t now;
+	int cpu = smp_processor_id();
 	MSA_NOW(now);
 
 	msa_irq_times[cpu][oldirq] += now - msa_irq_entered[cpu][oldirq];
@@ -186,11 +230,12 @@
 }
 
 
-void msa_finish_irq(int cpu, int irq) 
+void msa_finish_irq(int irq) 
 {
 	struct task_struct *p = current;
 	struct microstates *mp = &p->microstates;
 	clk_t now;
+	int cpu = smp_processor_id();
 
 	MSA_NOW(now);
 
@@ -249,7 +294,7 @@
 	}
 
 	tp = which == MSA_SELF ? msp->timers : msp->child_timers;
-	for (i = UNKNOWN; i < ntimers; i++) {
+	for (i = 0; i < ntimers; i++) {
 		clk_t x = MSA_TO_NSEC(*tp++);
 		if (copy_to_user(timers++, &x, sizeof x))
 			return -EFAULT;
diff -Nru a/mm/memory.c b/mm/memory.c
--- a/mm/memory.c	Tue Jul  8 13:13:30 2003
+++ b/mm/memory.c	Tue Jul  8 13:13:30 2003
@@ -1532,6 +1532,7 @@
 	if (is_vm_hugetlb_page(vma))
 		return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
 
+	msa_next_state(current, PAGING_SLEEP);
 	/*
 	 * We need the page table lock to synchronize with kswapd
 	 * and the SMP-safe atomic PTE updates.
@@ -1541,10 +1542,14 @@
 
 	if (pmd) {
 		pte_t * pte = pte_alloc_map(mm, pmd, address);
-		if (pte)
-			return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+		if (pte) {
+			int ret = handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+			msa_next_state(current, UNKNOWN);
+			return ret;
+		}
 	}
 	spin_unlock(&mm->page_table_lock);
+	msa_next_state(current, UNKNOWN);
 	return VM_FAULT_OOM;
 }
 

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2003-07-08  3:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-08  3:14 [PATCH] Microstate accounting: account for some more things Peter Chubb

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).