linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Read-Copy Update 2.5.34
@ 2002-09-11 11:19 Dipankar Sarma
  2002-09-11 11:24 ` Ingo Molnar
  0 siblings, 1 reply; 7+ messages in thread
From: Dipankar Sarma @ 2002-09-11 11:19 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Rusty Russell, Andrea Arcangeli, Paul McKenney,
	linux-kernel

Hi,

Here is RCU for 2.5.34. It is just a rediff from earlier BK version.

The current rcu_poll is essentially a per-CPU version of the
rcu_poll patch in -aa series of kernel. Additionally it also supports
call_rcu_preempt() which allows RCU to work in preemptive kernels
transparently.

Ingo, it will be nice to have your comments on this, specially since
it touches the scheduler.

Read-Copy Update is also available in bk for pulling -

http://lse.bkbits.net/linux-2.5-rcu

Thanks
-- 
Dipankar Sarma  <dipankar@in.ibm.com> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.


diff -urN linux-2.5.34-base/include/linux/init_task.h linux-2.5.34-rcu_poll/include/linux/init_task.h
--- linux-2.5.34-base/include/linux/init_task.h	Mon Sep  9 23:05:02 2002
+++ linux-2.5.34-rcu_poll/include/linux/init_task.h	Wed Sep 11 11:45:03 2002
@@ -83,6 +83,7 @@
 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\
 	.switch_lock	= SPIN_LOCK_UNLOCKED,				\
 	.journal_info	= NULL,						\
+	.cpu_preempt_cntr = NULL,					\
 }
 
 
diff -urN linux-2.5.34-base/include/linux/rcupdate.h linux-2.5.34-rcu_poll/include/linux/rcupdate.h
--- linux-2.5.34-base/include/linux/rcupdate.h	Thu Jan  1 05:30:00 1970
+++ linux-2.5.34-rcu_poll/include/linux/rcupdate.h	Wed Sep 11 11:45:03 2002
@@ -0,0 +1,86 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#ifndef _LINUX_RCUPDATE_H
+#define _LINUX_RCUPDATE_H
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/**
+ * struct rcu_head - callback structure for use with RCU
+ * @list: list_head to queue the update requests
+ * @func: actual update function to call after the grace period.
+ * @arg: argument to be passed to the actual update function.
+ */
+struct rcu_head {
+	struct list_head list;
+	void (*func)(void *obj);
+	void *arg;
+};
+
+#define RCU_HEAD_INIT(head) \
+		{.list = LIST_HEAD_INIT((head).list), .func = NULL, .arg = NULL}
+#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head)
+#define INIT_RCU_HEAD(ptr) do { \
+	INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \
+} while (0)
+
+extern void FASTCALL(call_rcu(struct rcu_head *head, 
+				void (*func)(void *arg), void *arg));
+
+#ifdef CONFIG_PREEMPT
+
+#define rcu_read_lock()         preempt_disable()
+#define rcu_read_unlock()       preempt_enable()
+extern void FASTCALL(call_rcu_preempt(struct rcu_head *head,
+				void (*func)(void *arg), void *arg));
+
+#else
+
+#define rcu_read_lock()         do {} while(0)
+#define rcu_read_unlock()       do {} while(0)
+static inline void call_rcu_preempt(struct rcu_head *head, 
+				void (*func)(void *arg), void *arg)
+{
+	call_rcu(head, func, arg);
+}
+
+#endif
+
+extern void synchronize_kernel(void);
+extern void rcu_init(void);
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_RCUPDATE_H */
diff -urN linux-2.5.34-base/include/linux/sched.h linux-2.5.34-rcu_poll/include/linux/sched.h
--- linux-2.5.34-base/include/linux/sched.h	Mon Sep  9 23:05:02 2002
+++ linux-2.5.34-rcu_poll/include/linux/sched.h	Wed Sep 11 11:45:03 2002
@@ -27,6 +27,7 @@
 #include <linux/securebits.h>
 #include <linux/fs_struct.h>
 #include <linux/compiler.h>
+#include <linux/percpu.h>
 
 struct exec_domain;
 
@@ -164,6 +165,7 @@
 extern void flush_scheduled_tasks(void);
 extern int start_context_thread(void);
 extern int current_is_keventd(void);
+extern void force_cpu_reschedule(int cpu);
 
 struct namespace;
 
@@ -383,6 +385,7 @@
 /* journalling filesystem info */
 	void *journal_info;
 	struct dentry *proc_dentry;
+	atomic_t *cpu_preempt_cntr;
 };
 
 extern void __put_task_struct(struct task_struct *tsk);
@@ -459,6 +462,8 @@
 extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
+extern struct task_struct *init_tasks[NR_CPUS];
+extern DEFINE_PER_CPU(long, cpu_quiescent);
 
 /* PID hashing. (shouldnt this be dynamic?) */
 #define PIDHASH_SZ 8192
@@ -962,6 +967,55 @@
 
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_PREEMPT
+
+extern DEFINE_PER_CPU(atomic_t[2], rcu_preempt_cntr);
+extern DEFINE_PER_CPU(atomic_t, *curr_preempt_cntr);
+extern DEFINE_PER_CPU(atomic_t, *next_preempt_cntr);
+
+static inline void rcu_switch_preempt_cntr(int cpu)
+{
+	atomic_t *tmp;
+	tmp = per_cpu(curr_preempt_cntr, cpu);
+	per_cpu(curr_preempt_cntr, cpu) = per_cpu(next_preempt_cntr, cpu);
+	per_cpu(next_preempt_cntr, cpu) = tmp;
+
+}
+
+static inline void rcu_preempt_put(void)
+{
+	preempt_disable();
+	if (unlikely(current->cpu_preempt_cntr != NULL)) {
+		atomic_dec(current->cpu_preempt_cntr);
+		current->cpu_preempt_cntr = NULL;
+	}
+	preempt_enable();
+}
+
+/* Must not be preempted */
+static inline void rcu_preempt_get(void)
+{
+	if (likely(current->cpu_preempt_cntr == NULL)) {
+		current->cpu_preempt_cntr = 
+				per_cpu(next_preempt_cntr, smp_processor_id());
+		atomic_inc(current->cpu_preempt_cntr);
+	}
+}
+
+static inline int is_rcu_cpu_preempted(int cpu)
+{
+	return (atomic_read(per_cpu(curr_preempt_cntr, cpu)) != 0);
+}
+#else
+
+#define rcu_init_preempt_cntr(cpu) do { } while(0)
+#define rcu_switch_preempt_cntr(cpu) do { } while(0)
+#define rcu_preempt_put() do { } while(0)
+#define rcu_preempt_get() do { } while(0)
+#define is_rcu_cpu_preempted(cpu) (0)
+
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif
diff -urN linux-2.5.34-base/init/main.c linux-2.5.34-rcu_poll/init/main.c
--- linux-2.5.34-base/init/main.c	Mon Sep  9 23:05:04 2002
+++ linux-2.5.34-rcu_poll/init/main.c	Wed Sep 11 11:45:03 2002
@@ -30,6 +30,7 @@
 #include <linux/percpu.h>
 #include <linux/kernel_stat.h>
 #include <linux/security.h>
+#include <linux/rcupdate.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -399,6 +400,7 @@
 	printk("Kernel command line: %s\n", saved_command_line);
 	parse_options(command_line);
 	trap_init();
+	rcu_init();
 	init_IRQ();
 	sched_init();
 	softirq_init();
diff -urN linux-2.5.34-base/kernel/Makefile linux-2.5.34-rcu_poll/kernel/Makefile
--- linux-2.5.34-base/kernel/Makefile	Mon Sep  9 23:05:05 2002
+++ linux-2.5.34-rcu_poll/kernel/Makefile	Wed Sep 11 11:45:03 2002
@@ -10,12 +10,12 @@
 O_TARGET := kernel.o
 
 export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \
-		printk.o platform.o suspend.o dma.o
+		printk.o platform.o suspend.o dma.o rcupdate.o
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    module.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
-	    signal.o sys.o kmod.o context.o futex.o platform.o
+	    signal.o sys.o kmod.o context.o futex.o platform.o rcupdate.o
 
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o
diff -urN linux-2.5.34-base/kernel/exit.c linux-2.5.34-rcu_poll/kernel/exit.c
--- linux-2.5.34-base/kernel/exit.c	Mon Sep  9 23:05:07 2002
+++ linux-2.5.34-rcu_poll/kernel/exit.c	Wed Sep 11 11:45:03 2002
@@ -616,6 +616,7 @@
 	preempt_disable();
 	if (current->exit_signal == -1)
 		release_task(current);
+	rcu_preempt_put();
 	schedule();
 	BUG();
 /*
diff -urN linux-2.5.34-base/kernel/fork.c linux-2.5.34-rcu_poll/kernel/fork.c
--- linux-2.5.34-base/kernel/fork.c	Mon Sep  9 23:05:02 2002
+++ linux-2.5.34-rcu_poll/kernel/fork.c	Wed Sep 11 11:45:03 2002
@@ -155,6 +155,7 @@
 	tsk->thread_info = ti;
 	ti->task = tsk;
 	atomic_set(&tsk->usage,1);
+	tsk->cpu_preempt_cntr = NULL;
 
 	return tsk;
 }
diff -urN linux-2.5.34-base/kernel/rcupdate.c linux-2.5.34-rcu_poll/kernel/rcupdate.c
--- linux-2.5.34-base/kernel/rcupdate.c	Thu Jan  1 05:30:00 1970
+++ linux-2.5.34-rcu_poll/kernel/rcupdate.c	Wed Sep 11 13:20:52 2002
@@ -0,0 +1,294 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) IBM Corporation, 2001
+ * Copyright (C) Andrea Arcangeli <andrea@suse.de> SuSE, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>,
+ *	   Andrea Arcangeli <andrea@suse.de>
+ * 
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+
+struct rcu_data {
+	struct list_head nxtlist;
+	struct list_head curlist;
+	struct tasklet_struct tasklet;
+	unsigned long qsmask;
+	int polling;
+	long qcheckpt[NR_CPUS];
+} ____cacheline_aligned_in_smp;
+
+static struct rcu_data rcu_data[NR_CPUS] __cacheline_aligned;
+static void rcu_data_init(struct rcu_data *);
+
+#ifdef CONFIG_PREEMPT
+static spinlock_t rcu_lock_preempt = SPIN_LOCK_UNLOCKED;
+static struct rcu_data rcu_data_preempt __cacheline_aligned;
+static inline void rcu_preempt_lock(void) 
+{
+	spin_lock_bh(&rcu_lock_preempt);
+}
+static inline void rcu_preempt_unlock(void) 
+{
+	spin_unlock_bh(&rcu_lock_preempt);
+}
+static inline void rcu_data_preempt_init(void) 
+{
+	rcu_data_init(&rcu_data_preempt);
+}
+#define is_rcu_preempt_data(rdata) (rdata == &rcu_data_preempt)
+#else
+static inline void rcu_preempt_lock(void)	{}
+static inline void rcu_preempt_unlock(void) 	{}
+static inline void rcu_data_preempt_init(void)  {}
+#define is_rcu_preempt_data(rdata) (0)
+#endif
+
+#define RCU_quiescent(cpu) per_cpu(cpu_quiescent, cpu)
+
+/**
+ * call_rcu - Queue an RCU update request. 
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ * @arg: argument to be passed to the update function
+ *
+ * The update function will be invoked as soon as all CPUs have performed a 
+ * context switch or been seen in the idle loop or in a user process. 
+ * It can be called only from process or BH context, however can be 
+ * made to work from irq context too with minor code changes 
+ * if necessary. The read-side of critical section that use 
+ * call_rcu() for updation must be protected by 
+ * rcu_read_lock()/rcu_read_unlock().
+ */
+void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)
+{
+	struct rcu_data *rdata;
+
+	head->func = func;
+	head->arg = arg;
+ 	rdata = &rcu_data[get_cpu()];
+	local_bh_disable();
+	list_add(&head->list, &rdata->nxtlist);
+	local_bh_enable();
+	tasklet_hi_schedule(&rdata->tasklet);
+	put_cpu();
+}
+
+#ifdef CONFIG_PREEMPT
+/**
+ * call_rcu_preempt - Queue an RCU update request for preempt-safe reads
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ * @arg: argument to be passed to the update function
+ *
+ * The update function will be invoked as soon as all CPUs have performed a
+ * context switch or been seen in the idle loop or in a user process. 
+ * It can be called only from process or BH context, however can be 
+ * made to work from irq context too with minor code changes 
+ * if necessary. The read-side of critical section  doesn't require any 
+ * protection, but updates may have long grace period in preemptive kernels.
+ * It should not be used for things like deferred kfree() unless such 
+ * use can be guaranteed to be extremely infrequent. Doing otherwise 
+ * can result in all your memory consumed while waiting for a 
+ * low-priority preempted task to be rescheduled.
+ */
+void call_rcu_preempt(struct rcu_head *head, void (*func)(void *arg), void *arg)
+{
+	struct rcu_data *rdata = &rcu_data_preempt;
+
+	head->func = func;
+	head->arg = arg;
+	rcu_preempt_lock();
+	list_add(&head->list, &rdata->nxtlist);
+	rcu_preempt_unlock();
+	tasklet_hi_schedule(&rdata->tasklet);
+}
+#endif
+
+/*
+ * Set up grace period detection for one batch of RCUs.
+ */
+static int rcu_prepare_polling(struct rcu_data *rdata)
+{
+	int stop;
+	int i;
+
+	stop = 1;
+	if (!list_empty(&rdata->nxtlist)) {
+		list_splice(&rdata->nxtlist, &rdata->curlist);
+		INIT_LIST_HEAD(&rdata->nxtlist);
+
+		rdata->polling = 1;
+
+		for (i = 0; i < NR_CPUS; i++) {
+			if (!cpu_online(i))
+				continue;
+			rdata->qsmask |= 1UL << i;
+			rdata->qcheckpt[i] = RCU_quiescent(i);
+			if (is_rcu_preempt_data(rdata))
+				rcu_switch_preempt_cntr(i);
+			force_cpu_reschedule(i);
+		}
+		stop = 0;
+	}
+
+	return stop;
+}
+
+/*
+ * Invoke the completed RCU callbacks.
+ */
+static void rcu_invoke_callbacks(struct rcu_data *rdata)
+{
+	struct list_head *entry;
+	struct rcu_head *head;
+
+	entry = rdata->curlist.prev;
+	do {
+		head = list_entry(entry, struct rcu_head, list);
+		entry = entry->prev;
+		head->func(head->arg);
+	} while (entry != &rdata->curlist);
+	INIT_LIST_HEAD(&rdata->curlist);
+}
+
+static int rcu_completion(struct rcu_data *rdata)
+{
+	int stop;
+
+	rdata->polling = 0;
+	rcu_invoke_callbacks(rdata);
+	stop = rcu_prepare_polling(rdata);
+	return stop;
+}
+
+/*
+ * Poll for completion of grace period for this batch.
+ */
+static int rcu_polling(struct rcu_data *rdata)
+{
+	int i;
+	int stop;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i) ||
+		    !(rdata->qsmask & (1UL << i)))
+			continue;
+
+		if ((rdata->qcheckpt[i] != RCU_quiescent(i)) &&
+		    (!is_rcu_preempt_data(rdata) || !is_rcu_cpu_preempted(i)))
+				rdata->qsmask &= ~(1UL << i);
+		else
+			break;
+	}
+	stop = 0;
+	if (!rdata->qsmask)
+		stop = rcu_completion(rdata);
+	return stop;
+}
+
+/*
+ * Look into the per-cpu callback information to see if there is
+ * any processing necessary - if so do it.
+ */
+static void rcu_process_callbacks(unsigned long data)
+{
+	int stop;
+	struct rcu_data *rdata = (struct rcu_data *)data;
+
+	if (is_rcu_preempt_data(rdata))
+		rcu_preempt_lock();
+	if (!rdata->polling)
+		stop = rcu_prepare_polling(rdata);
+	else
+		stop = rcu_polling(rdata);
+	if (is_rcu_preempt_data(rdata))
+		rcu_preempt_unlock();
+
+	if (!stop)
+		tasklet_hi_schedule(&rdata->tasklet);
+}
+
+/* Because of FASTCALL declaration of complete, we use this wrapper */
+static void wakeme_after_rcu(void *completion)
+{
+	complete(completion);
+}
+
+static void rcu_data_init(struct rcu_data *rdata)
+{
+	tasklet_init(&rdata->tasklet, rcu_process_callbacks, 
+			(unsigned long)rdata);
+	INIT_LIST_HEAD(&rdata->nxtlist);
+	INIT_LIST_HEAD(&rdata->curlist);
+}
+
+/*
+ * Initializes rcu mechanism.  Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ */
+void __init rcu_init(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		rcu_data_init(&rcu_data[i]);
+	}
+	rcu_data_preempt_init();
+}
+
+/**
+ * synchronize-kernel - wait until all the CPUs have gone 
+ * through a "quiescent" state. It may sleep.
+ */
+void synchronize_kernel(void)
+{
+	struct rcu_head rcu;
+	DECLARE_COMPLETION(completion);
+
+	/* Will wake me after RCU finished */
+	call_rcu_preempt(&rcu, wakeme_after_rcu, &completion);
+
+	/* Wait for it */
+	wait_for_completion(&completion);
+}
+
+EXPORT_SYMBOL(call_rcu);
+EXPORT_SYMBOL(synchronize_kernel);
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(call_rcu_preempt);
+#endif
diff -urN linux-2.5.34-base/kernel/sched.c linux-2.5.34-rcu_poll/kernel/sched.c
--- linux-2.5.34-base/kernel/sched.c	Mon Sep  9 23:05:05 2002
+++ linux-2.5.34-rcu_poll/kernel/sched.c	Wed Sep 11 11:45:03 2002
@@ -25,6 +25,7 @@
 #include <asm/mmu_context.h>
 #include <linux/interrupt.h>
 #include <linux/completion.h>
+#include <linux/percpu.h>
 #include <linux/kernel_stat.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
@@ -158,12 +159,33 @@
 
 static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
 
+/* Fake initialization to work around compiler breakage */
+DEFINE_PER_CPU(long, cpu_quiescent) = 0L;
+
 #define cpu_rq(cpu)		(runqueues + (cpu))
 #define this_rq()		cpu_rq(smp_processor_id())
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define rt_task(p)		((p)->prio < MAX_RT_PRIO)
 
+#ifdef CONFIG_PREEMPT
+/* Fake initialization to work around compiler breakage */
+DEFINE_PER_CPU(atomic_t[2], rcu_preempt_cntr) = 
+			{ATOMIC_INIT(0), ATOMIC_INIT(0)};
+DEFINE_PER_CPU(atomic_t, *curr_preempt_cntr) = NULL;
+DEFINE_PER_CPU(atomic_t, *next_preempt_cntr) = NULL;
+static inline void rcu_init_preempt_cntr(int cpu)
+{
+
+	atomic_set(&(per_cpu(rcu_preempt_cntr, cpu)[0]), 0);
+	atomic_set(&(per_cpu(rcu_preempt_cntr, cpu)[1]), 0);
+	per_cpu(curr_preempt_cntr, cpu) = 
+			&(per_cpu(rcu_preempt_cntr, cpu)[1]);
+	per_cpu(next_preempt_cntr, cpu) = 
+			&(per_cpu(rcu_preempt_cntr, cpu)[0]);
+}
+#endif
+
 /*
  * Default context-switch locking:
  */
@@ -858,6 +880,10 @@
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
 
+	if (user_ticks ||
+            (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1))
+		per_cpu(cpu_quiescent, cpu)++;
+
 	if (p == rq->idle) {
 		/* note: this timer irq context must be accounted for as well */
 		if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
@@ -961,6 +987,8 @@
 	 */
 	if (unlikely(preempt_count() & PREEMPT_ACTIVE))
 		goto pick_next_task;
+	else
+		rcu_preempt_put();
 
 	switch (prev->state) {
 	case TASK_INTERRUPTIBLE:
@@ -1003,6 +1031,7 @@
 switch_tasks:
 	prefetch(next);
 	clear_tsk_need_resched(prev);
+	per_cpu(cpu_quiescent, prev->thread_info->cpu)++;
 
 	if (likely(prev != next)) {
 		rq->nr_switches++;
@@ -1041,6 +1070,7 @@
 
 need_resched:
 	ti->preempt_count = PREEMPT_ACTIVE;
+	rcu_preempt_get();
 	schedule();
 	ti->preempt_count = 0;
 
@@ -1269,6 +1299,18 @@
 	task_rq_unlock(rq, &flags);
 }
 
+void force_cpu_reschedule(int cpu)
+{
+	struct runqueue *rq;
+
+	rq = cpu_rq(cpu);
+	/* Need to save flags if someday called from irq context */
+	spin_lock_irq(&rq->lock);
+	resched_task(rq->curr);
+	spin_unlock_irq(&rq->lock);
+}
+
+
 #ifndef __alpha__
 
 /*
@@ -2108,6 +2150,7 @@
 		rq->expired = rq->arrays + 1;
 		spin_lock_init(&rq->lock);
 		INIT_LIST_HEAD(&rq->migration_queue);
+		rcu_init_preempt_cntr(i);
 
 		for (j = 0; j < 2; j++) {
 			array = rq->arrays + j;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 11:19 [PATCH] Read-Copy Update 2.5.34 Dipankar Sarma
@ 2002-09-11 11:24 ` Ingo Molnar
  2002-09-11 12:20   ` Dipankar Sarma
  0 siblings, 1 reply; 7+ messages in thread
From: Ingo Molnar @ 2002-09-11 11:24 UTC (permalink / raw)
  To: Dipankar Sarma
  Cc: Linus Torvalds, Rusty Russell, Andrea Arcangeli, Paul McKenney,
	linux-kernel


On Wed, 11 Sep 2002, Dipankar Sarma wrote:

> Ingo, it will be nice to have your comments on this, specially since it
> touches the scheduler.

i dont really understand why it has to change the scheduler. You want a
facility to force a reschedule on any given CPU, correct?

	Ingo


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 11:24 ` Ingo Molnar
@ 2002-09-11 12:20   ` Dipankar Sarma
  2002-09-11 14:03     ` Robert Love
  0 siblings, 1 reply; 7+ messages in thread
From: Dipankar Sarma @ 2002-09-11 12:20 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Rusty Russell, Andrea Arcangeli, Paul McKenney,
	linux-kernel

On Wed, Sep 11, 2002 at 01:24:21PM +0200, Ingo Molnar wrote:
> 
> i dont really understand why it has to change the scheduler. You want a
> facility to force a reschedule on any given CPU, correct?
> 

Hi Ingo,

Yes, like force_cpu_reschedule().

Apart from that RCU adds some minor things in the scheduler code -

1. Per-CPU context switch counter increment in the fast path
2. For preemptive kernels, a conditional branch in the voluntary
   context switch code path checking whether the current task may 
   have had an involuntary context switch earlier [rcu_preempt_put()].
3. Adding a field to the task structure - cpu_preempt_cntr.
4. RCU checking hook into the scheduler_tick(), but this is not
   in the fast path.

#2 and #3 are necessary to support call_rcu_preempt() which
allows preemption-safe reads of data protected by RCU. A preempted
task may still contain references to RCU protected data and
the RCU grace period needs to be prolonged until all such tasks
before the update do voluntary context switches.

I did some reflex benchmarking to make sure that I didn't introduce 
any false sharing by mistake in scheduler fast path and the results 
look comparable -

(4CPU P3 Xeon with 1MB L2 + 1GB RAM)

			vanilla-2.5.34	rcu_poll-2.5.34
			--------------  ---------------
80 , 40 , 		1.593		1.569
112 , 40 , 		1.544		1.554
144 , 40 , 		1.595		1.552
176 , 40 , 		1.568		1.605
198 , 40 , 		1.562		1.577
230 , 40 , 		1.563		1.583
244 , 40 , 		1.671		1.638

Not sure how reliable these numbers are.

Thanks
-- 
Dipankar Sarma  <dipankar@in.ibm.com> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 12:20   ` Dipankar Sarma
@ 2002-09-11 14:03     ` Robert Love
  2002-09-11 14:29       ` Dipankar Sarma
  2002-09-11 16:35       ` Dipankar Sarma
  0 siblings, 2 replies; 7+ messages in thread
From: Robert Love @ 2002-09-11 14:03 UTC (permalink / raw)
  To: dipankar
  Cc: Ingo Molnar, Linus Torvalds, Rusty Russell, Andrea Arcangeli,
	Paul McKenney, linux-kernel

On Wed, 2002-09-11 at 08:20, Dipankar Sarma wrote:

> 			vanilla-2.5.34	rcu_poll-2.5.34
> 			--------------  ---------------
> 80 , 40 , 		1.593		1.569
> 112 , 40 , 		1.544		1.554
> 144 , 40 , 		1.595		1.552
> 176 , 40 , 		1.568		1.605
> 198 , 40 , 		1.562		1.577
> 230 , 40 , 		1.563		1.583
> 244 , 40 , 		1.671		1.638
> 
> Not sure how reliable these numbers are.

And how bad is the performance drop from 2.5.34-preempt to
2.5.34-preempt-rcu?

I am glad you guys support kernel preemption (not that you have a chance
at this point) but I hope it was not an afterthought.

	Robert Love


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 14:03     ` Robert Love
@ 2002-09-11 14:29       ` Dipankar Sarma
  2002-09-11 18:52         ` Robert Love
  2002-09-11 16:35       ` Dipankar Sarma
  1 sibling, 1 reply; 7+ messages in thread
From: Dipankar Sarma @ 2002-09-11 14:29 UTC (permalink / raw)
  To: Robert Love
  Cc: Ingo Molnar, Linus Torvalds, Rusty Russell, Andrea Arcangeli,
	Paul McKenney, linux-kernel

On Wed, Sep 11, 2002 at 10:03:28AM -0400, Robert Love wrote:
> On Wed, 2002-09-11 at 08:20, Dipankar Sarma wrote:
> 
> > 			vanilla-2.5.34	rcu_poll-2.5.34
> > 			--------------  ---------------
> > 80 , 40 , 		1.593		1.569
> > 112 , 40 , 		1.544		1.554
> > 144 , 40 , 		1.595		1.552
> > 176 , 40 , 		1.568		1.605
> > 198 , 40 , 		1.562		1.577
> > 230 , 40 , 		1.563		1.583
> > 244 , 40 , 		1.671		1.638
> > 
> > Not sure how reliable these numbers are.
> 
> And how bad is the performance drop from 2.5.34-preempt to
> 2.5.34-preempt-rcu?
> 
> I am glad you guys support kernel preemption (not that you have a chance
> at this point) but I hope it was not an afterthought.

Hi Robert,

Sorry, I should have been more careful labelling them - those are 2.5.34-preempt
vs 2.5.34-preempt-rcu numbers. I did them first because rcu-poll-preempt
kernel has a conditinal branch in fast path and hence more interesting. 
I will publish the vanilla vs rcu_poll reflex numbers in a few minutes 
from now.

The preemption support have been in RCU for a very long time. IIRC, I added
it in around 2.5.14.

See http://marc.theaimsgroup.com/?l=linux-kernel&m=102084967517192&w=2

Our OLS paper and presentation too deals with preemption -

http://www.rdrop.com/users/paulmck/rclock/rcu.2002.07.08.pdf
http://www.rdrop.com/users/paulmck/rclock/rclock.OLS.2002.07.08a.pdf

Thanks
-- 
Dipankar Sarma  <dipankar@in.ibm.com> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 14:03     ` Robert Love
  2002-09-11 14:29       ` Dipankar Sarma
@ 2002-09-11 16:35       ` Dipankar Sarma
  1 sibling, 0 replies; 7+ messages in thread
From: Dipankar Sarma @ 2002-09-11 16:35 UTC (permalink / raw)
  To: Robert Love
  Cc: Ingo Molnar, Linus Torvalds, Rusty Russell, Andrea Arcangeli,
	Paul McKenney, linux-kernel

On Wed, Sep 11, 2002 at 10:03:28AM -0400, Robert Love wrote:
> On Wed, 2002-09-11 at 08:20, Dipankar Sarma wrote:
> 
> > Not sure how reliable these numbers are.
> 
> And how bad is the performance drop from 2.5.34-preempt to
> 2.5.34-preempt-rcu?

Hi Robert,

Here are the detailed results from reflex benchmark all with
2.5.34 kernel and 4CPU p3 xeon with 1MB L2 cache and 1GB RAM.

 		vanilla-preempt	rcu_poll-preempt vanilla  rcu_poll
		--------------	---------------- -------  --------
80 , 40 , 		1.593	1.569		 1.545	  1.536
112 , 40 , 		1.544	1.554		 1.544	  1.535
144 , 40 , 		1.595	1.552		 1.545    1.586
176 , 40 , 		1.568	1.605		 1.615	  1.536
198 , 40 , 		1.562	1.577		 1.582	  1.651
230 , 40 , 		1.563	1.583		 1.581	  1.554
244 , 40 , 		1.671	1.638		 1.631	  1.571


> 
> I am glad you guys support kernel preemption (not that you have a chance
> at this point) but I hope it was not an afterthought.
> 

Forgot to mention, earlier implementations of RCU like in K42 had complete
preemption support, so the linux implementation is not really new.
We added it after preemption support went into Linus' tree.

Thanks
-- 
Dipankar Sarma  <dipankar@in.ibm.com> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Read-Copy Update 2.5.34
  2002-09-11 14:29       ` Dipankar Sarma
@ 2002-09-11 18:52         ` Robert Love
  0 siblings, 0 replies; 7+ messages in thread
From: Robert Love @ 2002-09-11 18:52 UTC (permalink / raw)
  To: dipankar
  Cc: Ingo Molnar, Linus Torvalds, Rusty Russell, Andrea Arcangeli,
	Paul McKenney, linux-kernel

On Wed, 2002-09-11 at 10:29, Dipankar Sarma wrote:

> Sorry, I should have been more careful labelling them - those are 2.5.34-preempt
> vs 2.5.34-preempt-rcu numbers. I did them first because rcu-poll-preempt
> kernel has a conditinal branch in fast path and hence more interesting. 
> I will publish the vanilla vs rcu_poll reflex numbers in a few minutes 
> from now.

Ahh OK.  Results are OK, then - thanks :-)


> Our OLS paper and presentation too deals with preemption -
> 
> http://www.rdrop.com/users/paulmck/rclock/rcu.2002.07.08.pdf
> http://www.rdrop.com/users/paulmck/rclock/rclock.OLS.2002.07.08a.pdf

Yep.  Was there, saw the talk, got the t-shirt.

	Robert Love


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2002-09-11 18:48 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-09-11 11:19 [PATCH] Read-Copy Update 2.5.34 Dipankar Sarma
2002-09-11 11:24 ` Ingo Molnar
2002-09-11 12:20   ` Dipankar Sarma
2002-09-11 14:03     ` Robert Love
2002-09-11 14:29       ` Dipankar Sarma
2002-09-11 18:52         ` Robert Love
2002-09-11 16:35       ` Dipankar Sarma

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).