linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] In-kernel module loader 3/7
@ 2002-09-18  2:07 Rusty Russell
  2002-09-18 23:07 ` Roman Zippel
  0 siblings, 1 reply; 4+ messages in thread
From: Rusty Russell @ 2002-09-18  2:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: mingo

Name: Bigrefs Implementation
Author: Rusty Russell
Status: Tested on 2.5.35
Depends: Misc/later.patch.gz

D: This is an implementation of cache-friendly reference counts.  The
D: refcounters work in two modes: the normal mode just incs and decs a
D: cache-aligned per-cpu counter.  When someone is waiting for the
D: reference count to hit 0, a flag is set and a shared reference
D: counter is decremented (which is slower).
D: 
D: This uses the wait_for_later() primitive, but it doesn't need to:
D: it could manually reschedule on each online CPU.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11594-2.5.35-bigrefs.pre/include/linux/bigref.h .11594-2.5.35-bigrefs/include/linux/bigref.h
--- .11594-2.5.35-bigrefs.pre/include/linux/bigref.h	1970-01-01 10:00:00.000000000 +1000
+++ .11594-2.5.35-bigrefs/include/linux/bigref.h	2002-09-18 07:25:34.000000000 +1000
@@ -0,0 +1,73 @@
+#ifndef _LINUX_BIGREF_H
+#define _LINUX_BIGREF_H
+#include <linux/cache.h>
+#include <linux/smp.h>
+#include <linux/compiler.h>
+#include <linux/thread_info.h>
+#include <asm/atomic.h>
+
+/* Big reference counts for Linux.
+ *   (C) Copyright 2002 Paul Russell, IBM Corporation.
+ */
+
+struct bigref_percpu
+{
+	int counter;
+	int slow_mode;
+} ____cacheline_aligned;
+
+struct bigref
+{
+	struct bigref_percpu ref[NR_CPUS];
+
+	atomic_t slow_count;
+	struct task_struct *waiter;
+};
+
+static inline void bigref_init(struct bigref *ref, int value)
+{
+	unsigned int i;
+	ref->ref[0].counter = value;
+	for (i = 1; i < NR_CPUS; i++)
+		ref->ref[i].counter = 0;
+	ref->waiter = NULL; /* To trap bugs */
+}
+
+extern void __bigref_inc(struct bigref *ref);
+extern void __bigref_dec(struct bigref *ref);
+
+/* Stopping interrupts faster than atomics on many archs (and more
+   easily optimized if they're not) */
+static inline void bigref_inc(struct bigref *ref)
+{
+	unsigned long flags;
+	struct bigref_percpu *cpu;
+
+	local_irq_save(flags);
+	cpu = &ref->ref[smp_processor_id()];
+	if (likely(!cpu->slow_mode))
+		cpu->counter++;
+	else
+		__bigref_inc(ref);
+	local_irq_restore(flags);
+}
+
+static inline void bigref_dec(struct bigref *ref)
+{
+	unsigned long flags;
+	struct bigref_percpu *cpu;
+
+	local_irq_save(flags);
+	cpu = &ref->ref[smp_processor_id()];
+	if (likely(!cpu->slow_mode))
+		cpu->counter--;
+	else
+		__bigref_dec(ref);
+	local_irq_restore(flags);
+}
+
+/* Get the approximate value */
+extern int bigref_approx_val(struct bigref *ref);
+
+extern void bigref_wait_for_zero(struct bigref *ref);
+#endif /* _LINUX_BIGREF_H */ 
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11594-2.5.35-bigrefs.pre/kernel/Makefile .11594-2.5.35-bigrefs/kernel/Makefile
--- .11594-2.5.35-bigrefs.pre/kernel/Makefile	2002-09-18 07:25:32.000000000 +1000
+++ .11594-2.5.35-bigrefs/kernel/Makefile	2002-09-18 07:25:34.000000000 +1000
@@ -10,12 +10,12 @@
 O_TARGET := kernel.o
 
 export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \
-		printk.o platform.o suspend.o dma.o
+		printk.o platform.o suspend.o dma.o bigref.o
 
 obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    module.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
-	    signal.o sys.o kmod.o context.o futex.o platform.o
+	    signal.o sys.o kmod.o context.o futex.o platform.o bigref.o
 
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11594-2.5.35-bigrefs.pre/kernel/bigref.c .11594-2.5.35-bigrefs/kernel/bigref.c
--- .11594-2.5.35-bigrefs.pre/kernel/bigref.c	1970-01-01 10:00:00.000000000 +1000
+++ .11594-2.5.35-bigrefs/kernel/bigref.c	2002-09-18 07:25:34.000000000 +1000
@@ -0,0 +1,74 @@
+/* Big reference counts for Linux.
+ *   (C) Copyright 2002 Paul Russell, IBM Corporation.
+ */
+#include <linux/bigref.h>
+#include <linux/later.h>
+#include <linux/module.h>
+
+/* Atomic is 24 bits on sparc, so make this 23. */
+#define BIGREF_BIAS (1 << 23)
+
+void __bigref_inc(struct bigref *ref)
+{
+	/* They *must* have read slow_mode before they touch slow
+           count, which is not guaranteed on all architectures. */
+	rmb();
+	atomic_inc(&ref->slow_count);
+}
+
+void __bigref_dec(struct bigref *ref)
+{
+	/* They *must* have read slow_mode before they touch slow
+           count, which is not guaranteed on all architectures. */
+	rmb();
+	if (atomic_dec_and_test(&ref->slow_count))
+		wake_up_process(ref->waiter);
+}
+
+/* Get the approximate value */
+int bigref_approx_val(struct bigref *ref)
+{
+	unsigned int i;
+	int total = 0;
+
+	for (i = 0; i < NR_CPUS; i++)
+		total += ref->ref[i].counter;
+
+	return total;
+}
+
+void bigref_wait_for_zero(struct bigref *ref)
+{
+	unsigned int i;
+	int total;
+
+	atomic_set(&ref->slow_count, BIGREF_BIAS);
+	wmb();
+	for (i = 0; i < NR_CPUS; i++)
+		ref->ref[i].slow_mode = 1;
+	wmb();
+	/* Wait for that to sink in everywhere... */
+	wait_for_later();
+
+	/* Sum all the (now inactive) per-cpu counters */
+	for (i = 0, total = 0; i < NR_CPUS; i++)
+		total += ref->ref[i].counter;
+
+	/* Now we move those counters into the slow counter, and take
+           away the bias again.  Leave one refcount for us. */
+	atomic_sub(BIGREF_BIAS + total - 1, &ref->slow_count);
+
+	/* Someone may dec to zero after the next step, so be ready. */
+	ref->waiter = current;
+	current->state = TASK_UNINTERRUPTIBLE;
+	wmb();
+
+	/* Drop (probably final) refcount */
+	__bigref_dec(ref);
+	schedule();
+}
+
+EXPORT_SYMBOL(__bigref_inc);
+EXPORT_SYMBOL(__bigref_dec);
+EXPORT_SYMBOL(bigref_approx_val);
+EXPORT_SYMBOL(bigref_wait_for_zero);

--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] In-kernel module loader 3/7
  2002-09-18  2:07 [PATCH] In-kernel module loader 3/7 Rusty Russell
@ 2002-09-18 23:07 ` Roman Zippel
  2002-09-18 23:11   ` Robert Love
  2002-09-19  2:05   ` Rusty Russell
  0 siblings, 2 replies; 4+ messages in thread
From: Roman Zippel @ 2002-09-18 23:07 UTC (permalink / raw)
  To: Rusty Russell; +Cc: linux-kernel

Hi,

On Wed, 18 Sep 2002, Rusty Russell wrote:

> +/* Stopping interrupts faster than atomics on many archs (and more
> +   easily optimized if they're not) */
> +static inline void bigref_inc(struct bigref *ref)
> +{
> +	unsigned long flags;
> +	struct bigref_percpu *cpu;
> +
> +	local_irq_save(flags);
> +	cpu = &ref->ref[smp_processor_id()];
> +	if (likely(!cpu->slow_mode))
> +		cpu->counter++;

Did you benchmark this? On most UP machines an inc/dec should be cheaper
than irq enable/disable.

bye, Roman


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] In-kernel module loader 3/7
  2002-09-18 23:07 ` Roman Zippel
@ 2002-09-18 23:11   ` Robert Love
  2002-09-19  2:05   ` Rusty Russell
  1 sibling, 0 replies; 4+ messages in thread
From: Robert Love @ 2002-09-18 23:11 UTC (permalink / raw)
  To: Roman Zippel; +Cc: Rusty Russell, linux-kernel

On Wed, 2002-09-18 at 19:07, Roman Zippel wrote:

> On Wed, 18 Sep 2002, Rusty Russell wrote:
> 
> > +/* Stopping interrupts faster than atomics on many archs (and more
> > +   easily optimized if they're not) */
> > +static inline void bigref_inc(struct bigref *ref)
> > +{
> > +	unsigned long flags;
> > +	struct bigref_percpu *cpu;
> > +
> > +	local_irq_save(flags);
> > +	cpu = &ref->ref[smp_processor_id()];
> > +	if (likely(!cpu->slow_mode))
> > +		cpu->counter++;
> 
> Did you benchmark this? On most UP machines an inc/dec should be cheaper
> than irq enable/disable.

Yah.  I would think due to pipeline effects, disabling interrupts would
never be faster than an atomic inc/dec, assuming the architecture has
normal locking.

	Robert Love


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] In-kernel module loader 3/7
  2002-09-18 23:07 ` Roman Zippel
  2002-09-18 23:11   ` Robert Love
@ 2002-09-19  2:05   ` Rusty Russell
  1 sibling, 0 replies; 4+ messages in thread
From: Rusty Russell @ 2002-09-19  2:05 UTC (permalink / raw)
  To: Roman Zippel; +Cc: linux-kernel, anton

In message <Pine.LNX.4.44.0209190042370.8911-100000@serv> you write:
> Hi,
> 
> On Wed, 18 Sep 2002, Rusty Russell wrote:
> 
> > +/* Stopping interrupts faster than atomics on many archs (and more
> > +   easily optimized if they're not) */
> > +static inline void bigref_inc(struct bigref *ref)
> > +{
> > +	unsigned long flags;
> > +	struct bigref_percpu *cpu;
> > +
> > +	local_irq_save(flags);
> > +	cpu = &ref->ref[smp_processor_id()];
> > +	if (likely(!cpu->slow_mode))
> > +		cpu->counter++;
> 
> Did you benchmark this? On most UP machines an inc/dec should be cheaper
> than irq enable/disable.

Oops, I forgot to test that: I had both implementations.

Doing a million loop (so there's loop overhead):

	350MHz dual Pentium II, atomic is almost twice as fast
	PPC 500MHz G4: atomic is 3.5 times as fast.
	Power3 4-way 375MHz machine: atomic is 10% faster.
	Power4 4-way 1.3GHz machine: atomic was 20% slower (depending
	       on version if irq_restore).

I suspect a P4 might get similar pro-irq results (code below,
anyone?)

Ideally we'd have a "local_inc()" and "local_dec()".  Architectures
which do soft interrupts enabling should see a real win from the
save/restore version.

Meanwhile, I'll revert to atomics, since it's sometimes dramatically
faster.

Thanks!
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

static void test(void)
{
	struct timeval start, end;
	atomic_t x;
	unsigned int tmp, i, diff;
	unsigned long flags;

	/* Atomic test. */
	atomic_set(&x, 0);
	do_gettimeofday(&start);
	for (i = 0; i < 1000000; i++)
		atomic_dec(&x);
	do_gettimeofday(&end);

	diff = (end.tv_sec - start.tv_sec) * 1000000
		+ (end.tv_usec - start.tv_usec);
	
	printk("Atomic test: %u usec\n", diff);

	/* Interrupt test (interrupts enabled) */
	tmp = 0;
	do_gettimeofday(&start);
	for (i = 0; i < 1000000; i++) {
		local_irq_save(flags);
		tmp++;
		local_irq_restore(flags);
	}
	do_gettimeofday(&end);

	diff = (end.tv_sec - start.tv_sec) * 1000000
		+ (end.tv_usec - start.tv_usec);
	
	printk("Interrupt test: %u usec\n", diff);

	/* Interrupt test (interrupts disabled) */
	tmp = 0;
	local_irq_disable();
	do_gettimeofday(&start);
	for (i = 0; i < 1000000; i++) {
		local_irq_save(flags);
		tmp++;
		local_irq_restore(flags);
	}
	do_gettimeofday(&end);
	local_irq_enable();

	diff = (end.tv_sec - start.tv_sec) * 1000000
		+ (end.tv_usec - start.tv_usec);
	
	printk("Interrupt test (interrupts disabled): %u usec\n", diff);
}

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2002-09-19  2:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-09-18  2:07 [PATCH] In-kernel module loader 3/7 Rusty Russell
2002-09-18 23:07 ` Roman Zippel
2002-09-18 23:11   ` Robert Love
2002-09-19  2:05   ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).