linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Lameter <cl@linux.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: Tejun Heo <tj@kernel.org>,
	Martin Schwidefsky <schwidefsky@de.ibm.com>,
	rusty@rustcorp.com.au, tglx@linutronix.de, x86@kernel.org,
	linux-kernel@vger.kernel.org, hpa@zytor.com,
	Paul Mundt <lethal@linux-sh.org>,
	rmk@arm.linux.org.uk, starvik@axis.com, ralf@linux-mips.org,
	davem@davemloft.net, cooloney@kernel.org, kyle@mcmartin.ca,
	matthew@wil.cx, grundler@parisc-linux.org, takata@linux-m32r.org,
	benh@kernel.crashing.org, rth@twiddle.net,
	ink@jurassic.park.msu.ru, heiko.carstens@de.ibm.com,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Nick Piggin <npiggin@suse.de>
Subject: Re: [PATCH UPDATED] percpu: use dynamic percpu allocator as the default percpu allocator
Date: Tue, 31 Mar 2009 12:14:20 -0400 (EDT)	[thread overview]
Message-ID: <alpine.DEB.1.10.0903311212360.17960@qirst.com> (raw)
In-Reply-To: <alpine.DEB.1.10.0903301049060.13333@qirst.com>

Needs additional feedback and review by Tejun I would think. Just compile
tested so far. The removal of the rbtree also relaxes locking restrictions
for pcpu_chunk_address_search (which is not really searching anymore).


Subject: dynamic percpu allocator: Remove rbtree

The rbtree is used to determine the chunk from the virtual address. However,
we can already determine the page struct from a virtual address and there
are several unused fields in page struct used by vmalloc. Use the index
field to store a pointer to the chunk. Then there is no need anymore for
an rbtree.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 mm/percpu.c |  101 +++++++++++-------------------------------------------------
 1 file changed, 19 insertions(+), 82 deletions(-)

Index: linux-2.6/mm/percpu.c
===================================================================
--- linux-2.6.orig/mm/percpu.c	2009-03-31 11:02:01.000000000 -0500
+++ linux-2.6/mm/percpu.c	2009-03-31 11:04:04.000000000 -0500
@@ -23,7 +23,7 @@
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
  * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers UNIT_SIZE apart.
+ * percpu base registers pcpu_unit_size apart.
  *
  * There are usually many small percpu allocations many of them as
  * small as 4 bytes.  The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
  * region and negative allocated.  Allocation inside a chunk is done
  * by scanning this map sequentially and serving the first matching
  * entry.  This is mostly copied from the percpu_modalloc() allocator.
- * Chunks are also linked into a rb tree to ease address to chunk
- * mapping during free.
+ * Chunks can be determined from the address using the index field
+ * in the page struct. The index field contains a pointer to the chunk.
  *
  * To use this allocator, arch code should do the followings.
  *
@@ -61,7 +61,6 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/pfn.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
@@ -88,7 +87,6 @@

 struct pcpu_chunk {
 	struct list_head	list;		/* linked to pcpu_slot lists */
-	struct rb_node		rb_node;	/* key is chunk->vm->addr */
 	int			free_size;	/* free bytes in the chunk */
 	int			contig_hint;	/* max contiguous size hint */
 	struct vm_struct	*vm;		/* mapped vmalloc region */
@@ -121,7 +119,7 @@ static int pcpu_reserved_chunk_limit;
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
  * protects allocation/reclaim paths, chunks and chunk->page arrays.
  * The latter is a spinlock and protects the index data structures -
- * chunk slots, rbtree, chunks and area maps in chunks.
+ * chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -140,7 +138,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex);	/
 static DEFINE_SPINLOCK(pcpu_lock);	/* protects index data structures */

 static struct list_head *pcpu_slot; /* chunk list slots */
-static struct rb_root pcpu_addr_root = RB_ROOT;	/* chunks by address */

 /* reclaim work to release fully free chunks, scheduled from free path */
 static void pcpu_reclaim(struct work_struct *work);
@@ -257,49 +254,27 @@ static void pcpu_chunk_relocate(struct p
 	}
 }

-static struct rb_node **pcpu_chunk_rb_search(void *addr,
-					     struct rb_node **parentp)
+/* Set the pointer to a chunk in a page struct */
+static inline void set_chunk(struct page *page, struct pcpu_chunk *pcpu)
 {
-	struct rb_node **p = &pcpu_addr_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct pcpu_chunk *chunk;
-
-	while (*p) {
-		parent = *p;
-		chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
-
-		if (addr < chunk->vm->addr)
-			p = &(*p)->rb_left;
-		else if (addr > chunk->vm->addr)
-			p = &(*p)->rb_right;
-		else
-			break;
-	}
+	page->index = (unsigned long)pcpu;
+}

-	if (parentp)
-		*parentp = parent;
-	return p;
+/* Obtain pointer to a chunk from a page struct */
+static inline struct pcpu_chunk*get_chunk(struct page *page)
+{
+	return (struct pcpu_chunk *)page->index;
 }

 /**
- * pcpu_chunk_addr_search - search for chunk containing specified address
- * @addr: address to search for
- *
- * Look for chunk which might contain @addr.  More specifically, it
- * searchs for the chunk with the highest start address which isn't
- * beyond @addr.
- *
- * CONTEXT:
- * pcpu_lock.
+ * pcpu_chunk_addr_search - determine chunk containing specified address
+ * @addr: address for which the chunk needs to be determined.
  *
  * RETURNS:
  * The address of the found chunk.
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
-	struct rb_node *n, *parent;
-	struct pcpu_chunk *chunk;
-
 	/* is it in the reserved chunk? */
 	if (pcpu_reserved_chunk) {
 		void *start = pcpu_reserved_chunk->vm->addr;
@@ -308,42 +283,7 @@ static struct pcpu_chunk *pcpu_chunk_add
 			return pcpu_reserved_chunk;
 	}

-	/* nah... search the regular ones */
-	n = *pcpu_chunk_rb_search(addr, &parent);
-	if (!n) {
-		/* no exactly matching chunk, the parent is the closest */
-		n = parent;
-		BUG_ON(!n);
-	}
-	chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-
-	if (addr < chunk->vm->addr) {
-		/* the parent was the next one, look for the previous one */
-		n = rb_prev(n);
-		BUG_ON(!n);
-		chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-	}
-
-	return chunk;
-}
-
-/**
- * pcpu_chunk_addr_insert - insert chunk into address rb tree
- * @new: chunk to insert
- *
- * Insert @new into address rb tree.
- *
- * CONTEXT:
- * pcpu_lock.
- */
-static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
-{
-	struct rb_node **p, *parent;
-
-	p = pcpu_chunk_rb_search(new->vm->addr, &parent);
-	BUG_ON(*p);
-	rb_link_node(&new->rb_node, parent, p);
-	rb_insert_color(&new->rb_node, &pcpu_addr_root);
+	return get_chunk(vmalloc_to_page(addr));
 }

 /**
@@ -755,6 +695,7 @@ static int pcpu_populate_chunk(struct pc
 						  alloc_mask, 0);
 			if (!*pagep)
 				goto err;
+			set_chunk(*pagep, chunk);
 		}
 	}

@@ -879,7 +820,6 @@ restart:

 	spin_lock_irq(&pcpu_lock);
 	pcpu_chunk_relocate(chunk, -1);
-	pcpu_chunk_addr_insert(chunk);
 	goto restart;

 area_found:
@@ -968,7 +908,6 @@ static void pcpu_reclaim(struct work_str
 		if (chunk == list_first_entry(head, struct pcpu_chunk, list))
 			continue;

-		rb_erase(&chunk->rb_node, &pcpu_addr_root);
 		list_move(&chunk->list, &todo);
 	}

@@ -1202,6 +1141,7 @@ size_t __init pcpu_setup_first_chunk(pcp
 			if (!page)
 				break;
 			*pcpu_chunk_pagep(schunk, cpu, i) = page;
+			set_chunk(page, schunk);
 		}

 		BUG_ON(i < PFN_UP(static_size));
@@ -1226,13 +1166,10 @@ size_t __init pcpu_setup_first_chunk(pcp
 	}

 	/* link the first chunk in */
-	if (!dchunk) {
+	if (!dchunk)
 		pcpu_chunk_relocate(schunk, -1);
-		pcpu_chunk_addr_insert(schunk);
-	} else {
+	else
 		pcpu_chunk_relocate(dchunk, -1);
-		pcpu_chunk_addr_insert(dchunk);
-	}

 	/* we're done */
 	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);


  parent reply	other threads:[~2009-03-31 16:19 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-10  7:53 [GIT RFC] percpu: use dynamic percpu allocator as the default percpu allocator Tejun Heo
2009-03-10  7:53 ` [PATCH 1/5] linker script: define __per_cpu_load on all SMP capable archs Tejun Heo
2009-03-10  7:53 ` [PATCH 2/5] percpu: make x86 addr <-> pcpu ptr conversion macros generic Tejun Heo
2009-03-10  7:53 ` [PATCH 3/5] percpu: more flexibility for @dyn_size of pcpu_setup_first_chunk() Tejun Heo
2009-03-10  7:53 ` [PATCH 4/5] percpu: generalize embedding first chunk setup helper Tejun Heo
2009-03-10  7:53 ` [PATCH 5/5] percpu: use dynamic percpu allocator as the default percpu allocator Tejun Heo
2009-03-10  7:57 ` test module to verify " Tejun Heo
2009-03-10 10:59 ` [GIT RFC] percpu: use dynamic percpu allocator as the default " David Miller
2009-03-11  6:03   ` Tejun Heo
2009-03-11  5:56 ` [GIT PULL] pull request for safe part Tejun Heo
2009-03-16 18:01 ` [GIT RFC] percpu: use dynamic percpu allocator as the default percpu allocator Martin Schwidefsky
2009-03-20  2:35   ` Tejun Heo
2009-03-24 15:22     ` Tejun Heo
2009-03-25 11:27       ` Martin Schwidefsky
2009-03-25 11:51         ` Tejun Heo
2009-03-25 12:22           ` Ingo Molnar
2009-03-25 12:27             ` Tejun Heo
2009-03-25 12:39               ` Ingo Molnar
2009-03-25 13:13               ` Martin Schwidefsky
2009-03-25 13:21                 ` Tejun Heo
2009-03-25 13:25                   ` Martin Schwidefsky
2009-03-25 13:34                     ` Tejun Heo
2009-03-31 16:54                       ` Martin Schwidefsky
2009-03-31 17:20                         ` Christoph Lameter
2009-03-31 20:18                           ` Martin Schwidefsky
2009-03-31 21:10                             ` Christoph Lameter
2009-04-01  8:01                               ` Martin Schwidefsky
2009-03-31 19:17                         ` Ivan Kokshaysky
2009-03-31 20:19                           ` Martin Schwidefsky
2009-03-31 20:29                             ` Ivan Kokshaysky
2009-04-01  0:07                         ` Tejun Heo
2009-04-01  8:10                           ` Martin Schwidefsky
2009-04-01  8:17                             ` Tejun Heo
2009-04-01  8:32                               ` Martin Schwidefsky
2009-04-01  8:37                                 ` David Miller
2009-04-01  8:47                                   ` Martin Schwidefsky
2009-04-01  8:50                                     ` Tejun Heo
2009-04-01  9:08                                       ` Martin Schwidefsky
2009-04-02  1:54                                         ` Tejun Heo
2009-04-01  8:53                                     ` David Miller
2009-04-01  8:53                                 ` Tejun Heo
2009-04-01 11:07                                   ` Martin Schwidefsky
2009-04-02  1:57                                     ` Tejun Heo
2009-04-02  7:24                                       ` Ivan Kokshaysky
2009-04-02 11:13                                         ` Martin Schwidefsky
2009-04-03  0:31                                           ` Tejun Heo
2009-04-07 16:09                                             ` Ivan Kokshaysky
2009-04-08 20:18                                               ` Tejun Heo
2009-04-09  9:47                                                 ` Ivan Kokshaysky
2009-04-09 11:53                                                   ` Tejun Heo
2009-04-11  1:38                                                     ` Rusty Russell
2009-04-11  1:52                                                       ` Tejun Heo
2009-04-02  0:20                               ` Rusty Russell
2009-03-25 14:00           ` Martin Schwidefsky
2009-03-25 14:14             ` Tejun Heo
2009-03-30 10:07               ` [PATCH UPDATED] " Tejun Heo
2009-03-30 10:42                 ` Martin Schwidefsky
2009-04-01  0:08                   ` Tejun Heo
2009-03-30 11:49                 ` Ingo Molnar
2009-03-30 14:50                   ` Christoph Lameter
2009-03-31 16:12                     ` Christoph Lameter
2009-04-01  0:15                       ` Tejun Heo
2009-04-01 13:49                         ` Christoph Lameter
2009-04-01 15:49                           ` Ingo Molnar
2009-04-01 18:06                             ` Christoph Lameter
2009-04-01 19:01                               ` Ingo Molnar
2009-04-01 19:39                                 ` Linus Torvalds
2009-04-01 20:12                                   ` Matthew Wilcox
2009-04-02  2:13                                     ` Ingo Molnar
2009-04-01 22:32                                   ` Ingo Molnar
2009-04-01 22:57                                     ` Matthew Wilcox
2009-04-02  2:10                                       ` Ingo Molnar
2009-04-02  2:21                                         ` Christoph Lameter
2009-04-02  3:25                                           ` Ingo Molnar
2009-04-02  3:28                                             ` Christoph Lameter
2009-04-02  2:30                                       ` Tejun Heo
2009-04-02  2:18                                     ` Christoph Lameter
2009-04-02  3:42                                       ` Ingo Molnar
2009-04-02 13:53                                         ` Christoph Lameter
2009-04-08 16:26                                           ` Ingo Molnar
2009-04-13 18:18                                             ` Christoph Lameter
2009-04-14 14:04                                               ` Ingo Molnar
2009-04-14 16:48                                                 ` Christoph Lameter
2009-04-14 17:12                                                   ` Ingo Molnar
2009-04-02  2:15                                 ` Christoph Lameter
2009-04-02  4:19                       ` [PATCH 1/2 x86#core/percpu] percpu: don't put the first chunk in reverse-map rbtree Tejun Heo
2009-04-02  4:21                         ` [PATCH 2/2 x86#core/percpu] percpu: remove rbtree and use page->index instead Tejun Heo
2009-04-08 17:03                           ` [tip:core/percpu] " Christoph Lameter
2009-04-08 17:03                         ` [tip:core/percpu] percpu: don't put the first chunk in reverse-map rbtree Tejun Heo
2009-03-31 16:14                     ` Christoph Lameter [this message]
2009-04-01  0:18                       ` [PATCH UPDATED] percpu: use dynamic percpu allocator as the default percpu allocator Tejun Heo
2009-03-31  1:34                   ` Rusty Russell
2009-03-31 22:57                     ` David Miller
2009-03-31 23:49                     ` Benjamin Herrenschmidt
2009-04-01  0:19                       ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.1.10.0903311212360.17960@qirst.com \
    --to=cl@linux.com \
    --cc=benh@kernel.crashing.org \
    --cc=cooloney@kernel.org \
    --cc=davem@davemloft.net \
    --cc=grundler@parisc-linux.org \
    --cc=heiko.carstens@de.ibm.com \
    --cc=hpa@zytor.com \
    --cc=ink@jurassic.park.msu.ru \
    --cc=kyle@mcmartin.ca \
    --cc=lethal@linux-sh.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthew@wil.cx \
    --cc=mingo@elte.hu \
    --cc=npiggin@suse.de \
    --cc=ralf@linux-mips.org \
    --cc=rmk@arm.linux.org.uk \
    --cc=rth@twiddle.net \
    --cc=rusty@rustcorp.com.au \
    --cc=schwidefsky@de.ibm.com \
    --cc=starvik@axis.com \
    --cc=takata@linux-m32r.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).