From: Tejun Heo <tj@kernel.org>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
mingo@redhat.com, benh@kernel.crashing.org, davem@davemloft.net,
dhowells@redhat.com, npiggin@suse.de, JBeulich@novell.com,
cl@linux-foundation.org, rusty@rustcorp.com.au, hpa@zytor.com,
tglx@linutronix.de, akpm@linux-foundation.org, x86@kernel.org,
andi@firstfloor.org
Cc: Tejun Heo <tj@kernel.org>
Subject: [PATCH 09/20] percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg() upward
Date: Tue, 21 Jul 2009 19:26:08 +0900 [thread overview]
Message-ID: <1248171979-29166-10-git-send-email-tj@kernel.org> (raw)
In-Reply-To: <1248171979-29166-1-git-send-email-tj@kernel.org>
Unit map handling will be generalized and extended and used for
embedding sparse first chunk and other purposes. Relocate two
unit_map related functions upward in preparation. This patch just
moves the code without any actual change.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/percpu.h | 14 +-
mm/percpu.c | 339 ++++++++++++++++++++++++------------------------
2 files changed, 180 insertions(+), 173 deletions(-)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d385dbc..570fb18 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -78,6 +78,14 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+extern int __init pcpu_lpage_build_unit_map(
+ size_t reserved_size, ssize_t *dyn_sizep,
+ size_t *unit_sizep, size_t lpage_size,
+ int *unit_map,
+ pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
+#endif
+
extern size_t __init pcpu_setup_first_chunk(
size_t static_size, size_t reserved_size,
size_t dyn_size, size_t unit_size,
@@ -97,12 +105,6 @@ extern ssize_t __init pcpu_page_first_chunk(
#endif
#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_build_unit_map(
- size_t reserved_size, ssize_t *dyn_sizep,
- size_t *unit_sizep, size_t lpage_size,
- int *unit_map,
- pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-
extern ssize_t __init pcpu_lpage_first_chunk(
size_t reserved_size, size_t dyn_size,
size_t unit_size, size_t lpage_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index cfdc03e..b3d0ca0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1231,6 +1231,178 @@ void free_percpu(void *ptr)
}
EXPORT_SYMBOL_GPL(free_percpu);
+static inline size_t pcpu_calc_fc_sizes(size_t static_size,
+ size_t reserved_size,
+ ssize_t *dyn_sizep)
+{
+ size_t size_sum;
+
+ size_sum = PFN_ALIGN(static_size + reserved_size +
+ (*dyn_sizep >= 0 ? *dyn_sizep : 0));
+ if (*dyn_sizep != 0)
+ *dyn_sizep = size_sum - static_size - reserved_size;
+
+ return size_sum;
+}
+
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+/**
+ * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
+ * @unit_sizep: out parameter for unit size
+ * @unit_map: unit_map to be filled
+ * @cpu_distance_fn: callback to determine distance between cpus
+ *
+ * This function builds cpu -> unit map and determine other parameters
+ * considering needed percpu size, large page size and distances
+ * between CPUs in NUMA.
+ *
+ * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
+ * may share units in the same large page. The returned configuration
+ * is guaranteed to have CPUs on different nodes on different large
+ * pages and >=75% usage of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
+ * returns the number of units to be allocated. -errno on failure.
+ */
+int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
+ size_t *unit_sizep, size_t lpage_size,
+ int *unit_map,
+ pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+ static int group_map[NR_CPUS] __initdata;
+ static int group_cnt[NR_CPUS] __initdata;
+ const size_t static_size = __per_cpu_end - __per_cpu_start;
+ int group_cnt_max = 0;
+ size_t size_sum, min_unit_size, alloc_size;
+ int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
+ int last_allocs;
+ unsigned int cpu, tcpu;
+ int group, unit;
+
+ /*
+ * Determine min_unit_size, alloc_size and max_upa such that
+ * alloc_size is multiple of lpage_size and is the smallest
+ * which can accomodate 4k aligned segments which are equal to
+ * or larger than min_unit_size.
+ */
+ size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+ min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+ alloc_size = roundup(min_unit_size, lpage_size);
+ upa = alloc_size / min_unit_size;
+ while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+ upa--;
+ max_upa = upa;
+
+ /* group cpus according to their proximity */
+ for_each_possible_cpu(cpu) {
+ group = 0;
+ next_group:
+ for_each_possible_cpu(tcpu) {
+ if (cpu == tcpu)
+ break;
+ if (group_map[tcpu] == group &&
+ (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+ cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+ group++;
+ goto next_group;
+ }
+ }
+ group_map[cpu] = group;
+ group_cnt[group]++;
+ group_cnt_max = max(group_cnt_max, group_cnt[group]);
+ }
+
+ /*
+ * Expand unit size until address space usage goes over 75%
+ * and then as much as possible without using more address
+ * space.
+ */
+ last_allocs = INT_MAX;
+ for (upa = max_upa; upa; upa--) {
+ int allocs = 0, wasted = 0;
+
+ if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+ continue;
+
+ for (group = 0; group_cnt[group]; group++) {
+ int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+ allocs += this_allocs;
+ wasted += this_allocs * upa - group_cnt[group];
+ }
+
+ /*
+ * Don't accept if wastage is over 25%. The
+ * greater-than comparison ensures upa==1 always
+ * passes the following check.
+ */
+ if (wasted > num_possible_cpus() / 3)
+ continue;
+
+ /* and then don't consume more memory */
+ if (allocs > last_allocs)
+ break;
+ last_allocs = allocs;
+ best_upa = upa;
+ }
+ *unit_sizep = alloc_size / best_upa;
+
+ /* assign units to cpus accordingly */
+ unit = 0;
+ for (group = 0; group_cnt[group]; group++) {
+ for_each_possible_cpu(cpu)
+ if (group_map[cpu] == group)
+ unit_map[cpu] = unit++;
+ unit = roundup(unit, best_upa);
+ }
+
+ return unit; /* unit contains aligned number of units */
+}
+
+static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+ unsigned int *cpup);
+
+static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
+ size_t reserved_size, size_t dyn_size,
+ size_t unit_size, size_t lpage_size,
+ const int *unit_map, int nr_units)
+{
+ int width = 1, v = nr_units;
+ char empty_str[] = "--------";
+ int upl, lpl; /* units per lpage, lpage per line */
+ unsigned int cpu;
+ int lpage, unit;
+
+ while (v /= 10)
+ width++;
+ empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+
+ upl = max_t(int, lpage_size / unit_size, 1);
+ lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+
+ printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
+ static_size, reserved_size, dyn_size, unit_size, lpage_size);
+
+ for (lpage = 0, unit = 0; unit < nr_units; unit++) {
+ if (!(unit % upl)) {
+ if (!(lpage++ % lpl)) {
+ printk("\n");
+ printk("%spcpu-lpage: ", lvl);
+ } else
+ printk("| ");
+ }
+ if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+ printk("%0*d ", width, cpu);
+ else
+ printk("%s ", empty_str);
+ }
+ printk("\n");
+}
+#endif
+
/**
* pcpu_setup_first_chunk - initialize the first percpu chunk
* @static_size: the size of static percpu area in bytes
@@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str)
}
early_param("percpu_alloc", percpu_alloc_setup);
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
- size_t reserved_size,
- ssize_t *dyn_sizep)
-{
- size_t size_sum;
-
- size_sum = PFN_ALIGN(static_size + reserved_size +
- (*dyn_sizep >= 0 ? *dyn_sizep : 0));
- if (*dyn_sizep != 0)
- *dyn_sizep = size_sum - static_size - reserved_size;
-
- return size_sum;
-}
-
#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
/**
@@ -1637,122 +1795,6 @@ out_free_ar:
#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-/**
- * pcpu_lpage_build_unit_map - build unit_map for large page remapping
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
- * @unit_sizep: out parameter for unit size
- * @unit_map: unit_map to be filled
- * @cpu_distance_fn: callback to determine distance between cpus
- *
- * This function builds cpu -> unit map and determine other parameters
- * considering needed percpu size, large page size and distances
- * between CPUs in NUMA.
- *
- * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
- * may share units in the same large page. The returned configuration
- * is guaranteed to have CPUs on different nodes on different large
- * pages and >=75% usage of allocated virtual address space.
- *
- * RETURNS:
- * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
- * returns the number of units to be allocated. -errno on failure.
- */
-int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
- size_t *unit_sizep, size_t lpage_size,
- int *unit_map,
- pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
- static int group_map[NR_CPUS] __initdata;
- static int group_cnt[NR_CPUS] __initdata;
- const size_t static_size = __per_cpu_end - __per_cpu_start;
- int group_cnt_max = 0;
- size_t size_sum, min_unit_size, alloc_size;
- int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
- int last_allocs;
- unsigned int cpu, tcpu;
- int group, unit;
-
- /*
- * Determine min_unit_size, alloc_size and max_upa such that
- * alloc_size is multiple of lpage_size and is the smallest
- * which can accomodate 4k aligned segments which are equal to
- * or larger than min_unit_size.
- */
- size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
- min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-
- alloc_size = roundup(min_unit_size, lpage_size);
- upa = alloc_size / min_unit_size;
- while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
- upa--;
- max_upa = upa;
-
- /* group cpus according to their proximity */
- for_each_possible_cpu(cpu) {
- group = 0;
- next_group:
- for_each_possible_cpu(tcpu) {
- if (cpu == tcpu)
- break;
- if (group_map[tcpu] == group &&
- (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
- cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
- group++;
- goto next_group;
- }
- }
- group_map[cpu] = group;
- group_cnt[group]++;
- group_cnt_max = max(group_cnt_max, group_cnt[group]);
- }
-
- /*
- * Expand unit size until address space usage goes over 75%
- * and then as much as possible without using more address
- * space.
- */
- last_allocs = INT_MAX;
- for (upa = max_upa; upa; upa--) {
- int allocs = 0, wasted = 0;
-
- if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
- continue;
-
- for (group = 0; group_cnt[group]; group++) {
- int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
- allocs += this_allocs;
- wasted += this_allocs * upa - group_cnt[group];
- }
-
- /*
- * Don't accept if wastage is over 25%. The
- * greater-than comparison ensures upa==1 always
- * passes the following check.
- */
- if (wasted > num_possible_cpus() / 3)
- continue;
-
- /* and then don't consume more memory */
- if (allocs > last_allocs)
- break;
- last_allocs = allocs;
- best_upa = upa;
- }
- *unit_sizep = alloc_size / best_upa;
-
- /* assign units to cpus accordingly */
- unit = 0;
- for (group = 0; group_cnt[group]; group++) {
- for_each_possible_cpu(cpu)
- if (group_map[cpu] == group)
- unit_map[cpu] = unit++;
- unit = roundup(unit, best_upa);
- }
-
- return unit; /* unit contains aligned number of units */
-}
-
struct pcpul_ent {
void *ptr;
void *map_addr;
@@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
return false;
}
-static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
- size_t reserved_size, size_t dyn_size,
- size_t unit_size, size_t lpage_size,
- const int *unit_map, int nr_units)
-{
- int width = 1, v = nr_units;
- char empty_str[] = "--------";
- int upl, lpl; /* units per lpage, lpage per line */
- unsigned int cpu;
- int lpage, unit;
-
- while (v /= 10)
- width++;
- empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
-
- upl = max_t(int, lpage_size / unit_size, 1);
- lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
-
- printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
- static_size, reserved_size, dyn_size, unit_size, lpage_size);
-
- for (lpage = 0, unit = 0; unit < nr_units; unit++) {
- if (!(unit % upl)) {
- if (!(lpage++ % lpl)) {
- printk("\n");
- printk("%spcpu-lpage: ", lvl);
- } else
- printk("| ");
- }
- if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
- printk("%0*d ", width, cpu);
- else
- printk("%s ", empty_str);
- }
- printk("\n");
-}
-
/**
* pcpu_lpage_first_chunk - remap the first percpu chunk using large page
* @reserved_size: the size of reserved percpu area in bytes
--
1.6.0.2
next prev parent reply other threads:[~2009-07-21 10:31 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-07-21 10:25 [PATCHSET percpu#for-next] implement and use sparse embedding first chunk allocator Tejun Heo
2009-07-21 10:26 ` [PATCH 01/20] percpu: fix pcpu_reclaim() locking Tejun Heo
2009-07-21 21:41 ` Christoph Lameter
2009-07-21 10:26 ` [PATCH 02/20] percpu: improve boot messages Tejun Heo
2009-07-21 21:43 ` Christoph Lameter
2009-07-21 10:26 ` [PATCH 03/20] percpu: rename 4k first chunk allocator to page Tejun Heo
2009-07-21 21:47 ` Christoph Lameter
2009-07-22 4:38 ` Tejun Heo
2009-07-21 10:26 ` [PATCH 04/20] percpu: build first chunk allocators selectively Tejun Heo
2009-07-21 10:26 ` [PATCH 05/20] percpu: generalize first chunk allocator selection Tejun Heo
2009-07-21 10:26 ` [PATCH 06/20] percpu: drop @static_size from first chunk allocators Tejun Heo
2009-07-21 10:26 ` [PATCH 07/20] percpu: make @dyn_size mandatory for pcpu_setup_first_chunk() Tejun Heo
2009-07-21 10:26 ` [PATCH 08/20] percpu: add @align to pcpu_fc_alloc_fn_t Tejun Heo
2009-07-21 10:26 ` Tejun Heo [this message]
2009-07-21 10:26 ` [PATCH 10/20] percpu: introduce pcpu_alloc_info and pcpu_group_info Tejun Heo
2009-07-21 10:26 ` [PATCH 11/20] percpu: add pcpu_unit_offsets[] Tejun Heo
2009-07-21 10:26 ` [PATCH 12/20] percpu: add chunk->base_addr Tejun Heo
2009-07-21 10:26 ` [PATCH 13/20] vmalloc: separate out insert_vmalloc_vm() Tejun Heo
2009-07-21 10:26 ` [PATCH 14/20] vmalloc: implement pcpu_get_vm_areas() Tejun Heo
2009-08-14 6:07 ` Tejun Heo
2009-07-21 10:26 ` [PATCH 15/20] percpu: use group information to allocate vmap areas sparsely Tejun Heo
2009-07-21 10:26 ` [PATCH 16/20] percpu: update embedding first chunk allocator to handle sparse units Tejun Heo
2009-07-21 10:26 ` [PATCH 17/20] x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA Tejun Heo
2009-07-21 10:26 ` [PATCH 18/20] percpu: kill lpage first chunk allocator Tejun Heo
2009-07-21 10:26 ` [PATCH 19/20] sparc64: use embedding percpu " Tejun Heo
2009-07-22 3:54 ` David Miller
2009-07-21 10:26 ` [PATCH 20/20] powerpc64: convert to dynamic percpu allocator Tejun Heo
2009-07-21 12:22 ` [RFC PATCH] percpu: kill legacy " Tejun Heo
2009-07-22 4:30 ` Rusty Russell
2009-08-14 6:09 ` [PATCHSET percpu#for-next] implement and use sparse embedding first chunk allocator Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1248171979-29166-10-git-send-email-tj@kernel.org \
--to=tj@kernel.org \
--cc=JBeulich@novell.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=benh@kernel.crashing.org \
--cc=cl@linux-foundation.org \
--cc=davem@davemloft.net \
--cc=dhowells@redhat.com \
--cc=hpa@zytor.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=npiggin@suse.de \
--cc=rusty@rustcorp.com.au \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).