linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dennis Zhou <dennisz@fb.com>
To: Tejun Heo <tj@kernel.org>, Christoph Lameter <cl@linux.com>
Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Dennis Zhou <dennisszhou@gmail.com>
Subject: [PATCH 06/10] percpu: modify base_addr to be region specific
Date: Sat, 15 Jul 2017 22:23:11 -0400	[thread overview]
Message-ID: <20170716022315.19892-7-dennisz@fb.com> (raw)
In-Reply-To: <20170716022315.19892-1-dennisz@fb.com>

From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>

Originally, the first chunk is served by up to three chunks, each given
a region they are responsible for. Despite this, the arithmetic was based
off of the base_addr making it require offsets or be overly inclusive.
This patch changes percpu checks for first chunk to consider the only
the dynamic region and the reserved check to be only the reserved
region. There is no impact here besides making these checks a little
more accurate.

This patch also adds the ground work increasing the minimum allocation
size to 4 bytes. The new field nr_pages in pcpu_chunk will be used to
keep track of the number of pages the bitmap serves. The arithmetic for
identifying first chunk and reserved chunk reflect this change.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
---
 include/linux/percpu.h |   4 ++
 mm/percpu-internal.h   |  12 +++--
 mm/percpu.c            | 127 ++++++++++++++++++++++++++++++++++---------------
 3 files changed, 100 insertions(+), 43 deletions(-)

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 98a371c..a5cedcd 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -21,6 +21,10 @@
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(32 << 10)
 
+/* minimum allocation size and shift in bytes */
+#define PCPU_MIN_ALLOC_SIZE		(1 << PCPU_MIN_ALLOC_SHIFT)
+#define PCPU_MIN_ALLOC_SHIFT		2
+
 /*
  * Percpu allocator can serve percpu allocations before slab is
  * initialized which allows slab to depend on the percpu allocator.
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index c9158a4..56e1aba 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -23,11 +23,12 @@ struct pcpu_chunk {
 	void			*data;		/* chunk data */
 	int			first_free;	/* no free below this */
 	bool			immutable;	/* no [de]population allowed */
-	bool			has_reserved;	/* Indicates if chunk has reserved space
-						   at the beginning. Reserved chunk will
-						   contain reservation for static chunk.
-						   Dynamic chunk will contain reservation
-						   for static and reserved chunks. */
+	bool			has_reserved;	/* indicates if the region this chunk
+						   is responsible for overlaps with
+						   the prior adjacent region */
+
+	int                     nr_pages;       /* # of PAGE_SIZE pages served
+						   by this chunk */
 	int			nr_populated;	/* # of populated pages */
 	unsigned long		populated[];	/* populated bitmap */
 };
@@ -40,6 +41,7 @@ extern int pcpu_nr_empty_pop_pages;
 
 extern struct pcpu_chunk *pcpu_first_chunk;
 extern struct pcpu_chunk *pcpu_reserved_chunk;
+extern unsigned long pcpu_reserved_offset;
 
 #ifdef CONFIG_PERCPU_STATS
 
diff --git a/mm/percpu.c b/mm/percpu.c
index 7704db9..c74ad68 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -144,14 +144,14 @@ static const size_t *pcpu_group_sizes __ro_after_init;
 struct pcpu_chunk *pcpu_first_chunk __ro_after_init;
 
 /*
- * Optional reserved chunk.  This chunk reserves part of the first
- * chunk and serves it for reserved allocations.  The amount of
- * reserved offset is in pcpu_reserved_chunk_limit.  When reserved
- * area doesn't exist, the following variables contain NULL and 0
- * respectively.
+ * Optional reserved chunk.  This is the part of the first chunk that
+ * serves reserved allocations.  The pcpu_reserved_offset is the amount
+ * the pcpu_reserved_chunk->base_addr is push back into the static
+ * region for the base_addr to be page aligned.  When the reserved area
+ * doesn't exist, the following variables contain NULL and 0 respectively.
  */
 struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
-static int pcpu_reserved_chunk_limit __ro_after_init;
+unsigned long pcpu_reserved_offset __ro_after_init;
 
 DEFINE_SPINLOCK(pcpu_lock);	/* all internal data structures */
 static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop, map ext */
@@ -184,19 +184,32 @@ static void pcpu_schedule_balance_work(void)
 		schedule_work(&pcpu_balance_work);
 }
 
+/*
+ * Static addresses should never be passed into the allocator.  They
+ * are accessed using the group_offsets and therefore do not rely on
+ * chunk->base_addr.
+ */
 static bool pcpu_addr_in_first_chunk(void *addr)
 {
 	void *first_start = pcpu_first_chunk->base_addr;
 
-	return addr >= first_start && addr < first_start + pcpu_unit_size;
+	return addr >= first_start &&
+	       addr < first_start +
+	       pcpu_first_chunk->nr_pages * PAGE_SIZE;
 }
 
 static bool pcpu_addr_in_reserved_chunk(void *addr)
 {
-	void *first_start = pcpu_first_chunk->base_addr;
+	void *first_start;
 
-	return addr >= first_start &&
-		addr < first_start + pcpu_reserved_chunk_limit;
+	if (!pcpu_reserved_chunk)
+		return false;
+
+	first_start = pcpu_reserved_chunk->base_addr;
+
+	return addr >= first_start + pcpu_reserved_offset &&
+	       addr < first_start +
+	       pcpu_reserved_chunk->nr_pages * PAGE_SIZE;
 }
 
 static int __pcpu_size_to_slot(int size)
@@ -237,11 +250,16 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
 	return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
 }
 
+static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
+{
+	return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
+}
+
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 				     unsigned int cpu, int page_idx)
 {
-	return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
-		(page_idx << PAGE_SHIFT);
+	return (unsigned long)chunk->base_addr +
+		pcpu_unit_page_offset(cpu, page_idx);
 }
 
 static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk,
@@ -737,6 +755,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 	chunk->free_size = pcpu_unit_size;
 	chunk->contig_hint = pcpu_unit_size;
 
+	chunk->nr_pages = pcpu_unit_pages;
+
 	return chunk;
 }
 
@@ -824,18 +844,20 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
  * pcpu_chunk_addr_search - determine chunk containing specified address
  * @addr: address for which the chunk needs to be determined.
  *
+ * This is an internal function that handles all but static allocations.
+ * Static percpu address values should never be passed into the allocator.
+ *
  * RETURNS:
  * The address of the found chunk.
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
 	/* is it in the first chunk? */
-	if (pcpu_addr_in_first_chunk(addr)) {
-		/* is it in the reserved area? */
-		if (pcpu_addr_in_reserved_chunk(addr))
-			return pcpu_reserved_chunk;
+	if (pcpu_addr_in_first_chunk(addr))
 		return pcpu_first_chunk;
-	}
+	/* is it in the reserved chunk? */
+	if (pcpu_addr_in_reserved_chunk(addr))
+		return pcpu_reserved_chunk;
 
 	/*
 	 * The address is relative to unit0 which might be unused and
@@ -1366,10 +1388,17 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 	 * The following test on unit_low/high isn't strictly
 	 * necessary but will speed up lookups of addresses which
 	 * aren't in the first chunk.
+	 *
+	 * The address check is of high granularity checking against full
+	 * chunk sizes.  pcpu_base_addr points to the beginning of the first
+	 * chunk including the static region.  This allows us to examine all
+	 * regions of the first chunk. Assumes good intent as the first
+	 * chunk may not be full (ie. < pcpu_unit_pages in size).
 	 */
-	first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
-	first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
-				     pcpu_unit_pages);
+	first_low = (unsigned long) pcpu_base_addr +
+		    pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
+	first_high = (unsigned long) pcpu_base_addr +
+		     pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
 	if ((unsigned long)addr >= first_low &&
 	    (unsigned long)addr < first_high) {
 		for_each_possible_cpu(cpu) {
@@ -1575,6 +1604,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	unsigned int cpu;
 	int *unit_map;
 	int group, unit, i;
+	unsigned long tmp_addr, aligned_addr;
+	unsigned long map_size_bytes;
 
 #define PCPU_SETUP_BUG_ON(cond)	do {					\
 	if (unlikely(cond)) {						\
@@ -1678,46 +1709,66 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 		INIT_LIST_HEAD(&pcpu_slot[i]);
 
 	/*
-	 * Initialize static chunk.  If reserved_size is zero, the
-	 * static chunk covers static area + dynamic allocation area
-	 * in the first chunk.  If reserved_size is not zero, it
-	 * covers static area + reserved area (mostly used for module
-	 * static percpu allocation).
+	 * Initialize static chunk.
+	 * The static region is dropped as those addresses are already
+	 * allocated and do not rely on chunk->base_addr.
+	 * reserved_size == 0:
+	 *      the static chunk covers the dynamic area
+	 * reserved_size > 0:
+	 *      the static chunk covers the reserved area
+	 *
+	 * If the static area is not page aligned, the region adjacent
+	 * to the static area must have its base_addr be offset into
+	 * the static area to have it be page aligned.  The overlap is
+	 * then allocated preserving the alignment in the metadata for
+	 * the actual region.
 	 */
+	tmp_addr = (unsigned long)base_addr + ai->static_size;
+	aligned_addr = tmp_addr & PAGE_MASK;
+	pcpu_reserved_offset = tmp_addr - aligned_addr;
+
+	map_size_bytes = (ai->reserved_size ?: ai->dyn_size) +
+			 pcpu_reserved_offset;
+
+	/* schunk allocation */
 	schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 	INIT_LIST_HEAD(&schunk->list);
 	INIT_LIST_HEAD(&schunk->map_extend_list);
-	schunk->base_addr = base_addr;
+	schunk->base_addr = (void *)aligned_addr;
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
 	schunk->immutable = true;
 	bitmap_fill(schunk->populated, pcpu_unit_pages);
 	schunk->nr_populated = pcpu_unit_pages;
 
+	schunk->nr_pages = map_size_bytes >> PAGE_SHIFT;
+
 	if (ai->reserved_size) {
 		schunk->free_size = ai->reserved_size;
 		pcpu_reserved_chunk = schunk;
-		pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
 	} else {
 		schunk->free_size = dyn_size;
 		dyn_size = 0;			/* dynamic area covered */
 	}
 	schunk->contig_hint = schunk->free_size;
 
-	schunk->map[0] = 1;
-	schunk->map[1] = ai->static_size;
-	schunk->map_used = 1;
+	if (pcpu_reserved_offset) {
+		schunk->has_reserved = true;
+		schunk->map[0] = 1;
+		schunk->map[1] = pcpu_reserved_offset;
+		schunk->map_used = 1;
+	}
 	if (schunk->free_size)
-		schunk->map[++schunk->map_used] = ai->static_size + schunk->free_size;
+		schunk->map[++schunk->map_used] = map_size_bytes;
 	schunk->map[schunk->map_used] |= 1;
-	schunk->has_reserved = true;
 
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
 		dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
 		INIT_LIST_HEAD(&dchunk->list);
 		INIT_LIST_HEAD(&dchunk->map_extend_list);
-		dchunk->base_addr = base_addr;
+		dchunk->base_addr = base_addr + ai->static_size +
+				    ai->reserved_size;
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
 		dchunk->immutable = true;
@@ -1725,11 +1776,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 		dchunk->nr_populated = pcpu_unit_pages;
 
 		dchunk->contig_hint = dchunk->free_size = dyn_size;
-		dchunk->map[0] = 1;
-		dchunk->map[1] = pcpu_reserved_chunk_limit;
-		dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1;
-		dchunk->map_used = 2;
-		dchunk->has_reserved = true;
+		dchunk->map[0] = 0;
+		dchunk->map[1] = dchunk->free_size | 1;
+		dchunk->map_used = 1;
+
+		dchunk->nr_pages = dyn_size >> PAGE_SHIFT;
 	}
 
 	/* link the first chunk in */
-- 
2.9.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2017-07-16  2:24 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-16  2:23 [PATCH 00/10] percpu: replace percpu area map allocator with bitmap allocator Dennis Zhou
2017-07-16  2:23 ` [PATCH 01/10] percpu: pcpu-stats change void buffer to int buffer Dennis Zhou
2017-07-17 14:44   ` Tejun Heo
2017-07-16  2:23 ` [PATCH 02/10] percpu: change the format for percpu_stats output Dennis Zhou
2017-07-17 14:46   ` Tejun Heo
2017-07-16  2:23 ` [PATCH 03/10] percpu: expose pcpu_nr_empty_pop_pages in pcpu_stats Dennis Zhou
2017-07-17 14:47   ` Tejun Heo
2017-07-16  2:23 ` [PATCH 04/10] percpu: update the header comment and pcpu_build_alloc_info comments Dennis Zhou
2017-07-17 14:53   ` Tejun Heo
2017-07-16  2:23 ` [PATCH 05/10] percpu: change reserved_size to end page aligned Dennis Zhou
2017-07-16  4:01   ` kbuild test robot
2017-07-16  5:11   ` kbuild test robot
2017-07-17 16:46   ` Tejun Heo
2017-07-17 19:10     ` Dennis Zhou
2017-07-24 20:04     ` Dennis Zhou
2017-07-16  2:23 ` Dennis Zhou [this message]
2017-07-17 18:57   ` [PATCH 06/10] percpu: modify base_addr to be region specific Tejun Heo
2017-07-18 19:26   ` Josef Bacik
2017-07-18 19:36     ` Matthew Wilcox
2017-07-19 14:20       ` Josef Bacik
2017-07-16  2:23 ` [PATCH 07/10] percpu: fix misnomer in schunk/dchunk variable names Dennis Zhou
2017-07-17 19:10   ` Tejun Heo
2017-07-24 20:07     ` Dennis Zhou
2017-07-16  2:23 ` [PATCH 08/10] percpu: change the number of pages marked in the first_chunk bitmaps Dennis Zhou
2017-07-17 19:26   ` Tejun Heo
2017-07-24 20:13     ` Dennis Zhou
2017-07-16  2:23 ` [PATCH 09/10] percpu: replace area map allocator with bitmap allocator Dennis Zhou
2017-07-17 23:27   ` Tejun Heo
2017-07-24 21:37     ` Dennis Zhou
2017-07-19 19:11   ` Josef Bacik
2017-07-19 22:19     ` Dennis Zhou
2017-07-19 19:16   ` Josef Bacik
2017-07-19 22:13     ` Dennis Zhou
2017-07-16  2:23 ` [PATCH 10/10] percpu: add optimizations on allocation path for the " Dennis Zhou
2017-07-17 23:32   ` Tejun Heo
2017-07-18 19:15 ` [PATCH 00/10] percpu: replace percpu area map allocator with " Josef Bacik
2017-07-24 21:14   ` Dennis Zhou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170716022315.19892-7-dennisz@fb.com \
    --to=dennisz@fb.com \
    --cc=cl@linux.com \
    --cc=dennisszhou@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).