All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-21 21:15 ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-21 21:15 UTC (permalink / raw)
  To: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Catalin Marinas
  Cc: Peter Collingbourne, Linux ARM, Linux Memory Management List,
	Linux Kernel Mailing List, vbabka, penberg, roman.gushchin,
	iamjoonsoo.kim, rientjes, Herbert Xu, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, kasan-dev, Eric Biederman,
	Kees Cook

When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
slab alignment to 16. This happens even if MTE is not supported in
hardware or disabled via kasan=off, which creates an unnecessary
memory overhead in those cases. Eliminate this overhead by making
the minimum slab alignment a runtime property and only aligning to
16 if KASAN is enabled at runtime.

On a DragonBoard 845c (non-MTE hardware) with a kernel built with
CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
boot I see the following Slab measurements in /proc/meminfo (median
of 3 reboots):

Before: 169020 kB
After:  167304 kB

Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
Signed-off-by: Peter Collingbourne <pcc@google.com>
---
v2:
- use max instead of max_t in flat_stack_align()

 arch/arc/include/asm/cache.h        |  4 ++--
 arch/arm/include/asm/cache.h        |  2 +-
 arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
 arch/microblaze/include/asm/page.h  |  2 +-
 arch/riscv/include/asm/cache.h      |  2 +-
 arch/sparc/include/asm/cache.h      |  2 +-
 arch/xtensa/include/asm/processor.h |  2 +-
 fs/binfmt_flat.c                    |  9 ++++++---
 include/crypto/hash.h               |  2 +-
 include/linux/slab.h                | 22 +++++++++++++++++-----
 mm/slab.c                           |  7 +++----
 mm/slab_common.c                    |  3 +--
 mm/slob.c                           |  6 +++---
 13 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f0f1fc5d62b6..b6a7763fd5d6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -55,11 +55,11 @@
  * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
  * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
  * alignment for any atomic64_t embedded in buffer.
- * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
+ * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
  * value of 4 (and not 8) in ARC ABI.
  */
 #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
-#define ARCH_SLAB_MINALIGN	8
+#define ARCH_SLAB_MIN_MINALIGN	8
 #endif
 
 extern int ioc_enable;
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index e3ea34558ada..3e1018bb9805 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -21,7 +21,7 @@
  * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
  */
 #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define ARCH_SLAB_MINALIGN 8
+#define ARCH_SLAB_MIN_MINALIGN 8
 #endif
 
 #define __read_mostly __section(".data..read_mostly")
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a074459f8f2f..38f171591c3f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -6,6 +6,7 @@
 #define __ASM_CACHE_H
 
 #include <asm/cputype.h>
+#include <asm/mte-def.h>
 
 #define CTR_L1IP_SHIFT		14
 #define CTR_L1IP_MASK		3
@@ -49,15 +50,21 @@
  */
 #define ARCH_DMA_MINALIGN	(128)
 
-#ifdef CONFIG_KASAN_SW_TAGS
-#define ARCH_SLAB_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
-#elif defined(CONFIG_KASAN_HW_TAGS)
-#define ARCH_SLAB_MINALIGN	MTE_GRANULE_SIZE
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/bitops.h>
+#include <linux/kasan-enabled.h>
+
+#ifdef CONFIG_KASAN_SW_TAGS
+#define ARCH_SLAB_MIN_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
+#elif defined(CONFIG_KASAN_HW_TAGS)
+static inline size_t arch_slab_minalign(void)
+{
+	return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
+					 __alignof__(unsigned long long);
+}
+#define arch_slab_minalign() arch_slab_minalign()
+#endif
 
 #define ICACHEF_ALIASING	0
 #define ICACHEF_VPIPT		1
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 4b8b2fa78fc5..ccdbc1da3c3e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -33,7 +33,7 @@
 /* MS be sure that SLAB allocates aligned objects */
 #define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
 
-#define ARCH_SLAB_MINALIGN	L1_CACHE_BYTES
+#define ARCH_SLAB_MIN_MINALIGN	L1_CACHE_BYTES
 
 /*
  * PAGE_OFFSET -- the first address of the first page of memory. With MMU
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index 9b58b104559e..7beb3b5d27c7 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -16,7 +16,7 @@
  * the flat loader aligns it accordingly.
  */
 #ifndef CONFIG_MMU
-#define ARCH_SLAB_MINALIGN	16
+#define ARCH_SLAB_MIN_MINALIGN	16
 #endif
 
 #endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
index e62fd0e72606..9d8cb4687b7e 100644
--- a/arch/sparc/include/asm/cache.h
+++ b/arch/sparc/include/asm/cache.h
@@ -8,7 +8,7 @@
 #ifndef _SPARC_CACHE_H
 #define _SPARC_CACHE_H
 
-#define ARCH_SLAB_MINALIGN	__alignof__(unsigned long long)
+#define ARCH_SLAB_MIN_MINALIGN	__alignof__(unsigned long long)
 
 #define L1_CACHE_SHIFT 5
 #define L1_CACHE_BYTES 32
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..e3ea278e3fcf 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -18,7 +18,7 @@
 #include <asm/types.h>
 #include <asm/regs.h>
 
-#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
+#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
 
 /*
  * User space process size: 1 GB.
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 626898150011..23ce3439eafa 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -64,7 +64,10 @@
  * Here we can be a bit looser than the data sections since this
  * needs to only meet arch ABI requirements.
  */
-#define FLAT_STACK_ALIGN	max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
+static size_t flat_stack_align(void)
+{
+	return max(sizeof(void *), arch_slab_minalign());
+}
 
 #define RELOC_FAILED 0xff00ff01		/* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff		/* Placeholder for unused library */
@@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
 		sp -= 2; /* argvp + envp */
 	sp -= 1;  /* &argc */
 
-	current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
+	current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
 	sp = (unsigned long __user *)current->mm->start_stack;
 
 	if (put_user(bprm->argc, sp++))
@@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
 #endif
 	stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
 	stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
-	stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
+	stack_len = ALIGN(stack_len, flat_stack_align());
 
 	res = load_flat_file(bprm, &libinfo, 0, &stack_len);
 	if (res < 0)
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index f140e4643949..442c290f458c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -149,7 +149,7 @@ struct ahash_alg {
 
 struct shash_desc {
 	struct crypto_shash *tfm;
-	void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
+	void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
 };
 
 #define HASH_MAX_DIGESTSIZE	 64
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 373b3ef99f4e..80e517593372 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
 #endif
 
 /*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
  * Intended for arches that get misalignment faults even for 64 bit integer
  * aligned buffers.
  */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#ifndef ARCH_SLAB_MIN_MINALIGN
+#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * Arches can define this function if they want to decide the minimum slab
+ * alignment at runtime. The value returned by the function must be
+ * >= ARCH_SLAB_MIN_MINALIGN.
+ */
+#ifndef arch_slab_minalign
+static inline size_t arch_slab_minalign(void)
+{
+	return ARCH_SLAB_MIN_MINALIGN;
+}
 #endif
 
 /*
  * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
- * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
  * aligned pointers.
  */
 #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
-#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MIN_MINALIGN)
 #define __assume_page_alignment __assume_aligned(PAGE_SIZE)
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..97b756976c8b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON)
 		cachep->ctor(objp);
-	if (ARCH_SLAB_MINALIGN &&
-	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
-		pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
-		       objp, (int)ARCH_SLAB_MINALIGN);
+	if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
+		pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp,
+		       (int)arch_slab_minalign());
 	}
 	return objp;
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..33cc49810a54 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags,
 		align = max(align, ralign);
 	}
 
-	if (align < ARCH_SLAB_MINALIGN)
-		align = ARCH_SLAB_MINALIGN;
+	align = max_t(size_t, align, arch_slab_minalign());
 
 	return ALIGN(align, sizeof(void *));
 }
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..3bd2669bd690 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -478,7 +478,7 @@ static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 {
 	unsigned int *m;
-	int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 	void *ret;
 
 	gfp &= gfp_allowed_mask;
@@ -555,7 +555,7 @@ void kfree(const void *block)
 
 	sp = virt_to_folio(block);
 	if (folio_test_slab(sp)) {
-		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 		unsigned int *m = (unsigned int *)(block - align);
 		slob_free(m, *m + align);
 	} else {
@@ -584,7 +584,7 @@ size_t __ksize(const void *block)
 	if (unlikely(!folio_test_slab(folio)))
 		return folio_size(folio);
 
-	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 	m = (unsigned int *)(block - align);
 	return SLOB_UNITS(*m) * SLOB_UNIT;
 }
-- 
2.36.0.rc2.479.g8af0fa9b8e-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-21 21:15 ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-21 21:15 UTC (permalink / raw)
  To: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Catalin Marinas
  Cc: Peter Collingbourne, Linux ARM, Linux Memory Management List,
	Linux Kernel Mailing List, vbabka, penberg, roman.gushchin,
	iamjoonsoo.kim, rientjes, Herbert Xu, Andrey Ryabinin,
	Alexander Potapenko, Dmitry Vyukov, kasan-dev, Eric Biederman,
	Kees Cook

When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
slab alignment to 16. This happens even if MTE is not supported in
hardware or disabled via kasan=off, which creates an unnecessary
memory overhead in those cases. Eliminate this overhead by making
the minimum slab alignment a runtime property and only aligning to
16 if KASAN is enabled at runtime.

On a DragonBoard 845c (non-MTE hardware) with a kernel built with
CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
boot I see the following Slab measurements in /proc/meminfo (median
of 3 reboots):

Before: 169020 kB
After:  167304 kB

Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
Signed-off-by: Peter Collingbourne <pcc@google.com>
---
v2:
- use max instead of max_t in flat_stack_align()

 arch/arc/include/asm/cache.h        |  4 ++--
 arch/arm/include/asm/cache.h        |  2 +-
 arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
 arch/microblaze/include/asm/page.h  |  2 +-
 arch/riscv/include/asm/cache.h      |  2 +-
 arch/sparc/include/asm/cache.h      |  2 +-
 arch/xtensa/include/asm/processor.h |  2 +-
 fs/binfmt_flat.c                    |  9 ++++++---
 include/crypto/hash.h               |  2 +-
 include/linux/slab.h                | 22 +++++++++++++++++-----
 mm/slab.c                           |  7 +++----
 mm/slab_common.c                    |  3 +--
 mm/slob.c                           |  6 +++---
 13 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index f0f1fc5d62b6..b6a7763fd5d6 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -55,11 +55,11 @@
  * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
  * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
  * alignment for any atomic64_t embedded in buffer.
- * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
+ * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
  * value of 4 (and not 8) in ARC ABI.
  */
 #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
-#define ARCH_SLAB_MINALIGN	8
+#define ARCH_SLAB_MIN_MINALIGN	8
 #endif
 
 extern int ioc_enable;
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index e3ea34558ada..3e1018bb9805 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -21,7 +21,7 @@
  * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
  */
 #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define ARCH_SLAB_MINALIGN 8
+#define ARCH_SLAB_MIN_MINALIGN 8
 #endif
 
 #define __read_mostly __section(".data..read_mostly")
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a074459f8f2f..38f171591c3f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -6,6 +6,7 @@
 #define __ASM_CACHE_H
 
 #include <asm/cputype.h>
+#include <asm/mte-def.h>
 
 #define CTR_L1IP_SHIFT		14
 #define CTR_L1IP_MASK		3
@@ -49,15 +50,21 @@
  */
 #define ARCH_DMA_MINALIGN	(128)
 
-#ifdef CONFIG_KASAN_SW_TAGS
-#define ARCH_SLAB_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
-#elif defined(CONFIG_KASAN_HW_TAGS)
-#define ARCH_SLAB_MINALIGN	MTE_GRANULE_SIZE
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/bitops.h>
+#include <linux/kasan-enabled.h>
+
+#ifdef CONFIG_KASAN_SW_TAGS
+#define ARCH_SLAB_MIN_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
+#elif defined(CONFIG_KASAN_HW_TAGS)
+static inline size_t arch_slab_minalign(void)
+{
+	return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
+					 __alignof__(unsigned long long);
+}
+#define arch_slab_minalign() arch_slab_minalign()
+#endif
 
 #define ICACHEF_ALIASING	0
 #define ICACHEF_VPIPT		1
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 4b8b2fa78fc5..ccdbc1da3c3e 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -33,7 +33,7 @@
 /* MS be sure that SLAB allocates aligned objects */
 #define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
 
-#define ARCH_SLAB_MINALIGN	L1_CACHE_BYTES
+#define ARCH_SLAB_MIN_MINALIGN	L1_CACHE_BYTES
 
 /*
  * PAGE_OFFSET -- the first address of the first page of memory. With MMU
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index 9b58b104559e..7beb3b5d27c7 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -16,7 +16,7 @@
  * the flat loader aligns it accordingly.
  */
 #ifndef CONFIG_MMU
-#define ARCH_SLAB_MINALIGN	16
+#define ARCH_SLAB_MIN_MINALIGN	16
 #endif
 
 #endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
index e62fd0e72606..9d8cb4687b7e 100644
--- a/arch/sparc/include/asm/cache.h
+++ b/arch/sparc/include/asm/cache.h
@@ -8,7 +8,7 @@
 #ifndef _SPARC_CACHE_H
 #define _SPARC_CACHE_H
 
-#define ARCH_SLAB_MINALIGN	__alignof__(unsigned long long)
+#define ARCH_SLAB_MIN_MINALIGN	__alignof__(unsigned long long)
 
 #define L1_CACHE_SHIFT 5
 #define L1_CACHE_BYTES 32
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 4489a27d527a..e3ea278e3fcf 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -18,7 +18,7 @@
 #include <asm/types.h>
 #include <asm/regs.h>
 
-#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
+#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
 
 /*
  * User space process size: 1 GB.
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 626898150011..23ce3439eafa 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -64,7 +64,10 @@
  * Here we can be a bit looser than the data sections since this
  * needs to only meet arch ABI requirements.
  */
-#define FLAT_STACK_ALIGN	max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
+static size_t flat_stack_align(void)
+{
+	return max(sizeof(void *), arch_slab_minalign());
+}
 
 #define RELOC_FAILED 0xff00ff01		/* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff		/* Placeholder for unused library */
@@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
 		sp -= 2; /* argvp + envp */
 	sp -= 1;  /* &argc */
 
-	current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
+	current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
 	sp = (unsigned long __user *)current->mm->start_stack;
 
 	if (put_user(bprm->argc, sp++))
@@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
 #endif
 	stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
 	stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
-	stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
+	stack_len = ALIGN(stack_len, flat_stack_align());
 
 	res = load_flat_file(bprm, &libinfo, 0, &stack_len);
 	if (res < 0)
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index f140e4643949..442c290f458c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -149,7 +149,7 @@ struct ahash_alg {
 
 struct shash_desc {
 	struct crypto_shash *tfm;
-	void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
+	void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
 };
 
 #define HASH_MAX_DIGESTSIZE	 64
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 373b3ef99f4e..80e517593372 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
 #endif
 
 /*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
  * Intended for arches that get misalignment faults even for 64 bit integer
  * aligned buffers.
  */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#ifndef ARCH_SLAB_MIN_MINALIGN
+#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * Arches can define this function if they want to decide the minimum slab
+ * alignment at runtime. The value returned by the function must be
+ * >= ARCH_SLAB_MIN_MINALIGN.
+ */
+#ifndef arch_slab_minalign
+static inline size_t arch_slab_minalign(void)
+{
+	return ARCH_SLAB_MIN_MINALIGN;
+}
 #endif
 
 /*
  * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
- * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
  * aligned pointers.
  */
 #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
-#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MIN_MINALIGN)
 #define __assume_page_alignment __assume_aligned(PAGE_SIZE)
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..97b756976c8b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
 	objp += obj_offset(cachep);
 	if (cachep->ctor && cachep->flags & SLAB_POISON)
 		cachep->ctor(objp);
-	if (ARCH_SLAB_MINALIGN &&
-	    ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
-		pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
-		       objp, (int)ARCH_SLAB_MINALIGN);
+	if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
+		pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp,
+		       (int)arch_slab_minalign());
 	}
 	return objp;
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..33cc49810a54 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags,
 		align = max(align, ralign);
 	}
 
-	if (align < ARCH_SLAB_MINALIGN)
-		align = ARCH_SLAB_MINALIGN;
+	align = max_t(size_t, align, arch_slab_minalign());
 
 	return ALIGN(align, sizeof(void *));
 }
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..3bd2669bd690 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -478,7 +478,7 @@ static __always_inline void *
 __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
 {
 	unsigned int *m;
-	int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 	void *ret;
 
 	gfp &= gfp_allowed_mask;
@@ -555,7 +555,7 @@ void kfree(const void *block)
 
 	sp = virt_to_folio(block);
 	if (folio_test_slab(sp)) {
-		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+		int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 		unsigned int *m = (unsigned int *)(block - align);
 		slob_free(m, *m + align);
 	} else {
@@ -584,7 +584,7 @@ size_t __ksize(const void *block)
 	if (unlikely(!folio_test_slab(folio)))
 		return folio_size(folio);
 
-	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
 	m = (unsigned int *)(block - align);
 	return SLOB_UNITS(*m) * SLOB_UNIT;
 }
-- 
2.36.0.rc2.479.g8af0fa9b8e-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
  2022-04-21 21:15 ` Peter Collingbourne
@ 2022-04-22 16:04   ` Andrey Konovalov
  -1 siblings, 0 replies; 10+ messages in thread
From: Andrey Konovalov @ 2022-04-22 16:04 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Hyeonggon Yoo, Andrew Morton, Catalin Marinas, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Thu, Apr 21, 2022 at 11:16 PM Peter Collingbourne <pcc@google.com> wrote:
>
> When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
> slab alignment to 16. This happens even if MTE is not supported in
> hardware or disabled via kasan=off, which creates an unnecessary
> memory overhead in those cases. Eliminate this overhead by making
> the minimum slab alignment a runtime property and only aligning to
> 16 if KASAN is enabled at runtime.
>
> On a DragonBoard 845c (non-MTE hardware) with a kernel built with
> CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
> boot I see the following Slab measurements in /proc/meminfo (median
> of 3 reboots):
>
> Before: 169020 kB
> After:  167304 kB

Thanks for the improvement, Peter!

Overall, the patch looks good to me:

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

While looking at the code, I noticed a couple of issues in the already
existing comments. Not sure if they are worth fixing, but I'll point
them out just in case.

> Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
> v2:
> - use max instead of max_t in flat_stack_align()
>
>  arch/arc/include/asm/cache.h        |  4 ++--
>  arch/arm/include/asm/cache.h        |  2 +-
>  arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
>  arch/microblaze/include/asm/page.h  |  2 +-
>  arch/riscv/include/asm/cache.h      |  2 +-
>  arch/sparc/include/asm/cache.h      |  2 +-
>  arch/xtensa/include/asm/processor.h |  2 +-
>  fs/binfmt_flat.c                    |  9 ++++++---
>  include/crypto/hash.h               |  2 +-
>  include/linux/slab.h                | 22 +++++++++++++++++-----
>  mm/slab.c                           |  7 +++----
>  mm/slab_common.c                    |  3 +--
>  mm/slob.c                           |  6 +++---
>  13 files changed, 51 insertions(+), 31 deletions(-)
>
> diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> index f0f1fc5d62b6..b6a7763fd5d6 100644
> --- a/arch/arc/include/asm/cache.h
> +++ b/arch/arc/include/asm/cache.h
> @@ -55,11 +55,11 @@
>   * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
>   * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
>   * alignment for any atomic64_t embedded in buffer.
> - * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
> + * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
>   * value of 4 (and not 8) in ARC ABI.
>   */
>  #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
> -#define ARCH_SLAB_MINALIGN     8
> +#define ARCH_SLAB_MIN_MINALIGN 8
>  #endif
>
>  extern int ioc_enable;
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index e3ea34558ada..3e1018bb9805 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -21,7 +21,7 @@
>   * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
>   */
>  #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
> -#define ARCH_SLAB_MINALIGN 8
> +#define ARCH_SLAB_MIN_MINALIGN 8
>  #endif
>
>  #define __read_mostly __section(".data..read_mostly")
> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> index a074459f8f2f..38f171591c3f 100644
> --- a/arch/arm64/include/asm/cache.h
> +++ b/arch/arm64/include/asm/cache.h
> @@ -6,6 +6,7 @@
>  #define __ASM_CACHE_H
>
>  #include <asm/cputype.h>
> +#include <asm/mte-def.h>
>
>  #define CTR_L1IP_SHIFT         14
>  #define CTR_L1IP_MASK          3
> @@ -49,15 +50,21 @@
>   */
>  #define ARCH_DMA_MINALIGN      (128)
>
> -#ifdef CONFIG_KASAN_SW_TAGS
> -#define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> -#elif defined(CONFIG_KASAN_HW_TAGS)
> -#define ARCH_SLAB_MINALIGN     MTE_GRANULE_SIZE
> -#endif
> -
>  #ifndef __ASSEMBLY__
>
>  #include <linux/bitops.h>
> +#include <linux/kasan-enabled.h>
> +
> +#ifdef CONFIG_KASAN_SW_TAGS
> +#define ARCH_SLAB_MIN_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> +#elif defined(CONFIG_KASAN_HW_TAGS)
> +static inline size_t arch_slab_minalign(void)
> +{
> +       return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
> +                                        __alignof__(unsigned long long);
> +}
> +#define arch_slab_minalign() arch_slab_minalign()
> +#endif
>
>  #define ICACHEF_ALIASING       0
>  #define ICACHEF_VPIPT          1
> diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
> index 4b8b2fa78fc5..ccdbc1da3c3e 100644
> --- a/arch/microblaze/include/asm/page.h
> +++ b/arch/microblaze/include/asm/page.h
> @@ -33,7 +33,7 @@
>  /* MS be sure that SLAB allocates aligned objects */
>  #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
>
> -#define ARCH_SLAB_MINALIGN     L1_CACHE_BYTES
> +#define ARCH_SLAB_MIN_MINALIGN L1_CACHE_BYTES
>
>  /*
>   * PAGE_OFFSET -- the first address of the first page of memory. With MMU
> diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
> index 9b58b104559e..7beb3b5d27c7 100644
> --- a/arch/riscv/include/asm/cache.h
> +++ b/arch/riscv/include/asm/cache.h
> @@ -16,7 +16,7 @@
>   * the flat loader aligns it accordingly.
>   */
>  #ifndef CONFIG_MMU
> -#define ARCH_SLAB_MINALIGN     16
> +#define ARCH_SLAB_MIN_MINALIGN 16
>  #endif
>
>  #endif /* _ASM_RISCV_CACHE_H */
> diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
> index e62fd0e72606..9d8cb4687b7e 100644
> --- a/arch/sparc/include/asm/cache.h
> +++ b/arch/sparc/include/asm/cache.h
> @@ -8,7 +8,7 @@
>  #ifndef _SPARC_CACHE_H
>  #define _SPARC_CACHE_H
>
> -#define ARCH_SLAB_MINALIGN     __alignof__(unsigned long long)
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
>
>  #define L1_CACHE_SHIFT 5
>  #define L1_CACHE_BYTES 32
> diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
> index 4489a27d527a..e3ea278e3fcf 100644
> --- a/arch/xtensa/include/asm/processor.h
> +++ b/arch/xtensa/include/asm/processor.h
> @@ -18,7 +18,7 @@
>  #include <asm/types.h>
>  #include <asm/regs.h>
>
> -#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
> +#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
>
>  /*
>   * User space process size: 1 GB.
> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> index 626898150011..23ce3439eafa 100644
> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> @@ -64,7 +64,10 @@
>   * Here we can be a bit looser than the data sections since this
>   * needs to only meet arch ABI requirements.
>   */
> -#define FLAT_STACK_ALIGN       max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
> +static size_t flat_stack_align(void)
> +{
> +       return max(sizeof(void *), arch_slab_minalign());
> +}
>
>  #define RELOC_FAILED 0xff00ff01                /* Relocation incorrect somewhere */
>  #define UNLOADED_LIB 0x7ff000ff                /* Placeholder for unused library */
> @@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
>                 sp -= 2; /* argvp + envp */
>         sp -= 1;  /* &argc */
>
> -       current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
> +       current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
>         sp = (unsigned long __user *)current->mm->start_stack;
>
>         if (put_user(bprm->argc, sp++))
> @@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
>  #endif
>         stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
>         stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
> -       stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
> +       stack_len = ALIGN(stack_len, flat_stack_align());
>
>         res = load_flat_file(bprm, &libinfo, 0, &stack_len);
>         if (res < 0)
> diff --git a/include/crypto/hash.h b/include/crypto/hash.h
> index f140e4643949..442c290f458c 100644
> --- a/include/crypto/hash.h
> +++ b/include/crypto/hash.h
> @@ -149,7 +149,7 @@ struct ahash_alg {
>
>  struct shash_desc {
>         struct crypto_shash *tfm;
> -       void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
> +       void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
>  };
>
>  #define HASH_MAX_DIGESTSIZE     64
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 373b3ef99f4e..80e517593372 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
>  #endif
>
>  /*
> - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
>   * Intended for arches that get misalignment faults even for 64 bit integer
>   * aligned buffers.
>   */
> -#ifndef ARCH_SLAB_MINALIGN
> -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> +#ifndef ARCH_SLAB_MIN_MINALIGN
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> +#endif
> +
> +/*
> + * Arches can define this function if they want to decide the minimum slab
> + * alignment at runtime. The value returned by the function must be
> + * >= ARCH_SLAB_MIN_MINALIGN.
> + */
> +#ifndef arch_slab_minalign
> +static inline size_t arch_slab_minalign(void)
> +{
> +       return ARCH_SLAB_MIN_MINALIGN;
> +}
>  #endif
>
>  /*
>   * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
> - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
> + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
>   * aligned pointers.

This comment is not precise: kmalloc relies on kmem_cache_alloc, so
kmalloc technically returns pointers aligned to
max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MIN_MINALIGN). See
create_kmalloc_cache()->create_boot_cache()->calculate_alignment() for
SLAB and SLUB and __do_kmalloc_node() for SLOB. This alignment is
stronger than the one is specified for __assume_kmalloc_alignment
below, so the code should be fine. However, the comment is confusing.

Also, the comment next to the ARCH_KMALLOC_MINALIGN definition says
"Setting ARCH_KMALLOC_MINALIGN in arch headers" while it should say
"Setting ARCH_DMA_MINALIGN in arch headers".

>   */
>  #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
> -#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
> +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MIN_MINALIGN)
>  #define __assume_page_alignment __assume_aligned(PAGE_SIZE)
>
>  /*
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..97b756976c8b 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
>         objp += obj_offset(cachep);
>         if (cachep->ctor && cachep->flags & SLAB_POISON)
>                 cachep->ctor(objp);
> -       if (ARCH_SLAB_MINALIGN &&
> -           ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
> -               pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
> -                      objp, (int)ARCH_SLAB_MINALIGN);
> +       if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
> +               pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp,
> +                      (int)arch_slab_minalign());
>         }
>         return objp;
>  }
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..33cc49810a54 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags,
>                 align = max(align, ralign);
>         }
>
> -       if (align < ARCH_SLAB_MINALIGN)
> -               align = ARCH_SLAB_MINALIGN;
> +       align = max_t(size_t, align, arch_slab_minalign());
>
>         return ALIGN(align, sizeof(void *));
>  }
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..3bd2669bd690 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -478,7 +478,7 @@ static __always_inline void *
>  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  {
>         unsigned int *m;
> -       int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +       int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>         void *ret;
>
>         gfp &= gfp_allowed_mask;
> @@ -555,7 +555,7 @@ void kfree(const void *block)
>
>         sp = virt_to_folio(block);
>         if (folio_test_slab(sp)) {
> -               int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +               int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>                 unsigned int *m = (unsigned int *)(block - align);
>                 slob_free(m, *m + align);
>         } else {
> @@ -584,7 +584,7 @@ size_t __ksize(const void *block)
>         if (unlikely(!folio_test_slab(folio)))
>                 return folio_size(folio);
>
> -       align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +       align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>         m = (unsigned int *)(block - align);
>         return SLOB_UNITS(*m) * SLOB_UNIT;
>  }
> --
> 2.36.0.rc2.479.g8af0fa9b8e-goog
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-22 16:04   ` Andrey Konovalov
  0 siblings, 0 replies; 10+ messages in thread
From: Andrey Konovalov @ 2022-04-22 16:04 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Hyeonggon Yoo, Andrew Morton, Catalin Marinas, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Thu, Apr 21, 2022 at 11:16 PM Peter Collingbourne <pcc@google.com> wrote:
>
> When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
> slab alignment to 16. This happens even if MTE is not supported in
> hardware or disabled via kasan=off, which creates an unnecessary
> memory overhead in those cases. Eliminate this overhead by making
> the minimum slab alignment a runtime property and only aligning to
> 16 if KASAN is enabled at runtime.
>
> On a DragonBoard 845c (non-MTE hardware) with a kernel built with
> CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
> boot I see the following Slab measurements in /proc/meminfo (median
> of 3 reboots):
>
> Before: 169020 kB
> After:  167304 kB

Thanks for the improvement, Peter!

Overall, the patch looks good to me:

Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

While looking at the code, I noticed a couple of issues in the already
existing comments. Not sure if they are worth fixing, but I'll point
them out just in case.

> Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
> v2:
> - use max instead of max_t in flat_stack_align()
>
>  arch/arc/include/asm/cache.h        |  4 ++--
>  arch/arm/include/asm/cache.h        |  2 +-
>  arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
>  arch/microblaze/include/asm/page.h  |  2 +-
>  arch/riscv/include/asm/cache.h      |  2 +-
>  arch/sparc/include/asm/cache.h      |  2 +-
>  arch/xtensa/include/asm/processor.h |  2 +-
>  fs/binfmt_flat.c                    |  9 ++++++---
>  include/crypto/hash.h               |  2 +-
>  include/linux/slab.h                | 22 +++++++++++++++++-----
>  mm/slab.c                           |  7 +++----
>  mm/slab_common.c                    |  3 +--
>  mm/slob.c                           |  6 +++---
>  13 files changed, 51 insertions(+), 31 deletions(-)
>
> diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> index f0f1fc5d62b6..b6a7763fd5d6 100644
> --- a/arch/arc/include/asm/cache.h
> +++ b/arch/arc/include/asm/cache.h
> @@ -55,11 +55,11 @@
>   * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
>   * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
>   * alignment for any atomic64_t embedded in buffer.
> - * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
> + * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
>   * value of 4 (and not 8) in ARC ABI.
>   */
>  #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
> -#define ARCH_SLAB_MINALIGN     8
> +#define ARCH_SLAB_MIN_MINALIGN 8
>  #endif
>
>  extern int ioc_enable;
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index e3ea34558ada..3e1018bb9805 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -21,7 +21,7 @@
>   * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
>   */
>  #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
> -#define ARCH_SLAB_MINALIGN 8
> +#define ARCH_SLAB_MIN_MINALIGN 8
>  #endif
>
>  #define __read_mostly __section(".data..read_mostly")
> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> index a074459f8f2f..38f171591c3f 100644
> --- a/arch/arm64/include/asm/cache.h
> +++ b/arch/arm64/include/asm/cache.h
> @@ -6,6 +6,7 @@
>  #define __ASM_CACHE_H
>
>  #include <asm/cputype.h>
> +#include <asm/mte-def.h>
>
>  #define CTR_L1IP_SHIFT         14
>  #define CTR_L1IP_MASK          3
> @@ -49,15 +50,21 @@
>   */
>  #define ARCH_DMA_MINALIGN      (128)
>
> -#ifdef CONFIG_KASAN_SW_TAGS
> -#define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> -#elif defined(CONFIG_KASAN_HW_TAGS)
> -#define ARCH_SLAB_MINALIGN     MTE_GRANULE_SIZE
> -#endif
> -
>  #ifndef __ASSEMBLY__
>
>  #include <linux/bitops.h>
> +#include <linux/kasan-enabled.h>
> +
> +#ifdef CONFIG_KASAN_SW_TAGS
> +#define ARCH_SLAB_MIN_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> +#elif defined(CONFIG_KASAN_HW_TAGS)
> +static inline size_t arch_slab_minalign(void)
> +{
> +       return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
> +                                        __alignof__(unsigned long long);
> +}
> +#define arch_slab_minalign() arch_slab_minalign()
> +#endif
>
>  #define ICACHEF_ALIASING       0
>  #define ICACHEF_VPIPT          1
> diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
> index 4b8b2fa78fc5..ccdbc1da3c3e 100644
> --- a/arch/microblaze/include/asm/page.h
> +++ b/arch/microblaze/include/asm/page.h
> @@ -33,7 +33,7 @@
>  /* MS be sure that SLAB allocates aligned objects */
>  #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
>
> -#define ARCH_SLAB_MINALIGN     L1_CACHE_BYTES
> +#define ARCH_SLAB_MIN_MINALIGN L1_CACHE_BYTES
>
>  /*
>   * PAGE_OFFSET -- the first address of the first page of memory. With MMU
> diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
> index 9b58b104559e..7beb3b5d27c7 100644
> --- a/arch/riscv/include/asm/cache.h
> +++ b/arch/riscv/include/asm/cache.h
> @@ -16,7 +16,7 @@
>   * the flat loader aligns it accordingly.
>   */
>  #ifndef CONFIG_MMU
> -#define ARCH_SLAB_MINALIGN     16
> +#define ARCH_SLAB_MIN_MINALIGN 16
>  #endif
>
>  #endif /* _ASM_RISCV_CACHE_H */
> diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
> index e62fd0e72606..9d8cb4687b7e 100644
> --- a/arch/sparc/include/asm/cache.h
> +++ b/arch/sparc/include/asm/cache.h
> @@ -8,7 +8,7 @@
>  #ifndef _SPARC_CACHE_H
>  #define _SPARC_CACHE_H
>
> -#define ARCH_SLAB_MINALIGN     __alignof__(unsigned long long)
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
>
>  #define L1_CACHE_SHIFT 5
>  #define L1_CACHE_BYTES 32
> diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
> index 4489a27d527a..e3ea278e3fcf 100644
> --- a/arch/xtensa/include/asm/processor.h
> +++ b/arch/xtensa/include/asm/processor.h
> @@ -18,7 +18,7 @@
>  #include <asm/types.h>
>  #include <asm/regs.h>
>
> -#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
> +#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
>
>  /*
>   * User space process size: 1 GB.
> diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> index 626898150011..23ce3439eafa 100644
> --- a/fs/binfmt_flat.c
> +++ b/fs/binfmt_flat.c
> @@ -64,7 +64,10 @@
>   * Here we can be a bit looser than the data sections since this
>   * needs to only meet arch ABI requirements.
>   */
> -#define FLAT_STACK_ALIGN       max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
> +static size_t flat_stack_align(void)
> +{
> +       return max(sizeof(void *), arch_slab_minalign());
> +}
>
>  #define RELOC_FAILED 0xff00ff01                /* Relocation incorrect somewhere */
>  #define UNLOADED_LIB 0x7ff000ff                /* Placeholder for unused library */
> @@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
>                 sp -= 2; /* argvp + envp */
>         sp -= 1;  /* &argc */
>
> -       current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
> +       current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
>         sp = (unsigned long __user *)current->mm->start_stack;
>
>         if (put_user(bprm->argc, sp++))
> @@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
>  #endif
>         stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
>         stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
> -       stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
> +       stack_len = ALIGN(stack_len, flat_stack_align());
>
>         res = load_flat_file(bprm, &libinfo, 0, &stack_len);
>         if (res < 0)
> diff --git a/include/crypto/hash.h b/include/crypto/hash.h
> index f140e4643949..442c290f458c 100644
> --- a/include/crypto/hash.h
> +++ b/include/crypto/hash.h
> @@ -149,7 +149,7 @@ struct ahash_alg {
>
>  struct shash_desc {
>         struct crypto_shash *tfm;
> -       void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
> +       void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
>  };
>
>  #define HASH_MAX_DIGESTSIZE     64
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 373b3ef99f4e..80e517593372 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
>  #endif
>
>  /*
> - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
>   * Intended for arches that get misalignment faults even for 64 bit integer
>   * aligned buffers.
>   */
> -#ifndef ARCH_SLAB_MINALIGN
> -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> +#ifndef ARCH_SLAB_MIN_MINALIGN
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> +#endif
> +
> +/*
> + * Arches can define this function if they want to decide the minimum slab
> + * alignment at runtime. The value returned by the function must be
> + * >= ARCH_SLAB_MIN_MINALIGN.
> + */
> +#ifndef arch_slab_minalign
> +static inline size_t arch_slab_minalign(void)
> +{
> +       return ARCH_SLAB_MIN_MINALIGN;
> +}
>  #endif
>
>  /*
>   * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
> - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
> + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
>   * aligned pointers.

This comment is not precise: kmalloc relies on kmem_cache_alloc, so
kmalloc technically returns pointers aligned to
max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MIN_MINALIGN). See
create_kmalloc_cache()->create_boot_cache()->calculate_alignment() for
SLAB and SLUB and __do_kmalloc_node() for SLOB. This alignment is
stronger than the one is specified for __assume_kmalloc_alignment
below, so the code should be fine. However, the comment is confusing.

Also, the comment next to the ARCH_KMALLOC_MINALIGN definition says
"Setting ARCH_KMALLOC_MINALIGN in arch headers" while it should say
"Setting ARCH_DMA_MINALIGN in arch headers".

>   */
>  #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
> -#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
> +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MIN_MINALIGN)
>  #define __assume_page_alignment __assume_aligned(PAGE_SIZE)
>
>  /*
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..97b756976c8b 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
>         objp += obj_offset(cachep);
>         if (cachep->ctor && cachep->flags & SLAB_POISON)
>                 cachep->ctor(objp);
> -       if (ARCH_SLAB_MINALIGN &&
> -           ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
> -               pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n",
> -                      objp, (int)ARCH_SLAB_MINALIGN);
> +       if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
> +               pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp,
> +                      (int)arch_slab_minalign());
>         }
>         return objp;
>  }
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..33cc49810a54 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags,
>                 align = max(align, ralign);
>         }
>
> -       if (align < ARCH_SLAB_MINALIGN)
> -               align = ARCH_SLAB_MINALIGN;
> +       align = max_t(size_t, align, arch_slab_minalign());
>
>         return ALIGN(align, sizeof(void *));
>  }
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..3bd2669bd690 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -478,7 +478,7 @@ static __always_inline void *
>  __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
>  {
>         unsigned int *m;
> -       int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +       int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>         void *ret;
>
>         gfp &= gfp_allowed_mask;
> @@ -555,7 +555,7 @@ void kfree(const void *block)
>
>         sp = virt_to_folio(block);
>         if (folio_test_slab(sp)) {
> -               int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +               int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>                 unsigned int *m = (unsigned int *)(block - align);
>                 slob_free(m, *m + align);
>         } else {
> @@ -584,7 +584,7 @@ size_t __ksize(const void *block)
>         if (unlikely(!folio_test_slab(folio)))
>                 return folio_size(folio);
>
> -       align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
> +       align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign());
>         m = (unsigned int *)(block - align);
>         return SLOB_UNITS(*m) * SLOB_UNIT;
>  }
> --
> 2.36.0.rc2.479.g8af0fa9b8e-goog
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
  2022-04-21 21:15 ` Peter Collingbourne
@ 2022-04-22 18:02   ` Catalin Marinas
  -1 siblings, 0 replies; 10+ messages in thread
From: Catalin Marinas @ 2022-04-22 18:02 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List, vbabka,
	penberg, roman.gushchin, iamjoonsoo.kim, rientjes, Herbert Xu,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	Eric Biederman, Kees Cook

On Thu, Apr 21, 2022 at 02:15:48PM -0700, Peter Collingbourne wrote:
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 373b3ef99f4e..80e517593372 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
>  #endif
>  
>  /*
> - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
>   * Intended for arches that get misalignment faults even for 64 bit integer
>   * aligned buffers.
>   */
> -#ifndef ARCH_SLAB_MINALIGN
> -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> +#ifndef ARCH_SLAB_MIN_MINALIGN
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> +#endif

Sorry, only a drive-by comment, I'll look at the arm64 parts next week.
I've seen it mentioned in the first version, what's the point of MIN_MIN
and not just MIN?

-- 
Catalin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-22 18:02   ` Catalin Marinas
  0 siblings, 0 replies; 10+ messages in thread
From: Catalin Marinas @ 2022-04-22 18:02 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List, vbabka,
	penberg, roman.gushchin, iamjoonsoo.kim, rientjes, Herbert Xu,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov, kasan-dev,
	Eric Biederman, Kees Cook

On Thu, Apr 21, 2022 at 02:15:48PM -0700, Peter Collingbourne wrote:
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 373b3ef99f4e..80e517593372 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
>  #endif
>  
>  /*
> - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
>   * Intended for arches that get misalignment faults even for 64 bit integer
>   * aligned buffers.
>   */
> -#ifndef ARCH_SLAB_MINALIGN
> -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> +#ifndef ARCH_SLAB_MIN_MINALIGN
> +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> +#endif

Sorry, only a drive-by comment, I'll look at the arm64 parts next week.
I've seen it mentioned in the first version, what's the point of MIN_MIN
and not just MIN?

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
  2022-04-22 16:04   ` Andrey Konovalov
@ 2022-04-22 19:45     ` Peter Collingbourne
  -1 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-22 19:45 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: Hyeonggon Yoo, Andrew Morton, Catalin Marinas, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Fri, Apr 22, 2022 at 9:04 AM Andrey Konovalov <andreyknvl@gmail.com> wrote:
>
> On Thu, Apr 21, 2022 at 11:16 PM Peter Collingbourne <pcc@google.com> wrote:
> >
> > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
> > slab alignment to 16. This happens even if MTE is not supported in
> > hardware or disabled via kasan=off, which creates an unnecessary
> > memory overhead in those cases. Eliminate this overhead by making
> > the minimum slab alignment a runtime property and only aligning to
> > 16 if KASAN is enabled at runtime.
> >
> > On a DragonBoard 845c (non-MTE hardware) with a kernel built with
> > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
> > boot I see the following Slab measurements in /proc/meminfo (median
> > of 3 reboots):
> >
> > Before: 169020 kB
> > After:  167304 kB
>
> Thanks for the improvement, Peter!
>
> Overall, the patch looks good to me:
>
> Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

Thanks!

> While looking at the code, I noticed a couple of issues in the already
> existing comments. Not sure if they are worth fixing, but I'll point
> them out just in case.
>
> > Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > ---
> > v2:
> > - use max instead of max_t in flat_stack_align()
> >
> >  arch/arc/include/asm/cache.h        |  4 ++--
> >  arch/arm/include/asm/cache.h        |  2 +-
> >  arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
> >  arch/microblaze/include/asm/page.h  |  2 +-
> >  arch/riscv/include/asm/cache.h      |  2 +-
> >  arch/sparc/include/asm/cache.h      |  2 +-
> >  arch/xtensa/include/asm/processor.h |  2 +-
> >  fs/binfmt_flat.c                    |  9 ++++++---
> >  include/crypto/hash.h               |  2 +-
> >  include/linux/slab.h                | 22 +++++++++++++++++-----
> >  mm/slab.c                           |  7 +++----
> >  mm/slab_common.c                    |  3 +--
> >  mm/slob.c                           |  6 +++---
> >  13 files changed, 51 insertions(+), 31 deletions(-)
> >
> > diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> > index f0f1fc5d62b6..b6a7763fd5d6 100644
> > --- a/arch/arc/include/asm/cache.h
> > +++ b/arch/arc/include/asm/cache.h
> > @@ -55,11 +55,11 @@
> >   * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
> >   * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
> >   * alignment for any atomic64_t embedded in buffer.
> > - * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
> > + * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
> >   * value of 4 (and not 8) in ARC ABI.
> >   */
> >  #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
> > -#define ARCH_SLAB_MINALIGN     8
> > +#define ARCH_SLAB_MIN_MINALIGN 8
> >  #endif
> >
> >  extern int ioc_enable;
> > diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> > index e3ea34558ada..3e1018bb9805 100644
> > --- a/arch/arm/include/asm/cache.h
> > +++ b/arch/arm/include/asm/cache.h
> > @@ -21,7 +21,7 @@
> >   * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
> >   */
> >  #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
> > -#define ARCH_SLAB_MINALIGN 8
> > +#define ARCH_SLAB_MIN_MINALIGN 8
> >  #endif
> >
> >  #define __read_mostly __section(".data..read_mostly")
> > diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> > index a074459f8f2f..38f171591c3f 100644
> > --- a/arch/arm64/include/asm/cache.h
> > +++ b/arch/arm64/include/asm/cache.h
> > @@ -6,6 +6,7 @@
> >  #define __ASM_CACHE_H
> >
> >  #include <asm/cputype.h>
> > +#include <asm/mte-def.h>
> >
> >  #define CTR_L1IP_SHIFT         14
> >  #define CTR_L1IP_MASK          3
> > @@ -49,15 +50,21 @@
> >   */
> >  #define ARCH_DMA_MINALIGN      (128)
> >
> > -#ifdef CONFIG_KASAN_SW_TAGS
> > -#define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> > -#elif defined(CONFIG_KASAN_HW_TAGS)
> > -#define ARCH_SLAB_MINALIGN     MTE_GRANULE_SIZE
> > -#endif
> > -
> >  #ifndef __ASSEMBLY__
> >
> >  #include <linux/bitops.h>
> > +#include <linux/kasan-enabled.h>
> > +
> > +#ifdef CONFIG_KASAN_SW_TAGS
> > +#define ARCH_SLAB_MIN_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> > +#elif defined(CONFIG_KASAN_HW_TAGS)
> > +static inline size_t arch_slab_minalign(void)
> > +{
> > +       return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
> > +                                        __alignof__(unsigned long long);
> > +}
> > +#define arch_slab_minalign() arch_slab_minalign()
> > +#endif
> >
> >  #define ICACHEF_ALIASING       0
> >  #define ICACHEF_VPIPT          1
> > diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
> > index 4b8b2fa78fc5..ccdbc1da3c3e 100644
> > --- a/arch/microblaze/include/asm/page.h
> > +++ b/arch/microblaze/include/asm/page.h
> > @@ -33,7 +33,7 @@
> >  /* MS be sure that SLAB allocates aligned objects */
> >  #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
> >
> > -#define ARCH_SLAB_MINALIGN     L1_CACHE_BYTES
> > +#define ARCH_SLAB_MIN_MINALIGN L1_CACHE_BYTES
> >
> >  /*
> >   * PAGE_OFFSET -- the first address of the first page of memory. With MMU
> > diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
> > index 9b58b104559e..7beb3b5d27c7 100644
> > --- a/arch/riscv/include/asm/cache.h
> > +++ b/arch/riscv/include/asm/cache.h
> > @@ -16,7 +16,7 @@
> >   * the flat loader aligns it accordingly.
> >   */
> >  #ifndef CONFIG_MMU
> > -#define ARCH_SLAB_MINALIGN     16
> > +#define ARCH_SLAB_MIN_MINALIGN 16
> >  #endif
> >
> >  #endif /* _ASM_RISCV_CACHE_H */
> > diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
> > index e62fd0e72606..9d8cb4687b7e 100644
> > --- a/arch/sparc/include/asm/cache.h
> > +++ b/arch/sparc/include/asm/cache.h
> > @@ -8,7 +8,7 @@
> >  #ifndef _SPARC_CACHE_H
> >  #define _SPARC_CACHE_H
> >
> > -#define ARCH_SLAB_MINALIGN     __alignof__(unsigned long long)
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> >
> >  #define L1_CACHE_SHIFT 5
> >  #define L1_CACHE_BYTES 32
> > diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
> > index 4489a27d527a..e3ea278e3fcf 100644
> > --- a/arch/xtensa/include/asm/processor.h
> > +++ b/arch/xtensa/include/asm/processor.h
> > @@ -18,7 +18,7 @@
> >  #include <asm/types.h>
> >  #include <asm/regs.h>
> >
> > -#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
> > +#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
> >
> >  /*
> >   * User space process size: 1 GB.
> > diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> > index 626898150011..23ce3439eafa 100644
> > --- a/fs/binfmt_flat.c
> > +++ b/fs/binfmt_flat.c
> > @@ -64,7 +64,10 @@
> >   * Here we can be a bit looser than the data sections since this
> >   * needs to only meet arch ABI requirements.
> >   */
> > -#define FLAT_STACK_ALIGN       max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
> > +static size_t flat_stack_align(void)
> > +{
> > +       return max(sizeof(void *), arch_slab_minalign());
> > +}
> >
> >  #define RELOC_FAILED 0xff00ff01                /* Relocation incorrect somewhere */
> >  #define UNLOADED_LIB 0x7ff000ff                /* Placeholder for unused library */
> > @@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
> >                 sp -= 2; /* argvp + envp */
> >         sp -= 1;  /* &argc */
> >
> > -       current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
> > +       current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
> >         sp = (unsigned long __user *)current->mm->start_stack;
> >
> >         if (put_user(bprm->argc, sp++))
> > @@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
> >  #endif
> >         stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
> >         stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
> > -       stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
> > +       stack_len = ALIGN(stack_len, flat_stack_align());
> >
> >         res = load_flat_file(bprm, &libinfo, 0, &stack_len);
> >         if (res < 0)
> > diff --git a/include/crypto/hash.h b/include/crypto/hash.h
> > index f140e4643949..442c290f458c 100644
> > --- a/include/crypto/hash.h
> > +++ b/include/crypto/hash.h
> > @@ -149,7 +149,7 @@ struct ahash_alg {
> >
> >  struct shash_desc {
> >         struct crypto_shash *tfm;
> > -       void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
> > +       void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
> >  };
> >
> >  #define HASH_MAX_DIGESTSIZE     64
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 373b3ef99f4e..80e517593372 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
> >  #endif
> >
> >  /*
> > - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> > + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
> >   * Intended for arches that get misalignment faults even for 64 bit integer
> >   * aligned buffers.
> >   */
> > -#ifndef ARCH_SLAB_MINALIGN
> > -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> > +#ifndef ARCH_SLAB_MIN_MINALIGN
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> > +#endif
> > +
> > +/*
> > + * Arches can define this function if they want to decide the minimum slab
> > + * alignment at runtime. The value returned by the function must be
> > + * >= ARCH_SLAB_MIN_MINALIGN.
> > + */
> > +#ifndef arch_slab_minalign
> > +static inline size_t arch_slab_minalign(void)
> > +{
> > +       return ARCH_SLAB_MIN_MINALIGN;
> > +}
> >  #endif
> >
> >  /*
> >   * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
> > - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
> > + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
> >   * aligned pointers.
>
> This comment is not precise: kmalloc relies on kmem_cache_alloc, so
> kmalloc technically returns pointers aligned to
> max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MIN_MINALIGN). See
> create_kmalloc_cache()->create_boot_cache()->calculate_alignment() for
> SLAB and SLUB and __do_kmalloc_node() for SLOB. This alignment is
> stronger than the one is specified for __assume_kmalloc_alignment
> below, so the code should be fine. However, the comment is confusing.
>
> Also, the comment next to the ARCH_KMALLOC_MINALIGN definition says
> "Setting ARCH_KMALLOC_MINALIGN in arch headers" while it should say
> "Setting ARCH_DMA_MINALIGN in arch headers".

Good catches. Let's fix these issues separately (especially since I
reverted the ARCH_SLAB_MIN_MINALIGN change so the patches shouldn't
conflict). Would you like to send patches? I can if not.

Peter

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-22 19:45     ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-22 19:45 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: Hyeonggon Yoo, Andrew Morton, Catalin Marinas, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Fri, Apr 22, 2022 at 9:04 AM Andrey Konovalov <andreyknvl@gmail.com> wrote:
>
> On Thu, Apr 21, 2022 at 11:16 PM Peter Collingbourne <pcc@google.com> wrote:
> >
> > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum
> > slab alignment to 16. This happens even if MTE is not supported in
> > hardware or disabled via kasan=off, which creates an unnecessary
> > memory overhead in those cases. Eliminate this overhead by making
> > the minimum slab alignment a runtime property and only aligning to
> > 16 if KASAN is enabled at runtime.
> >
> > On a DragonBoard 845c (non-MTE hardware) with a kernel built with
> > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android
> > boot I see the following Slab measurements in /proc/meminfo (median
> > of 3 reboots):
> >
> > Before: 169020 kB
> > After:  167304 kB
>
> Thanks for the improvement, Peter!
>
> Overall, the patch looks good to me:
>
> Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>

Thanks!

> While looking at the code, I noticed a couple of issues in the already
> existing comments. Not sure if they are worth fixing, but I'll point
> them out just in case.
>
> > Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > ---
> > v2:
> > - use max instead of max_t in flat_stack_align()
> >
> >  arch/arc/include/asm/cache.h        |  4 ++--
> >  arch/arm/include/asm/cache.h        |  2 +-
> >  arch/arm64/include/asm/cache.h      | 19 +++++++++++++------
> >  arch/microblaze/include/asm/page.h  |  2 +-
> >  arch/riscv/include/asm/cache.h      |  2 +-
> >  arch/sparc/include/asm/cache.h      |  2 +-
> >  arch/xtensa/include/asm/processor.h |  2 +-
> >  fs/binfmt_flat.c                    |  9 ++++++---
> >  include/crypto/hash.h               |  2 +-
> >  include/linux/slab.h                | 22 +++++++++++++++++-----
> >  mm/slab.c                           |  7 +++----
> >  mm/slab_common.c                    |  3 +--
> >  mm/slob.c                           |  6 +++---
> >  13 files changed, 51 insertions(+), 31 deletions(-)
> >
> > diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
> > index f0f1fc5d62b6..b6a7763fd5d6 100644
> > --- a/arch/arc/include/asm/cache.h
> > +++ b/arch/arc/include/asm/cache.h
> > @@ -55,11 +55,11 @@
> >   * Make sure slab-allocated buffers are 64-bit aligned when atomic64_t uses
> >   * ARCv2 64-bit atomics (LLOCKD/SCONDD). This guarantess runtime 64-bit
> >   * alignment for any atomic64_t embedded in buffer.
> > - * Default ARCH_SLAB_MINALIGN is __alignof__(long long) which has a relaxed
> > + * Default ARCH_SLAB_MIN_MINALIGN is __alignof__(long long) which has a relaxed
> >   * value of 4 (and not 8) in ARC ABI.
> >   */
> >  #if defined(CONFIG_ARC_HAS_LL64) && defined(CONFIG_ARC_HAS_LLSC)
> > -#define ARCH_SLAB_MINALIGN     8
> > +#define ARCH_SLAB_MIN_MINALIGN 8
> >  #endif
> >
> >  extern int ioc_enable;
> > diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> > index e3ea34558ada..3e1018bb9805 100644
> > --- a/arch/arm/include/asm/cache.h
> > +++ b/arch/arm/include/asm/cache.h
> > @@ -21,7 +21,7 @@
> >   * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
> >   */
> >  #if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
> > -#define ARCH_SLAB_MINALIGN 8
> > +#define ARCH_SLAB_MIN_MINALIGN 8
> >  #endif
> >
> >  #define __read_mostly __section(".data..read_mostly")
> > diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
> > index a074459f8f2f..38f171591c3f 100644
> > --- a/arch/arm64/include/asm/cache.h
> > +++ b/arch/arm64/include/asm/cache.h
> > @@ -6,6 +6,7 @@
> >  #define __ASM_CACHE_H
> >
> >  #include <asm/cputype.h>
> > +#include <asm/mte-def.h>
> >
> >  #define CTR_L1IP_SHIFT         14
> >  #define CTR_L1IP_MASK          3
> > @@ -49,15 +50,21 @@
> >   */
> >  #define ARCH_DMA_MINALIGN      (128)
> >
> > -#ifdef CONFIG_KASAN_SW_TAGS
> > -#define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> > -#elif defined(CONFIG_KASAN_HW_TAGS)
> > -#define ARCH_SLAB_MINALIGN     MTE_GRANULE_SIZE
> > -#endif
> > -
> >  #ifndef __ASSEMBLY__
> >
> >  #include <linux/bitops.h>
> > +#include <linux/kasan-enabled.h>
> > +
> > +#ifdef CONFIG_KASAN_SW_TAGS
> > +#define ARCH_SLAB_MIN_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
> > +#elif defined(CONFIG_KASAN_HW_TAGS)
> > +static inline size_t arch_slab_minalign(void)
> > +{
> > +       return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE :
> > +                                        __alignof__(unsigned long long);
> > +}
> > +#define arch_slab_minalign() arch_slab_minalign()
> > +#endif
> >
> >  #define ICACHEF_ALIASING       0
> >  #define ICACHEF_VPIPT          1
> > diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
> > index 4b8b2fa78fc5..ccdbc1da3c3e 100644
> > --- a/arch/microblaze/include/asm/page.h
> > +++ b/arch/microblaze/include/asm/page.h
> > @@ -33,7 +33,7 @@
> >  /* MS be sure that SLAB allocates aligned objects */
> >  #define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
> >
> > -#define ARCH_SLAB_MINALIGN     L1_CACHE_BYTES
> > +#define ARCH_SLAB_MIN_MINALIGN L1_CACHE_BYTES
> >
> >  /*
> >   * PAGE_OFFSET -- the first address of the first page of memory. With MMU
> > diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
> > index 9b58b104559e..7beb3b5d27c7 100644
> > --- a/arch/riscv/include/asm/cache.h
> > +++ b/arch/riscv/include/asm/cache.h
> > @@ -16,7 +16,7 @@
> >   * the flat loader aligns it accordingly.
> >   */
> >  #ifndef CONFIG_MMU
> > -#define ARCH_SLAB_MINALIGN     16
> > +#define ARCH_SLAB_MIN_MINALIGN 16
> >  #endif
> >
> >  #endif /* _ASM_RISCV_CACHE_H */
> > diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
> > index e62fd0e72606..9d8cb4687b7e 100644
> > --- a/arch/sparc/include/asm/cache.h
> > +++ b/arch/sparc/include/asm/cache.h
> > @@ -8,7 +8,7 @@
> >  #ifndef _SPARC_CACHE_H
> >  #define _SPARC_CACHE_H
> >
> > -#define ARCH_SLAB_MINALIGN     __alignof__(unsigned long long)
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> >
> >  #define L1_CACHE_SHIFT 5
> >  #define L1_CACHE_BYTES 32
> > diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
> > index 4489a27d527a..e3ea278e3fcf 100644
> > --- a/arch/xtensa/include/asm/processor.h
> > +++ b/arch/xtensa/include/asm/processor.h
> > @@ -18,7 +18,7 @@
> >  #include <asm/types.h>
> >  #include <asm/regs.h>
> >
> > -#define ARCH_SLAB_MINALIGN XTENSA_STACK_ALIGNMENT
> > +#define ARCH_SLAB_MIN_MINALIGN XTENSA_STACK_ALIGNMENT
> >
> >  /*
> >   * User space process size: 1 GB.
> > diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
> > index 626898150011..23ce3439eafa 100644
> > --- a/fs/binfmt_flat.c
> > +++ b/fs/binfmt_flat.c
> > @@ -64,7 +64,10 @@
> >   * Here we can be a bit looser than the data sections since this
> >   * needs to only meet arch ABI requirements.
> >   */
> > -#define FLAT_STACK_ALIGN       max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
> > +static size_t flat_stack_align(void)
> > +{
> > +       return max(sizeof(void *), arch_slab_minalign());
> > +}
> >
> >  #define RELOC_FAILED 0xff00ff01                /* Relocation incorrect somewhere */
> >  #define UNLOADED_LIB 0x7ff000ff                /* Placeholder for unused library */
> > @@ -148,7 +151,7 @@ static int create_flat_tables(struct linux_binprm *bprm, unsigned long arg_start
> >                 sp -= 2; /* argvp + envp */
> >         sp -= 1;  /* &argc */
> >
> > -       current->mm->start_stack = (unsigned long)sp & -FLAT_STACK_ALIGN;
> > +       current->mm->start_stack = (unsigned long)sp & -flat_stack_align();
> >         sp = (unsigned long __user *)current->mm->start_stack;
> >
> >         if (put_user(bprm->argc, sp++))
> > @@ -966,7 +969,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
> >  #endif
> >         stack_len += (bprm->argc + 1) * sizeof(char *);   /* the argv array */
> >         stack_len += (bprm->envc + 1) * sizeof(char *);   /* the envp array */
> > -       stack_len = ALIGN(stack_len, FLAT_STACK_ALIGN);
> > +       stack_len = ALIGN(stack_len, flat_stack_align());
> >
> >         res = load_flat_file(bprm, &libinfo, 0, &stack_len);
> >         if (res < 0)
> > diff --git a/include/crypto/hash.h b/include/crypto/hash.h
> > index f140e4643949..442c290f458c 100644
> > --- a/include/crypto/hash.h
> > +++ b/include/crypto/hash.h
> > @@ -149,7 +149,7 @@ struct ahash_alg {
> >
> >  struct shash_desc {
> >         struct crypto_shash *tfm;
> > -       void *__ctx[] __aligned(ARCH_SLAB_MINALIGN);
> > +       void *__ctx[] __aligned(ARCH_SLAB_MIN_MINALIGN);
> >  };
> >
> >  #define HASH_MAX_DIGESTSIZE     64
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 373b3ef99f4e..80e517593372 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
> >  #endif
> >
> >  /*
> > - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> > + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
> >   * Intended for arches that get misalignment faults even for 64 bit integer
> >   * aligned buffers.
> >   */
> > -#ifndef ARCH_SLAB_MINALIGN
> > -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> > +#ifndef ARCH_SLAB_MIN_MINALIGN
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> > +#endif
> > +
> > +/*
> > + * Arches can define this function if they want to decide the minimum slab
> > + * alignment at runtime. The value returned by the function must be
> > + * >= ARCH_SLAB_MIN_MINALIGN.
> > + */
> > +#ifndef arch_slab_minalign
> > +static inline size_t arch_slab_minalign(void)
> > +{
> > +       return ARCH_SLAB_MIN_MINALIGN;
> > +}
> >  #endif
> >
> >  /*
> >   * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
> > - * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
> > + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MIN_MINALIGN
> >   * aligned pointers.
>
> This comment is not precise: kmalloc relies on kmem_cache_alloc, so
> kmalloc technically returns pointers aligned to
> max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MIN_MINALIGN). See
> create_kmalloc_cache()->create_boot_cache()->calculate_alignment() for
> SLAB and SLUB and __do_kmalloc_node() for SLOB. This alignment is
> stronger than the one is specified for __assume_kmalloc_alignment
> below, so the code should be fine. However, the comment is confusing.
>
> Also, the comment next to the ARCH_KMALLOC_MINALIGN definition says
> "Setting ARCH_KMALLOC_MINALIGN in arch headers" while it should say
> "Setting ARCH_DMA_MINALIGN in arch headers".

Good catches. Let's fix these issues separately (especially since I
reverted the ARCH_SLAB_MIN_MINALIGN change so the patches shouldn't
conflict). Would you like to send patches? I can if not.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
  2022-04-22 18:02   ` Catalin Marinas
@ 2022-04-22 20:08     ` Peter Collingbourne
  -1 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-22 20:08 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Fri, Apr 22, 2022 at 11:03 AM Catalin Marinas
<catalin.marinas@arm.com> wrote:
>
> On Thu, Apr 21, 2022 at 02:15:48PM -0700, Peter Collingbourne wrote:
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 373b3ef99f4e..80e517593372 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
> >  #endif
> >
> >  /*
> > - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> > + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
> >   * Intended for arches that get misalignment faults even for 64 bit integer
> >   * aligned buffers.
> >   */
> > -#ifndef ARCH_SLAB_MINALIGN
> > -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> > +#ifndef ARCH_SLAB_MIN_MINALIGN
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> > +#endif
>
> Sorry, only a drive-by comment, I'll look at the arm64 parts next week.
> I've seen it mentioned in the first version, what's the point of MIN_MIN
> and not just MIN?

I tried to explain it here:
https://lore.kernel.org/all/CAMn1gO5xHZvFSSsW5sTVaUBN_gS-cYYNMG3PnpgCmh7kk_Zx7Q@mail.gmail.com/

In the end I decided to go back to MIN so this is moot.

Peter

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm: make minimum slab alignment a runtime property
@ 2022-04-22 20:08     ` Peter Collingbourne
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Collingbourne @ 2022-04-22 20:08 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Andrey Konovalov, Hyeonggon Yoo, Andrew Morton, Linux ARM,
	Linux Memory Management List, Linux Kernel Mailing List,
	Vlastimil Babka, Pekka Enberg, roman.gushchin, Joonsoo Kim,
	David Rientjes, Herbert Xu, Andrey Ryabinin, Alexander Potapenko,
	Dmitry Vyukov, kasan-dev, Eric Biederman, Kees Cook

On Fri, Apr 22, 2022 at 11:03 AM Catalin Marinas
<catalin.marinas@arm.com> wrote:
>
> On Thu, Apr 21, 2022 at 02:15:48PM -0700, Peter Collingbourne wrote:
> > diff --git a/include/linux/slab.h b/include/linux/slab.h
> > index 373b3ef99f4e..80e517593372 100644
> > --- a/include/linux/slab.h
> > +++ b/include/linux/slab.h
> > @@ -201,21 +201,33 @@ void kmem_dump_obj(void *object);
> >  #endif
> >
> >  /*
> > - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
> > + * Setting ARCH_SLAB_MIN_MINALIGN in arch headers allows a different alignment.
> >   * Intended for arches that get misalignment faults even for 64 bit integer
> >   * aligned buffers.
> >   */
> > -#ifndef ARCH_SLAB_MINALIGN
> > -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
> > +#ifndef ARCH_SLAB_MIN_MINALIGN
> > +#define ARCH_SLAB_MIN_MINALIGN __alignof__(unsigned long long)
> > +#endif
>
> Sorry, only a drive-by comment, I'll look at the arm64 parts next week.
> I've seen it mentioned in the first version, what's the point of MIN_MIN
> and not just MIN?

I tried to explain it here:
https://lore.kernel.org/all/CAMn1gO5xHZvFSSsW5sTVaUBN_gS-cYYNMG3PnpgCmh7kk_Zx7Q@mail.gmail.com/

In the end I decided to go back to MIN so this is moot.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-04-22 21:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-21 21:15 [PATCH v2] mm: make minimum slab alignment a runtime property Peter Collingbourne
2022-04-21 21:15 ` Peter Collingbourne
2022-04-22 16:04 ` Andrey Konovalov
2022-04-22 16:04   ` Andrey Konovalov
2022-04-22 19:45   ` Peter Collingbourne
2022-04-22 19:45     ` Peter Collingbourne
2022-04-22 18:02 ` Catalin Marinas
2022-04-22 18:02   ` Catalin Marinas
2022-04-22 20:08   ` Peter Collingbourne
2022-04-22 20:08     ` Peter Collingbourne

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.