All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
@ 2020-09-03  7:01 Alex Shi
  2020-09-03  7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
                   ` (3 more replies)
  0 siblings, 4 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  7:01 UTC (permalink / raw)
  To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief the false sharing in cmpxchg.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h          |  6 +++---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 38 +++++++++++++++++++-------------------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8379432f4f2f..be676e659fb7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -437,7 +437,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned long		*pageblock_flags;
+	unsigned char		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned long pageblock_flags[0];
+	unsigned char	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97dc9ca..81e96d4d9c42 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
-	word = bitmap[word_bitidx];
-	return (word >> bitidx) & mask;
+	byte = bitmap[byte_bitidx];
+	return (byte >> bitidx) & mask;
 }
 
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,29 +513,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long old_word, word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char old_byte, byte;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
 	mask <<= bitidx;
 	flags <<= bitidx;
 
-	word = READ_ONCE(bitmap[word_bitidx]);
+	byte = READ_ONCE(bitmap[byte_bitidx]);
 	for (;;) {
-		old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
-		if (word == old_word)
+		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+		if (byte == old_byte)
 			break;
-		word = old_word;
+		byte = old_byte;
 	}
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags
  2020-09-03  7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
@ 2020-09-03  7:01 ` Alex Shi
  2020-09-03  7:01   ` Alex Shi
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  7:01 UTC (permalink / raw)
  To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Andrew Morton, Mel Gorman, linux-kernel, linux-mm

Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.

If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/page-isolation.h  |  2 +-
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 10 +++-------
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 572458016331..baa2e1ecc134 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -35,7 +35,7 @@ static inline bool is_migrate_isolate(int migratetype)
 
 struct page *has_unmovable_pages(struct zone *zone, struct page *page,
 				 int migratetype, int flags);
-void set_pageblock_migratetype(struct page *page, int migratetype);
+void set_pageblock_migratetype(struct page *page, unsigned char migratetype);
 int move_freepages_block(struct zone *zone, struct page *page,
 				int migratetype, int *num_movable);
 
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..f785c9d6d68c 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
 	 * Assume the bits will always align on a word. If this assumption
 	 * changes then get/set pageblock needs updating.
 	 */
-	NR_PAGEBLOCK_BITS
+	NR_PAGEBLOCK_BITS = BITS_PER_BYTE
 };
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81e96d4d9c42..3688e6b83318 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,19 +517,15 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	unsigned long bitidx, byte_bitidx;
 	unsigned char old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
 	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-	mask <<= bitidx;
-	flags <<= bitidx;
-
 	byte = READ_ONCE(bitmap[byte_bitidx]);
 	for (;;) {
 		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
@@ -539,13 +535,13 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	}
 }
 
-void set_pageblock_migratetype(struct page *page, int migratetype)
+void set_pageblock_migratetype(struct page *page, unsigned char migratetype)
 {
 	if (unlikely(page_group_by_mobility_disabled &&
 		     migratetype < MIGRATE_PCPTYPES))
 		migratetype = MIGRATE_UNMOVABLE;
 
-	set_pfnblock_flags_mask(page, (unsigned long)migratetype,
+	set_pfnblock_flags_mask(page, migratetype,
 				page_to_pfn(page), MIGRATETYPE_MASK);
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
  2020-09-03  7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
  2020-09-03  7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
@ 2020-09-03  7:01   ` Alex Shi
  2020-09-03  7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman
  2020-09-03  8:19 ` David Hildenbrand
  3 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  7:01 UTC (permalink / raw)
  To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Chris Zankel, Rich Felker, Yoshinori Sato, linux-sh,
	linux-xtensa, Russell King, linux-kernel, linux-mm, Max Filippov,
	Baolin Wang, sparclinux, Andrew Morton, David S. Miller,
	linux-arm-kernel

Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
cmpxchg4 on it.

Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
found.

This is the first usages of cmpxchg flase sharing change. We'd better
check more cmpxchg usages in current kernel...

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-mm@kvack.org
---
 arch/Kconfig           |  3 +++
 arch/arm/Kconfig       |  1 +
 arch/sh/Kconfig        |  1 +
 arch/sparc/Kconfig     |  1 +
 arch/xtensa/Kconfig    |  1 +
 include/linux/mmzone.h | 15 ++++++++++++---
 mm/page_alloc.c        | 22 +++++++++++-----------
 7 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index af14a567b493..3514570c0f5f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL
 config HAVE_CMPXCHG_DOUBLE
 	bool
 
+config NO_CMPXCHG_BYTE
+	bool
+
 config ARCH_WEAK_RELEASE_ACQUIRE
 	bool
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e00d94b16658..03a6c7fd999d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -48,6 +48,7 @@ config ARM
 	select GENERIC_ALLOCATOR
 	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
 	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
+	select NO_CMPXCHG_BYTE if CPU_V6
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d20927128fce..4c7f0ad5b93f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -155,6 +155,7 @@ menu "System type"
 config CPU_SH2
 	bool
 	select SH_INTC
+	select NO_CMPXCHG_BYTE
 
 config CPU_SH2A
 	bool
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index efeff2c896a5..51ae5c8ede87 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -58,6 +58,7 @@ config SPARC32
 	select CLZ_TAB
 	select HAVE_UID16
 	select OLD_SIGACTION
+	select NO_CMPXCHG_BYTE
 
 config SPARC64
 	def_bool 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e997e0119c02..862b008ab09e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -42,6 +42,7 @@ config XTENSA
 	select MODULES_USE_ELF_RELA
 	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
+	select NO_CMPXCHG_BYTE
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..0bc5ac0f8cd7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,15 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
+/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/
+#ifdef CONFIG_NO_CMPXCHG_BYTE
+#define BITS_PER_FLAGS	BITS_PER_LONG
+typedef unsigned long pageblockflags_t;
+#else
+#define BITS_PER_FLAGS	BITS_PER_BYTE
+typedef unsigned char pageblockflags_t;
+#endif
+
 struct zone {
 	/* Read-mostly fields */
 
@@ -437,7 +446,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned char		*pageblock_flags;
+	pageblockflags_t	*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn = zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1168,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned char	pageblock_flags[0];
+	pageblockflags_t	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1221,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3688e6b83318..8b65d83d8be6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char byte;
+	pageblockflags_t byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
+	bitidx &= (BITS_PER_FLAGS - 1);
 
 	byte = bitmap[byte_bitidx];
 	return (byte >> bitidx) & mask;
 }
 
-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char old_byte, byte;
+	pageblockflags_t old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  7:01   ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  7:01 UTC (permalink / raw)
  To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Andrew Morton, Baolin Wang, Russell King, Yoshinori Sato,
	Rich Felker, David S. Miller, Chris Zankel, Max Filippov,
	linux-kernel, linux-arm-kernel, linux-sh, sparclinux,
	linux-xtensa, linux-mm

Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
cmpxchg4 on it.

Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
found.

This is the first usages of cmpxchg flase sharing change. We'd better
check more cmpxchg usages in current kernel...

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-mm@kvack.org
---
 arch/Kconfig           |  3 +++
 arch/arm/Kconfig       |  1 +
 arch/sh/Kconfig        |  1 +
 arch/sparc/Kconfig     |  1 +
 arch/xtensa/Kconfig    |  1 +
 include/linux/mmzone.h | 15 ++++++++++++---
 mm/page_alloc.c        | 22 +++++++++++-----------
 7 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index af14a567b493..3514570c0f5f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL
 config HAVE_CMPXCHG_DOUBLE
 	bool
 
+config NO_CMPXCHG_BYTE
+	bool
+
 config ARCH_WEAK_RELEASE_ACQUIRE
 	bool
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e00d94b16658..03a6c7fd999d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -48,6 +48,7 @@ config ARM
 	select GENERIC_ALLOCATOR
 	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
 	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
+	select NO_CMPXCHG_BYTE if CPU_V6
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d20927128fce..4c7f0ad5b93f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -155,6 +155,7 @@ menu "System type"
 config CPU_SH2
 	bool
 	select SH_INTC
+	select NO_CMPXCHG_BYTE
 
 config CPU_SH2A
 	bool
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index efeff2c896a5..51ae5c8ede87 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -58,6 +58,7 @@ config SPARC32
 	select CLZ_TAB
 	select HAVE_UID16
 	select OLD_SIGACTION
+	select NO_CMPXCHG_BYTE
 
 config SPARC64
 	def_bool 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e997e0119c02..862b008ab09e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -42,6 +42,7 @@ config XTENSA
 	select MODULES_USE_ELF_RELA
 	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
+	select NO_CMPXCHG_BYTE
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..0bc5ac0f8cd7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,15 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
+/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/
+#ifdef CONFIG_NO_CMPXCHG_BYTE
+#define BITS_PER_FLAGS	BITS_PER_LONG
+typedef unsigned long pageblockflags_t;
+#else
+#define BITS_PER_FLAGS	BITS_PER_BYTE
+typedef unsigned char pageblockflags_t;
+#endif
+
 struct zone {
 	/* Read-mostly fields */
 
@@ -437,7 +446,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned char		*pageblock_flags;
+	pageblockflags_t	*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1168,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned char	pageblock_flags[0];
+	pageblockflags_t	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1221,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3688e6b83318..8b65d83d8be6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char byte;
+	pageblockflags_t byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
+	bitidx &= (BITS_PER_FLAGS - 1);
 
 	byte = bitmap[byte_bitidx];
 	return (byte >> bitidx) & mask;
 }
 
-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char old_byte, byte;
+	pageblockflags_t old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  7:01   ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  7:01 UTC (permalink / raw)
  To: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Chris Zankel, Rich Felker, Yoshinori Sato, linux-sh,
	linux-xtensa, Russell King, linux-kernel, linux-mm, Max Filippov,
	Baolin Wang, sparclinux, Andrew Morton, David S. Miller,
	linux-arm-kernel

Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
cmpxchg4 on it.

Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
found.

This is the first usages of cmpxchg flase sharing change. We'd better
check more cmpxchg usages in current kernel...

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-mm@kvack.org
---
 arch/Kconfig           |  3 +++
 arch/arm/Kconfig       |  1 +
 arch/sh/Kconfig        |  1 +
 arch/sparc/Kconfig     |  1 +
 arch/xtensa/Kconfig    |  1 +
 include/linux/mmzone.h | 15 ++++++++++++---
 mm/page_alloc.c        | 22 +++++++++++-----------
 7 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index af14a567b493..3514570c0f5f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -431,6 +431,9 @@ config HAVE_CMPXCHG_LOCAL
 config HAVE_CMPXCHG_DOUBLE
 	bool
 
+config NO_CMPXCHG_BYTE
+	bool
+
 config ARCH_WEAK_RELEASE_ACQUIRE
 	bool
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e00d94b16658..03a6c7fd999d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -48,6 +48,7 @@ config ARM
 	select GENERIC_ALLOCATOR
 	select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
 	select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
+	select NO_CMPXCHG_BYTE if CPU_V6
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d20927128fce..4c7f0ad5b93f 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -155,6 +155,7 @@ menu "System type"
 config CPU_SH2
 	bool
 	select SH_INTC
+	select NO_CMPXCHG_BYTE
 
 config CPU_SH2A
 	bool
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index efeff2c896a5..51ae5c8ede87 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -58,6 +58,7 @@ config SPARC32
 	select CLZ_TAB
 	select HAVE_UID16
 	select OLD_SIGACTION
+	select NO_CMPXCHG_BYTE
 
 config SPARC64
 	def_bool 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index e997e0119c02..862b008ab09e 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -42,6 +42,7 @@ config XTENSA
 	select MODULES_USE_ELF_RELA
 	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
+	select NO_CMPXCHG_BYTE
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..0bc5ac0f8cd7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,15 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
+/* cmpxchg only support 32-bits operands on ARMv6, SPARC32, sh2, XTENSA.*/
+#ifdef CONFIG_NO_CMPXCHG_BYTE
+#define BITS_PER_FLAGS	BITS_PER_LONG
+typedef unsigned long pageblockflags_t;
+#else
+#define BITS_PER_FLAGS	BITS_PER_BYTE
+typedef unsigned char pageblockflags_t;
+#endif
+
 struct zone {
 	/* Read-mostly fields */
 
@@ -437,7 +446,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned char		*pageblock_flags;
+	pageblockflags_t	*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1168,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned char	pageblock_flags[0];
+	pageblockflags_t	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1221,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3688e6b83318..8b65d83d8be6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char byte;
+	pageblockflags_t byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
+	bitidx &= (BITS_PER_FLAGS - 1);
 
 	byte = bitmap[byte_bitidx];
 	return (byte >> bitidx) & mask;
 }
 
-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,16 +513,16 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char old_byte, byte;
+	pageblockflags_t old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
  2020-09-03  7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
  2020-09-03  7:01   ` Alex Shi
@ 2020-09-03  7:24 ` Mel Gorman
  2020-09-03  8:32   ` Alex Shi
  2020-09-03  8:19 ` David Hildenbrand
  3 siblings, 1 reply; 23+ messages in thread
From: Mel Gorman @ 2020-09-03  7:24 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm,
	linux-kernel

On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
> 
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief the false sharing in cmpxchg.
> 
> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>

Page block types were not known to change at high frequency that would
cause a measurable performance drop. If anything, the performance hit
from pageblocks is the lookup paths which is a lot more frequent.

What was the workload you were running that altered pageblocks at a high
enough frequency for collisions to occur when updating adjacent
pageblocks?

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
  2020-09-03  7:01   ` Alex Shi
  (?)
  (?)
@ 2020-09-03  7:29     ` Max Filippov
  -1 siblings, 0 replies; 23+ messages in thread
From: Max Filippov @ 2020-09-03  7:29 UTC (permalink / raw)
  To: Alex Shi
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, open list:SUPERH,
	open list:TENSILICA XTENSA PORT (xtensa),
	Anshuman Khandual, Russell King, Alexander Duyck, LKML,
	Linux Memory Management List, Matthew Wilcox, Baolin Wang,
	open list:SPARC + UltraSPAR...,
	Andrew Morton, David S. Miller, Vlastimil Babka

On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.

[...]

> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -48,6 +48,7 @@ config ARM
>         select GENERIC_ALLOCATOR
>         select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
>         select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
> +       select NO_CMPXCHG_BYTE if CPU_V6
>         select GENERIC_CLOCKEVENTS_BROADCAST if SMP
>         select GENERIC_CPU_AUTOPROBE
>         select GENERIC_EARLY_IOREMAP
> diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
> index d20927128fce..4c7f0ad5b93f 100644
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -155,6 +155,7 @@ menu "System type"
>  config CPU_SH2
>         bool
>         select SH_INTC
> +       select NO_CMPXCHG_BYTE
>
>  config CPU_SH2A
>         bool
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index efeff2c896a5..51ae5c8ede87 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -58,6 +58,7 @@ config SPARC32
>         select CLZ_TAB
>         select HAVE_UID16
>         select OLD_SIGACTION
> +       select NO_CMPXCHG_BYTE
>
>  config SPARC64
>         def_bool 64BIT
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index e997e0119c02..862b008ab09e 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -42,6 +42,7 @@ config XTENSA
>         select MODULES_USE_ELF_RELA
>         select PERF_USE_VMALLOC
>         select VIRT_TO_BUS
> +       select NO_CMPXCHG_BYTE

Please keep the lists of select statements in Kconfig files above
alphabetically sorted.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  7:29     ` Max Filippov
  0 siblings, 0 replies; 23+ messages in thread
From: Max Filippov @ 2020-09-03  7:29 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang,
	Russell King, Yoshinori Sato, Rich Felker, David S. Miller,
	Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH,
	open list:SPARC + UltraSPAR...,
	open list:TENSILICA XTENSA PORT (xtensa),
	Linux Memory Management List

On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.

[...]

> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -48,6 +48,7 @@ config ARM
>         select GENERIC_ALLOCATOR
>         select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
>         select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
> +       select NO_CMPXCHG_BYTE if CPU_V6
>         select GENERIC_CLOCKEVENTS_BROADCAST if SMP
>         select GENERIC_CPU_AUTOPROBE
>         select GENERIC_EARLY_IOREMAP
> diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
> index d20927128fce..4c7f0ad5b93f 100644
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -155,6 +155,7 @@ menu "System type"
>  config CPU_SH2
>         bool
>         select SH_INTC
> +       select NO_CMPXCHG_BYTE
>
>  config CPU_SH2A
>         bool
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index efeff2c896a5..51ae5c8ede87 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -58,6 +58,7 @@ config SPARC32
>         select CLZ_TAB
>         select HAVE_UID16
>         select OLD_SIGACTION
> +       select NO_CMPXCHG_BYTE
>
>  config SPARC64
>         def_bool 64BIT
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index e997e0119c02..862b008ab09e 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -42,6 +42,7 @@ config XTENSA
>         select MODULES_USE_ELF_RELA
>         select PERF_USE_VMALLOC
>         select VIRT_TO_BUS
> +       select NO_CMPXCHG_BYTE

Please keep the lists of select statements in Kconfig files above
alphabetically sorted.

-- 
Thanks.
-- Max

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  7:29     ` Max Filippov
  0 siblings, 0 replies; 23+ messages in thread
From: Max Filippov @ 2020-09-03  7:29 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang,
	Russell King, Yoshinori Sato, Rich Felker, David S. Miller,
	Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH,
	open list:SPARC + UltraSPAR...,
	open list:TENSILICA XTENSA PORT (xtensa),
	Linux Memory Management List

On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.

[...]

> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -48,6 +48,7 @@ config ARM
>         select GENERIC_ALLOCATOR
>         select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
>         select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
> +       select NO_CMPXCHG_BYTE if CPU_V6
>         select GENERIC_CLOCKEVENTS_BROADCAST if SMP
>         select GENERIC_CPU_AUTOPROBE
>         select GENERIC_EARLY_IOREMAP
> diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
> index d20927128fce..4c7f0ad5b93f 100644
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -155,6 +155,7 @@ menu "System type"
>  config CPU_SH2
>         bool
>         select SH_INTC
> +       select NO_CMPXCHG_BYTE
>
>  config CPU_SH2A
>         bool
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index efeff2c896a5..51ae5c8ede87 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -58,6 +58,7 @@ config SPARC32
>         select CLZ_TAB
>         select HAVE_UID16
>         select OLD_SIGACTION
> +       select NO_CMPXCHG_BYTE
>
>  config SPARC64
>         def_bool 64BIT
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index e997e0119c02..862b008ab09e 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -42,6 +42,7 @@ config XTENSA
>         select MODULES_USE_ELF_RELA
>         select PERF_USE_VMALLOC
>         select VIRT_TO_BUS
> +       select NO_CMPXCHG_BYTE

Please keep the lists of select statements in Kconfig files above
alphabetically sorted.

-- 
Thanks.
-- Max


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  7:29     ` Max Filippov
  0 siblings, 0 replies; 23+ messages in thread
From: Max Filippov @ 2020-09-03  7:29 UTC (permalink / raw)
  To: Alex Shi
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, open list:SUPERH,
	open list:TENSILICA XTENSA PORT (xtensa),
	Anshuman Khandual, Russell King, Alexander Duyck, LKML,
	Linux Memory Management List, Matthew Wilcox, Baolin Wang,
	open list:SPARC + UltraSPAR...,
	Andrew Morton, David S. Miller, Vlastimil Babka

On Thu, Sep 3, 2020 at 12:01 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.

[...]

> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig> index e00d94b16658..03a6c7fd999d 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -48,6 +48,7 @@ config ARM
>         select GENERIC_ALLOCATOR
>         select GENERIC_ARCH_TOPOLOGY if ARM_CPU_TOPOLOGY
>         select GENERIC_ATOMIC64 if CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI
> +       select NO_CMPXCHG_BYTE if CPU_V6
>         select GENERIC_CLOCKEVENTS_BROADCAST if SMP
>         select GENERIC_CPU_AUTOPROBE
>         select GENERIC_EARLY_IOREMAP
> diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
> index d20927128fce..4c7f0ad5b93f 100644
> --- a/arch/sh/Kconfig
> +++ b/arch/sh/Kconfig
> @@ -155,6 +155,7 @@ menu "System type"
>  config CPU_SH2
>         bool
>         select SH_INTC
> +       select NO_CMPXCHG_BYTE
>
>  config CPU_SH2A
>         bool
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index efeff2c896a5..51ae5c8ede87 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -58,6 +58,7 @@ config SPARC32
>         select CLZ_TAB
>         select HAVE_UID16
>         select OLD_SIGACTION
> +       select NO_CMPXCHG_BYTE
>
>  config SPARC64
>         def_bool 64BIT
> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
> index e997e0119c02..862b008ab09e 100644
> --- a/arch/xtensa/Kconfig
> +++ b/arch/xtensa/Kconfig
> @@ -42,6 +42,7 @@ config XTENSA
>         select MODULES_USE_ELF_RELA
>         select PERF_USE_VMALLOC
>         select VIRT_TO_BUS
> +       select NO_CMPXCHG_BYTE

Please keep the lists of select statements in Kconfig files above
alphabetically sorted.

-- 
Thanks.
-- Max

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
                   ` (2 preceding siblings ...)
  2020-09-03  7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman
@ 2020-09-03  8:19 ` David Hildenbrand
  2020-09-03  9:14   ` Alex Shi
  3 siblings, 1 reply; 23+ messages in thread
From: David Hildenbrand @ 2020-09-03  8:19 UTC (permalink / raw)
  To: Alex Shi, Anshuman Khandual, Matthew Wilcox, Vlastimil Babka,
	Alexander Duyck
  Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel

On 03.09.20 09:01, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
> 
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief the false sharing in cmpxchg.
> 
> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Mel Gorman <mgorman@techsingularity.net>
> Cc: linux-mm@kvack.org
> Cc: linux-kernel@vger.kernel.org

Could you please

1. Send a cover letter and indicate the changees between versions. I
cannot find any in my mailbox or on -mm - is there any? (also, is there
a patch 4 ?)

2. Report proper performance numbers as requested, especially, over
multiple runs. This should go into patch 1/2. Are they buried somewhere?

3. Clarify what patch 3 does: Do we actually waste 8*sizeof(long) where
we only need 4 bits?

Also, breaking stuff in patch 1 and fixing it in patch 3 is not
acceptable. This breaks git bisect. Skimming over the patches I think
this is the case.

I am not convinced yet that we need and want this. As Alex mentioned, we
touch pageblock flags while holding the zone lock already in most cases
-  and as Mel mentiones, updates should be rare.

-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman
@ 2020-09-03  8:32   ` Alex Shi
  2020-09-03  8:40     ` Alex Shi
  2020-09-03  9:31     ` Mel Gorman
  0 siblings, 2 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:32 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm,
	linux-kernel



在 2020/9/3 下午3:24, Mel Gorman 写道:
> On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief the false sharing in cmpxchg.
>>
>> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
> 
> Page block types were not known to change at high frequency that would
> cause a measurable performance drop. If anything, the performance hit
> from pageblocks is the lookup paths which is a lot more frequent.

Yes, it is not hot path. But it's still a meaningful points to reduce cmpxchg
level false sharing which isn't right on logical.


> 
> What was the workload you were running that altered pageblocks at a high
> enough frequency for collisions to occur when updating adjacent
> pageblocks?
> 

I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much
larger than a very little average run time reducing.

But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds
or less.

Subject: [PATCH v4 4/4] add cmpxchg tracing

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
---
 include/trace/events/pageblock.h | 30 ++++++++++++++++++++++++++++++
 mm/page_alloc.c                  |  4 ++++
 2 files changed, 34 insertions(+)
 create mode 100644 include/trace/events/pageblock.h

diff --git a/include/trace/events/pageblock.h b/include/trace/events/pageblock.h
new file mode 100644
index 000000000000..003c2d716f82
--- /dev/null
+++ b/include/trace/events/pageblock.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pageblock
+
+#if !defined(_TRACE_PAGEBLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEBLOCK_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hit_cmpxchg,
+
+	TP_PROTO(char byte),
+
+	TP_ARGS(byte),
+
+	TP_STRUCT__entry(
+		__field(char, byte)
+	),
+
+	TP_fast_assign(
+		__entry->byte = byte;
+	),
+
+	TP_printk("%d", __entry->byte)
+);
+
+#endif /* _TRACE_PAGE_ISOLATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8b65d83d8be6..a6d7159295bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -509,6 +509,9 @@ static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned
  * @pfn: The target page frame number
  * @mask: mask of bits that the caller is interested in
  */
+#define CREATE_TRACE_POINTS
+#include <trace/events/pageblock.h>
+
 void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
@@ -532,6 +535,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 		if (byte == old_byte)
 			break;
 		byte = old_byte;
+		trace_hit_cmpxchg(byte);
 	}
 }

--
1.8.3.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  8:32   ` Alex Shi
@ 2020-09-03  8:40     ` Alex Shi
  2020-09-03  9:00       ` Vlastimil Babka
  2020-09-03  9:31     ` Mel Gorman
  1 sibling, 1 reply; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:40 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm,
	linux-kernel



在 2020/9/3 下午4:32, Alex Shi 写道:
>>
> I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much
> larger than a very little average run time reducing.
> 
> But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds
> or less.
> 
> Subject: [PATCH v4 4/4] add cmpxchg tracing


It's a typical result with the patchset:

 Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale pageblock-c':

             9,564      compaction:mm_compaction_isolate_migratepages
             6,430      compaction:mm_compaction_isolate_freepages
             5,287      compaction:mm_compaction_migratepages
            45,299      compaction:mm_compaction_begin
            45,299      compaction:mm_compaction_end
            30,557      compaction:mm_compaction_try_to_compact_pages
            95,540      compaction:mm_compaction_finished
           149,379      compaction:mm_compaction_suitable
                 0      compaction:mm_compaction_deferred
                 0      compaction:mm_compaction_defer_compaction
             3,949      compaction:mm_compaction_defer_reset
                 0      compaction:mm_compaction_kcompactd_sleep
                 0      compaction:mm_compaction_wakeup_kcompactd
                 0      compaction:mm_compaction_kcompactd_wake
                68      pageblock:hit_cmpxchg

     113.570974583 seconds time elapsed

      14.664451000 seconds user
      96.847116000 seconds sys

It's 5.9-rc2 base kernel result:

 Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale rc2-e':

            15,920      compaction:mm_compaction_isolate_migratepages
            20,523      compaction:mm_compaction_isolate_freepages
             9,752      compaction:mm_compaction_migratepages
            27,773      compaction:mm_compaction_begin
            27,773      compaction:mm_compaction_end
            16,391      compaction:mm_compaction_try_to_compact_pages
            62,809      compaction:mm_compaction_finished
            69,821      compaction:mm_compaction_suitable
                 0      compaction:mm_compaction_deferred
                 0      compaction:mm_compaction_defer_compaction
             7,875      compaction:mm_compaction_defer_reset
                 0      compaction:mm_compaction_kcompactd_sleep
                 0      compaction:mm_compaction_wakeup_kcompactd
                 0      compaction:mm_compaction_kcompactd_wake
             1,208      pageblock:hit_cmpxchg

     116.440414591 seconds time elapsed

      15.326913000 seconds user
     103.752758000 seconds sys


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
  2020-09-03  7:29     ` Max Filippov
  (?)
  (?)
@ 2020-09-03  8:50       ` Alex Shi
  -1 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:50 UTC (permalink / raw)
  To: Max Filippov
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, open list:SUPERH,
	open list:TENSILICA XTENSA PORT (xtensa),
	Anshuman Khandual, Russell King, Alexander Duyck, LKML,
	Linux Memory Management List, Matthew Wilcox, Baolin Wang,
	open list:SPARC + UltraSPAR...,
	Andrew Morton, David S. Miller, Vlastimil Babka



在 2020/9/3 下午3:29, Max Filippov 写道:
>> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
>> index e997e0119c02..862b008ab09e 100644
>> --- a/arch/xtensa/Kconfig
>> +++ b/arch/xtensa/Kconfig
>> @@ -42,6 +42,7 @@ config XTENSA
>>         select MODULES_USE_ELF_RELA
>>         select PERF_USE_VMALLOC
>>         select VIRT_TO_BUS
>> +       select NO_CMPXCHG_BYTE
> Please keep the lists of select statements in Kconfig files above
> alphabetically sorted.

Hi Max,

Thanks for the reminder, Let's comments from Mel Gorman.

Thanks!

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  8:50       ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:50 UTC (permalink / raw)
  To: Max Filippov
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang,
	Russell King, Yoshinori Sato, Rich Felker, David S. Miller,
	Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH,
	open list:SPARC + UltraSPAR...,
	open list:TENSILICA XTENSA PORT (xtensa),
	Linux Memory Management List



在 2020/9/3 下午3:29, Max Filippov 写道:
>> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
>> index e997e0119c02..862b008ab09e 100644
>> --- a/arch/xtensa/Kconfig
>> +++ b/arch/xtensa/Kconfig
>> @@ -42,6 +42,7 @@ config XTENSA
>>         select MODULES_USE_ELF_RELA
>>         select PERF_USE_VMALLOC
>>         select VIRT_TO_BUS
>> +       select NO_CMPXCHG_BYTE
> Please keep the lists of select statements in Kconfig files above
> alphabetically sorted.

Hi Max,

Thanks for the reminder, Let's comments from Mel Gorman.

Thanks!

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  8:50       ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:50 UTC (permalink / raw)
  To: Max Filippov
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang,
	Russell King, Yoshinori Sato, Rich Felker, David S. Miller,
	Chris Zankel, LKML, linux-arm-kernel, open list:SUPERH,
	open list:SPARC + UltraSPAR...,
	open list:TENSILICA XTENSA PORT (xtensa),
	Linux Memory Management List



在 2020/9/3 下午3:29, Max Filippov 写道:
>> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
>> index e997e0119c02..862b008ab09e 100644
>> --- a/arch/xtensa/Kconfig
>> +++ b/arch/xtensa/Kconfig
>> @@ -42,6 +42,7 @@ config XTENSA
>>         select MODULES_USE_ELF_RELA
>>         select PERF_USE_VMALLOC
>>         select VIRT_TO_BUS
>> +       select NO_CMPXCHG_BYTE
> Please keep the lists of select statements in Kconfig files above
> alphabetically sorted.

Hi Max,

Thanks for the reminder, Let's comments from Mel Gorman.

Thanks!


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-03  8:50       ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  8:50 UTC (permalink / raw)
  To: Max Filippov
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, open list:SUPERH,
	open list:TENSILICA XTENSA PORT (xtensa),
	Anshuman Khandual, Russell King, Alexander Duyck, LKML,
	Linux Memory Management List, Matthew Wilcox, Baolin Wang,
	open list:SPARC + UltraSPAR...,
	Andrew Morton, David S. Miller, Vlastimil Babka



在 2020/9/3 下午3:29, Max Filippov 写道:
>> diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
>> index e997e0119c02..862b008ab09e 100644
>> --- a/arch/xtensa/Kconfig
>> +++ b/arch/xtensa/Kconfig
>> @@ -42,6 +42,7 @@ config XTENSA
>>         select MODULES_USE_ELF_RELA
>>         select PERF_USE_VMALLOC
>>         select VIRT_TO_BUS
>> +       select NO_CMPXCHG_BYTE
> Please keep the lists of select statements in Kconfig files above
> alphabetically sorted.

Hi Max,

Thanks for the reminder, Let's comments from Mel Gorman.

Thanks!

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  8:40     ` Alex Shi
@ 2020-09-03  9:00       ` Vlastimil Babka
  0 siblings, 0 replies; 23+ messages in thread
From: Vlastimil Babka @ 2020-09-03  9:00 UTC (permalink / raw)
  To: Alex Shi, Mel Gorman
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Alexander Duyck, Andrew Morton, linux-mm, linux-kernel

On 9/3/20 10:40 AM, Alex Shi wrote:
> 
> 
> 在 2020/9/3 下午4:32, Alex Shi 写道:
>>>
>> I have run thpscale with 'always' defrag setting of THP. The Amean stddev is much
>> larger than a very little average run time reducing.
>> 
>> But the left patch 4 could show the cmpxchg retry reduce from thousands to hundreds
>> or less.
>> 
>> Subject: [PATCH v4 4/4] add cmpxchg tracing
> 
> 
> It's a typical result with the patchset:
> 
>  Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale pageblock-c':
> 
>              9,564      compaction:mm_compaction_isolate_migratepages
>              6,430      compaction:mm_compaction_isolate_freepages
>              5,287      compaction:mm_compaction_migratepages
>             45,299      compaction:mm_compaction_begin
>             45,299      compaction:mm_compaction_end
>             30,557      compaction:mm_compaction_try_to_compact_pages
>             95,540      compaction:mm_compaction_finished
>            149,379      compaction:mm_compaction_suitable
>                  0      compaction:mm_compaction_deferred
>                  0      compaction:mm_compaction_defer_compaction
>              3,949      compaction:mm_compaction_defer_reset
>                  0      compaction:mm_compaction_kcompactd_sleep
>                  0      compaction:mm_compaction_wakeup_kcompactd
>                  0      compaction:mm_compaction_kcompactd_wake
>                 68      pageblock:hit_cmpxchg
> 
>      113.570974583 seconds time elapsed
> 
>       14.664451000 seconds user
>       96.847116000 seconds sys
> 
> It's 5.9-rc2 base kernel result:
> 
>  Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale rc2-e':
> 
>             15,920      compaction:mm_compaction_isolate_migratepages
>             20,523      compaction:mm_compaction_isolate_freepages
>              9,752      compaction:mm_compaction_migratepages
>             27,773      compaction:mm_compaction_begin
>             27,773      compaction:mm_compaction_end
>             16,391      compaction:mm_compaction_try_to_compact_pages
>             62,809      compaction:mm_compaction_finished
>             69,821      compaction:mm_compaction_suitable
>                  0      compaction:mm_compaction_deferred
>                  0      compaction:mm_compaction_defer_compaction
>              7,875      compaction:mm_compaction_defer_reset
>                  0      compaction:mm_compaction_kcompactd_sleep
>                  0      compaction:mm_compaction_wakeup_kcompactd
>                  0      compaction:mm_compaction_kcompactd_wake
>              1,208      pageblock:hit_cmpxchg
> 
>      116.440414591 seconds time elapsed
> 
>       15.326913000 seconds user
>      103.752758000 seconds sys

The runs wildly differ in many of other stats, so I'm not sure they are really
comparable. I guess you could show the fraction of hit_cmpxchg to all cmpxchg.
But there's also danger of tracepoints widening the race window.

In the end what matters is how these 1208 retries contribute to runtime. I doubt
they could be really visible in a 100+ seconds run though.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  8:19 ` David Hildenbrand
@ 2020-09-03  9:14   ` Alex Shi
  0 siblings, 0 replies; 23+ messages in thread
From: Alex Shi @ 2020-09-03  9:14 UTC (permalink / raw)
  To: David Hildenbrand, Anshuman Khandual, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck
  Cc: Andrew Morton, Mel Gorman, linux-mm, linux-kernel



在 2020/9/3 下午4:19, David Hildenbrand 写道:
> On 03.09.20 09:01, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief the false sharing in cmpxchg.
>>
>> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Mel Gorman <mgorman@techsingularity.net>
>> Cc: linux-mm@kvack.org
>> Cc: linux-kernel@vger.kernel.org
> 
> Could you please
> 
> 1. Send a cover letter and indicate the changees between versions. I
> cannot find any in my mailbox or on -mm - is there any? (also, is there
> a patch 4 ?)

Hi David,

Thanks for comments!
I thought a short patchset don't need a cover. Here it's:

cmpxchg is a lockless way to update data by check and compare the target
data after updated and retry if target data is changed during that action.

So we should just compare the exact size of target data. If the data is only
byte, but cmpxchg compare a long word, that leads to a cmpxchg level flase
sharing, cause more try which lock memory more times. That's a clearly
logical error and should be fixed.

v1, the initial version
v2, fix the armv6 cmpxchgb missing issue.
v3, fix more arch cmpxchg missing issue on armv6, sh2, sparc32, xtensa
v4, redo cmpxchgb fix by NO_CMPXCHG_BYTE into arch/Kconfig 

> 
> 2. Report proper performance numbers as requested, especially, over
> multiple runs. This should go into patch 1/2. Are they buried somewhere?

There are some result sent on
https://lkml.org/lkml/2020/8/30/95

> 
> 3. Clarify what patch 3 does: Do we actually waste 8*sizeof(long) where
> we only need 4 bits?

No, no waste, current kernel pack the 4 bits into long, that cause cmpxchg
compare 8 pageblock flags which 7 of them are not needed.

> 
> Also, breaking stuff in patch 1 and fixing it in patch 3 is not
> acceptable. This breaks git bisect. Skimming over the patches I think
> this is the case.

Got it, thanks!
> 
> I am not convinced yet that we need and want this. As Alex mentioned, we
> touch pageblock flags while holding the zone lock already in most cases
> -  and as Mel mentiones, updates should be rare.
> 

yes, not too much, but there are still a few. and cmpxchg retry will lock memory
 which I believe the less the better.

Thanks
Alex


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-09-03  8:32   ` Alex Shi
  2020-09-03  8:40     ` Alex Shi
@ 2020-09-03  9:31     ` Mel Gorman
  1 sibling, 0 replies; 23+ messages in thread
From: Mel Gorman @ 2020-09-03  9:31 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, linux-mm,
	linux-kernel

On Thu, Sep 03, 2020 at 04:32:54PM +0800, Alex Shi wrote:
> 
> 
> ??? 2020/9/3 ??????3:24, Mel Gorman ??????:
> > On Thu, Sep 03, 2020 at 03:01:20PM +0800, Alex Shi wrote:
> >> pageblock_flags is used as long, since every pageblock_flags is just 4
> >> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> >> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> >> flags. It would cause long waiting for sync.
> >>
> >> If we could change the pageblock_flags variable as char, we could use
> >> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> >> relief the false sharing in cmpxchg.
> >>
> >> Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
> > 
> > Page block types were not known to change at high frequency that would
> > cause a measurable performance drop. If anything, the performance hit
> > from pageblocks is the lookup paths which is a lot more frequent.
> 
> Yes, it is not hot path. But it's still a meaningful points to reduce cmpxchg
> level false sharing which isn't right on logical.
> 

Except there is no guarantee that false sharing was reduced. cmpxchg is
still used except using the byte as the comparison for the old value
and in some cases, that width will still be 32-bit for the exchange.
It would be up to each architecture to see if that can be translated to a
better instruction but it may not even matter.  As the instruction will
be prefixed with the lock instruction, the bus will be locked and bus
locking is probably on the cache line boundary so there is a collision
anyway while the atomic update takes place.

End result -- reducing false sharing in this case is not guaranteed to help
performance and may not be detectable when it's a low frequency operation
but the code will behave differently depending on the architecture and
CPU family.

Your justification path measured the number of times a cmpxchg was retried
but it did not track how many pageblock updates there were or how many
potentially collided. As the workload is uncontrolled with respect to
pageblock updates, you do not know if the difference in retries is due to
a real reduction in collisions or a difference in the number of pageblock
updates that potentially collide. Either way, because the frequency of
the operation was so low relative too your target load, any difference
in performance would be indistinguishable from noise.

I don't think it's worth the churn in this case for an impact that will
be very difficult to detect and variable across architectures and CPU
families.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
  2020-09-03  7:01   ` Alex Shi
  (?)
@ 2020-09-10  5:51     ` Christoph Hellwig
  -1 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2020-09-10  5:51 UTC (permalink / raw)
  To: Alex Shi
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, linux-sh, linux-xtensa, Anshuman Khandual,
	Russell King, Alexander Duyck, linux-kernel, linux-mm,
	Max Filippov, Matthew Wilcox, Baolin Wang, sparclinux,
	Andrew Morton, David S. Miller, Vlastimil Babka

On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote:
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.
> 
> Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
> found.
> 
> This is the first usages of cmpxchg flase sharing change. We'd better
> check more cmpxchg usages in current kernel...

I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to
understand, and also fool-proof.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-10  5:51     ` Christoph Hellwig
  0 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2020-09-10  5:51 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Matthew Wilcox,
	Vlastimil Babka, Alexander Duyck, Andrew Morton, Baolin Wang,
	Russell King, Yoshinori Sato, Rich Felker, David S. Miller,
	Chris Zankel, Max Filippov, linux-kernel, linux-arm-kernel,
	linux-sh, sparclinux, linux-xtensa, linux-mm

On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote:
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.
> 
> Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
> found.
> 
> This is the first usages of cmpxchg flase sharing change. We'd better
> check more cmpxchg usages in current kernel...

I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to
understand, and also fool-proof.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue
@ 2020-09-10  5:51     ` Christoph Hellwig
  0 siblings, 0 replies; 23+ messages in thread
From: Christoph Hellwig @ 2020-09-10  5:51 UTC (permalink / raw)
  To: Alex Shi
  Cc: linux-arm-kernel, Chris Zankel, Rich Felker, Yoshinori Sato,
	David Hildenbrand, linux-sh, linux-xtensa, Anshuman Khandual,
	Russell King, Alexander Duyck, linux-kernel, linux-mm,
	Max Filippov, Matthew Wilcox, Baolin Wang, sparclinux,
	Andrew Morton, David S. Miller, Vlastimil Babka

On Thu, Sep 03, 2020 at 03:01:22PM +0800, Alex Shi wrote:
> Armv6, sh2, sparc32 and xtensa can not do cmpxchg1, so we have to use
> cmpxchg4 on it.
> 
> Here we mark above 4 arch's NO_CMPXCHG_BYTE, and would add more if we
> found.
> 
> This is the first usages of cmpxchg flase sharing change. We'd better
> check more cmpxchg usages in current kernel...

I think a positive symbol, e.g. HAVE_CMPXCHG_BYTE is a lot easier to
understand, and also fool-proof.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-09-10  5:54 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-03  7:01 [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
2020-09-03  7:01 ` [PATCH v4 2/4] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
2020-09-03  7:01 ` [PATCH v4 3/4] mm/pageblock: work around multiple arch's cmpxchg support issue Alex Shi
2020-09-03  7:01   ` Alex Shi
2020-09-03  7:01   ` Alex Shi
2020-09-03  7:29   ` Max Filippov
2020-09-03  7:29     ` Max Filippov
2020-09-03  7:29     ` Max Filippov
2020-09-03  7:29     ` Max Filippov
2020-09-03  8:50     ` Alex Shi
2020-09-03  8:50       ` Alex Shi
2020-09-03  8:50       ` Alex Shi
2020-09-03  8:50       ` Alex Shi
2020-09-10  5:51   ` Christoph Hellwig
2020-09-10  5:51     ` Christoph Hellwig
2020-09-10  5:51     ` Christoph Hellwig
2020-09-03  7:24 ` [PATCH v4 1/4] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Mel Gorman
2020-09-03  8:32   ` Alex Shi
2020-09-03  8:40     ` Alex Shi
2020-09-03  9:00       ` Vlastimil Babka
2020-09-03  9:31     ` Mel Gorman
2020-09-03  8:19 ` David Hildenbrand
2020-09-03  9:14   ` Alex Shi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.