All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
@ 2020-08-16  3:47 ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16  3:47 UTC (permalink / raw)
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h          |  6 +++---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 24 +++++++++++++-----------
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned long		*pageblock_flags;
+	unsigned char		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned long pageblock_flags[0];
+	unsigned char	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 66d45e9cc358..142803d1f49b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,7 +447,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
 	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
 }
 
+#define BITS_PER_CHAR	8
+
 /**
  * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
  * @page: The page within the block of interest
@@ -476,24 +478,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
+	unsigned char *bitmap;
 	unsigned long bitidx, word_bitidx;
-	unsigned long word;
+	unsigned char word;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	word_bitidx = bitidx / BITS_PER_CHAR;
+	bitidx &= (BITS_PER_CHAR-1);
 
 	word = bitmap[word_bitidx];
 	return (word >> bitidx) & mask;
 }
 
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -515,17 +517,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
+	unsigned char *bitmap;
 	unsigned long bitidx, word_bitidx;
-	unsigned long old_word, word;
+	unsigned char old_word, word;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	word_bitidx = bitidx / BITS_PER_CHAR;
+	bitidx &= (BITS_PER_CHAR-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
@ 2020-08-16  3:47 ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16  3:47 UTC (permalink / raw)
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h          |  6 +++---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 24 +++++++++++++-----------
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned long		*pageblock_flags;
+	unsigned char		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned long pageblock_flags[0];
+	unsigned char	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 66d45e9cc358..142803d1f49b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,7 +447,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
 	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
 }
 
+#define BITS_PER_CHAR	8
+
 /**
  * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages
  * @page: The page within the block of interest
@@ -476,24 +478,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
+	unsigned char *bitmap;
 	unsigned long bitidx, word_bitidx;
-	unsigned long word;
+	unsigned char word;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	word_bitidx = bitidx / BITS_PER_CHAR;
+	bitidx &= (BITS_PER_CHAR-1);
 
 	word = bitmap[word_bitidx];
 	return (word >> bitidx) & mask;
 }
 
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -515,17 +517,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
+	unsigned char *bitmap;
 	unsigned long bitidx, word_bitidx;
-	unsigned long old_word, word;
+	unsigned char old_word, word;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	word_bitidx = bitidx / BITS_PER_CHAR;
+	bitidx &= (BITS_PER_CHAR-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-16  3:47 ` Alex Shi
@ 2020-08-16  3:47   ` Alex Shi
  -1 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16  3:47 UTC (permalink / raw)
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm

Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.

If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/pageblock-flags.h | 2 +-
 mm/page_alloc.c                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..556fc2c0b392 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
 	 * Assume the bits will always align on a word. If this assumption
 	 * changes then get/set pageblock needs updating.
 	 */
-	NR_PAGEBLOCK_BITS
+	NR_PAGEBLOCK_BITS = 8
 };
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 142803d1f49b..01c3fb822732 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -521,7 +521,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	unsigned long bitidx, word_bitidx;
 	unsigned char old_word, word;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
@ 2020-08-16  3:47   ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16  3:47 UTC (permalink / raw)
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm

Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.

If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/pageblock-flags.h | 2 +-
 mm/page_alloc.c                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..556fc2c0b392 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
 	 * Assume the bits will always align on a word. If this assumption
 	 * changes then get/set pageblock needs updating.
 	 */
-	NR_PAGEBLOCK_BITS
+	NR_PAGEBLOCK_BITS = 8
 };
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 142803d1f49b..01c3fb822732 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -521,7 +521,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	unsigned long bitidx, word_bitidx;
 	unsigned char old_word, word;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-16  3:47 ` Alex Shi
  (?)
  (?)
@ 2020-08-16  4:09 ` Matthew Wilcox
  2020-08-16 13:53   ` Alex Shi
  -1 siblings, 1 reply; 15+ messages in thread
From: Matthew Wilcox @ 2020-08-16  4:09 UTC (permalink / raw)
  To: Alex Shi
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
> +++ b/mm/page_alloc.c
> @@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
>  	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
>  }
>  
> +#define BITS_PER_CHAR	8

include/linux/bits.h:#define BITS_PER_BYTE              8

>  	bitmap = get_pageblock_bitmap(page, pfn);
>  	bitidx = pfn_to_bitidx(page, pfn);
> -	word_bitidx = bitidx / BITS_PER_LONG;
> -	bitidx &= (BITS_PER_LONG-1);
> +	word_bitidx = bitidx / BITS_PER_CHAR;
> +	bitidx &= (BITS_PER_CHAR-1);

It's not a word any more.  it's a byte.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-16  3:47   ` Alex Shi
  (?)
@ 2020-08-16  4:17   ` Matthew Wilcox
  2020-08-16 14:10     ` Alex Shi
  -1 siblings, 1 reply; 15+ messages in thread
From: Matthew Wilcox @ 2020-08-16  4:17 UTC (permalink / raw)
  To: Alex Shi
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm

On Sun, Aug 16, 2020 at 11:47:57AM +0800, Alex Shi wrote:
> Current pageblock_flags is only 4 bits, so it has to share a char size
> in cmpxchg when get set, the false sharing cause perf drop.
> 
> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> the only cost is half char per pageblock, which is half char per 128MB
> on x86, 4 chars in 1 GB.

I don't believe this patch has that effect, mostly because it still does
cmpxchg() on words instead of bytes.

But which functions would benefit?  It seems to me this cmpxchg() is
only called from the set_pageblock_migratetype() morass of functions,
none of which are called in hot paths as far as I can make out.

So are you just reasoning by analogy with the previous patch where you
have measured a performance improvement, or did you send the wrong patch,
or did I overlook a hot path that calls one of the pageblock migration
functions?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-16  3:47 ` Alex Shi
                   ` (2 preceding siblings ...)
  (?)
@ 2020-08-16 12:16 ` David Hildenbrand
  2020-08-16 14:14   ` Alex Shi
  -1 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2020-08-16 12:16 UTC (permalink / raw)
  To: Alex Shi
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

On 16.08.20 05:47, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
> 
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief much false sharing in cmpxchg.

Do you have any performance numbers to back your claims? IOW, do we care
at all?



-- 
Thanks,

David / dhildenb


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-16  4:09 ` [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Matthew Wilcox
@ 2020-08-16 13:53   ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16 13:53 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/16 下午12:09, Matthew Wilcox 写道:
> On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>> +++ b/mm/page_alloc.c
>> @@ -467,6 +467,8 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
>>  	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
>>  }
>>  
>> +#define BITS_PER_CHAR	8
> 
> include/linux/bits.h:#define BITS_PER_BYTE              8

Thank for reminder!

> 
>>  	bitmap = get_pageblock_bitmap(page, pfn);
>>  	bitidx = pfn_to_bitidx(page, pfn);
>> -	word_bitidx = bitidx / BITS_PER_LONG;
>> -	bitidx &= (BITS_PER_LONG-1);
>> +	word_bitidx = bitidx / BITS_PER_CHAR;
>> +	bitidx &= (BITS_PER_CHAR-1);
> 
> It's not a word any more.  it's a byte.
> 

Yes, will change this.
Thanks!

From 963425639f56c1ba1c997653a4ff6885c81dc0ab Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@linux.alibaba.com>
Date: Sat, 15 Aug 2020 22:54:17 +0800
Subject: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in
 pageblock flags

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h          |  6 +++---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 38 +++++++++++++++++++-------------------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned long		*pageblock_flags;
+	unsigned char		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned long pageblock_flags[0];
+	unsigned char	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 66d45e9cc358..eb5cca7e8683 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,7 +447,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -476,24 +476,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
-	word = bitmap[word_bitidx];
-	return (word >> bitidx) & mask;
+	byte = bitmap[byte_bitidx];
+	return (byte >> bitidx) & mask;
 }
 
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -515,29 +515,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long old_word, word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char old_byte, byte;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
 	mask <<= bitidx;
 	flags <<= bitidx;
 
-	word = READ_ONCE(bitmap[word_bitidx]);
+	byte = READ_ONCE(bitmap[byte_bitidx]);
 	for (;;) {
-		old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
-		if (word == old_word)
+		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+		if (byte == old_byte)
 			break;
-		word = old_word;
+		byte = old_byte;
 	}
 }
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-16  4:17   ` Matthew Wilcox
@ 2020-08-16 14:10     ` Alex Shi
  2020-08-16 15:56         ` Alexander Duyck
  0 siblings, 1 reply; 15+ messages in thread
From: Alex Shi @ 2020-08-16 14:10 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm



在 2020/8/16 下午12:17, Matthew Wilcox 写道:
> On Sun, Aug 16, 2020 at 11:47:57AM +0800, Alex Shi wrote:
>> Current pageblock_flags is only 4 bits, so it has to share a char size
>> in cmpxchg when get set, the false sharing cause perf drop.
>>
>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>> the only cost is half char per pageblock, which is half char per 128MB
>> on x86, 4 chars in 1 GB.
> 
> I don't believe this patch has that effect, mostly because it still does
> cmpxchg() on words instead of bytes.

Hi Matthew,

Thank a lot for comments!

Sorry, I must overlook sth, would you like point out why the cmpxchg is still
on words after patch 1 applied?


> 
> But which functions would benefit?  It seems to me this cmpxchg() is
> only called from the set_pageblock_migratetype() morass of functions,
> none of which are called in hot paths as far as I can make out.
> 
> So are you just reasoning by analogy with the previous patch where you
> have measured a performance improvement, or did you send the wrong patch,
> or did I overlook a hot path that calls one of the pageblock migration
> functions?
> 

Uh, I am reading compaction.c and found the following commit introduced 
test_and_set_skip under a lock. It looks like the pagelock_flags setting
has false sharing in cmpxchg. but I have no valid data on this yet.

Thanks
Alex

e380bebe4771548  mm, compaction: keep migration source private to a single compaction instance

                if (!locked) {
                        locked = compact_trylock_irqsave(zone_lru_lock(zone),
                                                                &flags, cc);
-                       if (!locked)
+
+                       /* Allow future scanning if the lock is contended */
+                       if (!locked) {
+                               clear_pageblock_skip(page);
                                break;
+                       }
+
+                       /* Try get exclusive access under lock */
+                       if (!skip_updated) {
+                               skip_updated = true;
+                               if (test_and_set_skip(cc, page, low_pfn))
+                                       goto isolate_abort;
+                       }

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-16 12:16 ` David Hildenbrand
@ 2020-08-16 14:14   ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-16 14:14 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/16 下午8:16, David Hildenbrand 写道:
> On 16.08.20 05:47, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
> 
> Do you have any performance numbers to back your claims? IOW, do we care
> at all?
> 
> 

Hi David,

Thanks for comments!

Not yet, I trace the following commit here, with this commit this hot path
looks be resolved.

Thanks
Alex

e380bebe4771548  mm, compaction: keep migration source private to a single compaction instance
                if (!locked) {
                        locked = compact_trylock_irqsave(zone_lru_lock(zone),
                                                                &flags, cc);
-                       if (!locked)
+
+                       /* Allow future scanning if the lock is contended */
+                       if (!locked) {
+                               clear_pageblock_skip(page);
                                break;
+                       }
+
+                       /* Try get exclusive access under lock */
+                       if (!skip_updated) {
+                               skip_updated = true;
+                               if (test_and_set_skip(cc, page, low_pfn))
+                                       goto isolate_abort;
+                       }


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-16 14:10     ` Alex Shi
@ 2020-08-16 15:56         ` Alexander Duyck
  0 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2020-08-16 15:56 UTC (permalink / raw)
  To: Alex Shi
  Cc: Matthew Wilcox, Andrew Morton, Hugh Dickins, Alexander Duyck,
	LKML, linux-mm

On Sun, Aug 16, 2020 at 7:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/16 下午12:17, Matthew Wilcox 写道:
> > On Sun, Aug 16, 2020 at 11:47:57AM +0800, Alex Shi wrote:
> >> Current pageblock_flags is only 4 bits, so it has to share a char size
> >> in cmpxchg when get set, the false sharing cause perf drop.
> >>
> >> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> >> the only cost is half char per pageblock, which is half char per 128MB
> >> on x86, 4 chars in 1 GB.
> >
> > I don't believe this patch has that effect, mostly because it still does
> > cmpxchg() on words instead of bytes.
>
> Hi Matthew,
>
> Thank a lot for comments!
>
> Sorry, I must overlook sth, would you like point out why the cmpxchg is still
> on words after patch 1 applied?
>

I would take it one step further. You still have false sharing as the
pageblocks bits still occupy the same cacheline so you are going to
see them cache bouncing regardless.

What it seems like you are attempting to address is the fact that
multiple threads could all be attempting to update the same long
value. As I pointed out for the migrate type it seems to be protected
by the zone lock, but for compaction the skip bit doesn't have the
same protection as there are some threads using the zone lock and
others using the LRU lock. I'm still not sure it makes much of a
difference though.

> >
> > But which functions would benefit?  It seems to me this cmpxchg() is
> > only called from the set_pageblock_migratetype() morass of functions,
> > none of which are called in hot paths as far as I can make out.
> >
> > So are you just reasoning by analogy with the previous patch where you
> > have measured a performance improvement, or did you send the wrong patch,
> > or did I overlook a hot path that calls one of the pageblock migration
> > functions?
> >
>
> Uh, I am reading compaction.c and found the following commit introduced
> test_and_set_skip under a lock. It looks like the pagelock_flags setting
> has false sharing in cmpxchg. but I have no valid data on this yet.
>
> Thanks
> Alex
>
> e380bebe4771548  mm, compaction: keep migration source private to a single compaction instance
>
>                 if (!locked) {
>                         locked = compact_trylock_irqsave(zone_lru_lock(zone),
>                                                                 &flags, cc);
> -                       if (!locked)
> +
> +                       /* Allow future scanning if the lock is contended */
> +                       if (!locked) {
> +                               clear_pageblock_skip(page);
>                                 break;
> +                       }
> +
> +                       /* Try get exclusive access under lock */
> +                       if (!skip_updated) {
> +                               skip_updated = true;
> +                               if (test_and_set_skip(cc, page, low_pfn))
> +                                       goto isolate_abort;
> +                       }
>

I'm not sure that is a good grounds for doubling the size of the
pageblock flags. If you look further down in the code there are bits
that are setting these bits without taking the lock. The assumption
here is that by taking the lock the test_and_set_skip will be
performed atomically since another thread cannot perform that while
the zone lock is held. If you look in the function itself it only does
anything if the skip bits are checked and if the page is the first
page in the pageblock.

I think you might be confusing some of my earlier comments. I still
believe the 3% regression you reported with my patch is not directly
related to the test_and_set_skip as the test you ran seems unlikely to
trigger compaction. However with that said one of the advantages of
using the locked section to perform these types of tests is that it
reduces the number of times the test is run since it will only be on
the first unlocked page in any batch of pages and the first page in
the pageblock is always going to be handled without the lock held
since it is the first page processed.

Until we can get a test up such as thpscale that does a good job of
stressing the compaction code I don't think we can rely on just
observations to say if this is an improvement or not.

Thanks.

- Alex

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
@ 2020-08-16 15:56         ` Alexander Duyck
  0 siblings, 0 replies; 15+ messages in thread
From: Alexander Duyck @ 2020-08-16 15:56 UTC (permalink / raw)
  To: Alex Shi
  Cc: Matthew Wilcox, Andrew Morton, Hugh Dickins, Alexander Duyck,
	LKML, linux-mm

On Sun, Aug 16, 2020 at 7:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/16 下午12:17, Matthew Wilcox 写道:
> > On Sun, Aug 16, 2020 at 11:47:57AM +0800, Alex Shi wrote:
> >> Current pageblock_flags is only 4 bits, so it has to share a char size
> >> in cmpxchg when get set, the false sharing cause perf drop.
> >>
> >> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> >> the only cost is half char per pageblock, which is half char per 128MB
> >> on x86, 4 chars in 1 GB.
> >
> > I don't believe this patch has that effect, mostly because it still does
> > cmpxchg() on words instead of bytes.
>
> Hi Matthew,
>
> Thank a lot for comments!
>
> Sorry, I must overlook sth, would you like point out why the cmpxchg is still
> on words after patch 1 applied?
>

I would take it one step further. You still have false sharing as the
pageblocks bits still occupy the same cacheline so you are going to
see them cache bouncing regardless.

What it seems like you are attempting to address is the fact that
multiple threads could all be attempting to update the same long
value. As I pointed out for the migrate type it seems to be protected
by the zone lock, but for compaction the skip bit doesn't have the
same protection as there are some threads using the zone lock and
others using the LRU lock. I'm still not sure it makes much of a
difference though.

> >
> > But which functions would benefit?  It seems to me this cmpxchg() is
> > only called from the set_pageblock_migratetype() morass of functions,
> > none of which are called in hot paths as far as I can make out.
> >
> > So are you just reasoning by analogy with the previous patch where you
> > have measured a performance improvement, or did you send the wrong patch,
> > or did I overlook a hot path that calls one of the pageblock migration
> > functions?
> >
>
> Uh, I am reading compaction.c and found the following commit introduced
> test_and_set_skip under a lock. It looks like the pagelock_flags setting
> has false sharing in cmpxchg. but I have no valid data on this yet.
>
> Thanks
> Alex
>
> e380bebe4771548  mm, compaction: keep migration source private to a single compaction instance
>
>                 if (!locked) {
>                         locked = compact_trylock_irqsave(zone_lru_lock(zone),
>                                                                 &flags, cc);
> -                       if (!locked)
> +
> +                       /* Allow future scanning if the lock is contended */
> +                       if (!locked) {
> +                               clear_pageblock_skip(page);
>                                 break;
> +                       }
> +
> +                       /* Try get exclusive access under lock */
> +                       if (!skip_updated) {
> +                               skip_updated = true;
> +                               if (test_and_set_skip(cc, page, low_pfn))
> +                                       goto isolate_abort;
> +                       }
>

I'm not sure that is a good grounds for doubling the size of the
pageblock flags. If you look further down in the code there are bits
that are setting these bits without taking the lock. The assumption
here is that by taking the lock the test_and_set_skip will be
performed atomically since another thread cannot perform that while
the zone lock is held. If you look in the function itself it only does
anything if the skip bits are checked and if the page is the first
page in the pageblock.

I think you might be confusing some of my earlier comments. I still
believe the 3% regression you reported with my patch is not directly
related to the test_and_set_skip as the test you ran seems unlikely to
trigger compaction. However with that said one of the advantages of
using the locked section to perform these types of tests is that it
reduces the number of times the test is run since it will only be on
the first unlocked page in any batch of pages and the first page in
the pageblock is always going to be handled without the lock held
since it is the first page processed.

Until we can get a test up such as thpscale that does a good job of
stressing the compaction code I don't think we can rely on just
observations to say if this is an improvement or not.

Thanks.

- Alex


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-16  3:47 ` Alex Shi
                   ` (3 preceding siblings ...)
  (?)
@ 2020-08-17  9:58 ` Wei Yang
  2020-08-17 11:02   ` Alex Shi
  -1 siblings, 1 reply; 15+ messages in thread
From: Wei Yang @ 2020-08-17  9:58 UTC (permalink / raw)
  To: Alex Shi
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>pageblock_flags is used as long, since every pageblock_flags is just 4
>bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>flags. It would cause long waiting for sync.
>
>If we could change the pageblock_flags variable as char, we could use
>char size cmpxchg, which just sync up with 2 pageblock flags. it could
>relief much false sharing in cmpxchg.
>

If my understanding is correct, CPU reads data in the size of cacheline.
Define a variable a char or other, doesn't help on false sharing. 

Correct me, if not.

-- 
Wei Yang
Help you, Help me

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-17  9:58 ` Wei Yang
@ 2020-08-17 11:02   ` Alex Shi
  0 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-17 11:02 UTC (permalink / raw)
  To: Wei Yang
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/17 下午5:58, Wei Yang 写道:
> On Sun, Aug 16, 2020 at 11:47:56AM +0800, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
>>
> 
> If my understanding is correct, CPU reads data in the size of cacheline.
> Define a variable a char or other, doesn't help on false sharing. 
> 
> Correct me, if not.

Right and not,

Cacheline false sharing is right. but after that, cmpxchg still need to compare
the pointed data, if the data is long, it need to compare long word and make sure
nothing changes in the long word. If we narrow the comparsion data to byte, cmpxchg
will just sync up on a byte. So it looks like there are 2 level false sharing here.

Thanks
Alex

        for (;;) {
-               old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
-               if (word == old_word)
+               old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+               if (byte == old_byte)
                        break;
-               word = old_word;
+               byte = old_byte;
        }

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-16 15:56         ` Alexander Duyck
  (?)
@ 2020-08-18  7:15         ` Alex Shi
  -1 siblings, 0 replies; 15+ messages in thread
From: Alex Shi @ 2020-08-18  7:15 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Matthew Wilcox, Andrew Morton, Hugh Dickins, Alexander Duyck,
	LKML, linux-mm



在 2020/8/16 下午11:56, Alexander Duyck 写道:
> On Sun, Aug 16, 2020 at 7:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>>
>>
>>
>> 在 2020/8/16 下午12:17, Matthew Wilcox 写道:
>>> On Sun, Aug 16, 2020 at 11:47:57AM +0800, Alex Shi wrote:
>>>> Current pageblock_flags is only 4 bits, so it has to share a char size
>>>> in cmpxchg when get set, the false sharing cause perf drop.
>>>>
>>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>>>> the only cost is half char per pageblock, which is half char per 128MB
>>>> on x86, 4 chars in 1 GB.
>>>
>>> I don't believe this patch has that effect, mostly because it still does
>>> cmpxchg() on words instead of bytes.
>>
>> Hi Matthew,
>>
>> Thank a lot for comments!
>>
>> Sorry, I must overlook sth, would you like point out why the cmpxchg is still
>> on words after patch 1 applied?
>>
> 
> I would take it one step further. You still have false sharing as the
> pageblocks bits still occupy the same cacheline so you are going to
> see them cache bouncing regardless.

Right, there 2 level false sharing here, cacheline and cmpxchg comparsion range.
this patch could fix the cmpxchg level with a very cheap price.
the cacheline size is too huge to resovle here.

> 
> What it seems like you are attempting to address is the fact that
> multiple threads could all be attempting to update the same long
> value. As I pointed out for the migrate type it seems to be protected
> by the zone lock, but for compaction the skip bit doesn't have the
> same protection as there are some threads using the zone lock and
> others using the LRU lock. I'm still not sure it makes much of a
> difference though.

It looks with this patch, lock are not needed anymore on the flags.

> 
>>>
>>> But which functions would benefit?  It seems to me this cmpxchg() is
>>> only called from the set_pageblock_migratetype() morass of functions,
>>> none of which are called in hot paths as far as I can make out.
>>>
>>> So are you just reasoning by analogy with the previous patch where you
>>> have measured a performance improvement, or did you send the wrong patch,
>>> or did I overlook a hot path that calls one of the pageblock migration
>>> functions?
>>>
>>
>> Uh, I am reading compaction.c and found the following commit introduced
>> test_and_set_skip under a lock. It looks like the pagelock_flags setting
>> has false sharing in cmpxchg. but I have no valid data on this yet.
>>
>> Thanks
>> Alex
>>
>> e380bebe4771548  mm, compaction: keep migration source private to a single compaction instance
>>
>>                 if (!locked) {
>>                         locked = compact_trylock_irqsave(zone_lru_lock(zone),
>>                                                                 &flags, cc);
>> -                       if (!locked)
>> +
>> +                       /* Allow future scanning if the lock is contended */
>> +                       if (!locked) {
>> +                               clear_pageblock_skip(page);
>>                                 break;
>> +                       }
>> +
>> +                       /* Try get exclusive access under lock */
>> +                       if (!skip_updated) {
>> +                               skip_updated = true;
>> +                               if (test_and_set_skip(cc, page, low_pfn))
>> +                                       goto isolate_abort;
>> +                       }
>>
> 
> I'm not sure that is a good grounds for doubling the size of the
> pageblock flags. If you look further down in the code there are bits
> that are setting these bits without taking the lock. The assumption
> here is that by taking the lock the test_and_set_skip will be
> performed atomically since another thread cannot perform that while
> the zone lock is held. If you look in the function itself it only does
> anything if the skip bits are checked and if the page is the first
> page in the pageblock.
> 
> I think you might be confusing some of my earlier comments. I still
> believe the 3% regression you reported with my patch is not directly
> related to the test_and_set_skip as the test you ran seems unlikely to
> trigger compaction. However with that said one of the advantages of
> using the locked section to perform these types of tests is that it
> reduces the number of times the test is run since it will only be on
> the first unlocked page in any batch of pages and the first page in
> the pageblock is always going to be handled without the lock held
> since it is the first page processed.
> 
> Until we can get a test up such as thpscale that does a good job of
> stressing the compaction code I don't think we can rely on just
> observations to say if this is an improvement or not.

I still struggle on thpscale meaningful running. But if the patch is 
clearly right in theory. Do we have to hang on a benchmark result?

Thanks
Alex

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-08-18  7:16 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-16  3:47 [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
2020-08-16  3:47 ` Alex Shi
2020-08-16  3:47 ` [PATCH 2/2] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
2020-08-16  3:47   ` Alex Shi
2020-08-16  4:17   ` Matthew Wilcox
2020-08-16 14:10     ` Alex Shi
2020-08-16 15:56       ` Alexander Duyck
2020-08-16 15:56         ` Alexander Duyck
2020-08-18  7:15         ` Alex Shi
2020-08-16  4:09 ` [PATCH 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Matthew Wilcox
2020-08-16 13:53   ` Alex Shi
2020-08-16 12:16 ` David Hildenbrand
2020-08-16 14:14   ` Alex Shi
2020-08-17  9:58 ` Wei Yang
2020-08-17 11:02   ` Alex Shi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.