linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
@ 2020-08-19  5:47 Alex Shi
  2020-08-19  5:47 ` [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
  2020-08-19  7:55 ` [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Anshuman Khandual
  0 siblings, 2 replies; 17+ messages in thread
From: Alex Shi @ 2020-08-19  5:47 UTC (permalink / raw)
  To: Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

pageblock_flags is used as long, since every pageblock_flags is just 4
bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
flags. It would cause long waiting for sync.

If we could change the pageblock_flags variable as char, we could use
char size cmpxchg, which just sync up with 2 pageblock flags. it could
relief much false sharing in cmpxchg.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 include/linux/mmzone.h          |  6 +++---
 include/linux/pageblock-flags.h |  2 +-
 mm/page_alloc.c                 | 38 +++++++++++++++++++-------------------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0ed520954843..c92d6d24527d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -438,7 +438,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned long		*pageblock_flags;
+	unsigned char		*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1159,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned long pageblock_flags[0];
+	unsigned char	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1212,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned long *section_to_usemap(struct mem_section *ms)
+static inline unsigned char *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..d189441568eb 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ enum pageblock_bits {
 /* Forward declaration */
 struct page;
 
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned char get_pfnblock_flags_mask(struct page *page,
 				unsigned long pfn,
 				unsigned long mask);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c0b0b32de50e..f60071e8a4e1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned long *get_pageblock_bitmap(struct page *page,
+static inline unsigned char *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned long __get_pfnblock_flags_mask(struct page *page,
+unsigned char __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
-	word = bitmap[word_bitidx];
-	return (word >> bitidx) & mask;
+	byte = bitmap[byte_bitidx];
+	return (byte >> bitidx) & mask;
 }
 
-unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,29 +513,29 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned long *bitmap;
-	unsigned long bitidx, word_bitidx;
-	unsigned long old_word, word;
+	unsigned char *bitmap;
+	unsigned long bitidx, byte_bitidx;
+	unsigned char old_byte, byte;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	word_bitidx = bitidx / BITS_PER_LONG;
-	bitidx &= (BITS_PER_LONG-1);
+	byte_bitidx = bitidx / BITS_PER_BYTE;
+	bitidx &= (BITS_PER_BYTE-1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
 	mask <<= bitidx;
 	flags <<= bitidx;
 
-	word = READ_ONCE(bitmap[word_bitidx]);
+	byte = READ_ONCE(bitmap[byte_bitidx]);
 	for (;;) {
-		old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
-		if (word == old_word)
+		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+		if (byte == old_byte)
 			break;
-		word = old_word;
+		byte = old_byte;
 	}
 }
 
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  5:47 [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
@ 2020-08-19  5:47 ` Alex Shi
  2020-08-19  7:57   ` Anshuman Khandual
  2020-08-19  7:55 ` [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Anshuman Khandual
  1 sibling, 1 reply; 17+ messages in thread
From: Alex Shi @ 2020-08-19  5:47 UTC (permalink / raw)
  To: Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm

Current pageblock_flags is only 4 bits, so it has to share a char size
in cmpxchg when get set, the false sharing cause perf drop.

If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
the only cost is half char per pageblock, which is half char per 128MB
on x86, 4 chars in 1 GB.

Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
---
 include/linux/pageblock-flags.h | 2 +-
 mm/page_alloc.c                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index d189441568eb..f785c9d6d68c 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -25,7 +25,7 @@ enum pageblock_bits {
 	 * Assume the bits will always align on a word. If this assumption
 	 * changes then get/set pageblock needs updating.
 	 */
-	NR_PAGEBLOCK_BITS
+	NR_PAGEBLOCK_BITS = BITS_PER_BYTE
 };
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f60071e8a4e1..65f692c762d5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,7 +517,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	unsigned long bitidx, byte_bitidx;
 	unsigned char old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-19  5:47 [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
  2020-08-19  5:47 ` [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
@ 2020-08-19  7:55 ` Anshuman Khandual
  2020-08-19  8:04   ` David Hildenbrand
  2020-08-30 10:14   ` Alex Shi
  1 sibling, 2 replies; 17+ messages in thread
From: Anshuman Khandual @ 2020-08-19  7:55 UTC (permalink / raw)
  To: Alex Shi, Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



On 08/19/2020 11:17 AM, Alex Shi wrote:
> pageblock_flags is used as long, since every pageblock_flags is just 4
> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
> flags. It would cause long waiting for sync.
> 
> If we could change the pageblock_flags variable as char, we could use
> char size cmpxchg, which just sync up with 2 pageblock flags. it could
> relief much false sharing in cmpxchg.

Do you have numbers demonstrating claimed performance improvement
after this change ?


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  5:47 ` [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
@ 2020-08-19  7:57   ` Anshuman Khandual
  2020-08-19  8:09     ` Alex Shi
  0 siblings, 1 reply; 17+ messages in thread
From: Anshuman Khandual @ 2020-08-19  7:57 UTC (permalink / raw)
  To: Alex Shi, Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm



On 08/19/2020 11:17 AM, Alex Shi wrote:
> Current pageblock_flags is only 4 bits, so it has to share a char size
> in cmpxchg when get set, the false sharing cause perf drop.
> 
> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> the only cost is half char per pageblock, which is half char per 128MB
> on x86, 4 chars in 1 GB.

Agreed that increase in memory utilization is negligible here but does
this really improve performance ?


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-19  7:55 ` [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Anshuman Khandual
@ 2020-08-19  8:04   ` David Hildenbrand
  2020-08-30 10:08     ` Alex Shi
  2020-08-30 10:14   ` Alex Shi
  1 sibling, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2020-08-19  8:04 UTC (permalink / raw)
  To: Anshuman Khandual, Alex Shi, Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

On 19.08.20 09:55, Anshuman Khandual wrote:
> 
> 
> On 08/19/2020 11:17 AM, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
> 
> Do you have numbers demonstrating claimed performance improvement
> after this change ?
> 

I asked for that in v1 and there are no performance numbers to justify
the change. IMHO, that will be required to consider this for inclusion,
otherwise it's just code churn resulting in an (although minimal)
additional memory consumption.

-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  7:57   ` Anshuman Khandual
@ 2020-08-19  8:09     ` Alex Shi
  2020-08-19  8:13       ` David Hildenbrand
                         ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Alex Shi @ 2020-08-19  8:09 UTC (permalink / raw)
  To: Anshuman Khandual, Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm



在 2020/8/19 下午3:57, Anshuman Khandual 写道:
> 
> 
> On 08/19/2020 11:17 AM, Alex Shi wrote:
>> Current pageblock_flags is only 4 bits, so it has to share a char size
>> in cmpxchg when get set, the false sharing cause perf drop.
>>
>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>> the only cost is half char per pageblock, which is half char per 128MB
>> on x86, 4 chars in 1 GB.
> 
> Agreed that increase in memory utilization is negligible here but does
> this really improve performance ?
> 

It's no doubt in theory. and it would had a bad impact according to 
commit e380bebe4771548  mm, compaction: keep migration source private to a single 

but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
could give a try.

BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)

Thanks
Alex 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  8:09     ` Alex Shi
@ 2020-08-19  8:13       ` David Hildenbrand
  2020-08-19  8:36         ` Alex Shi
  2020-08-19 16:50       ` Alexander Duyck
  2020-09-01 17:04       ` Vlastimil Babka
  2 siblings, 1 reply; 17+ messages in thread
From: David Hildenbrand @ 2020-08-19  8:13 UTC (permalink / raw)
  To: Alex Shi, Anshuman Khandual, Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm

On 19.08.20 10:09, Alex Shi wrote:
> 
> 
> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
>>
>>
>> On 08/19/2020 11:17 AM, Alex Shi wrote:
>>> Current pageblock_flags is only 4 bits, so it has to share a char size
>>> in cmpxchg when get set, the false sharing cause perf drop.
>>>
>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>>> the only cost is half char per pageblock, which is half char per 128MB
>>> on x86, 4 chars in 1 GB.
>>
>> Agreed that increase in memory utilization is negligible here but does
>> this really improve performance ?
>>
> 
> It's no doubt in theory. and it would had a bad impact according to 
> commit e380bebe4771548  mm, compaction: keep migration source private to a single 
> 
> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
> could give a try.
> 
> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)

I hate wasting memory, even if it's just a little bit, anyone who
doesn't? ;)


-- 
Thanks,

David / dhildenb



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  8:13       ` David Hildenbrand
@ 2020-08-19  8:36         ` Alex Shi
  0 siblings, 0 replies; 17+ messages in thread
From: Alex Shi @ 2020-08-19  8:36 UTC (permalink / raw)
  To: David Hildenbrand, Anshuman Khandual, Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel, linux-mm



在 2020/8/19 下午4:13, David Hildenbrand 写道:
> On 19.08.20 10:09, Alex Shi wrote:
>>
>>
>> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
>>>
>>>
>>> On 08/19/2020 11:17 AM, Alex Shi wrote:
>>>> Current pageblock_flags is only 4 bits, so it has to share a char size
>>>> in cmpxchg when get set, the false sharing cause perf drop.
>>>>
>>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>>>> the only cost is half char per pageblock, which is half char per 128MB
>>>> on x86, 4 chars in 1 GB.
>>>
>>> Agreed that increase in memory utilization is negligible here but does
>>> this really improve performance ?
>>>
>>
>> It's no doubt in theory. and it would had a bad impact according to 
>> commit e380bebe4771548  mm, compaction: keep migration source private to a single 

The above commit is a good disproof, the false sharing caused performance issue,
and need using a lock protect cmpxchg, the instruction was designed for lockless
using. The false sharing, it was a trouble, and would trouble again sometime.

Thanks
Alex

>>
>> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
>> could give a try.
>>
>> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)
> 
> I hate wasting memory, even if it's just a little bit, anyone who
> doesn't? ;)
> 
> 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  8:09     ` Alex Shi
  2020-08-19  8:13       ` David Hildenbrand
@ 2020-08-19 16:50       ` Alexander Duyck
  2020-08-30 10:00         ` Alex Shi
  2020-09-01 17:04       ` Vlastimil Babka
  2 siblings, 1 reply; 17+ messages in thread
From: Alexander Duyck @ 2020-08-19 16:50 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, Matthew Wilcox, David Hildenbrand,
	Andrew Morton, Hugh Dickins, Alexander Duyck, LKML, linux-mm

On Wed, Aug 19, 2020 at 1:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
> >
> >
> > On 08/19/2020 11:17 AM, Alex Shi wrote:
> >> Current pageblock_flags is only 4 bits, so it has to share a char size
> >> in cmpxchg when get set, the false sharing cause perf drop.
> >>
> >> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> >> the only cost is half char per pageblock, which is half char per 128MB
> >> on x86, 4 chars in 1 GB.
> >
> > Agreed that increase in memory utilization is negligible here but does
> > this really improve performance ?
> >
>
> It's no doubt in theory. and it would had a bad impact according to
> commit e380bebe4771548  mm, compaction: keep migration source private to a single
>
> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
> could give a try.
>
> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)

You keep bringing up false sharing but you don't fix the false sharing
by doing this. You are still allowing the flags for multiple
pageblocks per cacheline so you still have false sharing even after
this.

What I believe you are attempting to address is the fact that multiple
pageblocks share a single long value and that long is being used with
a cmpxchg so you end up with multiple threads potentially all banging
on the same value and watching it change. However the field currently
consists of only 4 bits, 3 of them for migratetype and 1 for the skip
bit. In the case of the 3 bit portion a cmpxchg makes sense and is
usually protected by the zone lock so you would only have one thread
accessing it in most cases with the possible exception of a section
that spans multiple zones.

For the case such as the skip bit and MIGRATE_UNMOVABLE (0x0) where we
would be clearing or setting the entire mask maybe it would make more
sense to simply use an atomic_or or atomic_and depending on if you are
setting or clearing the flag? It would allow you to avoid the spinning
or having to read the word before performing the operation since you
would just be directly applying an AND or OR via a mask value.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19 16:50       ` Alexander Duyck
@ 2020-08-30 10:00         ` Alex Shi
  2020-08-30 20:32           ` Alexander Duyck
  0 siblings, 1 reply; 17+ messages in thread
From: Alex Shi @ 2020-08-30 10:00 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Anshuman Khandual, Matthew Wilcox, David Hildenbrand,
	Andrew Morton, Hugh Dickins, Alexander Duyck, LKML, linux-mm



在 2020/8/20 上午12:50, Alexander Duyck 写道:
> On Wed, Aug 19, 2020 at 1:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>>
>>
>>
>> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
>>>
>>>
>>> On 08/19/2020 11:17 AM, Alex Shi wrote:
>>>> Current pageblock_flags is only 4 bits, so it has to share a char size
>>>> in cmpxchg when get set, the false sharing cause perf drop.
>>>>
>>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>>>> the only cost is half char per pageblock, which is half char per 128MB
>>>> on x86, 4 chars in 1 GB.
>>>
>>> Agreed that increase in memory utilization is negligible here but does
>>> this really improve performance ?
>>>
>>
>> It's no doubt in theory. and it would had a bad impact according to
>> commit e380bebe4771548  mm, compaction: keep migration source private to a single
>>
>> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
>> could give a try.
>>
>> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)
> 
> You keep bringing up false sharing but you don't fix the false sharing
> by doing this. You are still allowing the flags for multiple
> pageblocks per cacheline so you still have false sharing even after
> this.

yes, the cacheline false sharing is still there. But as you pointed, cmpxchg level
false sharing could be addressed much by the patchset.


> 
> What I believe you are attempting to address is the fact that multiple
> pageblocks share a single long value and that long is being used with
> a cmpxchg so you end up with multiple threads potentially all banging
> on the same value and watching it change. However the field currently
> consists of only 4 bits, 3 of them for migratetype and 1 for the skip
> bit. In the case of the 3 bit portion a cmpxchg makes sense and is
> usually protected by the zone lock so you would only have one thread
> accessing it in most cases with the possible exception of a section
> that spans multiple zones.
> 
> For the case such as the skip bit and MIGRATE_UNMOVABLE (0x0) where we
> would be clearing or setting the entire mask maybe it would make more
> sense to simply use an atomic_or or atomic_and depending on if you are
> setting or clearing the flag? It would allow you to avoid the spinning
> or having to read the word before performing the operation since you
> would just be directly applying an AND or OR via a mask value.

Right that the different level to fix this problem, but narrow the cmpxchg
comparsion is still needed and helpful.

Thanks
Alex
> 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-19  8:04   ` David Hildenbrand
@ 2020-08-30 10:08     ` Alex Shi
  0 siblings, 0 replies; 17+ messages in thread
From: Alex Shi @ 2020-08-30 10:08 UTC (permalink / raw)
  To: David Hildenbrand, Anshuman Khandual, Matthew Wilcox
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/19 下午4:04, David Hildenbrand 写道:
> On 19.08.20 09:55, Anshuman Khandual wrote:
>>
>>
>> On 08/19/2020 11:17 AM, Alex Shi wrote:
>>> pageblock_flags is used as long, since every pageblock_flags is just 4
>>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>>> flags. It would cause long waiting for sync.
>>>
>>> If we could change the pageblock_flags variable as char, we could use
>>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>>> relief much false sharing in cmpxchg.
>>
>> Do you have numbers demonstrating claimed performance improvement
>> after this change ?
>>
> 
> I asked for that in v1 and there are no performance numbers to justify
> the change. IMHO, that will be required to consider this for inclusion,
> otherwise it's just code churn resulting in an (although minimal)
> additional memory consumption.
> 

Just got some time to run thpscale on my 4*HT cores machine, here is the data:
I run each of kernel for 3 times, pageblock kernel is the 5.9-rc2 with this 2
patches, the plp1 is the first patch on 5.9-rc2, and rc2 is 5.9-rc2 kernel.
We could found the system and total time is slight less than original kernel.

                   pageblock   pageblock   pageblock        plp1        plp1        plp1         rc2         rc2         rc2   pageblock
                          16        16-2        16-3           1           2           3           a           b           c           a
Duration User          14.81       15.24       14.55       15.28       14.66       14.63       14.76       14.97       14.38       15.07
Duration System        84.44       88.38       90.64       92.65       94.01       90.58      100.43       89.15       88.89       84.04
Duration Elapsed       98.83       99.06       99.81       99.65      100.26       99.90      100.30       99.24       99.14       98.87

And I also add tracing for patchset effect, which show the cmpxchg failure times
get clearly less.



 Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale rc2-b':

             6,720      compaction:mm_compaction_isolate_migratepages
            13,526      compaction:mm_compaction_isolate_freepages
             4,052      compaction:mm_compaction_migratepages
            34,199      compaction:mm_compaction_begin
            34,199      compaction:mm_compaction_end
            21,784      compaction:mm_compaction_try_to_compact_pages
            71,606      compaction:mm_compaction_finished
           106,545      compaction:mm_compaction_suitable
                 0      compaction:mm_compaction_deferred
                 0      compaction:mm_compaction_defer_compaction
             2,977      compaction:mm_compaction_defer_reset
                 0      compaction:mm_compaction_kcompactd_sleep
                 0      compaction:mm_compaction_wakeup_kcompactd
                 0      compaction:mm_compaction_kcompactd_wake
             1,046      pageblock:hit_cmpxchg

     114.914303988 seconds time elapsed

      15.754797000 seconds user
      89.712251000 seconds sys





 Performance counter stats for './run-mmtests.sh -c configs/config-workload-thpscale pageblock-a':

               602      compaction:mm_compaction_isolate_migratepages
             3,710      compaction:mm_compaction_isolate_freepages
               402      compaction:mm_compaction_migratepages
            43,116      compaction:mm_compaction_begin
            43,116      compaction:mm_compaction_end
            24,810      compaction:mm_compaction_try_to_compact_pages
            86,527      compaction:mm_compaction_finished
           125,819      compaction:mm_compaction_suitable
                 2      compaction:mm_compaction_deferred
                 0      compaction:mm_compaction_defer_compaction
               271      compaction:mm_compaction_defer_reset
                 0      compaction:mm_compaction_kcompactd_sleep
                 0      compaction:mm_compaction_wakeup_kcompactd
                 0      compaction:mm_compaction_kcompactd_wake
               369      pageblock:hit_cmpxchg

     107.405499745 seconds time elapsed

      15.830967000 seconds user
      84.559767000 seconds sys


commit 36cea76895637c0c18ce8590c0f43a3e453fbf8f
Author: Alex Shi <alex.shi@linux.alibaba.com>
Date:   Wed Aug 19 17:26:26 2020 +0800

    add cmpxchg tracing

    Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>

diff --git a/include/trace/events/pageblock.h b/include/trace/events/pageblock.h
new file mode 100644
index 000000000000..003c2d716f82
--- /dev/null
+++ b/include/trace/events/pageblock.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pageblock
+
+#if !defined(_TRACE_PAGEBLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGEBLOCK_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hit_cmpxchg,
+
+       TP_PROTO(char byte),
+
+       TP_ARGS(byte),
+
+       TP_STRUCT__entry(
+               __field(char, byte)
+       ),
+
+       TP_fast_assign(
+               __entry->byte = byte;
+       ),
+
+       TP_printk("%d", __entry->byte)
+);
+
+#endif /* _TRACE_PAGE_ISOLATION_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60342e764090..2422dec00484 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -509,6 +509,9 @@ static __always_inline int get_pfnblock_migratetype(struct page *page, unsigned
  * @pfn: The target page frame number
  * @mask: mask of bits that the caller is interested in
  */
+#define CREATE_TRACE_POINTS
+#include <trace/events/pageblock.h>
+
 void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
                                        unsigned long pfn,
                                        unsigned long mask)
@@ -536,6 +539,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
                if (byte == old_byte)
                        break;
                byte = old_byte;
+               trace_hit_cmpxchg(byte);
        }
 }



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-19  7:55 ` [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Anshuman Khandual
  2020-08-19  8:04   ` David Hildenbrand
@ 2020-08-30 10:14   ` Alex Shi
  2020-08-30 10:18     ` Matthew Wilcox
  1 sibling, 1 reply; 17+ messages in thread
From: Alex Shi @ 2020-08-30 10:14 UTC (permalink / raw)
  To: Anshuman Khandual, Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/19 下午3:55, Anshuman Khandual 写道:
> 
> 
> On 08/19/2020 11:17 AM, Alex Shi wrote:
>> pageblock_flags is used as long, since every pageblock_flags is just 4
>> bits, 'long' size will include 8(32bit machine) or 16 pageblocks' flags,
>> that flag setting has to sync in cmpxchg with 7 or 15 other pageblock
>> flags. It would cause long waiting for sync.
>>
>> If we could change the pageblock_flags variable as char, we could use
>> char size cmpxchg, which just sync up with 2 pageblock flags. it could
>> relief much false sharing in cmpxchg.
> 
> Do you have numbers demonstrating claimed performance improvement
> after this change ?
> 

the performance data show in another email.

LKP reported the arm6 has a bug on this patchset, since it has no cmpxchgb
solution, so maybe let's fallback to cmpxchg on it.

From db3d97ba8cc5e206b440bd40a92ef6955ad86bc0 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@linux.alibaba.com>
Date: Tue, 18 Aug 2020 15:51:18 +0800
Subject: [PATCH v2 3/3] armv6: fix armv6 build issue

Arm v6 can not simulate cmpxchg1 func, so we have to use cmpxchg4 on it.

   arm-linux-gnueabi-ld: mm/page_alloc.o: in function `set_pfnblock_flags_mask':
   (.text+0x32b4): undefined reference to `__bad_cmpxchg'
   arm-linux-gnueabi-ld: (.text+0x32e0): undefined reference to `__bad_cmpxchg'
   arm-linux-gnueabi-ld: drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.o: in function `hw_atl_b0_get_mac_temp':
   hw_atl_b0.c:(.text+0x30fc): undefined reference to `__bad_udelay'

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
---
 mm/page_alloc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7da09d66233b..c09146a8946c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,7 +517,11 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 {
 	unsigned char *bitmap;
 	unsigned long bitidx, byte_bitidx;
+#ifdef	CONFIG_CPU_V6
+	unsigned long old_byte, byte;
+#else
 	unsigned char old_byte, byte;
+#endif
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
@@ -532,9 +536,18 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 	mask <<= bitidx;
 	flags <<= bitidx;
 
+#ifdef	CONFIG_CPU_V6
+	byte = (unsigned long)READ_ONCE(bitmap[byte_bitidx]);
+#else
 	byte = READ_ONCE(bitmap[byte_bitidx]);
+#endif
 	for (;;) {
+#ifdef	CONFIG_CPU_V6
+		/* arm v6 has no cmpxchgb function, so still false sharing long word */
+		old_byte = cmpxchg((unsigned long*)&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+#else
 		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
+#endif
 		if (byte == old_byte)
 			break;
 		byte = old_byte;
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-30 10:14   ` Alex Shi
@ 2020-08-30 10:18     ` Matthew Wilcox
  2020-08-31  8:40       ` Alex Shi
  0 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2020-08-30 10:18 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, David Hildenbrand, Andrew Morton,
	Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel

On Sun, Aug 30, 2020 at 06:14:33PM +0800, Alex Shi wrote:
> +++ b/mm/page_alloc.c
> @@ -532,9 +536,18 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>  	mask <<= bitidx;
>  	flags <<= bitidx;
>  
> +#ifdef	CONFIG_CPU_V6
> +	byte = (unsigned long)READ_ONCE(bitmap[byte_bitidx]);
> +#else
>  	byte = READ_ONCE(bitmap[byte_bitidx]);
> +#endif
>  	for (;;) {
> +#ifdef	CONFIG_CPU_V6
> +		/* arm v6 has no cmpxchgb function, so still false sharing long word */
> +		old_byte = cmpxchg((unsigned long*)&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
> +#else
>  		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
> +#endif

Good grief, no.  Either come up with an appropriate abstraction or
abandon this patch.  We can't possibly put this kind of ifdef in the
memory allocator!



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-30 10:00         ` Alex Shi
@ 2020-08-30 20:32           ` Alexander Duyck
  2020-09-03  5:35             ` Alex Shi
  0 siblings, 1 reply; 17+ messages in thread
From: Alexander Duyck @ 2020-08-30 20:32 UTC (permalink / raw)
  To: Alex Shi
  Cc: Anshuman Khandual, Matthew Wilcox, David Hildenbrand,
	Andrew Morton, Hugh Dickins, Alexander Duyck, LKML, linux-mm

On Sun, Aug 30, 2020 at 3:00 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/20 上午12:50, Alexander Duyck 写道:
> > On Wed, Aug 19, 2020 at 1:11 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
> >>
> >>
> >>
> >> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
> >>>
> >>>
> >>> On 08/19/2020 11:17 AM, Alex Shi wrote:
> >>>> Current pageblock_flags is only 4 bits, so it has to share a char size
> >>>> in cmpxchg when get set, the false sharing cause perf drop.
> >>>>
> >>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
> >>>> the only cost is half char per pageblock, which is half char per 128MB
> >>>> on x86, 4 chars in 1 GB.
> >>>
> >>> Agreed that increase in memory utilization is negligible here but does
> >>> this really improve performance ?
> >>>
> >>
> >> It's no doubt in theory. and it would had a bad impact according to
> >> commit e380bebe4771548  mm, compaction: keep migration source private to a single
> >>
> >> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
> >> could give a try.
> >>
> >> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)
> >
> > You keep bringing up false sharing but you don't fix the false sharing
> > by doing this. You are still allowing the flags for multiple
> > pageblocks per cacheline so you still have false sharing even after
> > this.
>
> yes, the cacheline false sharing is still there. But as you pointed, cmpxchg level
> false sharing could be addressed much by the patchset.
>
>
> >
> > What I believe you are attempting to address is the fact that multiple
> > pageblocks share a single long value and that long is being used with
> > a cmpxchg so you end up with multiple threads potentially all banging
> > on the same value and watching it change. However the field currently
> > consists of only 4 bits, 3 of them for migratetype and 1 for the skip
> > bit. In the case of the 3 bit portion a cmpxchg makes sense and is
> > usually protected by the zone lock so you would only have one thread
> > accessing it in most cases with the possible exception of a section
> > that spans multiple zones.
> >
> > For the case such as the skip bit and MIGRATE_UNMOVABLE (0x0) where we
> > would be clearing or setting the entire mask maybe it would make more
> > sense to simply use an atomic_or or atomic_and depending on if you are
> > setting or clearing the flag? It would allow you to avoid the spinning
> > or having to read the word before performing the operation since you
> > would just be directly applying an AND or OR via a mask value.
>
> Right that the different level to fix this problem, but narrow the cmpxchg
> comparsion is still needed and helpful.

What I was getting at though is that I am not sure that is the case.
Normally I believe we are always holding the zone lock when updating
the migrate type. The skip flag is a one-off operation that could
easily be addressed by changing the logic to use atomic_and or
atomic_or for the cases where we are updating single bit flags and
setting the mask value to all 1's or all 0's. So adding this extra
complexity which only really applies to the skip bit may not provide
much value, especially as there are a number of possible paths that
don't use the skip bit anyway.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags
  2020-08-30 10:18     ` Matthew Wilcox
@ 2020-08-31  8:40       ` Alex Shi
  0 siblings, 0 replies; 17+ messages in thread
From: Alex Shi @ 2020-08-31  8:40 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Anshuman Khandual, David Hildenbrand, Andrew Morton,
	Hugh Dickins, Alexander Duyck, linux-mm, linux-kernel



在 2020/8/30 下午6:18, Matthew Wilcox 写道:
> On Sun, Aug 30, 2020 at 06:14:33PM +0800, Alex Shi wrote:
>> +++ b/mm/page_alloc.c
>> @@ -532,9 +536,18 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>>  	mask <<= bitidx;
>>  	flags <<= bitidx;
>>  
>> +#ifdef	CONFIG_CPU_V6
>> +	byte = (unsigned long)READ_ONCE(bitmap[byte_bitidx]);
>> +#else
>>  	byte = READ_ONCE(bitmap[byte_bitidx]);
>> +#endif
>>  	for (;;) {
>> +#ifdef	CONFIG_CPU_V6
>> +		/* arm v6 has no cmpxchgb function, so still false sharing long word */
>> +		old_byte = cmpxchg((unsigned long*)&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
>> +#else
>>  		old_byte = cmpxchg(&bitmap[byte_bitidx], byte, (byte & ~mask) | flags);
>> +#endif
> 
> Good grief, no.  Either come up with an appropriate abstraction or
> abandon this patch.  We can't possibly put this kind of ifdef in the
> memory allocator!
> 

Hi Matthew,

Thanks a lot for comments! How about the following patch?

From 5f61b91351461084c5bb410025965a3b4d2f7206 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@linux.alibaba.com>
Date: Mon, 31 Aug 2020 15:41:20 +0800
Subject: [PATCH 3/3] mm/armv6: work around armv6 cmpxchg support issue

Armv6 can not simulate cmpxchg1 func, so we have to use cmpxchg4 on it.

   arm-linux-gnueabi-ld: mm/page_alloc.o: in function `set_pfnblock_flags_mask':
   (.text+0x32b4): undefined reference to `__bad_cmpxchg'
   arm-linux-gnueabi-ld: (.text+0x32e0): undefined reference to `__bad_cmpxchg'
   arm-linux-gnueabi-ld:drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.o: in function
   `hw_atl_b0_get_mac_temp':   hw_atl_b0.c:(.text+0x30fc): undefined reference to `__bad_udelay'

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alex Shi <alex.shi@linux.alibaba.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
---
 include/linux/mmzone.h | 15 ++++++++++++---
 mm/page_alloc.c        | 24 ++++++++++++------------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index be676e659fb7..c1bb904bcad8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -406,6 +406,15 @@ enum zone_type {
 
 #ifndef __GENERATING_BOUNDS_H
 
+#ifdef CONFIG_CPU_V6
+/* cmpxchg only support 32-bits operands on ARMv6. */
+typedef unsigned long pageblockflags_t;
+#define BITS_PER_FLAGS	BITS_PER_LONG
+#else
+typedef unsigned char pageblockflags_t;
+#define BITS_PER_FLAGS	BITS_PER_BYTE
+#endif
+
 struct zone {
 	/* Read-mostly fields */
 
@@ -437,7 +446,7 @@ struct zone {
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
 	 * In SPARSEMEM, this map is stored in struct mem_section
 	 */
-	unsigned char		*pageblock_flags;
+	pageblockflags_t	*pageblock_flags;
 #endif /* CONFIG_SPARSEMEM */
 
 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
@@ -1159,7 +1168,7 @@ struct mem_section_usage {
 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
 #endif
 	/* See declaration of similar field in struct zone */
-	unsigned char	pageblock_flags[0];
+	pageblockflags_t	pageblock_flags[0];
 };
 
 void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
@@ -1212,7 +1221,7 @@ struct mem_section {
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
-static inline unsigned char *section_to_usemap(struct mem_section *ms)
+static inline pageblockflags_t *section_to_usemap(struct mem_section *ms)
 {
 	return ms->usage->pageblock_flags;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60342e764090..9a41c5dc78eb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -445,7 +445,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 #endif
 
 /* Return a pointer to the bitmap storing bits affecting a block of pages */
-static inline unsigned char *get_pageblock_bitmap(struct page *page,
+static inline pageblockflags_t *get_pageblock_bitmap(struct page *page,
 							unsigned long pfn)
 {
 #ifdef CONFIG_SPARSEMEM
@@ -474,24 +474,24 @@ static inline int pfn_to_bitidx(struct page *page, unsigned long pfn)
  * Return: pageblock_bits flags
  */
 static __always_inline
-unsigned char __get_pfnblock_flags_mask(struct page *page,
+pageblockflags_t __get_pfnblock_flags_mask(struct page *page,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char byte;
+	pageblockflags_t byte;
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
+	bitidx &= (BITS_PER_FLAGS - 1);
 
 	byte = bitmap[byte_bitidx];
 	return (byte >> bitidx) & mask;
 }
 
-unsigned char get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
+pageblockflags_t get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long mask)
 {
 	return __get_pfnblock_flags_mask(page, pfn, mask);
@@ -513,17 +513,17 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
 					unsigned long pfn,
 					unsigned long mask)
 {
-	unsigned char *bitmap;
+	pageblockflags_t *bitmap;
 	unsigned long bitidx, byte_bitidx;
-	unsigned char old_byte, byte;
+	pageblockflags_t old_byte, byte;
 
-	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_BYTE);
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != BITS_PER_FLAGS);
 	BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
 
 	bitmap = get_pageblock_bitmap(page, pfn);
 	bitidx = pfn_to_bitidx(page, pfn);
-	byte_bitidx = bitidx / BITS_PER_BYTE;
-	bitidx &= (BITS_PER_BYTE-1);
+	byte_bitidx = bitidx / BITS_PER_FLAGS;
+	bitidx &= (BITS_PER_FLAGS - 1);
 
 	VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
 
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-19  8:09     ` Alex Shi
  2020-08-19  8:13       ` David Hildenbrand
  2020-08-19 16:50       ` Alexander Duyck
@ 2020-09-01 17:04       ` Vlastimil Babka
  2 siblings, 0 replies; 17+ messages in thread
From: Vlastimil Babka @ 2020-09-01 17:04 UTC (permalink / raw)
  To: Alex Shi, Anshuman Khandual, Matthew Wilcox, David Hildenbrand
  Cc: Andrew Morton, Hugh Dickins, Alexander Duyck, linux-kernel,
	linux-mm, Mel Gorman

On 8/19/20 10:09 AM, Alex Shi wrote:
> 
> 
> 在 2020/8/19 下午3:57, Anshuman Khandual 写道:
>> 
>> 
>> On 08/19/2020 11:17 AM, Alex Shi wrote:
>>> Current pageblock_flags is only 4 bits, so it has to share a char size
>>> in cmpxchg when get set, the false sharing cause perf drop.
>>>
>>> If we incrase the bits up to 8, false sharing would gone in cmpxchg. and
>>> the only cost is half char per pageblock, which is half char per 128MB
>>> on x86, 4 chars in 1 GB.
>> 
>> Agreed that increase in memory utilization is negligible here but does
>> this really improve performance ?
>> 
> 
> It's no doubt in theory. and it would had a bad impact according to 
> commit e380bebe4771548  mm, compaction: keep migration source private to a single 

I don't think that commit is doing the test_and_set_skip() under lock to avoid
false sharing. I think it's done to simply make the test and set protected
against races without relying on e.g. a truly atomic test_and_set_bit(). It's
still noted that it's just a hint so it's not protected to others calling
set_pageblock_skip() from other contexts not under a lock.

> but I do have some problem in running thpscale/mmtest. I'd like to see if anyone
> could give a try.
> 
> BTW, I naturally hate the false sharing even it's in theory. Anyone who doesn't? :)
> 
> Thanks
> Alex 
> 



^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags
  2020-08-30 20:32           ` Alexander Duyck
@ 2020-09-03  5:35             ` Alex Shi
  0 siblings, 0 replies; 17+ messages in thread
From: Alex Shi @ 2020-09-03  5:35 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Anshuman Khandual, Matthew Wilcox, David Hildenbrand,
	Andrew Morton, Hugh Dickins, Alexander Duyck, LKML, linux-mm,
	Vlastimil Babka



在 2020/8/31 上午4:32, Alexander Duyck 写道:
>> Right that the different level to fix this problem, but narrow the cmpxchg
>> comparsion is still needed and helpful.
> What I was getting at though is that I am not sure that is the case.
> Normally I believe we are always holding the zone lock when updating
> the migrate type. The skip flag is a one-off operation that could
> easily be addressed by changing the logic to use atomic_and or
> atomic_or for the cases where we are updating single bit flags and
> setting the mask value to all 1's or all 0's. So adding this extra
> complexity which only really applies to the skip bit may not provide
> much value, especially as there are a number of possible paths that
> don't use the skip bit anyway.


Using atomic bit set is only helpful for skip bit. but migrate bits are still
do the false sharing sync large.

And actully, for this issue on cmpxchg, it's cross arch issue which it's better
addressed in kernel. I see much of cmpxchg is using in kernel, but no one mentioned
the shortage on this. On this pointer, this problem should be resovled in kernel.

Thanks
Alex


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-09-03  5:35 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-19  5:47 [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Alex Shi
2020-08-19  5:47 ` [PATCH v2 2/2] mm/pageblock: remove false sharing in pageblock_flags Alex Shi
2020-08-19  7:57   ` Anshuman Khandual
2020-08-19  8:09     ` Alex Shi
2020-08-19  8:13       ` David Hildenbrand
2020-08-19  8:36         ` Alex Shi
2020-08-19 16:50       ` Alexander Duyck
2020-08-30 10:00         ` Alex Shi
2020-08-30 20:32           ` Alexander Duyck
2020-09-03  5:35             ` Alex Shi
2020-09-01 17:04       ` Vlastimil Babka
2020-08-19  7:55 ` [PATCH v2 1/2] mm/pageblock: mitigation cmpxchg false sharing in pageblock flags Anshuman Khandual
2020-08-19  8:04   ` David Hildenbrand
2020-08-30 10:08     ` Alex Shi
2020-08-30 10:14   ` Alex Shi
2020-08-30 10:18     ` Matthew Wilcox
2020-08-31  8:40       ` Alex Shi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).