* [RFC v2 01/10] mm: page_ref_add_unless() does not trace 'u' argument
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 02/10] mm: add overflow and underflow checks for page->_refcount Pasha Tatashin
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
In other page_ref_* functions all arguments and returns are traced, but
in page_ref_add_unless the 'u' argument which stands for unless boolean
is not traced. However, what is more confusing is that in the tracing
routine:
__page_ref_mod_unless(struct page *page, int v, int u);
The 'u' argument present, but instead a return value is passed into
this argument.
Add a new template specific for page_ref_add_unless(), and trace all
arguments and the return value.
Fixes: 95813b8faa0c ("mm/page_ref: add tracepoint to track down page reference manipulation")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 10 ++++----
include/trace/events/page_ref.h | 43 ++++++++++++++++++++++++++++++---
mm/debug_page_ref.c | 8 +++---
3 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 2e677e6ad09f..1903af5fb087 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -11,7 +11,7 @@ DECLARE_TRACEPOINT(page_ref_set);
DECLARE_TRACEPOINT(page_ref_mod);
DECLARE_TRACEPOINT(page_ref_mod_and_test);
DECLARE_TRACEPOINT(page_ref_mod_and_return);
-DECLARE_TRACEPOINT(page_ref_mod_unless);
+DECLARE_TRACEPOINT(page_ref_add_unless);
DECLARE_TRACEPOINT(page_ref_freeze);
DECLARE_TRACEPOINT(page_ref_unfreeze);
@@ -30,7 +30,7 @@ extern void __page_ref_set(struct page *page, int v);
extern void __page_ref_mod(struct page *page, int v);
extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
-extern void __page_ref_mod_unless(struct page *page, int v, int u);
+extern void __page_ref_add_unless(struct page *page, int v, int u, int ret);
extern void __page_ref_freeze(struct page *page, int v, int ret);
extern void __page_ref_unfreeze(struct page *page, int v);
@@ -50,7 +50,7 @@ static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
{
}
-static inline void __page_ref_mod_unless(struct page *page, int v, int u)
+static inline void __page_ref_add_unless(struct page *page, int v, int u, int ret)
{
}
static inline void __page_ref_freeze(struct page *page, int v, int ret)
@@ -237,8 +237,8 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
bool ret = atomic_add_unless(&page->_refcount, nr, u);
- if (page_ref_tracepoint_active(page_ref_mod_unless))
- __page_ref_mod_unless(page, nr, ret);
+ if (page_ref_tracepoint_active(page_ref_add_unless))
+ __page_ref_add_unless(page, nr, u, ret);
return ret;
}
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 8a99c1cd417b..c32d6d161cdb 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -94,6 +94,43 @@ DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
__entry->val, __entry->ret)
);
+DECLARE_EVENT_CLASS(page_ref_add_unless_template,
+
+ TP_PROTO(struct page *page, int v, int u, int ret),
+
+ TP_ARGS(page, v, u, ret),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, pfn)
+ __field(unsigned long, flags)
+ __field(int, count)
+ __field(int, mapcount)
+ __field(void *, mapping)
+ __field(int, mt)
+ __field(int, val)
+ __field(int, unless)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = page_to_pfn(page);
+ __entry->flags = page->flags;
+ __entry->count = page_ref_count(page);
+ __entry->mapcount = page_mapcount(page);
+ __entry->mapping = page->mapping;
+ __entry->mt = get_pageblock_migratetype(page);
+ __entry->val = v;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d unless=%d ret=%d",
+ __entry->pfn,
+ show_page_flags(__entry->flags & PAGEFLAGS_MASK),
+ __entry->count,
+ __entry->mapcount, __entry->mapping, __entry->mt,
+ __entry->val, __entry->unless, __entry->ret)
+);
+
DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_test,
TP_PROTO(struct page *page, int v, int ret),
@@ -108,11 +145,11 @@ DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_return,
TP_ARGS(page, v, ret)
);
-DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_unless,
+DEFINE_EVENT(page_ref_add_unless_template, page_ref_add_unless,
- TP_PROTO(struct page *page, int v, int ret),
+ TP_PROTO(struct page *page, int v, int u, int ret),
- TP_ARGS(page, v, ret)
+ TP_ARGS(page, v, u, ret)
);
DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_freeze,
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
index f3b2c9d3ece2..1426d6887b01 100644
--- a/mm/debug_page_ref.c
+++ b/mm/debug_page_ref.c
@@ -33,12 +33,12 @@ void __page_ref_mod_and_return(struct page *page, int v, int ret)
EXPORT_SYMBOL(__page_ref_mod_and_return);
EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_return);
-void __page_ref_mod_unless(struct page *page, int v, int u)
+void __page_ref_add_unless(struct page *page, int v, int u, int ret)
{
- trace_page_ref_mod_unless(page, v, u);
+ trace_page_ref_add_unless(page, v, u, ret);
}
-EXPORT_SYMBOL(__page_ref_mod_unless);
-EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_unless);
+EXPORT_SYMBOL(__page_ref_add_unless);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_add_unless);
void __page_ref_freeze(struct page *page, int v, int ret)
{
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 02/10] mm: add overflow and underflow checks for page->_refcount
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 01/10] mm: page_ref_add_unless() does not trace 'u' argument Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 03/10] mm: Avoid using set_page_count() in set_page_recounted() Pasha Tatashin
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
The problems with page->_refcount are hard to debug, because usually
when they are detected, the damage has occurred a long time ago. Yet,
the problems with invalid page refcount may be catastrophic and lead to
memory corruptions.
Reduce the scope of when the _refcount problems manifest themselves by
adding checks for underflows and overflows into functions that modify
_refcount.
Use atomic_fetch_* functions to get the old values of the _refcount,
and use it to check for overflow/underflow.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 59 +++++++++++++++++++++++++++++-----------
1 file changed, 43 insertions(+), 16 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 1903af5fb087..f3c61dc6344a 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -117,7 +117,10 @@ static inline void init_page_count(struct page *page)
static inline void page_ref_add(struct page *page, int nr)
{
- atomic_add(nr, &page->_refcount);
+ int old_val = atomic_fetch_add(nr, &page->_refcount);
+ int new_val = old_val + nr;
+
+ VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, nr);
}
@@ -129,7 +132,10 @@ static inline void folio_ref_add(struct folio *folio, int nr)
static inline void page_ref_sub(struct page *page, int nr)
{
- atomic_sub(nr, &page->_refcount);
+ int old_val = atomic_fetch_sub(nr, &page->_refcount);
+ int new_val = old_val - nr;
+
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, -nr);
}
@@ -141,11 +147,13 @@ static inline void folio_ref_sub(struct folio *folio, int nr)
static inline int page_ref_sub_return(struct page *page, int nr)
{
- int ret = atomic_sub_return(nr, &page->_refcount);
+ int old_val = atomic_fetch_sub(nr, &page->_refcount);
+ int new_val = old_val - nr;
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, -nr, ret);
- return ret;
+ __page_ref_mod_and_return(page, -nr, new_val);
+ return new_val;
}
static inline int folio_ref_sub_return(struct folio *folio, int nr)
@@ -155,7 +163,10 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
static inline void page_ref_inc(struct page *page)
{
- atomic_inc(&page->_refcount);
+ int old_val = atomic_fetch_inc(&page->_refcount);
+ int new_val = old_val + 1;
+
+ VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, 1);
}
@@ -167,7 +178,10 @@ static inline void folio_ref_inc(struct folio *folio)
static inline void page_ref_dec(struct page *page)
{
- atomic_dec(&page->_refcount);
+ int old_val = atomic_fetch_dec(&page->_refcount);
+ int new_val = old_val - 1;
+
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod))
__page_ref_mod(page, -1);
}
@@ -179,8 +193,11 @@ static inline void folio_ref_dec(struct folio *folio)
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
- int ret = atomic_sub_and_test(nr, &page->_refcount);
+ int old_val = atomic_fetch_sub(nr, &page->_refcount);
+ int new_val = old_val - nr;
+ int ret = new_val == 0;
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod_and_test))
__page_ref_mod_and_test(page, -nr, ret);
return ret;
@@ -193,11 +210,13 @@ static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
static inline int page_ref_inc_return(struct page *page)
{
- int ret = atomic_inc_return(&page->_refcount);
+ int old_val = atomic_fetch_inc(&page->_refcount);
+ int new_val = old_val + 1;
+ VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, 1, ret);
- return ret;
+ __page_ref_mod_and_return(page, 1, new_val);
+ return new_val;
}
static inline int folio_ref_inc_return(struct folio *folio)
@@ -207,8 +226,11 @@ static inline int folio_ref_inc_return(struct folio *folio)
static inline int page_ref_dec_and_test(struct page *page)
{
- int ret = atomic_dec_and_test(&page->_refcount);
+ int old_val = atomic_fetch_dec(&page->_refcount);
+ int new_val = old_val - 1;
+ int ret = new_val == 0;
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod_and_test))
__page_ref_mod_and_test(page, -1, ret);
return ret;
@@ -221,11 +243,13 @@ static inline int folio_ref_dec_and_test(struct folio *folio)
static inline int page_ref_dec_return(struct page *page)
{
- int ret = atomic_dec_return(&page->_refcount);
+ int old_val = atomic_fetch_dec(&page->_refcount);
+ int new_val = old_val - 1;
+ VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, -1, ret);
- return ret;
+ __page_ref_mod_and_return(page, -1, new_val);
+ return new_val;
}
static inline int folio_ref_dec_return(struct folio *folio)
@@ -235,8 +259,11 @@ static inline int folio_ref_dec_return(struct folio *folio)
static inline bool page_ref_add_unless(struct page *page, int nr, int u)
{
- bool ret = atomic_add_unless(&page->_refcount, nr, u);
+ int old_val = atomic_fetch_add_unless(&page->_refcount, nr, u);
+ int new_val = old_val + nr;
+ int ret = old_val != u;
+ VM_BUG_ON_PAGE(ret && (unsigned int)new_val < (unsigned int)old_val, page);
if (page_ref_tracepoint_active(page_ref_add_unless))
__page_ref_add_unless(page, nr, u, ret);
return ret;
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 03/10] mm: Avoid using set_page_count() in set_page_recounted()
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 01/10] mm: page_ref_add_unless() does not trace 'u' argument Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 02/10] mm: add overflow and underflow checks for page->_refcount Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 04/10] mm: remove set_page_count() from page_frag_alloc_align Pasha Tatashin
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
set_page_refcounted() converts a non-refcounted page that has
(page->_refcount == 0) into a refcounted page by setting _refcount to
1.
The current apporach uses the following logic:
VM_BUG_ON_PAGE(page_ref_count(page), page);
set_page_count(page, 1);
However, if _refcount changes from 0 to 1 between the VM_BUG_ON_PAGE()
and set_page_count() we can break _refcount, which can cause other
problems such as memory corruptions.
Instead, use a safer method: increment _refcount first and verify
that at increment time it was indeed 1.
refcnt = page_ref_inc_return(page);
VM_BUG_ON_PAGE(refcnt != 1, page);
Use page_ref_inc_return() to avoid unconditionally overwriting
the _refcount value with set_page_count(), and check the return value.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
mm/internal.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 3b79a5c9427a..f601575b7e5a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -132,9 +132,11 @@ static inline bool page_evictable(struct page *page)
*/
static inline void set_page_refcounted(struct page *page)
{
+ int refcnt;
+
VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(page_ref_count(page), page);
- set_page_count(page, 1);
+ refcnt = page_ref_inc_return(page);
+ VM_BUG_ON_PAGE(refcnt != 1, page);
}
extern unsigned long highest_memmap_pfn;
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 04/10] mm: remove set_page_count() from page_frag_alloc_align
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (2 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 03/10] mm: Avoid using set_page_count() in set_page_recounted() Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 05/10] mm: avoid using set_page_count() when pages are freed into allocator Pasha Tatashin
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
set_page_count() unconditionally resets the value of _ref_count and that
is dangerous, as it is not programmatically verified. Instead we rely on
comments like: "OK, page count is 0, we can safely set it".
Add a new refcount function: page_ref_add_return() to return the new
refcount value after adding to it. Use the return value to verify that
the _ref_count was indeed the expected one.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 11 +++++++++++
mm/page_alloc.c | 6 ++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index f3c61dc6344a..27880aca2e2f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -115,6 +115,17 @@ static inline void init_page_count(struct page *page)
set_page_count(page, 1);
}
+static inline int page_ref_add_return(struct page *page, int nr)
+{
+ int old_val = atomic_fetch_add(nr, &page->_refcount);
+ int new_val = old_val + nr;
+
+ VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
+ if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ __page_ref_mod_and_return(page, nr, new_val);
+ return new_val;
+}
+
static inline void page_ref_add(struct page *page, int nr)
{
int old_val = atomic_fetch_add(nr, &page->_refcount);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c5952749ad40..e8e88111028a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5516,6 +5516,7 @@ void *page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int size = PAGE_SIZE;
struct page *page;
int offset;
+ int refcnt;
if (unlikely(!nc->va)) {
refill:
@@ -5554,8 +5555,9 @@ void *page_frag_alloc_align(struct page_frag_cache *nc,
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
- /* OK, page count is 0, we can safely set it */
- set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+ /* page count is 0, set it to PAGE_FRAG_CACHE_MAX_SIZE + 1 */
+ refcnt = page_ref_add_return(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+ VM_BUG_ON_PAGE(refcnt != PAGE_FRAG_CACHE_MAX_SIZE + 1, page);
/* reset page count bias and offset to start of new frag */
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 05/10] mm: avoid using set_page_count() when pages are freed into allocator
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (3 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 04/10] mm: remove set_page_count() from page_frag_alloc_align Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 06/10] mm: rename init_page_count() -> page_ref_init() Pasha Tatashin
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
When struct pages are first initialized the page->_refcount field is
set 1. However, later when pages are freed into allocator we set
_refcount to 0 via set_page_count(). Unconditionally resetting
_refcount is dangerous.
Instead use page_ref_dec_return(), and verify that the _refcount is
what is expected.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
mm/page_alloc.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e8e88111028a..217c0c9fa25b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1653,6 +1653,7 @@ void __free_pages_core(struct page *page, unsigned int order)
unsigned int nr_pages = 1 << order;
struct page *p = page;
unsigned int loop;
+ int refcnt;
/*
* When initializing the memmap, __init_single_page() sets the refcount
@@ -1663,10 +1664,12 @@ void __free_pages_core(struct page *page, unsigned int order)
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
__ClearPageReserved(p);
- set_page_count(p, 0);
+ refcnt = page_ref_dec_return(p);
+ VM_BUG_ON_PAGE(refcnt, p);
}
__ClearPageReserved(p);
- set_page_count(p, 0);
+ refcnt = page_ref_dec_return(p);
+ VM_BUG_ON_PAGE(refcnt, p);
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
@@ -2238,10 +2241,12 @@ void __init init_cma_reserved_pageblock(struct page *page)
{
unsigned i = pageblock_nr_pages;
struct page *p = page;
+ int refcnt;
do {
__ClearPageReserved(p);
- set_page_count(p, 0);
+ refcnt = page_ref_dec_return(p);
+ VM_BUG_ON_PAGE(refcnt, p);
} while (++p, --i);
set_pageblock_migratetype(page, MIGRATE_CMA);
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 06/10] mm: rename init_page_count() -> page_ref_init()
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (4 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 05/10] mm: avoid using set_page_count() when pages are freed into allocator Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 07/10] mm: remove set_page_count() Pasha Tatashin
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
Now, that set_page_count() is not called from outside anymore and about
to be removed, init_page_count() is the only function that is going to
be used to unconditionally set _refcount, however it is restricted to set
it only to 1.
Make init_page_count() aligned with the other page_ref_*
functions by renaming it.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
arch/m68k/mm/motorola.c | 2 +-
include/linux/mm.h | 2 +-
include/linux/page_ref.h | 10 +++++++---
mm/page_alloc.c | 2 +-
4 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2b05bb2bac00..e81ecafedff3 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -133,7 +133,7 @@ void __init init_pointer_table(void *table, int type)
/* unreserve the page so it's possible to free that page */
__ClearPageReserved(PD_PAGE(dp));
- init_page_count(PD_PAGE(dp));
+ page_ref_init(PD_PAGE(dp));
return;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a7e4a9e7d807..736bf16e7104 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2506,7 +2506,7 @@ extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
static inline void free_reserved_page(struct page *page)
{
ClearPageReserved(page);
- init_page_count(page);
+ page_ref_init(page);
__free_page(page);
adjust_managed_page_count(page, 1);
}
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 27880aca2e2f..ff946d753df8 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -107,10 +107,14 @@ static inline void folio_set_count(struct folio *folio, int v)
}
/*
- * Setup the page count before being freed into the page allocator for
- * the first time (boot or memory hotplug)
+ * Setup the page refcount to one before being freed into the page allocator.
+ * The memory might not be initialized and therefore there cannot be any
+ * assumptions about the current value of page->_refcount. This call should be
+ * done during boot when memory is being initialized, during memory hotplug
+ * when new memory is added, or when a previous reserved memory is unreserved
+ * this is the first time kernel take control of the given memory.
*/
-static inline void init_page_count(struct page *page)
+static inline void page_ref_init(struct page *page)
{
set_page_count(page, 1);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 217c0c9fa25b..fc828dfde4fc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1555,7 +1555,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
{
mm_zero_struct_page(page);
set_page_links(page, zone, nid, pfn);
- init_page_count(page);
+ page_ref_init(page);
page_mapcount_reset(page);
page_cpupid_reset_last(page);
page_kasan_tag_reset(page);
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 07/10] mm: remove set_page_count()
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (5 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 06/10] mm: rename init_page_count() -> page_ref_init() Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 08/10] mm: simplify page_ref_* functions Pasha Tatashin
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
set_page_count() is dangerous because it resets _refcount to an
arbitrary value. Instead we now initialize _refcount to 1 only once,
and the rest of the time we are using add/dec/cmpxchg to have a
contiguous track of the counter.
Remove set_page_count() and add new tracing hooks to page_ref_init().
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 27 ++++++++-----------
include/trace/events/page_ref.h | 46 ++++++++++++++++++++++++++++-----
mm/debug_page_ref.c | 8 +++---
3 files changed, 54 insertions(+), 27 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index ff946d753df8..c7033f506d68 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -7,7 +7,7 @@
#include <linux/page-flags.h>
#include <linux/tracepoint-defs.h>
-DECLARE_TRACEPOINT(page_ref_set);
+DECLARE_TRACEPOINT(page_ref_init);
DECLARE_TRACEPOINT(page_ref_mod);
DECLARE_TRACEPOINT(page_ref_mod_and_test);
DECLARE_TRACEPOINT(page_ref_mod_and_return);
@@ -26,7 +26,7 @@ DECLARE_TRACEPOINT(page_ref_unfreeze);
*/
#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
-extern void __page_ref_set(struct page *page, int v);
+extern void __page_ref_init(struct page *page);
extern void __page_ref_mod(struct page *page, int v);
extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
@@ -38,7 +38,7 @@ extern void __page_ref_unfreeze(struct page *page, int v);
#define page_ref_tracepoint_active(t) false
-static inline void __page_ref_set(struct page *page, int v)
+static inline void __page_ref_init(struct page *page)
{
}
static inline void __page_ref_mod(struct page *page, int v)
@@ -94,18 +94,6 @@ static inline int page_count(const struct page *page)
return folio_ref_count(page_folio(page));
}
-static inline void set_page_count(struct page *page, int v)
-{
- atomic_set(&page->_refcount, v);
- if (page_ref_tracepoint_active(page_ref_set))
- __page_ref_set(page, v);
-}
-
-static inline void folio_set_count(struct folio *folio, int v)
-{
- set_page_count(&folio->page, v);
-}
-
/*
* Setup the page refcount to one before being freed into the page allocator.
* The memory might not be initialized and therefore there cannot be any
@@ -116,7 +104,14 @@ static inline void folio_set_count(struct folio *folio, int v)
*/
static inline void page_ref_init(struct page *page)
{
- set_page_count(page, 1);
+ atomic_set(&page->_refcount, 1);
+ if (page_ref_tracepoint_active(page_ref_init))
+ __page_ref_init(page);
+}
+
+static inline void folio_ref_init(struct folio *folio)
+{
+ page_ref_init(&folio->page);
}
static inline int page_ref_add_return(struct page *page, int nr)
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index c32d6d161cdb..2b8e5a4df53b 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -10,6 +10,45 @@
#include <linux/tracepoint.h>
#include <trace/events/mmflags.h>
+DECLARE_EVENT_CLASS(page_ref_init_template,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, pfn)
+ __field(unsigned long, flags)
+ __field(int, count)
+ __field(int, mapcount)
+ __field(void *, mapping)
+ __field(int, mt)
+ __field(int, val)
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = page_to_pfn(page);
+ __entry->flags = page->flags;
+ __entry->count = page_ref_count(page);
+ __entry->mapcount = page_mapcount(page);
+ __entry->mapping = page->mapping;
+ __entry->mt = get_pageblock_migratetype(page);
+ ),
+
+ TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d",
+ __entry->pfn,
+ show_page_flags(__entry->flags & PAGEFLAGS_MASK),
+ __entry->count,
+ __entry->mapcount, __entry->mapping, __entry->mt)
+);
+
+DEFINE_EVENT(page_ref_init_template, page_ref_init,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
DECLARE_EVENT_CLASS(page_ref_mod_template,
TP_PROTO(struct page *page, int v),
@@ -44,13 +83,6 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
__entry->val)
);
-DEFINE_EVENT(page_ref_mod_template, page_ref_set,
-
- TP_PROTO(struct page *page, int v),
-
- TP_ARGS(page, v)
-);
-
DEFINE_EVENT(page_ref_mod_template, page_ref_mod,
TP_PROTO(struct page *page, int v),
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
index 1426d6887b01..ad21abfec463 100644
--- a/mm/debug_page_ref.c
+++ b/mm/debug_page_ref.c
@@ -5,12 +5,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/page_ref.h>
-void __page_ref_set(struct page *page, int v)
+void __page_ref_init(struct page *page)
{
- trace_page_ref_set(page, v);
+ trace_page_ref_init(page);
}
-EXPORT_SYMBOL(__page_ref_set);
-EXPORT_TRACEPOINT_SYMBOL(page_ref_set);
+EXPORT_SYMBOL(__page_ref_init);
+EXPORT_TRACEPOINT_SYMBOL(page_ref_init);
void __page_ref_mod(struct page *page, int v)
{
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 08/10] mm: simplify page_ref_* functions
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (6 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 07/10] mm: remove set_page_count() Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 09/10] mm: do not use atomic_set_release in page_ref_unfreeze() Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 10/10] mm: use atomic_cmpxchg_acquire in page_ref_freeze() Pasha Tatashin
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
Now, that we are using atomic_fetch* variants to add/sub/inc/dec page
_refcount, it makes sense to combined page_ref_* return and non return
functions.
Also remove some extra trace points for non-return variants. This
improves the tracability by always recording the new _refcount value
after the modifications has occurred.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 102 +++++++++-----------------------
include/trace/events/page_ref.h | 24 ++------
mm/debug_page_ref.c | 14 -----
3 files changed, 34 insertions(+), 106 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index c7033f506d68..8c76bf3bf7e1 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -8,8 +8,6 @@
#include <linux/tracepoint-defs.h>
DECLARE_TRACEPOINT(page_ref_init);
-DECLARE_TRACEPOINT(page_ref_mod);
-DECLARE_TRACEPOINT(page_ref_mod_and_test);
DECLARE_TRACEPOINT(page_ref_mod_and_return);
DECLARE_TRACEPOINT(page_ref_add_unless);
DECLARE_TRACEPOINT(page_ref_freeze);
@@ -27,8 +25,6 @@ DECLARE_TRACEPOINT(page_ref_unfreeze);
#define page_ref_tracepoint_active(t) tracepoint_enabled(t)
extern void __page_ref_init(struct page *page);
-extern void __page_ref_mod(struct page *page, int v);
-extern void __page_ref_mod_and_test(struct page *page, int v, int ret);
extern void __page_ref_mod_and_return(struct page *page, int v, int ret);
extern void __page_ref_add_unless(struct page *page, int v, int u, int ret);
extern void __page_ref_freeze(struct page *page, int v, int ret);
@@ -41,12 +37,6 @@ extern void __page_ref_unfreeze(struct page *page, int v);
static inline void __page_ref_init(struct page *page)
{
}
-static inline void __page_ref_mod(struct page *page, int v)
-{
-}
-static inline void __page_ref_mod_and_test(struct page *page, int v, int ret)
-{
-}
static inline void __page_ref_mod_and_return(struct page *page, int v, int ret)
{
}
@@ -127,12 +117,7 @@ static inline int page_ref_add_return(struct page *page, int nr)
static inline void page_ref_add(struct page *page, int nr)
{
- int old_val = atomic_fetch_add(nr, &page->_refcount);
- int new_val = old_val + nr;
-
- VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod))
- __page_ref_mod(page, nr);
+ page_ref_add_return(page, nr);
}
static inline void folio_ref_add(struct folio *folio, int nr)
@@ -140,30 +125,25 @@ static inline void folio_ref_add(struct folio *folio, int nr)
page_ref_add(&folio->page, nr);
}
-static inline void page_ref_sub(struct page *page, int nr)
+static inline int page_ref_sub_return(struct page *page, int nr)
{
int old_val = atomic_fetch_sub(nr, &page->_refcount);
int new_val = old_val - nr;
VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod))
- __page_ref_mod(page, -nr);
+ if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ __page_ref_mod_and_return(page, -nr, new_val);
+ return new_val;
}
-static inline void folio_ref_sub(struct folio *folio, int nr)
+static inline void page_ref_sub(struct page *page, int nr)
{
- page_ref_sub(&folio->page, nr);
+ page_ref_sub_return(page, nr);
}
-static inline int page_ref_sub_return(struct page *page, int nr)
+static inline void folio_ref_sub(struct folio *folio, int nr)
{
- int old_val = atomic_fetch_sub(nr, &page->_refcount);
- int new_val = old_val - nr;
-
- VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, -nr, new_val);
- return new_val;
+ page_ref_sub(&folio->page, nr);
}
static inline int folio_ref_sub_return(struct folio *folio, int nr)
@@ -171,14 +151,20 @@ static inline int folio_ref_sub_return(struct folio *folio, int nr)
return page_ref_sub_return(&folio->page, nr);
}
-static inline void page_ref_inc(struct page *page)
+static inline int page_ref_inc_return(struct page *page)
{
int old_val = atomic_fetch_inc(&page->_refcount);
int new_val = old_val + 1;
VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod))
- __page_ref_mod(page, 1);
+ if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ __page_ref_mod_and_return(page, 1, new_val);
+ return new_val;
+}
+
+static inline void page_ref_inc(struct page *page)
+{
+ page_ref_inc_return(page);
}
static inline void folio_ref_inc(struct folio *folio)
@@ -186,14 +172,20 @@ static inline void folio_ref_inc(struct folio *folio)
page_ref_inc(&folio->page);
}
-static inline void page_ref_dec(struct page *page)
+static inline int page_ref_dec_return(struct page *page)
{
int old_val = atomic_fetch_dec(&page->_refcount);
int new_val = old_val - 1;
VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod))
- __page_ref_mod(page, -1);
+ if (page_ref_tracepoint_active(page_ref_mod_and_return))
+ __page_ref_mod_and_return(page, -1, new_val);
+ return new_val;
+}
+
+static inline void page_ref_dec(struct page *page)
+{
+ page_ref_dec_return(page);
}
static inline void folio_ref_dec(struct folio *folio)
@@ -203,14 +195,7 @@ static inline void folio_ref_dec(struct folio *folio)
static inline int page_ref_sub_and_test(struct page *page, int nr)
{
- int old_val = atomic_fetch_sub(nr, &page->_refcount);
- int new_val = old_val - nr;
- int ret = new_val == 0;
-
- VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod_and_test))
- __page_ref_mod_and_test(page, -nr, ret);
- return ret;
+ return page_ref_sub_return(page, nr) == 0;
}
static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
@@ -218,17 +203,6 @@ static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
return page_ref_sub_and_test(&folio->page, nr);
}
-static inline int page_ref_inc_return(struct page *page)
-{
- int old_val = atomic_fetch_inc(&page->_refcount);
- int new_val = old_val + 1;
-
- VM_BUG_ON_PAGE((unsigned int)new_val < (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, 1, new_val);
- return new_val;
-}
-
static inline int folio_ref_inc_return(struct folio *folio)
{
return page_ref_inc_return(&folio->page);
@@ -236,14 +210,7 @@ static inline int folio_ref_inc_return(struct folio *folio)
static inline int page_ref_dec_and_test(struct page *page)
{
- int old_val = atomic_fetch_dec(&page->_refcount);
- int new_val = old_val - 1;
- int ret = new_val == 0;
-
- VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod_and_test))
- __page_ref_mod_and_test(page, -1, ret);
- return ret;
+ return page_ref_dec_return(page) == 0;
}
static inline int folio_ref_dec_and_test(struct folio *folio)
@@ -251,17 +218,6 @@ static inline int folio_ref_dec_and_test(struct folio *folio)
return page_ref_dec_and_test(&folio->page);
}
-static inline int page_ref_dec_return(struct page *page)
-{
- int old_val = atomic_fetch_dec(&page->_refcount);
- int new_val = old_val - 1;
-
- VM_BUG_ON_PAGE((unsigned int)new_val > (unsigned int)old_val, page);
- if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, -1, new_val);
- return new_val;
-}
-
static inline int folio_ref_dec_return(struct folio *folio)
{
return page_ref_dec_return(&folio->page);
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 2b8e5a4df53b..600ea20c3e11 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -49,7 +49,7 @@ DEFINE_EVENT(page_ref_init_template, page_ref_init,
TP_ARGS(page)
);
-DECLARE_EVENT_CLASS(page_ref_mod_template,
+DECLARE_EVENT_CLASS(page_ref_unfreeze_template,
TP_PROTO(struct page *page, int v),
@@ -83,14 +83,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
__entry->val)
);
-DEFINE_EVENT(page_ref_mod_template, page_ref_mod,
-
- TP_PROTO(struct page *page, int v),
-
- TP_ARGS(page, v)
-);
-
-DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
+DECLARE_EVENT_CLASS(page_ref_mod_template,
TP_PROTO(struct page *page, int v, int ret),
@@ -163,14 +156,7 @@ DECLARE_EVENT_CLASS(page_ref_add_unless_template,
__entry->val, __entry->unless, __entry->ret)
);
-DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_test,
-
- TP_PROTO(struct page *page, int v, int ret),
-
- TP_ARGS(page, v, ret)
-);
-
-DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_return,
+DEFINE_EVENT(page_ref_mod_template, page_ref_mod_and_return,
TP_PROTO(struct page *page, int v, int ret),
@@ -184,14 +170,14 @@ DEFINE_EVENT(page_ref_add_unless_template, page_ref_add_unless,
TP_ARGS(page, v, u, ret)
);
-DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_freeze,
+DEFINE_EVENT(page_ref_mod_template, page_ref_freeze,
TP_PROTO(struct page *page, int v, int ret),
TP_ARGS(page, v, ret)
);
-DEFINE_EVENT(page_ref_mod_template, page_ref_unfreeze,
+DEFINE_EVENT(page_ref_unfreeze_template, page_ref_unfreeze,
TP_PROTO(struct page *page, int v),
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
index ad21abfec463..f5f39a77c6da 100644
--- a/mm/debug_page_ref.c
+++ b/mm/debug_page_ref.c
@@ -12,20 +12,6 @@ void __page_ref_init(struct page *page)
EXPORT_SYMBOL(__page_ref_init);
EXPORT_TRACEPOINT_SYMBOL(page_ref_init);
-void __page_ref_mod(struct page *page, int v)
-{
- trace_page_ref_mod(page, v);
-}
-EXPORT_SYMBOL(__page_ref_mod);
-EXPORT_TRACEPOINT_SYMBOL(page_ref_mod);
-
-void __page_ref_mod_and_test(struct page *page, int v, int ret)
-{
- trace_page_ref_mod_and_test(page, v, ret);
-}
-EXPORT_SYMBOL(__page_ref_mod_and_test);
-EXPORT_TRACEPOINT_SYMBOL(page_ref_mod_and_test);
-
void __page_ref_mod_and_return(struct page *page, int v, int ret)
{
trace_page_ref_mod_and_return(page, v, ret);
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 09/10] mm: do not use atomic_set_release in page_ref_unfreeze()
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (7 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 08/10] mm: simplify page_ref_* functions Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
2021-11-17 1:20 ` [RFC v2 10/10] mm: use atomic_cmpxchg_acquire in page_ref_freeze() Pasha Tatashin
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
In we set the old _refcount value after verifying that the old value was
indeed 0.
VM_BUG_ON_PAGE(page_count(page) != 0, page);
< the _refcount may change here>
atomic_set_release(&page->_refcount, count);
To avoid the smal gap where _refcount may change lets verify the time
of the _refcount at the time of the set operation.
Use atomic_xchg_release() and at the set time verify that the value
was 0.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 8c76bf3bf7e1..26676d3bcd58 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -322,10 +322,9 @@ static inline int folio_ref_freeze(struct folio *folio, int count)
static inline void page_ref_unfreeze(struct page *page, int count)
{
- VM_BUG_ON_PAGE(page_count(page) != 0, page);
- VM_BUG_ON(count == 0);
+ int old_val = atomic_xchg_release(&page->_refcount, count);
- atomic_set_release(&page->_refcount, count);
+ VM_BUG_ON_PAGE(count == 0 || old_val != 0, page);
if (page_ref_tracepoint_active(page_ref_unfreeze))
__page_ref_unfreeze(page, count);
}
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC v2 10/10] mm: use atomic_cmpxchg_acquire in page_ref_freeze().
2021-11-17 1:20 [RFC v2 00/10] Hardening page _refcount Pasha Tatashin
` (8 preceding siblings ...)
2021-11-17 1:20 ` [RFC v2 09/10] mm: do not use atomic_set_release in page_ref_unfreeze() Pasha Tatashin
@ 2021-11-17 1:20 ` Pasha Tatashin
9 siblings, 0 replies; 11+ messages in thread
From: Pasha Tatashin @ 2021-11-17 1:20 UTC (permalink / raw)
To: pasha.tatashin, linux-kernel, linux-mm, linux-m68k,
anshuman.khandual, willy, akpm, william.kucharski, mike.kravetz,
vbabka, geert, schmitzmic, rostedt, mingo, hannes, guro,
songmuchun, weixugc, gthelen, rientjes, pjt
page_ref_freeze and page_ref_unfreeze are designed to be used as a pair.
They protect critical sections where struct page can be modified.
page_ref_unfreeze() is protected by _release() atomic operation, but
page_ref_freeze() is not as it is assumed that cmpxch provides the full
barrier.
Instead, use the appropriate atomic_cmpxchg_acquire() to ensure that
memory model is excplicitly followed.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
include/linux/page_ref.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 26676d3bcd58..ecd92d7f3eef 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -308,7 +308,8 @@ static inline bool folio_try_get_rcu(struct folio *folio)
static inline int page_ref_freeze(struct page *page, int count)
{
- int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
+ int old_val = atomic_cmpxchg_acquire(&page->_refcount, count, 0);
+ int ret = likely(old_val == count);
if (page_ref_tracepoint_active(page_ref_freeze))
__page_ref_freeze(page, count, ret);
--
2.34.0.rc1.387.gb447b232ab-goog
^ permalink raw reply related [flat|nested] 11+ messages in thread