From: Byungchul Park <byungchul@sk.com>
To: linux-kernel@vger.kernel.org
Cc: kernel_team@skhynix.com, torvalds@linux-foundation.org,
damien.lemoal@opensource.wdc.com, linux-ide@vger.kernel.org,
adilger.kernel@dilger.ca, linux-ext4@vger.kernel.org,
mingo@redhat.com, peterz@infradead.org, will@kernel.org,
tglx@linutronix.de, rostedt@goodmis.org, joel@joelfernandes.org,
sashal@kernel.org, daniel.vetter@ffwll.ch, duyuyang@gmail.com,
johannes.berg@intel.com, tj@kernel.org, tytso@mit.edu,
willy@infradead.org, david@fromorbit.com, amir73il@gmail.com,
gregkh@linuxfoundation.org, kernel-team@lge.com,
linux-mm@kvack.org, akpm@linux-foundation.org, mhocko@kernel.org,
minchan@kernel.org, hannes@cmpxchg.org, vdavydov.dev@gmail.com,
sj@kernel.org, jglisse@redhat.com, dennis@kernel.org,
cl@linux.com, penberg@kernel.org, rientjes@google.com,
vbabka@suse.cz, ngupta@vflare.org, linux-block@vger.kernel.org,
josef@toxicpanda.com, linux-fsdevel@vger.kernel.org,
jack@suse.cz, jlayton@kernel.org, dan.j.williams@intel.com,
hch@infradead.org, djwong@kernel.org,
dri-devel@lists.freedesktop.org, rodrigosiqueiramelo@gmail.com,
melissa.srw@gmail.com, hamohammed.sa@gmail.com,
42.hyeyoo@gmail.com, chris.p.wilson@intel.com,
gwan-gyeong.mun@intel.com, max.byungchul.park@gmail.com,
boqun.feng@gmail.com, longman@redhat.com, hdanton@sina.com,
her0gyugyu@gmail.com
Subject: [PATCH v14 23/28] dept: Track PG_locked with dept
Date: Wed, 8 May 2024 18:47:20 +0900 [thread overview]
Message-ID: <20240508094726.35754-24-byungchul@sk.com> (raw)
In-Reply-To: <20240508094726.35754-1-byungchul@sk.com>
Makes Dept able to track PG_locked waits and events. It's going to be
useful in practice. See the following link that shows dept worked with
PG_locked and can detect real issues:
https://lore.kernel.org/lkml/1674268856-31807-1-git-send-email-byungchul.park@lge.com/
Signed-off-by: Byungchul Park <byungchul@sk.com>
---
include/linux/mm_types.h | 2 +
include/linux/page-flags.h | 125 +++++++++++++++++++++++++++++++++----
include/linux/pagemap.h | 7 ++-
mm/filemap.c | 26 ++++++++
mm/mm_init.c | 2 +
5 files changed, 149 insertions(+), 13 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5240bd7bca33..d21b2e298cdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
+#include <linux/dept.h>
#include <asm/mmu.h>
@@ -203,6 +204,7 @@ struct page {
struct page *kmsan_shadow;
struct page *kmsan_origin;
#endif
+ struct dept_ext_wgen PG_locked_wgen;
} _struct_page_alignment;
/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4bf1c25fd1dc..74cbbf694c18 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -197,6 +197,61 @@ enum pageflags {
#ifndef __GENERATING_BOUNDS_H
+#ifdef CONFIG_DEPT
+#include <linux/kernel.h>
+#include <linux/dept.h>
+
+extern struct dept_map PG_locked_map;
+
+/*
+ * Place the following annotations in its suitable point in code:
+ *
+ * Annotate dept_page_set_bit() around firstly set_bit*()
+ * Annotate dept_page_clear_bit() around clear_bit*()
+ * Annotate dept_page_wait_on_bit() around wait_on_bit*()
+ */
+
+static inline void dept_page_set_bit(struct page *p, int bit_nr)
+{
+ if (bit_nr == PG_locked)
+ dept_request_event(&PG_locked_map, &p->PG_locked_wgen);
+}
+
+static inline void dept_page_clear_bit(struct page *p, int bit_nr)
+{
+ if (bit_nr == PG_locked)
+ dept_event(&PG_locked_map, 1UL, _RET_IP_, __func__, &p->PG_locked_wgen);
+}
+
+static inline void dept_page_wait_on_bit(struct page *p, int bit_nr)
+{
+ if (bit_nr == PG_locked)
+ dept_wait(&PG_locked_map, 1UL, _RET_IP_, __func__, 0, -1L);
+}
+
+static inline void dept_folio_set_bit(struct folio *f, int bit_nr)
+{
+ dept_page_set_bit(&f->page, bit_nr);
+}
+
+static inline void dept_folio_clear_bit(struct folio *f, int bit_nr)
+{
+ dept_page_clear_bit(&f->page, bit_nr);
+}
+
+static inline void dept_folio_wait_on_bit(struct folio *f, int bit_nr)
+{
+ dept_page_wait_on_bit(&f->page, bit_nr);
+}
+#else
+#define dept_page_set_bit(p, bit_nr) do { } while (0)
+#define dept_page_clear_bit(p, bit_nr) do { } while (0)
+#define dept_page_wait_on_bit(p, bit_nr) do { } while (0)
+#define dept_folio_set_bit(f, bit_nr) do { } while (0)
+#define dept_folio_clear_bit(f, bit_nr) do { } while (0)
+#define dept_folio_wait_on_bit(f, bit_nr) do { } while (0)
+#endif
+
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
@@ -381,27 +436,51 @@ static __always_inline bool folio_test_##name(const struct folio *folio) \
#define FOLIO_SET_FLAG(name, page) \
static __always_inline void folio_set_##name(struct folio *folio) \
-{ set_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ set_bit(PG_##name, folio_flags(folio, page)); \
+ dept_folio_set_bit(folio, PG_##name); \
+}
#define FOLIO_CLEAR_FLAG(name, page) \
static __always_inline void folio_clear_##name(struct folio *folio) \
-{ clear_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ clear_bit(PG_##name, folio_flags(folio, page)); \
+ dept_folio_clear_bit(folio, PG_##name); \
+}
#define __FOLIO_SET_FLAG(name, page) \
static __always_inline void __folio_set_##name(struct folio *folio) \
-{ __set_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ __set_bit(PG_##name, folio_flags(folio, page)); \
+ dept_folio_set_bit(folio, PG_##name); \
+}
#define __FOLIO_CLEAR_FLAG(name, page) \
static __always_inline void __folio_clear_##name(struct folio *folio) \
-{ __clear_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ __clear_bit(PG_##name, folio_flags(folio, page)); \
+ dept_folio_clear_bit(folio, PG_##name); \
+}
#define FOLIO_TEST_SET_FLAG(name, page) \
static __always_inline bool folio_test_set_##name(struct folio *folio) \
-{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ bool __ret = test_and_set_bit(PG_##name, folio_flags(folio, page)); \
+ \
+ if (!__ret) \
+ dept_folio_set_bit(folio, PG_##name); \
+ return __ret; \
+}
#define FOLIO_TEST_CLEAR_FLAG(name, page) \
static __always_inline bool folio_test_clear_##name(struct folio *folio) \
-{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
+{ \
+ bool __ret = test_and_clear_bit(PG_##name, folio_flags(folio, page)); \
+ \
+ if (__ret) \
+ dept_folio_clear_bit(folio, PG_##name); \
+ return __ret; \
+}
#define FOLIO_FLAG(name, page) \
FOLIO_TEST_FLAG(name, page) \
@@ -416,32 +495,54 @@ static __always_inline int Page##uname(const struct page *page) \
#define SETPAGEFLAG(uname, lname, policy) \
FOLIO_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline void SetPage##uname(struct page *page) \
-{ set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ set_bit(PG_##lname, &policy(page, 1)->flags); \
+ dept_page_set_bit(page, PG_##lname); \
+}
#define CLEARPAGEFLAG(uname, lname, policy) \
FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline void ClearPage##uname(struct page *page) \
-{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ clear_bit(PG_##lname, &policy(page, 1)->flags); \
+ dept_page_clear_bit(page, PG_##lname); \
+}
#define __SETPAGEFLAG(uname, lname, policy) \
__FOLIO_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline void __SetPage##uname(struct page *page) \
-{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ __set_bit(PG_##lname, &policy(page, 1)->flags); \
+ dept_page_set_bit(page, PG_##lname); \
+}
#define __CLEARPAGEFLAG(uname, lname, policy) \
__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline void __ClearPage##uname(struct page *page) \
-{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ __clear_bit(PG_##lname, &policy(page, 1)->flags); \
+ dept_page_clear_bit(page, PG_##lname); \
+}
#define TESTSETFLAG(uname, lname, policy) \
FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \
static __always_inline int TestSetPage##uname(struct page *page) \
-{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ bool ret = test_and_set_bit(PG_##lname, &policy(page, 1)->flags);\
+ if (!ret) \
+ dept_page_set_bit(page, PG_##lname); \
+ return ret; \
+}
#define TESTCLEARFLAG(uname, lname, policy) \
FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \
static __always_inline int TestClearPage##uname(struct page *page) \
-{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ \
+ bool ret = test_and_clear_bit(PG_##lname, &policy(page, 1)->flags);\
+ if (ret) \
+ dept_page_clear_bit(page, PG_##lname); \
+ return ret; \
+}
#define PAGEFLAG(uname, lname, policy) \
TESTPAGEFLAG(uname, lname, policy) \
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..a438d8f038de 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1008,7 +1008,12 @@ void folio_unlock(struct folio *folio);
*/
static inline bool folio_trylock(struct folio *folio)
{
- return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+ bool ret = !test_and_set_bit_lock(PG_locked, folio_flags(folio, 0));
+
+ if (ret)
+ dept_page_set_bit(&folio->page, PG_locked);
+
+ return likely(ret);
}
/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 30de18c4fd28..ceb24a7ee0b1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -46,6 +46,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
+#include <linux/dept.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -1108,6 +1109,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
if (flags & WQ_FLAG_CUSTOM) {
if (test_and_set_bit(key->bit_nr, &key->folio->flags))
return -1;
+ dept_page_set_bit(&key->folio->page, key->bit_nr);
flags |= WQ_FLAG_DONE;
}
}
@@ -1191,6 +1193,7 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
if (wait->flags & WQ_FLAG_EXCLUSIVE) {
if (test_and_set_bit(bit_nr, &folio->flags))
return false;
+ dept_page_set_bit(&folio->page, bit_nr);
} else if (test_bit(bit_nr, &folio->flags))
return false;
@@ -1201,6 +1204,9 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
/* How many times do we accept lock stealing from under a waiter? */
int sysctl_page_lock_unfairness = 5;
+struct dept_map __maybe_unused PG_locked_map = DEPT_MAP_INITIALIZER(PG_locked_map, NULL);
+EXPORT_SYMBOL(PG_locked_map);
+
static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
int state, enum behavior behavior)
{
@@ -1212,6 +1218,8 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
unsigned long pflags;
bool in_thrashing;
+ dept_page_wait_on_bit(&folio->page, bit_nr);
+
if (bit_nr == PG_locked &&
!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
delayacct_thrashing_start(&in_thrashing);
@@ -1305,6 +1313,23 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
break;
}
+ /*
+ * dept_page_set_bit() might have been called already in
+ * folio_trylock_flag(), wake_page_function() or somewhere.
+ * However, call it again to reset the wgen of dept to ensure
+ * dept_page_wait_on_bit() is called prior to
+ * dept_page_set_bit().
+ *
+ * Remind dept considers all the waits between
+ * dept_page_set_bit() and dept_page_clear_bit() as potential
+ * event disturbers. Ensure the correct sequence so that dept
+ * can make correct decisions:
+ *
+ * wait -> acquire(set bit) -> release(clear bit)
+ */
+ if (wait->flags & WQ_FLAG_DONE)
+ dept_page_set_bit(&folio->page, bit_nr);
+
/*
* If a signal happened, this 'finish_wait()' may remove the last
* waiter from the wait-queues, but the folio waiters bit will remain
@@ -1481,6 +1506,7 @@ void folio_unlock(struct folio *folio)
BUILD_BUG_ON(PG_waiters != 7);
BUILD_BUG_ON(PG_locked > 7);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ dept_page_clear_bit(&folio->page, PG_locked);
if (folio_xor_flags_has_waiters(folio, 1 << PG_locked))
folio_wake_bit(folio, PG_locked);
}
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 549e76af8f82..a0c9069d3740 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/cma.h>
#include <linux/crash_dump.h>
+#include <linux/dept.h>
#include "internal.h"
#include "slab.h"
#include "shuffle.h"
@@ -570,6 +571,7 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
page_mapcount_reset(page);
page_cpupid_reset_last(page);
page_kasan_tag_reset(page);
+ dept_ext_wgen_init(&page->PG_locked_wgen);
INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL
--
2.17.1
next prev parent reply other threads:[~2024-05-08 10:03 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-08 9:46 [PATCH v14 00/28] DEPT(Dependency Tracker) Byungchul Park
2024-05-08 9:46 ` [PATCH v14 01/28] llist: Move llist_{head,node} definition to types.h Byungchul Park
2024-05-08 9:46 ` [PATCH v14 02/28] dept: Implement Dept(Dependency Tracker) Byungchul Park
2024-05-08 9:47 ` [PATCH v14 03/28] dept: Add single event dependency tracker APIs Byungchul Park
2024-05-08 9:47 ` [PATCH v14 04/28] dept: Add lock " Byungchul Park
2024-05-08 9:47 ` [PATCH v14 05/28] dept: Tie to Lockdep and IRQ tracing Byungchul Park
2024-05-08 9:47 ` [PATCH v14 06/28] dept: Add proc knobs to show stats and dependency graph Byungchul Park
2024-05-08 9:47 ` [PATCH v14 07/28] dept: Distinguish each syscall context from another Byungchul Park
2024-05-08 9:47 ` [PATCH v14 08/28] dept: Distinguish each work " Byungchul Park
2024-05-08 9:47 ` [PATCH v14 09/28] dept: Add a mechanism to refill the internal memory pools on running out Byungchul Park
2024-05-08 9:47 ` [PATCH v14 10/28] dept: Record the latest one out of consecutive waits of the same class Byungchul Park
2024-05-08 9:47 ` [PATCH v14 11/28] dept: Apply sdt_might_sleep_{start,end}() to wait_for_completion()/complete() Byungchul Park
2024-05-08 9:47 ` [PATCH v14 12/28] dept: Apply sdt_might_sleep_{start,end}() to swait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 13/28] dept: Apply sdt_might_sleep_{start,end}() to waitqueue wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 14/28] dept: Apply sdt_might_sleep_{start,end}() to hashed-waitqueue wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 15/28] dept: Apply sdt_might_sleep_{start,end}() to dma fence wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 16/28] dept: Track timeout waits separately with a new Kconfig Byungchul Park
2024-05-08 9:47 ` [PATCH v14 17/28] dept: Apply timeout consideration to wait_for_completion()/complete() Byungchul Park
2024-05-08 9:47 ` [PATCH v14 18/28] dept: Apply timeout consideration to swait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 19/28] dept: Apply timeout consideration to waitqueue wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 20/28] dept: Apply timeout consideration to hashed-waitqueue wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 21/28] dept: Apply timeout consideration to dma fence wait Byungchul Park
2024-05-08 9:47 ` [PATCH v14 22/28] dept: Make Dept able to work with an external wgen Byungchul Park
2024-05-08 9:47 ` Byungchul Park [this message]
2024-05-08 9:47 ` [PATCH v14 24/28] dept: Print event context requestor's stacktrace on report Byungchul Park
2024-05-08 9:47 ` [PATCH v14 25/28] cpu/hotplug: Use a weaker annotation in AP thread Byungchul Park
2024-05-08 9:47 ` [PATCH v14 26/28] fs/jbd2: Use a weaker annotation in journal handling Byungchul Park
2024-05-08 9:47 ` [PATCH v14 27/28] dept: Add documentation for Dept Byungchul Park
2024-05-08 9:47 ` [PATCH v14 28/28] dept: Add documentation for Dept's APIs Byungchul Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240508094726.35754-24-byungchul@sk.com \
--to=byungchul@sk.com \
--cc=42.hyeyoo@gmail.com \
--cc=adilger.kernel@dilger.ca \
--cc=akpm@linux-foundation.org \
--cc=amir73il@gmail.com \
--cc=boqun.feng@gmail.com \
--cc=chris.p.wilson@intel.com \
--cc=cl@linux.com \
--cc=damien.lemoal@opensource.wdc.com \
--cc=dan.j.williams@intel.com \
--cc=daniel.vetter@ffwll.ch \
--cc=david@fromorbit.com \
--cc=dennis@kernel.org \
--cc=djwong@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=duyuyang@gmail.com \
--cc=gregkh@linuxfoundation.org \
--cc=gwan-gyeong.mun@intel.com \
--cc=hamohammed.sa@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=hch@infradead.org \
--cc=hdanton@sina.com \
--cc=her0gyugyu@gmail.com \
--cc=jack@suse.cz \
--cc=jglisse@redhat.com \
--cc=jlayton@kernel.org \
--cc=joel@joelfernandes.org \
--cc=johannes.berg@intel.com \
--cc=josef@toxicpanda.com \
--cc=kernel-team@lge.com \
--cc=kernel_team@skhynix.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-ide@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=max.byungchul.park@gmail.com \
--cc=melissa.srw@gmail.com \
--cc=mhocko@kernel.org \
--cc=minchan@kernel.org \
--cc=mingo@redhat.com \
--cc=ngupta@vflare.org \
--cc=penberg@kernel.org \
--cc=peterz@infradead.org \
--cc=rientjes@google.com \
--cc=rodrigosiqueiramelo@gmail.com \
--cc=rostedt@goodmis.org \
--cc=sashal@kernel.org \
--cc=sj@kernel.org \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=tytso@mit.edu \
--cc=vbabka@suse.cz \
--cc=vdavydov.dev@gmail.com \
--cc=will@kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).