linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Byungchul Park <byungchul@sk.com>
To: linux-kernel@vger.kernel.org
Cc: kernel_team@skhynix.com, torvalds@linux-foundation.org,
	damien.lemoal@opensource.wdc.com, linux-ide@vger.kernel.org,
	adilger.kernel@dilger.ca, linux-ext4@vger.kernel.org,
	mingo@redhat.com, peterz@infradead.org, will@kernel.org,
	tglx@linutronix.de, rostedt@goodmis.org, joel@joelfernandes.org,
	sashal@kernel.org, daniel.vetter@ffwll.ch, duyuyang@gmail.com,
	johannes.berg@intel.com, tj@kernel.org, tytso@mit.edu,
	willy@infradead.org, david@fromorbit.com, amir73il@gmail.com,
	gregkh@linuxfoundation.org, kernel-team@lge.com,
	linux-mm@kvack.org, akpm@linux-foundation.org, mhocko@kernel.org,
	minchan@kernel.org, hannes@cmpxchg.org, vdavydov.dev@gmail.com,
	sj@kernel.org, jglisse@redhat.com, dennis@kernel.org,
	cl@linux.com, penberg@kernel.org, rientjes@google.com,
	vbabka@suse.cz, ngupta@vflare.org, linux-block@vger.kernel.org,
	josef@toxicpanda.com, linux-fsdevel@vger.kernel.org,
	viro@zeniv.linux.org.uk, jack@suse.cz, jlayton@kernel.org,
	dan.j.williams@intel.com, hch@infradead.org, djwong@kernel.org,
	dri-devel@lists.freedesktop.org, rodrigosiqueiramelo@gmail.com,
	melissa.srw@gmail.com, hamohammed.sa@gmail.com,
	42.hyeyoo@gmail.com, chris.p.wilson@intel.com,
	gwan-gyeong.mun@intel.com, max.byungchul.park@gmail.com,
	boqun.feng@gmail.com, longman@redhat.com, hdanton@sina.com,
	her0gyugyu@gmail.com
Subject: [PATCH v10 rebased on v6.4 25/25] dept: Track the potential waits of PG_{locked,writeback}
Date: Mon,  3 Jul 2023 18:47:52 +0900	[thread overview]
Message-ID: <20230703094752.79269-26-byungchul@sk.com> (raw)
In-Reply-To: <20230703094752.79269-1-byungchul@sk.com>

Currently, Dept only tracks the real waits of PG_{locked,writeback} that
actually happened having gone through __schedule() to avoid false
positives. However, it ends in limited capacity for deadlock detection,
because anyway there might be still way more potential dependencies by
the waits that have yet to happen but may happen in the future so as to
cause a deadlock.

So let Dept assume that when PG_{locked,writeback} bit gets cleared,
there might be waits on the bit to be woken up.

Even though false positives may increase with the aggressive tracking,
it's worth doing it because it's going to be useful in practice. See the
following link for instance:

   https://lore.kernel.org/lkml/1674268856-31807-1-git-send-email-byungchul.park@lge.com/

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/mm_types.h   |   3 +
 include/linux/page-flags.h | 112 +++++++++++++++++++++++++++++++++----
 include/linux/pagemap.h    |   7 ++-
 mm/filemap.c               |  11 +++-
 mm/mm_init.c               |   3 +
 5 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..ac5048b66e5c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -19,6 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/seqlock.h>
 #include <linux/percpu_counter.h>
+#include <linux/dept.h>
 
 #include <asm/mmu.h>
 
@@ -228,6 +229,8 @@ struct page {
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 	int _last_cpupid;
 #endif
+	struct dept_ext_wgen PG_locked_wgen;
+	struct dept_ext_wgen PG_writeback_wgen;
 } _struct_page_alignment;
 
 /*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 92a2063a0a23..d91e67ed194c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -196,6 +196,50 @@ enum pageflags {
 
 #ifndef __GENERATING_BOUNDS_H
 
+#ifdef CONFIG_DEPT
+#include <linux/kernel.h>
+#include <linux/dept.h>
+
+extern struct dept_map PG_locked_map;
+extern struct dept_map PG_writeback_map;
+
+/*
+ * Place the following annotations in its suitable point in code:
+ *
+ *	Annotate dept_page_set_bit() around firstly set_bit*()
+ *	Annotate dept_page_clear_bit() around clear_bit*()
+ *	Annotate dept_page_wait_on_bit() around wait_on_bit*()
+ */
+
+static inline void dept_page_set_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_request_event(&PG_locked_map, &p->PG_locked_wgen);
+	else if (bit_nr == PG_writeback)
+		dept_request_event(&PG_writeback_map, &p->PG_writeback_wgen);
+}
+
+static inline void dept_page_clear_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_event(&PG_locked_map, 1UL, _RET_IP_, __func__, &p->PG_locked_wgen);
+	else if (bit_nr == PG_writeback)
+		dept_event(&PG_writeback_map, 1UL, _RET_IP_, __func__, &p->PG_writeback_wgen);
+}
+
+static inline void dept_page_wait_on_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_wait(&PG_locked_map, 1UL, _RET_IP_, __func__, 0, -1L);
+	else if (bit_nr == PG_writeback)
+		dept_wait(&PG_writeback_map, 1UL, _RET_IP_, __func__, 0, -1L);
+}
+#else
+#define dept_page_set_bit(p, bit_nr)		do { } while (0)
+#define dept_page_clear_bit(p, bit_nr)		do { } while (0)
+#define dept_page_wait_on_bit(p, bit_nr)	do { } while (0)
+#endif
+
 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
 
@@ -377,44 +421,88 @@ static __always_inline int Page##uname(struct page *page)		\
 #define SETPAGEFLAG(uname, lname, policy)				\
 static __always_inline							\
 void folio_set_##lname(struct folio *folio)				\
-{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
+{									\
+	set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));	\
+	dept_page_set_bit(&folio->page, PG_##lname);			\
+}									\
 static __always_inline void SetPage##uname(struct page *page)		\
-{ set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	set_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_set_bit(page, PG_##lname);				\
+}
 
 #define CLEARPAGEFLAG(uname, lname, policy)				\
 static __always_inline							\
 void folio_clear_##lname(struct folio *folio)				\
-{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
+{									\
+	clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));	\
+	dept_page_clear_bit(&folio->page, PG_##lname);			\
+}									\
 static __always_inline void ClearPage##uname(struct page *page)		\
-{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	clear_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_clear_bit(page, PG_##lname);				\
+}
 
 #define __SETPAGEFLAG(uname, lname, policy)				\
 static __always_inline							\
 void __folio_set_##lname(struct folio *folio)				\
-{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
+{									\
+	__set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));	\
+	dept_page_set_bit(&folio->page, PG_##lname);			\
+}									\
 static __always_inline void __SetPage##uname(struct page *page)		\
-{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	__set_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_set_bit(page, PG_##lname);				\
+}
 
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
 static __always_inline							\
 void __folio_clear_##lname(struct folio *folio)				\
-{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }	\
+{									\
+	__clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));	\
+	dept_page_clear_bit(&folio->page, PG_##lname);			\
+}									\
 static __always_inline void __ClearPage##uname(struct page *page)	\
-{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	__clear_bit(PG_##lname, &policy(page, 1)->flags);		\
+	dept_page_clear_bit(page, PG_##lname);				\
+}
 
 #define TESTSETFLAG(uname, lname, policy)				\
 static __always_inline							\
 bool folio_test_set_##lname(struct folio *folio)			\
-{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+{									\
+	bool ret = test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));\
+	if (!ret)							\
+		dept_page_set_bit(&folio->page, PG_##lname);		\
+	return ret;							\
+}									\
 static __always_inline int TestSetPage##uname(struct page *page)	\
-{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	bool ret = test_and_set_bit(PG_##lname, &policy(page, 1)->flags);\
+	if (!ret)							\
+		dept_page_set_bit(page, PG_##lname);			\
+	return ret;							\
+}
 
 #define TESTCLEARFLAG(uname, lname, policy)				\
 static __always_inline							\
 bool folio_test_clear_##lname(struct folio *folio)			\
-{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+{									\
+	bool ret = test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy));\
+	if (ret)							\
+		dept_page_clear_bit(&folio->page, PG_##lname);		\
+	return ret;							\
+}									\
 static __always_inline int TestClearPage##uname(struct page *page)	\
-{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	bool ret = test_and_clear_bit(PG_##lname, &policy(page, 1)->flags);\
+	if (ret)							\
+		dept_page_clear_bit(page, PG_##lname);			\
+	return ret;							\
+}
 
 #define PAGEFLAG(uname, lname, policy)					\
 	TESTPAGEFLAG(uname, lname, policy)				\
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a56308a9d1a4..a88e2430f415 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -915,7 +915,12 @@ void folio_unlock(struct folio *folio);
  */
 static inline bool folio_trylock(struct folio *folio)
 {
-	return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+	bool ret = !test_and_set_bit_lock(PG_locked, folio_flags(folio, 0));
+
+	if (ret)
+		dept_page_set_bit(&folio->page, PG_locked);
+
+	return likely(ret);
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index eed64dc88e43..f05208bb50dc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1101,6 +1101,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 		if (flags & WQ_FLAG_CUSTOM) {
 			if (test_and_set_bit(key->bit_nr, &key->folio->flags))
 				return -1;
+			dept_page_set_bit(&key->folio->page, key->bit_nr);
 			flags |= WQ_FLAG_DONE;
 		}
 	}
@@ -1210,6 +1211,7 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
 	if (wait->flags & WQ_FLAG_EXCLUSIVE) {
 		if (test_and_set_bit(bit_nr, &folio->flags))
 			return false;
+		dept_page_set_bit(&folio->page, bit_nr);
 	} else if (test_bit(bit_nr, &folio->flags))
 		return false;
 
@@ -1220,8 +1222,10 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
 /* How many times do we accept lock stealing from under a waiter? */
 int sysctl_page_lock_unfairness = 5;
 
-static struct dept_map __maybe_unused PG_locked_map = DEPT_MAP_INITIALIZER(PG_locked_map, NULL);
-static struct dept_map __maybe_unused PG_writeback_map = DEPT_MAP_INITIALIZER(PG_writeback_map, NULL);
+struct dept_map __maybe_unused PG_locked_map = DEPT_MAP_INITIALIZER(PG_locked_map, NULL);
+struct dept_map __maybe_unused PG_writeback_map = DEPT_MAP_INITIALIZER(PG_writeback_map, NULL);
+EXPORT_SYMBOL(PG_locked_map);
+EXPORT_SYMBOL(PG_writeback_map);
 
 static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 		int state, enum behavior behavior)
@@ -1234,6 +1238,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 	unsigned long pflags;
 	bool in_thrashing;
 
+	dept_page_wait_on_bit(&folio->page, bit_nr);
 	if (bit_nr == PG_locked)
 		sdt_might_sleep_start(&PG_locked_map);
 	else if (bit_nr == PG_writeback)
@@ -1331,6 +1336,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 		wait->flags |= WQ_FLAG_DONE;
 		break;
 	}
+	dept_page_set_bit(&folio->page, bit_nr);
 
 	/*
 	 * If a signal happened, this 'finish_wait()' may remove the last
@@ -1538,6 +1544,7 @@ void folio_unlock(struct folio *folio)
 	BUILD_BUG_ON(PG_waiters != 7);
 	BUILD_BUG_ON(PG_locked > 7);
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	dept_page_clear_bit(&folio->page, PG_locked);
 	if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
 		folio_wake_bit(folio, PG_locked);
 }
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 7f7f9c677854..a339f0cbe1b2 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -26,6 +26,7 @@
 #include <linux/pgtable.h>
 #include <linux/swap.h>
 #include <linux/cma.h>
+#include <linux/dept.h>
 #include "internal.h"
 #include "slab.h"
 #include "shuffle.h"
@@ -558,6 +559,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
 	page_mapcount_reset(page);
 	page_cpupid_reset_last(page);
 	page_kasan_tag_reset(page);
+	dept_ext_wgen_init(&page->PG_locked_wgen);
+	dept_ext_wgen_init(&page->PG_writeback_wgen);
 
 	INIT_LIST_HEAD(&page->lru);
 #ifdef WANT_PAGE_VIRTUAL
-- 
2.17.1


      parent reply	other threads:[~2023-07-03  9:50 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-03  9:47 [PATCH v10 rebased on v6.4 00/25] DEPT(Dependency Tracker) Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 01/25] llist: Move llist_{head,node} definition to types.h Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 02/25] dept: Implement Dept(Dependency Tracker) Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 03/25] dept: Add single event dependency tracker APIs Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 04/25] dept: Add lock " Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 05/25] dept: Tie to Lockdep and IRQ tracing Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 06/25] dept: Add proc knobs to show stats and dependency graph Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 07/25] dept: Apply sdt_might_sleep_{start,end}() to wait_for_completion()/complete() Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 08/25] dept: Apply sdt_might_sleep_{start,end}() to PG_{locked,writeback} wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 09/25] dept: Apply sdt_might_sleep_{start,end}() to swait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 10/25] dept: Apply sdt_might_sleep_{start,end}() to waitqueue wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 11/25] dept: Apply sdt_might_sleep_{start,end}() to hashed-waitqueue wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 12/25] dept: Distinguish each syscall context from another Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 13/25] dept: Distinguish each work " Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 14/25] dept: Add a mechanism to refill the internal memory pools on running out Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 15/25] locking/lockdep, cpu/hotplus: Use a weaker annotation in AP thread Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 16/25] dept: Apply sdt_might_sleep_{start,end}() to dma fence wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 17/25] dept: Track timeout waits separately with a new Kconfig Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 18/25] dept: Apply timeout consideration to wait_for_completion()/complete() Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 19/25] dept: Apply timeout consideration to swait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 20/25] dept: Apply timeout consideration to waitqueue wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 21/25] dept: Apply timeout consideration to hashed-waitqueue wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 22/25] dept: Apply timeout consideration to dma fence wait Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 23/25] dept: Record the latest one out of consecutive waits of the same class Byungchul Park
2023-07-03  9:47 ` [PATCH v10 rebased on v6.4 24/25] dept: Make Dept able to work with an external wgen Byungchul Park
2023-07-03  9:47 ` Byungchul Park [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230703094752.79269-26-byungchul@sk.com \
    --to=byungchul@sk.com \
    --cc=42.hyeyoo@gmail.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=akpm@linux-foundation.org \
    --cc=amir73il@gmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=chris.p.wilson@intel.com \
    --cc=cl@linux.com \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=david@fromorbit.com \
    --cc=dennis@kernel.org \
    --cc=djwong@kernel.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=duyuyang@gmail.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=gwan-gyeong.mun@intel.com \
    --cc=hamohammed.sa@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=hdanton@sina.com \
    --cc=her0gyugyu@gmail.com \
    --cc=jack@suse.cz \
    --cc=jglisse@redhat.com \
    --cc=jlayton@kernel.org \
    --cc=joel@joelfernandes.org \
    --cc=johannes.berg@intel.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@lge.com \
    --cc=kernel_team@skhynix.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-ide@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=max.byungchul.park@gmail.com \
    --cc=melissa.srw@gmail.com \
    --cc=mhocko@kernel.org \
    --cc=minchan@kernel.org \
    --cc=mingo@redhat.com \
    --cc=ngupta@vflare.org \
    --cc=penberg@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rientjes@google.com \
    --cc=rodrigosiqueiramelo@gmail.com \
    --cc=rostedt@goodmis.org \
    --cc=sashal@kernel.org \
    --cc=sj@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=vbabka@suse.cz \
    --cc=vdavydov.dev@gmail.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).