All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aravinda Prasad <aravinda.prasad@intel.com>
To: damon@lists.linux.dev, linux-mm@kvack.org, sj@kernel.org,
	linux-kernel@vger.kernel.org
Cc: aravinda.prasad@intel.com, s2322819@ed.ac.uk,
	sandeep4.kumar@intel.com, ying.huang@intel.com,
	dave.hansen@intel.com, dan.j.williams@intel.com,
	sreenivas.subramoney@intel.com, antti.kervinen@intel.com,
	alexander.kanevskiy@intel.com
Subject: [PATCH v2 2/3] mm/damon: profiling enhancement
Date: Mon, 18 Mar 2024 18:58:47 +0530	[thread overview]
Message-ID: <20240318132848.82686-3-aravinda.prasad@intel.com> (raw)
In-Reply-To: <20240318132848.82686-1-aravinda.prasad@intel.com>

This patch adds profiling enhancement for DAMON.
Given the sampling_addr and its region bounds,
this patch picks the highest possible page table
tree level such that the address range covered by
the picked page table level (P*D) is within the
region's bounds. Once a page table level is picked,
access bit setting and checking is done at that level.
As the higher levels of the page table tree covers
a larger address space, any accessed bit set implies
one or more pages in the given region is accessed.
This helps in quickly identifying hot regions when
the region size is large (e.g., several GBs), which
is common for large footprint applications.

Signed-off-by: Alan Nair <alan.nair@intel.com>
Signed-off-by: Sandeep Kumar <sandeep4.kumar@intel.com>
Signed-off-by: Aravinda Prasad <aravinda.prasad@intel.com>
---
 mm/damon/vaddr.c | 233 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 221 insertions(+), 12 deletions(-)

diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 381559e4a1fa..daa1a2aedab6 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -52,6 +52,53 @@ static struct mm_struct *damon_get_mm(struct damon_target *t)
 	return mm;
 }
 
+/* Pick the highest possible page table profiling level for addr
+ * in the region defined by start and end
+ */
+static int pick_profile_level(unsigned long start, unsigned long end,
+		unsigned long addr)
+{
+	/* Start with PTE and check if higher levels can be picked */
+	int level = 0;
+
+	if (!arch_has_hw_nonleaf_pmd_young())
+		return level;
+
+	/* Check if PMD or higher can be picked, else use PTE */
+	if (pmd_addr_start(addr, (start) - 1) < start
+			|| pmd_addr_end(addr, (end) + 1) > end)
+		return level;
+
+	level++;
+	/* Check if PUD or higher can be picked, else use PMD */
+	if (pud_addr_start(addr, (start) - 1) < start
+			|| pud_addr_end(addr, (end) + 1) > end)
+		return level;
+
+	if (pgtable_l5_enabled()) {
+		level++;
+		/* Check if P4D or higher can be picked, else use PUD */
+		if (p4d_addr_start(addr, (start) - 1) < start
+				|| p4d_addr_end(addr, (end) + 1) > end)
+			return level;
+	}
+
+	level++;
+	/* Check if PGD can be picked, else return PUD level */
+	if (pgd_addr_start(addr, (start) - 1) < start
+			|| pgd_addr_end(addr, (end) + 1) > end)
+		return level;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+	/* Do not pick PGD level if PTI is enabled */
+	if (static_cpu_has(X86_FEATURE_PTI))
+		return level;
+#endif
+
+	/* Return PGD level */
+	return ++level;
+}
+
 /*
  * Functions for the initial monitoring target regions construction
  */
@@ -387,16 +434,90 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_mkold_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_mkold_ops = {
-	.pmd_entry = damon_mkold_pmd_entry,
+
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+static int damon_mkold_pmd(pmd_t *pmd, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+
+	if (!pmd_present(*pmd))
+		return 0;
+
+	ptl = pmd_lock(walk->mm, pmd);
+	pmdp_clear_young_notify(walk->vma, addr, pmd);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_mkold_pud(pud_t *pud, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+
+	if (!pud_present(*pud))
+		return 0;
+
+	ptl = pud_lock(walk->mm, pud);
+	pudp_clear_young_notify(walk->vma, addr, pud);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_mkold_p4d(p4d_t *p4d, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+
+	if (!p4d_present(*p4d))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	p4dp_clear_young_notify(walk->vma, addr, p4d);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+
+static int damon_mkold_pgd(pgd_t *pgd, unsigned long addr,
+	unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+
+	if (!pgd_present(*pgd))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	pgdp_clear_young_notify(walk->vma, addr, pgd);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+#endif
+
+static const struct mm_walk_ops damon_mkold_ops[] = {
+	{.pmd_entry = damon_mkold_pmd_entry,
 	.hugetlb_entry = damon_mkold_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
+	.walk_lock = PGWALK_RDLOCK},
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+	{.pmd_entry = damon_mkold_pmd},
+	{.pud_entry = damon_mkold_pud},
+	{.p4d_entry = damon_mkold_p4d},
+	{.pgd_entry = damon_mkold_pgd},
+#endif
 };
 
-static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
+static void damon_va_mkold(struct mm_struct *mm, struct damon_region *r)
 {
+	unsigned long addr = r->sampling_addr;
+	int profile_level;
+
+	profile_level = pick_profile_level(r->ar.start, r->ar.end, addr);
+
 	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
+	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops[profile_level], NULL);
 	mmap_read_unlock(mm);
 }
 
@@ -409,7 +530,7 @@ static void __damon_va_prepare_access_check(struct mm_struct *mm,
 {
 	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
 
-	damon_va_mkold(mm, r->sampling_addr);
+	damon_va_mkold(mm, r);
 }
 
 static void damon_va_prepare_access_checks(struct damon_ctx *ctx)
@@ -531,22 +652,110 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_young_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_young_ops = {
-	.pmd_entry = damon_young_pmd_entry,
+
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+static int damon_young_pmd(pmd_t *pmd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pmd_present(*pmd))
+		return 0;
+
+	ptl = pmd_lock(walk->mm, pmd);
+	if (pmd_young(*pmd) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PMD_SHIFT);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_young_pud(pud_t *pud, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	spinlock_t *ptl;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pud_present(*pud))
+		return 0;
+
+	ptl = pud_lock(walk->mm, pud);
+	if (pud_young(*pud) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PUD_SHIFT);
+	spin_unlock(ptl);
+
+	return 0;
+}
+
+static int damon_young_p4d(p4d_t *p4d, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct mm_struct *mm = walk->mm;
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!p4d_present(*p4d))
+		return 0;
+
+	spin_lock(&mm->page_table_lock);
+	if (p4d_young(*p4d) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << P4D_SHIFT);
+	spin_unlock(&mm->page_table_lock);
+
+	return 0;
+}
+
+static int damon_young_pgd(pgd_t *pgd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct damon_young_walk_private *priv = walk->private;
+
+	if (!pgd_present(*pgd))
+		return 0;
+
+	spin_lock(&pgd_lock);
+	if (pgd_young(*pgd) || mmu_notifier_test_young(walk->mm, addr))
+		priv->young = true;
+
+	*priv->folio_sz = (1UL << PGDIR_SHIFT);
+	spin_unlock(&pgd_lock);
+
+	return 0;
+}
+#endif
+
+static const struct mm_walk_ops damon_young_ops[] = {
+	{.pmd_entry = damon_young_pmd_entry,
 	.hugetlb_entry = damon_young_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
+	.walk_lock = PGWALK_RDLOCK},
+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+	{.pmd_entry = damon_young_pmd},
+	{.pud_entry = damon_young_pud},
+	{.p4d_entry = damon_young_p4d},
+	{.pgd_entry = damon_young_pgd},
+#endif
 };
 
-static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
+static bool damon_va_young(struct mm_struct *mm, struct damon_region *r,
 		unsigned long *folio_sz)
 {
+	unsigned long addr = r->sampling_addr;
+	int profile_level;
 	struct damon_young_walk_private arg = {
 		.folio_sz = folio_sz,
 		.young = false,
 	};
 
+	profile_level = pick_profile_level(r->ar.start, r->ar.end, addr);
+
 	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
+	walk_page_range(mm, addr, addr + 1, &damon_young_ops[profile_level], &arg);
 	mmap_read_unlock(mm);
 	return arg.young;
 }
@@ -577,7 +786,7 @@ static void __damon_va_check_access(struct mm_struct *mm,
 		return;
 	}
 
-	last_accessed = damon_va_young(mm, r->sampling_addr, &last_folio_sz);
+	last_accessed = damon_va_young(mm, r, &last_folio_sz);
 	damon_update_region_access_rate(r, last_accessed, attrs);
 
 	last_addr = r->sampling_addr;
-- 
2.21.3


  parent reply	other threads:[~2024-03-18 13:24 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-18 13:28 [PATCH v2 0/3] mm/damon: Profiling enhancements for DAMON Aravinda Prasad
2024-03-18 13:28 ` [PATCH v2 1/3] mm/damon: mm infrastructure support Aravinda Prasad
2024-03-18 20:27   ` kernel test robot
2024-03-18 13:28 ` Aravinda Prasad [this message]
2024-03-18 18:23   ` [PATCH v2 2/3] mm/damon: profiling enhancement kernel test robot
2024-03-18 21:59   ` kernel test robot
2024-03-18 13:28 ` [PATCH v2 3/3] mm/damon: documentation updates Aravinda Prasad
2024-03-19  0:51 ` [PATCH v2 0/3] mm/damon: Profiling enhancements for DAMON Yu Zhao
2024-03-19  5:20 ` SeongJae Park
2024-03-19 10:56   ` Prasad, Aravinda
2024-03-20 12:31   ` Prasad, Aravinda
2024-03-21 23:10     ` SeongJae Park
2024-03-22 12:12       ` Prasad, Aravinda
2024-03-22 18:32         ` SeongJae Park
2024-03-25  7:50           ` Prasad, Aravinda

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240318132848.82686-3-aravinda.prasad@intel.com \
    --to=aravinda.prasad@intel.com \
    --cc=alexander.kanevskiy@intel.com \
    --cc=antti.kervinen@intel.com \
    --cc=damon@lists.linux.dev \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=s2322819@ed.ac.uk \
    --cc=sandeep4.kumar@intel.com \
    --cc=sj@kernel.org \
    --cc=sreenivas.subramoney@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.