LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Song Liu <songliubraving@fb.com>
To: <linux-kernel@vger.kernel.org>, <linux-mm@kvack.org>
Cc: <kernel-team@fb.com>, Song Liu <songliubraving@fb.com>,
	<stable@vger.kernel.org>, Thomas Gleixner <tglx@linutronix.de>,
	Dave Hansen <dave.hansen@intel.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH] x86/mm: Do not split_large_page() for set_kernel_text_rw()
Date: Thu, 22 Aug 2019 22:23:35 -0700
Message-ID: <20190823052335.572133-1-songliubraving@fb.com> (raw)

As 4k pages check was removed from cpa [1], set_kernel_text_rw() leads to
split_large_page() for all kernel text pages. This means a single kprobe
will put all kernel text in 4k pages:

  root@ ~# grep ffff81000000- /sys/kernel/debug/page_tables/kernel
  0xffffffff81000000-0xffffffff82400000     20M  ro    PSE      x  pmd

  root@ ~# echo ONE_KPROBE >> /sys/kernel/debug/tracing/kprobe_events
  root@ ~# echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable

  root@ ~# grep ffff81000000- /sys/kernel/debug/page_tables/kernel
  0xffffffff81000000-0xffffffff82400000     20M  ro             x  pte

To fix this issue, introduce CPA_FLIP_TEXT_RW to bypass "Text RO" check
in static_protections().

Two helper functions set_text_rw() and set_text_ro() are added to flip
_PAGE_RW bit for kernel text.

[1] commit 585948f4f695 ("x86/mm/cpa: Avoid the 4k pages check completely")

Fixes: 585948f4f695 ("x86/mm/cpa: Avoid the 4k pages check completely")
Cc: stable@vger.kernel.org  # v4.20+
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 arch/x86/mm/init_64.c     |  4 ++--
 arch/x86/mm/mm_internal.h |  4 ++++
 arch/x86/mm/pageattr.c    | 34 +++++++++++++++++++++++++---------
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a6b5c653727b..5745fdcc429e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1276,7 +1276,7 @@ void set_kernel_text_rw(void)
 	 * mapping will always be RO. Refer to the comment in
 	 * static_protections() in pageattr.c
 	 */
-	set_memory_rw(start, (end - start) >> PAGE_SHIFT);
+	set_text_rw(start, (end - start) >> PAGE_SHIFT);
 }
 
 void set_kernel_text_ro(void)
@@ -1293,7 +1293,7 @@ void set_kernel_text_ro(void)
 	/*
 	 * Set the kernel identity mapping for text RO.
 	 */
-	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+	set_text_ro(start, (end - start) >> PAGE_SHIFT);
 }
 
 void mark_rodata_ro(void)
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index eeae142062ed..65b84b471770 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -24,4 +24,8 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache);
 
 extern unsigned long tlb_single_page_flush_ceiling;
 
+int set_text_rw(unsigned long addr, int numpages);
+
+int set_text_ro(unsigned long addr, int numpages);
+
 #endif	/* __X86_MM_INTERNAL_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6a9a77a403c9..44a885df776d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -66,6 +66,7 @@ static DEFINE_SPINLOCK(cpa_lock);
 #define CPA_ARRAY 2
 #define CPA_PAGES_ARRAY 4
 #define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
+#define CPA_FLIP_TEXT_RW 0x10 /* allow flip _PAGE_RW for kernel text */
 
 #ifdef CONFIG_PROC_FS
 static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -516,7 +517,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
  */
 static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
 					  unsigned long pfn, unsigned long npg,
-					  int warnlvl)
+					  int warnlvl, unsigned int cpa_flags)
 {
 	pgprotval_t forbidden, res;
 	unsigned long end;
@@ -535,9 +536,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
 	check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
 	forbidden = res;
 
-	res = protect_kernel_text_ro(start, end);
-	check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
-	forbidden |= res;
+	if (!(cpa_flags & CPA_FLIP_TEXT_RW)) {
+		res = protect_kernel_text_ro(start, end);
+		check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
+		forbidden |= res;
+	}
 
 	/* Check the PFN directly */
 	res = protect_pci_bios(pfn, pfn + npg - 1);
@@ -819,7 +822,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
 	 * extra conditional required here.
 	 */
 	chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
-				      CPA_CONFLICT);
+				      CPA_CONFLICT, cpa->flags);
 
 	if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
 		/*
@@ -855,7 +858,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
 	 * protection requirement in the large page.
 	 */
 	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
-				      CPA_DETECT);
+				      CPA_DETECT, cpa->flags);
 
 	/*
 	 * If there is a conflict, split the large page.
@@ -906,7 +909,7 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
 	if (!cpa->force_static_prot)
 		goto set;
 
-	prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT);
+	prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT, 0);
 
 	if (pgprot_val(prot) == pgprot_val(ref_prot))
 		goto set;
@@ -1504,7 +1507,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
 
 		cpa_inc_4k_install();
 		new_prot = static_protections(new_prot, address, pfn, 1,
-					      CPA_PROTECT);
+					      CPA_PROTECT, 0);
 
 		new_prot = pgprot_clear_protnone_bits(new_prot);
 
@@ -1707,7 +1710,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	cpa.curpage = 0;
 	cpa.force_split = force_split;
 
-	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
+	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY | CPA_FLIP_TEXT_RW))
 		cpa.flags |= in_flag;
 
 	/* No alias checking for _NX bit modifications */
@@ -1983,11 +1986,24 @@ int set_memory_ro(unsigned long addr, int numpages)
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
+int set_text_ro(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+					__pgprot(_PAGE_RW), 0, CPA_FLIP_TEXT_RW,
+					NULL);
+}
+
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
+int set_text_rw(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(&addr, numpages, __pgprot(_PAGE_RW),
+					__pgprot(0), 0, CPA_FLIP_TEXT_RW, NULL);
+}
+
 int set_memory_np(unsigned long addr, int numpages)
 {
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
-- 
2.17.1


             reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-23  5:23 Song Liu [this message]
2019-08-23  9:36 ` Peter Zijlstra
2019-08-26  4:40   ` Song Liu
2019-08-26  9:23     ` Peter Zijlstra
2019-08-26 15:08       ` Song Liu
2019-08-26 20:50         ` Song Liu
2019-08-26 11:33   ` Steven Rostedt
2019-08-26 12:44     ` Peter Zijlstra
2019-08-26 15:41     ` Nadav Amit
2019-08-26 15:56       ` Peter Zijlstra
2019-08-26 15:56       ` Steven Rostedt
2019-08-26 16:09         ` Nadav Amit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190823052335.572133-1-songliubraving@fb.com \
    --to=songliubraving@fb.com \
    --cc=dave.hansen@intel.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git