From: Igor Stoppa <igor.stoppa@gmail.com>
To: Andy Lutomirski <luto@amacapital.net>,
Matthew Wilcox <willy@infradead.org>,
Peter Zijlstra <peterz@infradead.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: igor.stoppa@huawei.com, Nadav Amit <nadav.amit@gmail.com>,
Kees Cook <keescook@chromium.org>,
linux-integrity@vger.kernel.org,
kernel-hardening@lists.openwall.com, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 04/12] __wr_after_init: x86_64: __wr_op
Date: Wed, 19 Dec 2018 23:33:30 +0200 [thread overview]
Message-ID: <20181219213338.26619-5-igor.stoppa@huawei.com> (raw)
In-Reply-To: <20181219213338.26619-1-igor.stoppa@huawei.com>
Architecture-specific implementation of the core write rare
operation.
The implementation is based on code from Andy Lutomirski and Nadav Amit
for patching the text on x86 [here goes reference to commits, once merged]
The modification of write protected data is done through an alternate
mapping of the same pages, as writable.
This mapping is persistent, but active only for a core that is
performing a write rare operation. And only for the duration of said
operation.
Local interrupts are disabled, while the alternate mapping is active.
In theory, it could introduce a non-predictable delay, in a preemptible
system, however the amount of data to be altered is likely to be far
smaller than a page.
Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Nadav Amit <nadav.amit@gmail.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Kees Cook <keescook@chromium.org>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: Mimi Zohar <zohar@linux.vnet.ibm.com>
CC: linux-integrity@vger.kernel.org
CC: kernel-hardening@lists.openwall.com
CC: linux-mm@kvack.org
CC: linux-kernel@vger.kernel.org
---
arch/x86/Kconfig | 1 +
arch/x86/mm/Makefile | 2 +
arch/x86/mm/prmem.c | 120 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 123 insertions(+)
create mode 100644 arch/x86/mm/prmem.c
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8689e794a43c..e5e4fc4fa5c2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -32,6 +32,7 @@ config X86_64
select SWIOTLB
select X86_DEV_DMA_OPS
select ARCH_HAS_SYSCALL_WRAPPER
+ select ARCH_HAS_PRMEM
#
# Arch settings
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 4b101dd6e52f..66652de1e2c7 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -53,3 +53,5 @@ obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
+
+obj-$(CONFIG_PRMEM) += prmem.o
diff --git a/arch/x86/mm/prmem.c b/arch/x86/mm/prmem.c
new file mode 100644
index 000000000000..fc367551e736
--- /dev/null
+++ b/arch/x86/mm/prmem.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * prmem.c: Memory Protection Library
+ *
+ * (C) Copyright 2017-2018 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa <igor.stoppa@huawei.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <linux/mmu_context.h>
+#include <linux/rcupdate.h>
+#include <linux/prmem.h>
+
+static __ro_after_init bool wr_ready;
+static __ro_after_init struct mm_struct *wr_poking_mm;
+static __ro_after_init unsigned long wr_poking_base;
+
+/*
+ * The following two variables are statically allocated by the linker
+ * script at the the boundaries of the memory region (rounded up to
+ * multiples of PAGE_SIZE) reserved for __wr_after_init.
+ */
+extern long __start_wr_after_init;
+extern long __end_wr_after_init;
+
+static inline bool is_wr_after_init(unsigned long ptr, __kernel_size_t size)
+{
+ unsigned long start = (unsigned long)&__start_wr_after_init;
+ unsigned long end = (unsigned long)&__end_wr_after_init;
+ unsigned long low = ptr;
+ unsigned long high = ptr + size;
+
+ return likely(start <= low && low <= high && high <= end);
+}
+
+void *__wr_op(unsigned long dst, unsigned long src, __kernel_size_t len,
+ enum wr_op_type op)
+{
+ temporary_mm_state_t prev;
+ unsigned long offset;
+ unsigned long wr_poking_addr;
+
+ /* Confirm that the writable mapping exists. */
+ if (WARN_ONCE(!wr_ready, "No writable mapping available"))
+ return (void *)dst;
+
+ if (WARN_ONCE(op >= WR_OPS_NUMBER, "Invalid WR operation.") ||
+ WARN_ONCE(!is_wr_after_init(dst, len), "Invalid WR range."))
+ return (void *)dst;
+
+ offset = dst - (unsigned long)&__start_wr_after_init;
+ wr_poking_addr = wr_poking_base + offset;
+ local_irq_disable();
+ prev = use_temporary_mm(wr_poking_mm);
+
+ if (op == WR_MEMCPY)
+ copy_to_user((void __user *)wr_poking_addr, (void *)src, len);
+ else if (op == WR_MEMSET)
+ memset_user((void __user *)wr_poking_addr, (u8)src, len);
+
+ unuse_temporary_mm(prev);
+ local_irq_enable();
+ return (void *)dst;
+}
+
+#define TB (1UL << 40)
+
+struct mm_struct *copy_init_mm(void);
+void __init wr_poking_init(void)
+{
+ unsigned long start = (unsigned long)&__start_wr_after_init;
+ unsigned long end = (unsigned long)&__end_wr_after_init;
+ unsigned long i;
+ unsigned long wr_range;
+
+ wr_poking_mm = copy_init_mm();
+ if (WARN_ONCE(!wr_poking_mm, "No alternate mapping available."))
+ return;
+
+ wr_range = round_up(end - start, PAGE_SIZE);
+
+ /* Randomize the poking address base*/
+ wr_poking_base = TASK_UNMAPPED_BASE +
+ (kaslr_get_random_long("Write Rare Poking") & PAGE_MASK) %
+ (TASK_SIZE - (TASK_UNMAPPED_BASE + wr_range));
+
+ /*
+ * Place 64TB of kernel address space within 128TB of user address
+ * space, at a random page aligned offset.
+ */
+ wr_poking_base = (((unsigned long)kaslr_get_random_long("WR Poke")) &
+ PAGE_MASK) % (64 * _BITUL(40));
+
+ /* Create alternate mapping for the entire wr_after_init range. */
+ for (i = start; i < end; i += PAGE_SIZE) {
+ struct page *page;
+ spinlock_t *ptl;
+ pte_t pte;
+ pte_t *ptep;
+ unsigned long wr_poking_addr;
+
+ page = virt_to_page(i);
+ if (WARN_ONCE(!page, "WR memory without physical page"))
+ return;
+ wr_poking_addr = i - start + wr_poking_base;
+
+ /* The lock is not needed, but avoids open-coding. */
+ ptep = get_locked_pte(wr_poking_mm, wr_poking_addr, &ptl);
+ if (WARN_ONCE(!ptep, "No pte for writable mapping"))
+ return;
+
+ pte = mk_pte(page, PAGE_KERNEL);
+ set_pte_at(wr_poking_mm, wr_poking_addr, ptep, pte);
+ spin_unlock(ptl);
+ }
+ wr_ready = true;
+}
--
2.19.1
next prev parent reply other threads:[~2018-12-19 21:35 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-19 21:33 [RFC v2 PATCH 0/12] hardening: statically allocated protected memory Igor Stoppa
2018-12-19 21:33 ` [PATCH 01/12] x86_64: memset_user() Igor Stoppa
2018-12-19 21:33 ` [PATCH 02/12] __wr_after_init: linker section and label Igor Stoppa
2018-12-19 21:33 ` [PATCH 03/12] __wr_after_init: generic header Igor Stoppa
2018-12-21 19:38 ` Nadav Amit
2018-12-21 19:45 ` Matthew Wilcox
2018-12-23 2:28 ` Igor Stoppa
2018-12-19 21:33 ` Igor Stoppa [this message]
2018-12-20 16:53 ` [PATCH 04/12] __wr_after_init: x86_64: __wr_op Igor Stoppa
2018-12-20 17:20 ` Thiago Jung Bauermann
2018-12-20 17:46 ` Igor Stoppa
2018-12-20 18:49 ` Matthew Wilcox
2018-12-20 19:19 ` Igor Stoppa
2018-12-20 19:27 ` Matthew Wilcox
2018-12-21 17:23 ` Andy Lutomirski
2018-12-21 17:42 ` Igor Stoppa
2018-12-19 21:33 ` [PATCH 05/12] __wr_after_init: x86_64: debug writes Igor Stoppa
2018-12-19 21:33 ` [PATCH 06/12] __wr_after_init: Documentation: self-protection Igor Stoppa
2018-12-19 21:33 ` [PATCH 07/12] __wr_after_init: lkdtm test Igor Stoppa
2018-12-19 21:33 ` [PATCH 08/12] rodata_test: refactor tests Igor Stoppa
2018-12-19 21:33 ` [PATCH 09/12] rodata_test: add verification for __wr_after_init Igor Stoppa
2018-12-19 21:33 ` [PATCH 10/12] __wr_after_init: test write rare functionality Igor Stoppa
2018-12-19 21:33 ` [PATCH 11/12] IMA: turn ima_policy_flags into __wr_after_init Igor Stoppa
2018-12-20 17:30 ` Thiago Jung Bauermann
2018-12-20 17:49 ` Igor Stoppa
2018-12-19 21:33 ` [PATCH 12/12] x86_64: __clear_user as case of __memset_user Igor Stoppa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181219213338.26619-5-igor.stoppa@huawei.com \
--to=igor.stoppa@gmail.com \
--cc=dave.hansen@linux.intel.com \
--cc=igor.stoppa@huawei.com \
--cc=keescook@chromium.org \
--cc=kernel-hardening@lists.openwall.com \
--cc=linux-integrity@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@amacapital.net \
--cc=nadav.amit@gmail.com \
--cc=peterz@infradead.org \
--cc=willy@infradead.org \
--cc=zohar@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).