Linux-mm Archive on lore.kernel.org
 help / color / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-arch@vger.kernel.org
Cc: mhocko@kernel.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	viro@zeniv.linux.org.uk, akpm@linux-foundation.org,
	arnd@arndb.de, ebiederm@xmission.com, keescook@chromium.org,
	gerg@linux-m68k.org, ktkhai@virtuozzo.com,
	christian.brauner@ubuntu.com, peterz@infradead.org,
	esyr@redhat.com, jgg@ziepe.ca, christian@kellner.me,
	areber@redhat.com, cyphar@cyphar.com, steven.sistare@oracle.com
Subject: [RFC PATCH 3/5] mm: introduce VM_EXEC_KEEP
Date: Mon, 27 Jul 2020 10:11:25 -0700
Message-ID: <1595869887-23307-4-git-send-email-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <1595869887-23307-1-git-send-email-anthony.yznaga@oracle.com>

A vma with the VM_EXEC_KEEP flag is preserved across exec.  For anonymous
vmas only.  For safety, overlap with fixed address VMAs created in the new
mm during exec (e.g. the stack and elf load segments) is not permitted and
will cause the exec to fail.
(We are studying how to guarantee there are no conflicts. Comments welcome.)

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 arch/x86/Kconfig   |  1 +
 fs/exec.c          | 20 ++++++++++++++++++++
 include/linux/mm.h |  5 +++++
 kernel/fork.c      |  2 +-
 mm/mmap.c          | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 883da0abf779..fc36eb2f45c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -30,6 +30,7 @@ config X86_64
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select SWIOTLB
+	select ARCH_USES_HIGH_VMA_FLAGS
 
 config FORCE_DYNAMIC_FTRACE
 	def_bool y
diff --git a/fs/exec.c b/fs/exec.c
index 262112e5f9f8..1de09c4eef00 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1069,6 +1069,20 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 EXPORT_SYMBOL(read_code);
 #endif
 
+static int vma_dup_some(struct mm_struct *old_mm, struct mm_struct *new_mm)
+{
+	struct vm_area_struct *vma;
+	int ret;
+
+	for (vma = old_mm->mmap; vma; vma = vma->vm_next)
+		if (vma->vm_flags & VM_EXEC_KEEP) {
+			ret = vma_dup(vma, new_mm);
+			if (ret)
+				return ret;
+		}
+	return 0;
+}
+
 /*
  * Maps the mm_struct mm into the current task struct.
  * On success, this function returns with the mutex
@@ -1104,6 +1118,12 @@ static int exec_mmap(struct mm_struct *mm)
 			mutex_unlock(&tsk->signal->exec_update_mutex);
 			return -EINTR;
 		}
+		ret = vma_dup_some(old_mm, mm);
+		if (ret) {
+			mmap_read_unlock(old_mm);
+			mutex_unlock(&tsk->signal->exec_update_mutex);
+			return ret;
+		}
 	}
 
 	task_lock(tsk);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..1c538ba77f33 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -295,11 +295,15 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
 #define VM_HIGH_ARCH_BIT_2	34	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_3	35	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_BIT_4	36	/* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_5	37	/* bit only usable on 64-bit architectures */
 #define VM_HIGH_ARCH_0	BIT(VM_HIGH_ARCH_BIT_0)
 #define VM_HIGH_ARCH_1	BIT(VM_HIGH_ARCH_BIT_1)
 #define VM_HIGH_ARCH_2	BIT(VM_HIGH_ARCH_BIT_2)
 #define VM_HIGH_ARCH_3	BIT(VM_HIGH_ARCH_BIT_3)
 #define VM_HIGH_ARCH_4	BIT(VM_HIGH_ARCH_BIT_4)
+#define VM_EXEC_KEEP	BIT(VM_HIGH_ARCH_BIT_5)	/* preserve VMA across exec */
+#else
+#define VM_EXEC_KEEP	VM_NONE
 #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
@@ -2534,6 +2538,7 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	unsigned long addr, unsigned long len, pgoff_t pgoff,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
+extern int vma_dup(struct vm_area_struct *vma, struct mm_struct *mm);
 
 static inline int check_data_rlimit(unsigned long rlim,
 				    unsigned long new,
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..15ead613714f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -564,7 +564,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			tmp->anon_vma = NULL;
 		} else if (anon_vma_fork(tmp, mpnt))
 			goto fail_nomem_anon_vma_fork;
-		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
+		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT | VM_EXEC_KEEP);
 		file = tmp->vm_file;
 		if (file) {
 			struct inode *inode = file_inode(file);
diff --git a/mm/mmap.c b/mm/mmap.c
index 59a4682ebf3f..be2ff53743c3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3279,6 +3279,53 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	return NULL;
 }
 
+int vma_dup(struct vm_area_struct *old_vma, struct mm_struct *mm)
+{
+	unsigned long npages;
+	struct mm_struct *old_mm = old_vma->vm_mm;
+	struct vm_area_struct *vma;
+	int ret = -ENOMEM;
+
+	if (WARN_ON(old_vma->vm_file || old_vma->vm_ops))
+		return -EINVAL;
+
+	vma = find_vma(mm, old_vma->vm_start);
+	if (vma && vma->vm_start < old_vma->vm_end)
+		return -EEXIST;
+
+	npages = vma_pages(old_vma);
+	mm->total_vm += npages;
+
+	vma = vm_area_dup(old_vma);
+	if (!vma)
+		goto fail_nomem;
+
+	ret = vma_dup_policy(old_vma, vma);
+	if (ret)
+		goto fail_nomem_policy;
+
+	vma->vm_mm = mm;
+	ret = anon_vma_fork(vma, old_vma);
+	if (ret)
+		goto fail_nomem_anon_vma_fork;
+
+	vma->vm_flags &= ~(VM_LOCKED|VM_UFFD_MISSING|VM_UFFD_WP|VM_EXEC_KEEP);
+	vma->vm_next = vma->vm_prev = NULL;
+	vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+	if (is_vm_hugetlb_page(vma))
+		reset_vma_resv_huge_pages(vma);
+	__insert_vm_struct(mm, vma);
+	ret = copy_page_range(mm, old_mm, old_vma);
+	return ret;
+
+fail_nomem_anon_vma_fork:
+	mpol_put(vma_policy(vma));
+fail_nomem_policy:
+	vm_area_free(vma);
+fail_nomem:
+	return -ENOMEM;
+}
+
 /*
  * Return true if the calling process may expand its vm space by the passed
  * number of pages
-- 
1.8.3.1



  parent reply index

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-27 17:11 [RFC PATCH 0/5] madvise MADV_DOEXEC Anthony Yznaga
2020-07-27 17:07 ` Eric W. Biederman
2020-07-27 18:00   ` Steven Sistare
2020-07-28 13:40     ` Christian Brauner
2020-07-27 17:11 ` [RFC PATCH 1/5] elf: reintroduce using MAP_FIXED_NOREPLACE for elf executable mappings Anthony Yznaga
2020-07-27 17:11 ` [RFC PATCH 2/5] mm: do not assume only the stack vma exists in setup_arg_pages() Anthony Yznaga
2020-07-27 17:11 ` Anthony Yznaga [this message]
2020-07-28 13:38   ` [RFC PATCH 3/5] mm: introduce VM_EXEC_KEEP Eric W. Biederman
2020-07-28 17:44     ` Anthony Yznaga
2020-07-29 13:52   ` Kirill A. Shutemov
2020-07-29 23:20     ` Anthony Yznaga
2020-07-27 17:11 ` [RFC PATCH 4/5] exec, elf: require opt-in for accepting preserved mem Anthony Yznaga
2020-07-27 17:11 ` [RFC PATCH 5/5] mm: introduce MADV_DOEXEC Anthony Yznaga
2020-07-28 13:22   ` Kirill Tkhai
2020-07-28 14:06     ` Steven Sistare
2020-07-28 11:34 ` [RFC PATCH 0/5] madvise MADV_DOEXEC Kirill Tkhai
2020-07-28 17:28   ` Anthony Yznaga
2020-07-28 14:23 ` Andy Lutomirski
2020-07-28 14:30   ` Steven Sistare
2020-07-30 15:22 ` Matthew Wilcox
2020-07-30 15:27   ` Christian Brauner
2020-07-30 15:34     ` Matthew Wilcox
2020-07-30 15:54       ` Christian Brauner
2020-07-31  9:12     ` Stefan Hajnoczi
2020-07-30 15:59   ` Steven Sistare
2020-07-30 17:12     ` Matthew Wilcox
2020-07-30 17:35       ` Steven Sistare
2020-07-30 17:49         ` Matthew Wilcox
2020-07-30 18:27           ` Steven Sistare
2020-07-30 21:58             ` Eric W. Biederman
2020-07-31 14:57               ` Steven Sistare
2020-07-31 15:27                 ` Matthew Wilcox
2020-07-31 16:11                   ` Steven Sistare
2020-07-31 16:56                     ` Jason Gunthorpe
2020-07-31 17:15                       ` Steven Sistare
2020-07-31 17:48                         ` Jason Gunthorpe
2020-07-31 17:55                           ` Steven Sistare
2020-07-31 17:23                     ` Matthew Wilcox
2020-08-03 15:28                 ` Eric W. Biederman
2020-08-03 15:42                   ` James Bottomley
2020-08-03 20:03                     ` Steven Sistare
     [not found]                     ` <9371b8272fd84280ae40b409b260bab3@AcuMS.aculab.com>
2020-08-04 11:13                       ` Matthew Wilcox
2020-08-03 19:29                   ` Steven Sistare
2020-07-31 19:41 ` Steven Sistare

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1595869887-23307-4-git-send-email-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=areber@redhat.com \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=christian.brauner@ubuntu.com \
    --cc=christian@kellner.me \
    --cc=cyphar@cyphar.com \
    --cc=ebiederm@xmission.com \
    --cc=esyr@redhat.com \
    --cc=gerg@linux-m68k.org \
    --cc=hpa@zytor.com \
    --cc=jgg@ziepe.ca \
    --cc=keescook@chromium.org \
    --cc=ktkhai@virtuozzo.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=steven.sistare@oracle.com \
    --cc=tglx@linutronix.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-mm Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-mm/0 linux-mm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-mm linux-mm/ https://lore.kernel.org/linux-mm \
		linux-mm@kvack.org
	public-inbox-index linux-mm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kvack.linux-mm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git