* [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
@ 2021-09-06 16:16 Huang Shijie
2021-09-06 9:35 ` David Hildenbrand
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Huang Shijie @ 2021-09-06 16:16 UTC (permalink / raw)
To: viro
Cc: akpm, jlayton, bfields, torvalds, linux-fsdevel, linux-kernel,
linux-mm, song.bao.hua, patches, zwang, Huang Shijie
This patch adds AT_NUMA_REPLICATION for execveat().
If this flag is set, the kernel will trigger COW(copy on write)
on the mmapped ELF binary. So the program will have a copied-page
on its NUMA node, even if the original page in page cache is
on other NUMA nodes.
Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
---
fs/binfmt_elf.c | 27 ++++++++++++++++++++++-----
fs/exec.c | 5 ++++-
include/linux/binfmts.h | 1 +
include/linux/mm.h | 2 ++
include/uapi/linux/fcntl.h | 2 ++
mm/mprotect.c | 2 +-
6 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 439ed81e755a..fac8f4a4555a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -362,13 +362,14 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
static unsigned long elf_map(struct file *filep, unsigned long addr,
const struct elf_phdr *eppnt, int prot, int type,
- unsigned long total_size)
+ unsigned long total_size, int numa_replication)
{
unsigned long map_addr;
unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
addr = ELF_PAGESTART(addr);
size = ELF_PAGEALIGN(size);
+ int ret;
/* mmap() will return -EINVAL if given a zero size, but a
* segment with zero filesize is perfectly valid */
@@ -385,11 +386,26 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
*/
if (total_size) {
total_size = ELF_PAGEALIGN(total_size);
- map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+
+ if (numa_replication) {
+ /* Trigger the COW for this ELF code section */
+ map_addr = vm_mmap(filep, addr, total_size, prot | PROT_WRITE,
+ type | MAP_POPULATE, off);
+ if (!IS_ERR_VALUE(map_addr) && !(prot & PROT_WRITE)) {
+ /* Change back */
+ ret = do_mprotect_pkey(map_addr, total_size, prot, -1);
+ if (ret)
+ return ret;
+ }
+ } else {
+ map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+ }
+
if (!BAD_ADDR(map_addr))
vm_munmap(map_addr+size, total_size-size);
- } else
+ } else {
map_addr = vm_mmap(filep, addr, size, prot, type, off);
+ }
if ((type & MAP_FIXED_NOREPLACE) &&
PTR_ERR((void *)map_addr) == -EEXIST)
@@ -635,7 +651,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
load_addr = -vaddr;
map_addr = elf_map(interpreter, load_addr + vaddr,
- eppnt, elf_prot, elf_type, total_size);
+ eppnt, elf_prot, elf_type, total_size, 0);
total_size = 0;
error = map_addr;
if (BAD_ADDR(map_addr))
@@ -1139,7 +1155,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, total_size);
+ elf_prot, elf_flags, total_size,
+ bprm->support_numa_replication);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
diff --git a/fs/exec.c b/fs/exec.c
index 38f63451b928..d27efa540641 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -900,7 +900,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
.lookup_flags = LOOKUP_FOLLOW,
};
- if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+ if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_NUMA_REPLICATION)) != 0)
return ERR_PTR(-EINVAL);
if (flags & AT_SYMLINK_NOFOLLOW)
open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
@@ -1828,6 +1828,9 @@ static int bprm_execve(struct linux_binprm *bprm,
if (retval)
goto out;
+ /* Do we support NUMA replication for this program? */
+ bprm->support_numa_replication = flags & AT_NUMA_REPLICATION;
+
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 049cf9421d83..1874e1732f20 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -64,6 +64,7 @@ struct linux_binprm {
struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
char buf[BINPRM_BUF_SIZE];
+ int support_numa_replication;
} __randomize_layout;
#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ca22e6e694a..76611381be2a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3244,6 +3244,8 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
#endif
extern int sysctl_nr_trim_pages;
+int do_mprotect_pkey(unsigned long start, size_t len,
+ unsigned long prot, int pkey);
#ifdef CONFIG_PRINTK
void mem_dump_obj(void *object);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 2f86b2ad6d7e..de99c5ae8eca 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -111,4 +111,6 @@
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#define AT_NUMA_REPLICATION 0x10000 /* Support NUMA replication for the ELF program */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 883e2cc85cad..d1f8cececfed 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -519,7 +519,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
/*
* pkey==-1 when doing a legacy mprotect()
*/
-static int do_mprotect_pkey(unsigned long start, size_t len,
+int do_mprotect_pkey(unsigned long start, size_t len,
unsigned long prot, int pkey)
{
unsigned long nstart, end, tmp, reqprot;
--
2.30.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-06 16:16 [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication Huang Shijie
@ 2021-09-06 9:35 ` David Hildenbrand
2021-09-09 9:46 ` Huang Shijie
2021-09-06 12:09 ` Matthew Wilcox
2021-09-09 1:43 ` kernel test robot
2 siblings, 1 reply; 7+ messages in thread
From: David Hildenbrand @ 2021-09-06 9:35 UTC (permalink / raw)
To: Huang Shijie, viro
Cc: akpm, jlayton, bfields, torvalds, linux-fsdevel, linux-kernel,
linux-mm, song.bao.hua, patches, zwang
On 06.09.21 18:16, Huang Shijie wrote:
> This patch adds AT_NUMA_REPLICATION for execveat().
>
> If this flag is set, the kernel will trigger COW(copy on write)
> on the mmapped ELF binary. So the program will have a copied-page
> on its NUMA node, even if the original page in page cache is
> on other NUMA nodes.
Am I missing something important or is this just absolutely not what we
want?
This means that for each and every invocation of the binary, we'll COW
the complete binary -- an awful lot of wasted main memory and swap.
This is not a NUMA replication, this is en executable ELF code replication.
>
> Signed-off-by: Huang Shijie <shijie@os.amperecomputing.com>
> ---
> fs/binfmt_elf.c | 27 ++++++++++++++++++++++-----
> fs/exec.c | 5 ++++-
> include/linux/binfmts.h | 1 +
> include/linux/mm.h | 2 ++
> include/uapi/linux/fcntl.h | 2 ++
> mm/mprotect.c | 2 +-
> 6 files changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 439ed81e755a..fac8f4a4555a 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -362,13 +362,14 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
>
> static unsigned long elf_map(struct file *filep, unsigned long addr,
> const struct elf_phdr *eppnt, int prot, int type,
> - unsigned long total_size)
> + unsigned long total_size, int numa_replication)
> {
> unsigned long map_addr;
> unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
> unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
> addr = ELF_PAGESTART(addr);
> size = ELF_PAGEALIGN(size);
> + int ret;
>
> /* mmap() will return -EINVAL if given a zero size, but a
> * segment with zero filesize is perfectly valid */
> @@ -385,11 +386,26 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
> */
> if (total_size) {
> total_size = ELF_PAGEALIGN(total_size);
> - map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
> +
> + if (numa_replication) {
> + /* Trigger the COW for this ELF code section */
> + map_addr = vm_mmap(filep, addr, total_size, prot | PROT_WRITE,
> + type | MAP_POPULATE, off);
> + if (!IS_ERR_VALUE(map_addr) && !(prot & PROT_WRITE)) {
> + /* Change back */
> + ret = do_mprotect_pkey(map_addr, total_size, prot, -1);
> + if (ret)
> + return ret;
> + }
> + } else {
> + map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
> + }
> +
> if (!BAD_ADDR(map_addr))
> vm_munmap(map_addr+size, total_size-size);
> - } else
> + } else {
> map_addr = vm_mmap(filep, addr, size, prot, type, off);
> + }
>
> if ((type & MAP_FIXED_NOREPLACE) &&
> PTR_ERR((void *)map_addr) == -EEXIST)
> @@ -635,7 +651,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
> load_addr = -vaddr;
>
> map_addr = elf_map(interpreter, load_addr + vaddr,
> - eppnt, elf_prot, elf_type, total_size);
> + eppnt, elf_prot, elf_type, total_size, 0);
> total_size = 0;
> error = map_addr;
> if (BAD_ADDR(map_addr))
> @@ -1139,7 +1155,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
> }
>
> error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
> - elf_prot, elf_flags, total_size);
> + elf_prot, elf_flags, total_size,
> + bprm->support_numa_replication);
> if (BAD_ADDR(error)) {
> retval = IS_ERR((void *)error) ?
> PTR_ERR((void*)error) : -EINVAL;
> diff --git a/fs/exec.c b/fs/exec.c
> index 38f63451b928..d27efa540641 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -900,7 +900,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
> .lookup_flags = LOOKUP_FOLLOW,
> };
>
> - if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
> + if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_NUMA_REPLICATION)) != 0)
> return ERR_PTR(-EINVAL);
> if (flags & AT_SYMLINK_NOFOLLOW)
> open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
> @@ -1828,6 +1828,9 @@ static int bprm_execve(struct linux_binprm *bprm,
> if (retval)
> goto out;
>
> + /* Do we support NUMA replication for this program? */
> + bprm->support_numa_replication = flags & AT_NUMA_REPLICATION;
> +
> retval = exec_binprm(bprm);
> if (retval < 0)
> goto out;
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index 049cf9421d83..1874e1732f20 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -64,6 +64,7 @@ struct linux_binprm {
> struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
>
> char buf[BINPRM_BUF_SIZE];
> + int support_numa_replication;
> } __randomize_layout;
>
> #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7ca22e6e694a..76611381be2a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3244,6 +3244,8 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
> #endif
>
> extern int sysctl_nr_trim_pages;
> +int do_mprotect_pkey(unsigned long start, size_t len,
> + unsigned long prot, int pkey);
>
> #ifdef CONFIG_PRINTK
> void mem_dump_obj(void *object);
> diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
> index 2f86b2ad6d7e..de99c5ae8eca 100644
> --- a/include/uapi/linux/fcntl.h
> +++ b/include/uapi/linux/fcntl.h
> @@ -111,4 +111,6 @@
>
> #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
>
> +#define AT_NUMA_REPLICATION 0x10000 /* Support NUMA replication for the ELF program */
> +
> #endif /* _UAPI_LINUX_FCNTL_H */
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 883e2cc85cad..d1f8cececfed 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -519,7 +519,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
> /*
> * pkey==-1 when doing a legacy mprotect()
> */
> -static int do_mprotect_pkey(unsigned long start, size_t len,
> +int do_mprotect_pkey(unsigned long start, size_t len,
> unsigned long prot, int pkey)
> {
> unsigned long nstart, end, tmp, reqprot;
>
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-06 9:35 ` David Hildenbrand
@ 2021-09-09 9:46 ` Huang Shijie
2021-09-09 8:58 ` David Hildenbrand
0 siblings, 1 reply; 7+ messages in thread
From: Huang Shijie @ 2021-09-09 9:46 UTC (permalink / raw)
To: David Hildenbrand
Cc: viro, akpm, jlayton, bfields, torvalds, linux-fsdevel,
linux-kernel, linux-mm, song.bao.hua, patches, zwang
On Mon, Sep 06, 2021 at 11:35:01AM +0200, David Hildenbrand wrote:
> On 06.09.21 18:16, Huang Shijie wrote:
> > This patch adds AT_NUMA_REPLICATION for execveat().
> >
> > If this flag is set, the kernel will trigger COW(copy on write)
> > on the mmapped ELF binary. So the program will have a copied-page
> > on its NUMA node, even if the original page in page cache is
> > on other NUMA nodes.
>
> Am I missing something important or is this just absolutely not what we
> want?
Please see the thread:
https://marc.info/?l=linux-kernel&m=163070220429222&w=2
Linus did not think it is a good choice to implement the "per-numa node page cache"
>
> This means that for each and every invocation of the binary, we'll COW the
> complete binary -- an awful lot of wasted main memory and swap.
Only the one who cares about the performance in NUMA will use this interface.
Most of the people never use it..
Thanks
Huang Shijie
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-09 9:46 ` Huang Shijie
@ 2021-09-09 8:58 ` David Hildenbrand
0 siblings, 0 replies; 7+ messages in thread
From: David Hildenbrand @ 2021-09-09 8:58 UTC (permalink / raw)
To: Huang Shijie
Cc: viro, akpm, jlayton, bfields, torvalds, linux-fsdevel,
linux-kernel, linux-mm, song.bao.hua, patches, zwang
On 09.09.21 11:46, Huang Shijie wrote:
> On Mon, Sep 06, 2021 at 11:35:01AM +0200, David Hildenbrand wrote:
>> On 06.09.21 18:16, Huang Shijie wrote:
>>> This patch adds AT_NUMA_REPLICATION for execveat().
>>>
>>> If this flag is set, the kernel will trigger COW(copy on write)
>>> on the mmapped ELF binary. So the program will have a copied-page
>>> on its NUMA node, even if the original page in page cache is
>>> on other NUMA nodes.
>>
>> Am I missing something important or is this just absolutely not what we
>> want?
>
> Please see the thread:
> https://marc.info/?l=linux-kernel&m=163070220429222&w=2
>
> Linus did not think it is a good choice to implement the "per-numa node page cache"
That doesn't make this approach any better.
I don't think we want this in the kernel. If user space wants to waste
memory, it can happily mmap() however it wants. The advisory is to not
do it.
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-06 16:16 [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication Huang Shijie
2021-09-06 9:35 ` David Hildenbrand
@ 2021-09-06 12:09 ` Matthew Wilcox
2021-09-09 9:48 ` Huang Shijie
2021-09-09 1:43 ` kernel test robot
2 siblings, 1 reply; 7+ messages in thread
From: Matthew Wilcox @ 2021-09-06 12:09 UTC (permalink / raw)
To: Huang Shijie
Cc: viro, akpm, jlayton, bfields, torvalds, linux-fsdevel,
linux-kernel, linux-mm, song.bao.hua, patches, zwang
On Mon, Sep 06, 2021 at 04:16:13PM +0000, Huang Shijie wrote:
> This patch adds AT_NUMA_REPLICATION for execveat().
>
> If this flag is set, the kernel will trigger COW(copy on write)
> on the mmapped ELF binary. So the program will have a copied-page
> on its NUMA node, even if the original page in page cache is
> on other NUMA nodes.
This does absolutely nothing for programs which run on multiple NUMA
nodes at the same time.
Also, I dislike the abuse of the term "COW" for this. It's COF --
Copy On Fault.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-06 12:09 ` Matthew Wilcox
@ 2021-09-09 9:48 ` Huang Shijie
0 siblings, 0 replies; 7+ messages in thread
From: Huang Shijie @ 2021-09-09 9:48 UTC (permalink / raw)
To: Matthew Wilcox
Cc: viro, akpm, jlayton, bfields, torvalds, linux-fsdevel,
linux-kernel, linux-mm, song.bao.hua, patches, zwang
On Mon, Sep 06, 2021 at 01:09:22PM +0100, Matthew Wilcox wrote:
> On Mon, Sep 06, 2021 at 04:16:13PM +0000, Huang Shijie wrote:
> > This patch adds AT_NUMA_REPLICATION for execveat().
> >
> > If this flag is set, the kernel will trigger COW(copy on write)
> > on the mmapped ELF binary. So the program will have a copied-page
> > on its NUMA node, even if the original page in page cache is
> > on other NUMA nodes.
>
> This does absolutely nothing for programs which run on multiple NUMA
> nodes at the same time.
Yes. Not suit for that case (Mysql database..).
>
> Also, I dislike the abuse of the term "COW" for this. It's COF --
> Copy On Fault.
Maybe it is better to add new flags "NUMA_REPLICATION" in mmap(). :)
Thanks
Huang Shijie
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication
2021-09-06 16:16 [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication Huang Shijie
2021-09-06 9:35 ` David Hildenbrand
2021-09-06 12:09 ` Matthew Wilcox
@ 2021-09-09 1:43 ` kernel test robot
2 siblings, 0 replies; 7+ messages in thread
From: kernel test robot @ 2021-09-09 1:43 UTC (permalink / raw)
To: kbuild-all
[-- Attachment #1: Type: text/plain, Size: 4202 bytes --]
Hi Huang,
[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on hnaz-linux-mm/master]
[also build test WARNING on linux/master v5.14]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Huang-Shijie/fs-exec-Add-the-support-for-ELF-program-s-NUMA-replication/20210906-162119
base: https://github.com/hnaz/linux-mm master
config: parisc-randconfig-s031-20210908 (attached as .config)
compiler: hppa-linux-gcc (GCC) 11.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-dirty
# https://github.com/0day-ci/linux/commit/b3c6ac3aa420536e234f841ab5196349b5624dcb
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Huang-Shijie/fs-exec-Add-the-support-for-ELF-program-s-NUMA-replication/20210906-162119
git checkout b3c6ac3aa420536e234f841ab5196349b5624dcb
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=parisc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
fs/binfmt_elf.c: In function 'elf_map':
>> fs/binfmt_elf.c:372:9: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
372 | int ret;
| ^~~
vim +372 fs/binfmt_elf.c
362
363 static unsigned long elf_map(struct file *filep, unsigned long addr,
364 const struct elf_phdr *eppnt, int prot, int type,
365 unsigned long total_size, int numa_replication)
366 {
367 unsigned long map_addr;
368 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
369 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
370 addr = ELF_PAGESTART(addr);
371 size = ELF_PAGEALIGN(size);
> 372 int ret;
373
374 /* mmap() will return -EINVAL if given a zero size, but a
375 * segment with zero filesize is perfectly valid */
376 if (!size)
377 return addr;
378
379 /*
380 * total_size is the size of the ELF (interpreter) image.
381 * The _first_ mmap needs to know the full size, otherwise
382 * randomization might put this image into an overlapping
383 * position with the ELF binary image. (since size < total_size)
384 * So we first map the 'big' image - and unmap the remainder at
385 * the end. (which unmap is needed for ELF images with holes.)
386 */
387 if (total_size) {
388 total_size = ELF_PAGEALIGN(total_size);
389
390 if (numa_replication) {
391 /* Trigger the COW for this ELF code section */
392 map_addr = vm_mmap(filep, addr, total_size, prot | PROT_WRITE,
393 type | MAP_POPULATE, off);
394 if (!IS_ERR_VALUE(map_addr) && !(prot & PROT_WRITE)) {
395 /* Change back */
396 ret = do_mprotect_pkey(map_addr, total_size, prot, -1);
397 if (ret)
398 return ret;
399 }
400 } else {
401 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
402 }
403
404 if (!BAD_ADDR(map_addr))
405 vm_munmap(map_addr+size, total_size-size);
406 } else {
407 map_addr = vm_mmap(filep, addr, size, prot, type, off);
408 }
409
410 if ((type & MAP_FIXED_NOREPLACE) &&
411 PTR_ERR((void *)map_addr) == -EEXIST)
412 pr_info("%d (%s): Uhuuh, elf segment@%px requested but the memory is mapped already\n",
413 task_pid_nr(current), current->comm, (void *)addr);
414
415 return(map_addr);
416 }
417
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 28622 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2021-09-09 8:58 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-06 16:16 [RFC PATCH] fs/exec: Add the support for ELF program's NUMA replication Huang Shijie
2021-09-06 9:35 ` David Hildenbrand
2021-09-09 9:46 ` Huang Shijie
2021-09-09 8:58 ` David Hildenbrand
2021-09-06 12:09 ` Matthew Wilcox
2021-09-09 9:48 ` Huang Shijie
2021-09-09 1:43 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.